diff --git a/llvm/lib/CodeGen/ExpandIRInsts.cpp b/llvm/lib/CodeGen/ExpandIRInsts.cpp index be7400fac1daf..c7946417cf1b8 100644 --- a/llvm/lib/CodeGen/ExpandIRInsts.cpp +++ b/llvm/lib/CodeGen/ExpandIRInsts.cpp @@ -1133,7 +1133,12 @@ static void scalarize(Instruction *I, else if (auto *CastI = dyn_cast(I)) NewOp = Builder.CreateCast(CastI->getOpcode(), Ext, I->getType()->getScalarType()); - else + else if (auto *II = dyn_cast(I)) { + assert(II->getIntrinsicID() == Intrinsic::fptoui_sat || + II->getIntrinsicID() == Intrinsic::fptosi_sat); + NewOp = Builder.CreateIntrinsic(VTy->getElementType(), + II->getIntrinsicID(), Ext); + } else llvm_unreachable("Unsupported instruction type"); Result = Builder.CreateInsertElement(Result, NewOp, Idx); diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index e50e1832e37a6..df43c3fb8dad7 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -1048,7 +1048,8 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm, // division. MaxDivRemBitWidthSupported = 128; - MaxLargeFPConvertBitWidthSupported = 128; + MaxLargeFPConvertBitWidthSupported = + TM.getTargetTriple().isArch64Bit() ? 128 : 64; MinCmpXchgSizeInBits = 0; SupportsUnalignedAtomics = false; diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 93e820b4713ec..025540caeb2c3 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1898,8 +1898,6 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setMaxDivRemBitWidthSupported(Subtarget.is64Bit() ? 128 : 64); - setMaxLargeFPConvertBitWidthSupported(Subtarget.is64Bit() ? 128 : 64); - // Disable strict node mutation. IsStrictFPEnabled = true; EnableExtLdPromotion = true; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index dda91ac19b44a..fcbacd6313442 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -430,6 +430,9 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( setMaxAtomicSizeInBitsSupported(64); + // Unlike most other targets, __int128 is supported even on wasm32. + setMaxLargeFPConvertBitWidthSupported(128); + // Always convert switches to br_tables unless there is only one case, which // is equivalent to a simple branch. This reduces code size for wasm, and we // defer possible jump table optimizations to the VM. diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 3be7d35a08b6d..9c4550935720b 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -174,8 +174,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setMaxDivRemBitWidthSupported(Subtarget.is64Bit() ? 128 : 64); - setMaxLargeFPConvertBitWidthSupported(128); - // Set up the register classes. addRegisterClass(MVT::i8, &X86::GR8RegClass); addRegisterClass(MVT::i16, &X86::GR16RegClass); diff --git a/llvm/test/CodeGen/ARM/fpclamptosat.ll b/llvm/test/CodeGen/ARM/fpclamptosat.ll index a6f0a03fc7e5b..3cb1c30f63a7b 100644 --- a/llvm/test/CodeGen/ARM/fpclamptosat.ll +++ b/llvm/test/CodeGen/ARM/fpclamptosat.ll @@ -943,126 +943,536 @@ entry: define i64 @stest_f64i64(double %x) { ; SOFT-LABEL: stest_f64i64: -; SOFT: @ %bb.0: @ %entry +; SOFT: @ %bb.0: @ %entryfp-to-i-entry ; SOFT-NEXT: .save {r4, r5, r6, r7, lr} ; SOFT-NEXT: push {r4, r5, r6, r7, lr} -; SOFT-NEXT: .pad #4 -; SOFT-NEXT: sub sp, #4 -; SOFT-NEXT: bl __fixdfti +; SOFT-NEXT: .pad #68 +; SOFT-NEXT: sub sp, #68 +; SOFT-NEXT: movs r7, #1 ; SOFT-NEXT: movs r4, #0 -; SOFT-NEXT: mvns r5, r4 -; SOFT-NEXT: ldr r6, .LCPI18_0 -; SOFT-NEXT: adds r7, r0, #1 +; SOFT-NEXT: ldr r3, .LCPI18_1 +; SOFT-NEXT: mov r5, r3 +; SOFT-NEXT: subs r5, #52 +; SOFT-NEXT: mov r6, r4 +; SOFT-NEXT: sbcs r6, r4 +; SOFT-NEXT: lsls r2, r1, #1 +; SOFT-NEXT: lsrs r2, r2, #21 +; SOFT-NEXT: subs r5, r2, r5 +; SOFT-NEXT: mov r5, r4 +; SOFT-NEXT: sbcs r5, r6 +; SOFT-NEXT: str r7, [sp, #28] @ 4-byte Spill +; SOFT-NEXT: bhs .LBB18_2 +; SOFT-NEXT: @ %bb.1: +; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: mov r6, r4 +; SOFT-NEXT: mov r2, r4 +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: b .LBB18_5 +; SOFT-NEXT: .LBB18_2: @ %fp-to-i-if-check.exp.size +; SOFT-NEXT: asrs r5, r1, #31 +; SOFT-NEXT: str r5, [sp, #24] @ 4-byte Spill +; SOFT-NEXT: orrs r5, r7 +; SOFT-NEXT: str r5, [sp, #20] @ 4-byte Spill +; SOFT-NEXT: ldr r5, .LCPI18_0 +; SOFT-NEXT: mvns r6, r5 +; SOFT-NEXT: mvns r1, r1 +; SOFT-NEXT: orrs r1, r6 +; SOFT-NEXT: subs r1, r5, r1 +; SOFT-NEXT: mvns r0, r0 +; SOFT-NEXT: mvns r0, r0 +; SOFT-NEXT: cmp r3, r2 +; SOFT-NEXT: bls .LBB18_4 +; SOFT-NEXT: @ %bb.3: @ %fp-to-i-if-exp.small +; SOFT-NEXT: subs r2, r3, r2 +; SOFT-NEXT: bl __aeabi_llsr ; SOFT-NEXT: mov r7, r1 -; SOFT-NEXT: sbcs r7, r6 +; SOFT-NEXT: str r1, [sp, #12] @ 4-byte Spill +; SOFT-NEXT: mov r5, r0 +; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: ldr r6, [sp, #20] @ 4-byte Reload +; SOFT-NEXT: mov r2, r6 +; SOFT-NEXT: mov r3, r4 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: str r0, [sp, #16] @ 4-byte Spill +; SOFT-NEXT: str r1, [sp, #4] @ 4-byte Spill +; SOFT-NEXT: mov r0, r7 +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: mov r2, r6 +; SOFT-NEXT: mov r3, r4 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: mov r7, r1 +; SOFT-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; SOFT-NEXT: adds r6, r0, r1 +; SOFT-NEXT: adcs r7, r4 +; SOFT-NEXT: mov r0, r5 +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: ldr r5, [sp, #24] @ 4-byte Reload +; SOFT-NEXT: mov r2, r5 +; SOFT-NEXT: mov r3, r4 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: adds r6, r0, r6 +; SOFT-NEXT: adcs r1, r4 +; SOFT-NEXT: adds r0, r7, r1 +; SOFT-NEXT: str r0, [sp, #20] @ 4-byte Spill +; SOFT-NEXT: mov r7, r4 +; SOFT-NEXT: adcs r7, r4 +; SOFT-NEXT: ldr r0, [sp, #12] @ 4-byte Reload +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: mov r2, r5 +; SOFT-NEXT: mov r3, r4 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: mov r5, r1 +; SOFT-NEXT: ldr r1, [sp, #20] @ 4-byte Reload +; SOFT-NEXT: adds r0, r0, r1 +; SOFT-NEXT: str r0, [sp, #20] @ 4-byte Spill +; SOFT-NEXT: adcs r5, r7 +; SOFT-NEXT: ldr r0, [sp, #24] @ 4-byte Reload +; SOFT-NEXT: mov r1, r0 +; SOFT-NEXT: ldr r2, [sp, #8] @ 4-byte Reload +; SOFT-NEXT: ldr r3, [sp, #12] @ 4-byte Reload +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: mov r2, r0 +; SOFT-NEXT: ldr r0, [sp, #16] @ 4-byte Reload +; SOFT-NEXT: ldr r3, [sp, #20] @ 4-byte Reload +; SOFT-NEXT: adds r2, r3, r2 +; SOFT-NEXT: adcs r1, r5 +; SOFT-NEXT: b .LBB18_5 +; SOFT-NEXT: .LBB18_4: @ %fp-to-i-if-exp.large +; SOFT-NEXT: str r4, [sp, #60] +; SOFT-NEXT: str r4, [sp, #44] +; SOFT-NEXT: str r4, [sp, #40] +; SOFT-NEXT: str r4, [sp, #36] +; SOFT-NEXT: str r4, [sp, #32] +; SOFT-NEXT: add r3, sp, #48 +; SOFT-NEXT: stm r3!, {r0, r1, r4} +; SOFT-NEXT: ldr r0, .LCPI18_2 +; SOFT-NEXT: adds r6, r2, r0 +; SOFT-NEXT: movs r7, #31 +; SOFT-NEXT: lsrs r0, r6, #3 +; SOFT-NEXT: ands r6, r7 +; SOFT-NEXT: movs r1, #12 +; SOFT-NEXT: ands r1, r0 +; SOFT-NEXT: add r0, sp, #32 +; SOFT-NEXT: adds r0, #16 +; SOFT-NEXT: subs r5, r0, r1 +; SOFT-NEXT: ldr r0, [r5, #4] +; SOFT-NEXT: str r0, [sp, #16] @ 4-byte Spill +; SOFT-NEXT: ldr r0, [r5, #8] +; SOFT-NEXT: ldr r1, [r5, #12] +; SOFT-NEXT: lsrs r3, r0, #1 +; SOFT-NEXT: lsls r0, r6 +; SOFT-NEXT: eors r7, r6 +; SOFT-NEXT: ldr r2, [sp, #16] @ 4-byte Reload +; SOFT-NEXT: lsrs r2, r2, #1 +; SOFT-NEXT: lsrs r2, r7 +; SOFT-NEXT: orrs r2, r0 +; SOFT-NEXT: lsls r1, r6 +; SOFT-NEXT: lsrs r3, r7 +; SOFT-NEXT: orrs r3, r1 +; SOFT-NEXT: ldr r0, [sp, #20] @ 4-byte Reload +; SOFT-NEXT: ldr r1, [sp, #24] @ 4-byte Reload +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: str r0, [sp, #12] @ 4-byte Spill +; SOFT-NEXT: str r1, [sp, #8] @ 4-byte Spill +; SOFT-NEXT: ldr r1, [sp, #16] @ 4-byte Reload +; SOFT-NEXT: lsls r1, r6 +; SOFT-NEXT: ldr r5, [r5] +; SOFT-NEXT: lsrs r0, r5, #1 +; SOFT-NEXT: lsrs r0, r7 +; SOFT-NEXT: orrs r0, r1 +; SOFT-NEXT: mov r3, r0 +; SOFT-NEXT: lsls r5, r6 +; SOFT-NEXT: ldr r0, [sp, #24] @ 4-byte Reload +; SOFT-NEXT: mov r1, r0 +; SOFT-NEXT: mov r2, r5 +; SOFT-NEXT: mov r6, r3 +; SOFT-NEXT: str r3, [sp, #4] @ 4-byte Spill +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: ldr r2, [sp, #12] @ 4-byte Reload +; SOFT-NEXT: adds r0, r0, r2 +; SOFT-NEXT: str r0, [sp, #12] @ 4-byte Spill +; SOFT-NEXT: ldr r0, [sp, #8] @ 4-byte Reload +; SOFT-NEXT: adcs r1, r0 +; SOFT-NEXT: str r1, [sp, #8] @ 4-byte Spill +; SOFT-NEXT: mov r0, r6 +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: ldr r6, [sp, #20] @ 4-byte Reload +; SOFT-NEXT: mov r2, r6 +; SOFT-NEXT: mov r3, r4 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: str r0, [sp] @ 4-byte Spill +; SOFT-NEXT: mov r7, r1 +; SOFT-NEXT: mov r0, r5 +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: mov r2, r6 +; SOFT-NEXT: mov r3, r4 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: str r0, [sp, #16] @ 4-byte Spill +; SOFT-NEXT: ldr r0, [sp] @ 4-byte Reload +; SOFT-NEXT: adds r6, r0, r1 +; SOFT-NEXT: adcs r7, r4 +; SOFT-NEXT: mov r0, r5 +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: ldr r2, [sp, #24] @ 4-byte Reload +; SOFT-NEXT: mov r3, r4 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: adds r6, r0, r6 +; SOFT-NEXT: adcs r1, r4 +; SOFT-NEXT: adds r5, r7, r1 +; SOFT-NEXT: mov r7, r4 +; SOFT-NEXT: adcs r7, r4 +; SOFT-NEXT: ldr r0, [sp, #4] @ 4-byte Reload +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: ldr r2, [sp, #24] @ 4-byte Reload +; SOFT-NEXT: mov r3, r4 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: mov r2, r0 +; SOFT-NEXT: ldr r0, [sp, #16] @ 4-byte Reload +; SOFT-NEXT: adds r2, r2, r5 +; SOFT-NEXT: adcs r1, r7 +; SOFT-NEXT: ldr r3, [sp, #12] @ 4-byte Reload +; SOFT-NEXT: adds r2, r2, r3 +; SOFT-NEXT: ldr r3, [sp, #8] @ 4-byte Reload +; SOFT-NEXT: adcs r1, r3 +; SOFT-NEXT: .LBB18_5: @ %fp-to-i-cleanup +; SOFT-NEXT: mvns r3, r4 +; SOFT-NEXT: ldr r5, .LCPI18_3 +; SOFT-NEXT: adds r7, r0, #1 +; SOFT-NEXT: mov r7, r6 +; SOFT-NEXT: sbcs r7, r5 ; SOFT-NEXT: mov r7, r2 ; SOFT-NEXT: sbcs r7, r4 -; SOFT-NEXT: mov r7, r3 +; SOFT-NEXT: mov r7, r1 ; SOFT-NEXT: sbcs r7, r4 -; SOFT-NEXT: bge .LBB18_8 -; SOFT-NEXT: @ %bb.1: @ %entry -; SOFT-NEXT: bge .LBB18_9 -; SOFT-NEXT: .LBB18_2: @ %entry -; SOFT-NEXT: bge .LBB18_10 -; SOFT-NEXT: .LBB18_3: @ %entry -; SOFT-NEXT: blt .LBB18_5 -; SOFT-NEXT: .LBB18_4: @ %entry -; SOFT-NEXT: mov r0, r5 -; SOFT-NEXT: .LBB18_5: @ %entry -; SOFT-NEXT: movs r6, #1 -; SOFT-NEXT: lsls r6, r6, #31 +; SOFT-NEXT: bge .LBB18_15 +; SOFT-NEXT: @ %bb.6: @ %fp-to-i-cleanup +; SOFT-NEXT: ldr r7, [sp, #28] @ 4-byte Reload +; SOFT-NEXT: bge .LBB18_16 +; SOFT-NEXT: .LBB18_7: @ %fp-to-i-cleanup +; SOFT-NEXT: bge .LBB18_17 +; SOFT-NEXT: .LBB18_8: @ %fp-to-i-cleanup +; SOFT-NEXT: blt .LBB18_10 +; SOFT-NEXT: .LBB18_9: @ %fp-to-i-cleanup +; SOFT-NEXT: mov r0, r3 +; SOFT-NEXT: .LBB18_10: @ %fp-to-i-cleanup +; SOFT-NEXT: lsls r5, r7, #31 ; SOFT-NEXT: rsbs r7, r0, #0 -; SOFT-NEXT: mov r7, r6 -; SOFT-NEXT: sbcs r7, r1 ; SOFT-NEXT: mov r7, r5 +; SOFT-NEXT: sbcs r7, r6 +; SOFT-NEXT: mov r7, r3 ; SOFT-NEXT: sbcs r7, r2 -; SOFT-NEXT: sbcs r5, r3 -; SOFT-NEXT: bge .LBB18_11 -; SOFT-NEXT: @ %bb.6: @ %entry -; SOFT-NEXT: bge .LBB18_12 -; SOFT-NEXT: .LBB18_7: @ %entry -; SOFT-NEXT: add sp, #4 -; SOFT-NEXT: pop {r4, r5, r6, r7, pc} -; SOFT-NEXT: .LBB18_8: @ %entry -; SOFT-NEXT: mov r3, r4 -; SOFT-NEXT: blt .LBB18_2 -; SOFT-NEXT: .LBB18_9: @ %entry -; SOFT-NEXT: mov r2, r4 -; SOFT-NEXT: blt .LBB18_3 -; SOFT-NEXT: .LBB18_10: @ %entry -; SOFT-NEXT: mov r1, r6 -; SOFT-NEXT: bge .LBB18_4 -; SOFT-NEXT: b .LBB18_5 -; SOFT-NEXT: .LBB18_11: @ %entry +; SOFT-NEXT: sbcs r3, r1 +; SOFT-NEXT: blt .LBB18_12 +; SOFT-NEXT: @ %bb.11: @ %fp-to-i-cleanup ; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: blt .LBB18_7 -; SOFT-NEXT: .LBB18_12: @ %entry +; SOFT-NEXT: .LBB18_12: @ %fp-to-i-cleanup +; SOFT-NEXT: blt .LBB18_14 +; SOFT-NEXT: @ %bb.13: @ %fp-to-i-cleanup +; SOFT-NEXT: mov r6, r5 +; SOFT-NEXT: .LBB18_14: @ %fp-to-i-cleanup ; SOFT-NEXT: mov r1, r6 -; SOFT-NEXT: add sp, #4 +; SOFT-NEXT: add sp, #68 ; SOFT-NEXT: pop {r4, r5, r6, r7, pc} +; SOFT-NEXT: .LBB18_15: @ %fp-to-i-cleanup +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: ldr r7, [sp, #28] @ 4-byte Reload +; SOFT-NEXT: blt .LBB18_7 +; SOFT-NEXT: .LBB18_16: @ %fp-to-i-cleanup +; SOFT-NEXT: mov r2, r4 +; SOFT-NEXT: blt .LBB18_8 +; SOFT-NEXT: .LBB18_17: @ %fp-to-i-cleanup +; SOFT-NEXT: mov r6, r5 +; SOFT-NEXT: bge .LBB18_9 +; SOFT-NEXT: b .LBB18_10 ; SOFT-NEXT: .p2align 2 -; SOFT-NEXT: @ %bb.13: +; SOFT-NEXT: @ %bb.18: ; SOFT-NEXT: .LCPI18_0: +; SOFT-NEXT: .long 1048575 @ 0xfffff +; SOFT-NEXT: .LCPI18_1: +; SOFT-NEXT: .long 1075 @ 0x433 +; SOFT-NEXT: .LCPI18_2: +; SOFT-NEXT: .long 4294966221 @ 0xfffffbcd +; SOFT-NEXT: .LCPI18_3: ; SOFT-NEXT: .long 2147483647 @ 0x7fffffff ; ; VFP2-LABEL: stest_f64i64: -; VFP2: @ %bb.0: @ %entry -; VFP2-NEXT: .save {r4, r5, r7, lr} -; VFP2-NEXT: push {r4, r5, r7, lr} -; VFP2-NEXT: bl __fixdfti -; VFP2-NEXT: subs.w r4, r0, #-1 -; VFP2-NEXT: mvn lr, #-2147483648 -; VFP2-NEXT: sbcs.w r4, r1, lr +; VFP2: @ %bb.0: @ %entryfp-to-i-entry +; VFP2-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; VFP2-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; VFP2-NEXT: .pad #52 +; VFP2-NEXT: sub sp, #52 +; VFP2-NEXT: vmov r3, r1, d0 ; VFP2-NEXT: mov.w r12, #0 -; VFP2-NEXT: sbcs r4, r2, #0 -; VFP2-NEXT: sbcs r4, r3, #0 +; VFP2-NEXT: movw r2, #1023 +; VFP2-NEXT: ubfx r0, r1, #20, #11 +; VFP2-NEXT: cmp r0, r2 +; VFP2-NEXT: bhs .LBB18_2 +; VFP2-NEXT: @ %bb.1: +; VFP2-NEXT: movs r0, #0 +; VFP2-NEXT: movs r3, #0 +; VFP2-NEXT: mov.w lr, #0 +; VFP2-NEXT: movs r2, #0 +; VFP2-NEXT: b .LBB18_5 +; VFP2-NEXT: .LBB18_2: @ %fp-to-i-if-check.exp.size +; VFP2-NEXT: movs r2, #1 +; VFP2-NEXT: mov r4, r1 +; VFP2-NEXT: orr.w r10, r2, r1, asr #31 +; VFP2-NEXT: bfi r4, r2, #20, #12 +; VFP2-NEXT: asrs r1, r1, #31 +; VFP2-NEXT: movw r2, #1074 +; VFP2-NEXT: cmp r0, r2 +; VFP2-NEXT: bhi .LBB18_4 +; VFP2-NEXT: @ %bb.3: @ %fp-to-i-if-exp.small +; VFP2-NEXT: movw r2, #1075 +; VFP2-NEXT: subs r2, r2, r0 +; VFP2-NEXT: lsr.w lr, r3, r2 +; VFP2-NEXT: rsb.w r3, r2, #32 +; VFP2-NEXT: lsr.w r2, r4, r2 +; VFP2-NEXT: lsl.w r3, r4, r3 +; VFP2-NEXT: orr.w r5, lr, r3 +; VFP2-NEXT: movw r3, #1043 +; VFP2-NEXT: subs r0, r3, r0 +; VFP2-NEXT: it pl +; VFP2-NEXT: lsrpl.w r5, r4, r0 +; VFP2-NEXT: umull r0, r3, r5, r10 ; VFP2-NEXT: mov.w r4, #0 +; VFP2-NEXT: it pl +; VFP2-NEXT: movpl r2, #0 +; VFP2-NEXT: mov.w lr, #0 +; VFP2-NEXT: umlal r3, r4, r2, r10 +; VFP2-NEXT: umull r8, r7, r5, r1 +; VFP2-NEXT: adds.w r6, r8, r3 +; VFP2-NEXT: adcs.w r6, r4, r7 +; VFP2-NEXT: umlal r3, r4, r5, r1 +; VFP2-NEXT: adc r6, lr, #0 +; VFP2-NEXT: mla r7, r1, r2, r7 +; VFP2-NEXT: umlal r4, r6, r2, r1 +; VFP2-NEXT: mla r1, r1, r5, r7 +; VFP2-NEXT: adds.w lr, r4, r8 +; VFP2-NEXT: adc.w r2, r6, r1 +; VFP2-NEXT: b .LBB18_5 +; VFP2-NEXT: .LBB18_4: @ %fp-to-i-if-exp.large +; VFP2-NEXT: add r2, sp, #32 +; VFP2-NEXT: subw r0, r0, #1075 +; VFP2-NEXT: mov.w lr, #0 +; VFP2-NEXT: stm.w r2, {r3, r4, lr} +; VFP2-NEXT: movs r2, #12 +; VFP2-NEXT: add r3, sp, #16 +; VFP2-NEXT: and.w r2, r2, r0, lsr #3 +; VFP2-NEXT: adds r3, #16 +; VFP2-NEXT: str.w lr, [sp, #44] +; VFP2-NEXT: strd lr, lr, [sp, #24] +; VFP2-NEXT: subs r2, r3, r2 +; VFP2-NEXT: strd lr, lr, [sp, #16] +; VFP2-NEXT: ldrd r3, r4, [r2] +; VFP2-NEXT: str r4, [sp, #4] @ 4-byte Spill +; VFP2-NEXT: ldr r7, [r2, #8] +; VFP2-NEXT: str r7, [sp, #12] @ 4-byte Spill +; VFP2-NEXT: lsrs r7, r3, #1 +; VFP2-NEXT: ldr r2, [r2, #12] +; VFP2-NEXT: str r2, [sp, #8] @ 4-byte Spill +; VFP2-NEXT: and r2, r0, #31 +; VFP2-NEXT: eor r12, r2, #31 +; VFP2-NEXT: lsl.w r5, r3, r2 +; VFP2-NEXT: lsr.w r0, r7, r12 +; VFP2-NEXT: lsl.w r7, r4, r2 +; VFP2-NEXT: orr.w r11, r7, r0 +; VFP2-NEXT: umull r0, r3, r5, r10 +; VFP2-NEXT: movs r7, #0 +; VFP2-NEXT: umull r4, r8, r5, r1 +; VFP2-NEXT: umlal r3, r7, r11, r10 +; VFP2-NEXT: str r4, [sp] @ 4-byte Spill +; VFP2-NEXT: adds.w r9, r4, r3 +; VFP2-NEXT: adcs.w r6, r7, r8 +; VFP2-NEXT: umlal r3, r7, r5, r1 +; VFP2-NEXT: ldr r6, [sp, #8] @ 4-byte Reload +; VFP2-NEXT: adc r9, lr, #0 +; VFP2-NEXT: umlal r7, r9, r11, r1 +; VFP2-NEXT: lsl.w lr, r6, r2 +; VFP2-NEXT: ldr r6, [sp, #12] @ 4-byte Reload +; VFP2-NEXT: lsrs r4, r6, #1 +; VFP2-NEXT: lsl.w r2, r6, r2 +; VFP2-NEXT: lsr.w r4, r4, r12 +; VFP2-NEXT: orr.w lr, lr, r4 +; VFP2-NEXT: ldr r4, [sp, #4] @ 4-byte Reload +; VFP2-NEXT: ldr r6, [sp] @ 4-byte Reload +; VFP2-NEXT: lsrs.w r4, r4, #1 +; VFP2-NEXT: lsr.w r4, r4, r12 +; VFP2-NEXT: orrs r2, r4 +; VFP2-NEXT: umull r12, r4, r10, r2 +; VFP2-NEXT: mla r4, r10, lr, r4 +; VFP2-NEXT: mla r2, r1, r2, r4 +; VFP2-NEXT: mla r4, r1, r11, r8 +; VFP2-NEXT: mla r1, r1, r5, r4 +; VFP2-NEXT: adds.w r5, r6, r12 +; VFP2-NEXT: mov.w r12, #0 +; VFP2-NEXT: adcs r1, r2 +; VFP2-NEXT: adds.w lr, r7, r5 +; VFP2-NEXT: adc.w r2, r9, r1 +; VFP2-NEXT: .LBB18_5: @ %fp-to-i-cleanup +; VFP2-NEXT: subs.w r7, r0, #-1 +; VFP2-NEXT: mvn r1, #-2147483648 +; VFP2-NEXT: sbcs.w r7, r3, r1 +; VFP2-NEXT: mov.w r6, #-2147483648 +; VFP2-NEXT: sbcs r7, lr, #0 +; VFP2-NEXT: sbcs r7, r2, #0 ; VFP2-NEXT: it lt -; VFP2-NEXT: movlt r4, #1 -; VFP2-NEXT: cmp r4, #0 -; VFP2-NEXT: itet eq -; VFP2-NEXT: moveq r3, r4 -; VFP2-NEXT: movne r4, r2 -; VFP2-NEXT: moveq r1, lr -; VFP2-NEXT: mov.w r2, #-1 -; VFP2-NEXT: it eq -; VFP2-NEXT: moveq r0, r2 +; VFP2-NEXT: movlt r1, r3 +; VFP2-NEXT: mov.w r7, #0 +; VFP2-NEXT: mov.w r3, #-1 +; VFP2-NEXT: it ge +; VFP2-NEXT: movge r0, r3 +; VFP2-NEXT: it lt +; VFP2-NEXT: movlt r7, #1 +; VFP2-NEXT: cmp r7, #0 +; VFP2-NEXT: ite eq +; VFP2-NEXT: moveq r2, r7 +; VFP2-NEXT: movne r7, lr ; VFP2-NEXT: rsbs r5, r0, #0 -; VFP2-NEXT: mov.w lr, #-2147483648 -; VFP2-NEXT: sbcs.w r5, lr, r1 -; VFP2-NEXT: sbcs.w r4, r2, r4 -; VFP2-NEXT: sbcs r2, r3 +; VFP2-NEXT: sbcs.w r5, r6, r1 +; VFP2-NEXT: sbcs.w r7, r3, r7 +; VFP2-NEXT: sbcs.w r2, r3, r2 ; VFP2-NEXT: itt ge ; VFP2-NEXT: movge r0, r12 -; VFP2-NEXT: movge r1, lr -; VFP2-NEXT: pop {r4, r5, r7, pc} +; VFP2-NEXT: movge r1, r6 +; VFP2-NEXT: add sp, #52 +; VFP2-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; ; FULL-LABEL: stest_f64i64: -; FULL: @ %bb.0: @ %entry -; FULL-NEXT: .save {r4, r5, r7, lr} -; FULL-NEXT: push {r4, r5, r7, lr} -; FULL-NEXT: bl __fixdfti -; FULL-NEXT: subs.w lr, r0, #-1 -; FULL-NEXT: mvn r12, #-2147483648 -; FULL-NEXT: sbcs.w lr, r1, r12 -; FULL-NEXT: sbcs lr, r2, #0 -; FULL-NEXT: sbcs lr, r3, #0 -; FULL-NEXT: cset lr, lt -; FULL-NEXT: cmp.w lr, #0 -; FULL-NEXT: csel r5, r3, lr, ne -; FULL-NEXT: mov.w r3, #-1 -; FULL-NEXT: csel r0, r0, r3, ne -; FULL-NEXT: csel r1, r1, r12, ne -; FULL-NEXT: csel r2, r2, lr, ne -; FULL-NEXT: rsbs r4, r0, #0 -; FULL-NEXT: mov.w r12, #-2147483648 -; FULL-NEXT: sbcs.w r4, r12, r1 -; FULL-NEXT: sbcs.w r2, r3, r2 -; FULL-NEXT: sbcs.w r2, r3, r5 -; FULL-NEXT: it ge -; FULL-NEXT: movge r0, #0 -; FULL-NEXT: csel r1, r1, r12, lt -; FULL-NEXT: pop {r4, r5, r7, pc} +; FULL: @ %bb.0: @ %entryfp-to-i-entry +; FULL-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; FULL-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; FULL-NEXT: .pad #52 +; FULL-NEXT: sub sp, #52 +; FULL-NEXT: vmov r3, r1, d0 +; FULL-NEXT: mov.w r12, #0 +; FULL-NEXT: movw r0, #1023 +; FULL-NEXT: ubfx r2, r1, #20, #11 +; FULL-NEXT: cmp r2, r0 +; FULL-NEXT: bhs .LBB18_2 +; FULL-NEXT: @ %bb.1: +; FULL-NEXT: movs r2, #0 +; FULL-NEXT: movs r3, #0 +; FULL-NEXT: movs r0, #0 +; FULL-NEXT: movs r1, #0 +; FULL-NEXT: b .LBB18_5 +; FULL-NEXT: .LBB18_2: @ %fp-to-i-if-check.exp.size +; FULL-NEXT: movs r0, #1 +; FULL-NEXT: mov r4, r1 +; FULL-NEXT: orr.w r10, r0, r1, asr #31 +; FULL-NEXT: bfi r4, r0, #20, #12 +; FULL-NEXT: asrs r1, r1, #31 +; FULL-NEXT: movw r0, #1074 +; FULL-NEXT: cmp r2, r0 +; FULL-NEXT: bhi .LBB18_4 +; FULL-NEXT: @ %bb.3: @ %fp-to-i-if-exp.small +; FULL-NEXT: movw r0, #1075 +; FULL-NEXT: subs r0, r0, r2 +; FULL-NEXT: lsr.w lr, r3, r0 +; FULL-NEXT: rsb.w r3, r0, #32 +; FULL-NEXT: lsr.w r0, r4, r0 +; FULL-NEXT: lsl.w r3, r4, r3 +; FULL-NEXT: orr.w r5, lr, r3 +; FULL-NEXT: movw r3, #1043 +; FULL-NEXT: subs r2, r3, r2 +; FULL-NEXT: it pl +; FULL-NEXT: lsrpl.w r5, r4, r2 +; FULL-NEXT: umull r2, r3, r5, r10 +; FULL-NEXT: mov.w r4, #0 +; FULL-NEXT: it pl +; FULL-NEXT: movpl r0, #0 +; FULL-NEXT: mov.w lr, #0 +; FULL-NEXT: umlal r3, r4, r0, r10 +; FULL-NEXT: umull r8, r7, r5, r1 +; FULL-NEXT: adds.w r6, r8, r3 +; FULL-NEXT: adcs.w r6, r4, r7 +; FULL-NEXT: umlal r3, r4, r5, r1 +; FULL-NEXT: adc r6, lr, #0 +; FULL-NEXT: mla r7, r1, r0, r7 +; FULL-NEXT: umlal r4, r6, r0, r1 +; FULL-NEXT: mla r1, r1, r5, r7 +; FULL-NEXT: adds.w r0, r4, r8 +; FULL-NEXT: adcs r1, r6 +; FULL-NEXT: b .LBB18_5 +; FULL-NEXT: .LBB18_4: @ %fp-to-i-if-exp.large +; FULL-NEXT: add r0, sp, #32 +; FULL-NEXT: mov.w lr, #0 +; FULL-NEXT: stm.w r0, {r3, r4, lr} +; FULL-NEXT: subw r0, r2, #1075 +; FULL-NEXT: movs r2, #12 +; FULL-NEXT: add r3, sp, #16 +; FULL-NEXT: and.w r2, r2, r0, lsr #3 +; FULL-NEXT: adds r3, #16 +; FULL-NEXT: str.w lr, [sp, #44] +; FULL-NEXT: strd lr, lr, [sp, #24] +; FULL-NEXT: subs r2, r3, r2 +; FULL-NEXT: strd lr, lr, [sp, #16] +; FULL-NEXT: and r0, r0, #31 +; FULL-NEXT: ldrd r3, r4, [r2] +; FULL-NEXT: eor r12, r0, #31 +; FULL-NEXT: str r4, [sp, #8] @ 4-byte Spill +; FULL-NEXT: lsls r4, r0 +; FULL-NEXT: ldrd r7, r2, [r2, #8] +; FULL-NEXT: lsl.w r5, r3, r0 +; FULL-NEXT: umull r6, r8, r5, r1 +; FULL-NEXT: str r2, [sp, #12] @ 4-byte Spill +; FULL-NEXT: lsrs r2, r3, #1 +; FULL-NEXT: lsr.w r2, r2, r12 +; FULL-NEXT: orr.w r11, r4, r2 +; FULL-NEXT: umull r2, r3, r5, r10 +; FULL-NEXT: movs r4, #0 +; FULL-NEXT: str r6, [sp, #4] @ 4-byte Spill +; FULL-NEXT: umlal r3, r4, r11, r10 +; FULL-NEXT: adds.w r9, r6, r3 +; FULL-NEXT: adcs.w r6, r4, r8 +; FULL-NEXT: umlal r3, r4, r5, r1 +; FULL-NEXT: ldr r6, [sp, #12] @ 4-byte Reload +; FULL-NEXT: adc r9, lr, #0 +; FULL-NEXT: umlal r4, r9, r11, r1 +; FULL-NEXT: lsl.w lr, r6, r0 +; FULL-NEXT: mov r6, r7 +; FULL-NEXT: lsl.w r0, r6, r0 +; FULL-NEXT: ldr r6, [sp, #8] @ 4-byte Reload +; FULL-NEXT: lsrs r7, r7, #1 +; FULL-NEXT: lsr.w r7, r7, r12 +; FULL-NEXT: orr.w lr, lr, r7 +; FULL-NEXT: lsrs.w r7, r6, #1 +; FULL-NEXT: lsr.w r7, r7, r12 +; FULL-NEXT: orrs r0, r7 +; FULL-NEXT: umull r12, r7, r10, r0 +; FULL-NEXT: mla r7, r10, lr, r7 +; FULL-NEXT: mla r0, r1, r0, r7 +; FULL-NEXT: mla r7, r1, r11, r8 +; FULL-NEXT: mla r1, r1, r5, r7 +; FULL-NEXT: ldr r7, [sp, #4] @ 4-byte Reload +; FULL-NEXT: adds.w r7, r7, r12 +; FULL-NEXT: mov.w r12, #0 +; FULL-NEXT: adcs r1, r0 +; FULL-NEXT: adds r0, r4, r7 +; FULL-NEXT: adc.w r1, r1, r9 +; FULL-NEXT: .LBB18_5: @ %fp-to-i-cleanup +; FULL-NEXT: subs.w r6, r2, #-1 +; FULL-NEXT: mvn r7, #-2147483648 +; FULL-NEXT: sbcs.w r6, r3, r7 +; FULL-NEXT: sbcs r6, r0, #0 +; FULL-NEXT: sbcs r6, r1, #0 +; FULL-NEXT: csel r3, r3, r7, lt +; FULL-NEXT: mov.w r7, #-1 +; FULL-NEXT: cset r6, lt +; FULL-NEXT: csel r2, r2, r7, lt +; FULL-NEXT: cmp r6, #0 +; FULL-NEXT: csel r1, r1, r6, ne +; FULL-NEXT: csel r0, r0, r6, ne +; FULL-NEXT: rsbs r5, r2, #0 +; FULL-NEXT: mov.w r6, #-2147483648 +; FULL-NEXT: sbcs.w r5, r6, r3 +; FULL-NEXT: sbcs.w r0, r7, r0 +; FULL-NEXT: sbcs.w r0, r7, r1 +; FULL-NEXT: csel r0, r2, r12, lt +; FULL-NEXT: csel r1, r3, r6, lt +; FULL-NEXT: add sp, #52 +; FULL-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} entry: %conv = fptosi double %x to i128 %0 = icmp slt i128 %conv, 9223372036854775807 @@ -1075,49 +1485,265 @@ entry: define i64 @utest_f64i64(double %x) { ; SOFT-LABEL: utest_f64i64: -; SOFT: @ %bb.0: @ %entry -; SOFT-NEXT: .save {r4, lr} -; SOFT-NEXT: push {r4, lr} -; SOFT-NEXT: bl __fixunsdfti +; SOFT: @ %bb.0: @ %entryfp-to-i-entry +; SOFT-NEXT: .save {r4, r5, r6, r7, lr} +; SOFT-NEXT: push {r4, r5, r6, r7, lr} +; SOFT-NEXT: .pad #36 +; SOFT-NEXT: sub sp, #36 ; SOFT-NEXT: movs r4, #0 -; SOFT-NEXT: subs r2, r2, #1 +; SOFT-NEXT: ldr r3, .LCPI19_1 +; SOFT-NEXT: mov r5, r3 +; SOFT-NEXT: subs r5, #52 +; SOFT-NEXT: mov r6, r4 +; SOFT-NEXT: sbcs r6, r4 +; SOFT-NEXT: lsls r2, r1, #1 +; SOFT-NEXT: lsrs r2, r2, #21 +; SOFT-NEXT: subs r5, r2, r5 +; SOFT-NEXT: mov r5, r4 +; SOFT-NEXT: sbcs r5, r6 +; SOFT-NEXT: bhs .LBB19_2 +; SOFT-NEXT: @ %bb.1: +; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: b .LBB19_4 +; SOFT-NEXT: .LBB19_2: @ %fp-to-i-if-check.exp.size +; SOFT-NEXT: ldr r5, .LCPI19_0 +; SOFT-NEXT: mvns r6, r5 +; SOFT-NEXT: mvns r1, r1 +; SOFT-NEXT: orrs r1, r6 +; SOFT-NEXT: subs r1, r5, r1 +; SOFT-NEXT: mvns r0, r0 +; SOFT-NEXT: mvns r0, r0 +; SOFT-NEXT: cmp r3, r2 +; SOFT-NEXT: bls .LBB19_5 +; SOFT-NEXT: @ %bb.3: @ %fp-to-i-if-exp.small +; SOFT-NEXT: subs r2, r3, r2 +; SOFT-NEXT: bl __aeabi_llsr +; SOFT-NEXT: .LBB19_4: @ %fp-to-i-cleanup +; SOFT-NEXT: mov r6, r4 +; SOFT-NEXT: mov r3, r4 +; SOFT-NEXT: b .LBB19_6 +; SOFT-NEXT: .LBB19_5: @ %fp-to-i-if-exp.large +; SOFT-NEXT: str r4, [sp, #28] +; SOFT-NEXT: str r4, [sp, #12] +; SOFT-NEXT: str r4, [sp, #8] +; SOFT-NEXT: str r4, [sp, #4] +; SOFT-NEXT: str r4, [sp] +; SOFT-NEXT: add r3, sp, #16 +; SOFT-NEXT: stm r3!, {r0, r1, r4} +; SOFT-NEXT: ldr r0, .LCPI19_2 +; SOFT-NEXT: adds r2, r2, r0 +; SOFT-NEXT: movs r5, #31 +; SOFT-NEXT: lsrs r0, r2, #3 +; SOFT-NEXT: ands r2, r5 +; SOFT-NEXT: movs r1, #12 +; SOFT-NEXT: ands r1, r0 +; SOFT-NEXT: mov r0, sp +; SOFT-NEXT: adds r0, #16 +; SOFT-NEXT: subs r3, r0, r1 +; SOFT-NEXT: ldm r3!, {r0, r7} +; SOFT-NEXT: ldr r1, [r3] +; SOFT-NEXT: ldr r6, [r3, #4] +; SOFT-NEXT: lsls r6, r2 +; SOFT-NEXT: eors r5, r2 +; SOFT-NEXT: lsrs r3, r1, #1 +; SOFT-NEXT: lsrs r3, r5 +; SOFT-NEXT: orrs r3, r6 +; SOFT-NEXT: lsls r1, r2 +; SOFT-NEXT: lsrs r6, r7, #1 +; SOFT-NEXT: lsrs r6, r5 +; SOFT-NEXT: orrs r6, r1 +; SOFT-NEXT: lsls r7, r2 +; SOFT-NEXT: lsrs r1, r0, #1 +; SOFT-NEXT: lsrs r1, r5 +; SOFT-NEXT: orrs r1, r7 +; SOFT-NEXT: lsls r0, r2 +; SOFT-NEXT: .LBB19_6: @ %fp-to-i-cleanup +; SOFT-NEXT: subs r2, r6, #1 ; SOFT-NEXT: sbcs r3, r4 -; SOFT-NEXT: bhs .LBB19_3 -; SOFT-NEXT: @ %bb.1: @ %entry -; SOFT-NEXT: bhs .LBB19_4 -; SOFT-NEXT: .LBB19_2: @ %entry -; SOFT-NEXT: pop {r4, pc} -; SOFT-NEXT: .LBB19_3: @ %entry +; SOFT-NEXT: bhs .LBB19_9 +; SOFT-NEXT: @ %bb.7: @ %fp-to-i-cleanup +; SOFT-NEXT: bhs .LBB19_10 +; SOFT-NEXT: .LBB19_8: @ %fp-to-i-cleanup +; SOFT-NEXT: add sp, #36 +; SOFT-NEXT: pop {r4, r5, r6, r7, pc} +; SOFT-NEXT: .LBB19_9: @ %fp-to-i-cleanup ; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: blo .LBB19_2 -; SOFT-NEXT: .LBB19_4: @ %entry +; SOFT-NEXT: blo .LBB19_8 +; SOFT-NEXT: .LBB19_10: @ %fp-to-i-cleanup ; SOFT-NEXT: mov r1, r4 -; SOFT-NEXT: pop {r4, pc} +; SOFT-NEXT: add sp, #36 +; SOFT-NEXT: pop {r4, r5, r6, r7, pc} +; SOFT-NEXT: .p2align 2 +; SOFT-NEXT: @ %bb.11: +; SOFT-NEXT: .LCPI19_0: +; SOFT-NEXT: .long 1048575 @ 0xfffff +; SOFT-NEXT: .LCPI19_1: +; SOFT-NEXT: .long 1075 @ 0x433 +; SOFT-NEXT: .LCPI19_2: +; SOFT-NEXT: .long 4294966221 @ 0xfffffbcd ; ; VFP2-LABEL: utest_f64i64: -; VFP2: @ %bb.0: @ %entry -; VFP2-NEXT: .save {r7, lr} -; VFP2-NEXT: push {r7, lr} -; VFP2-NEXT: bl __fixunsdfti -; VFP2-NEXT: subs r2, #1 +; VFP2: @ %bb.0: @ %entryfp-to-i-entry +; VFP2-NEXT: vmov r0, r1, d0 ; VFP2-NEXT: mov.w r12, #0 -; VFP2-NEXT: sbcs r2, r3, #0 +; VFP2-NEXT: movw r2, #1023 +; VFP2-NEXT: ubfx r3, r1, #20, #11 +; VFP2-NEXT: cmp r3, r2 +; VFP2-NEXT: bhs .LBB19_2 +; VFP2-NEXT: @ %bb.1: +; VFP2-NEXT: movs r0, #0 +; VFP2-NEXT: movs r1, #0 +; VFP2-NEXT: movs r3, #0 +; VFP2-NEXT: movs r2, #0 +; VFP2-NEXT: b .LBB19_6 +; VFP2-NEXT: .LBB19_2: @ %fp-to-i-if-check.exp.size +; VFP2-NEXT: .save {r4, r5, r7, lr} +; VFP2-NEXT: push {r4, r5, r7, lr} +; VFP2-NEXT: .pad #32 +; VFP2-NEXT: sub sp, #32 +; VFP2-NEXT: movs r2, #1 +; VFP2-NEXT: bfi r1, r2, #20, #12 +; VFP2-NEXT: movw r2, #1074 +; VFP2-NEXT: cmp r3, r2 +; VFP2-NEXT: bhi .LBB19_4 +; VFP2-NEXT: @ %bb.3: @ %fp-to-i-if-exp.small +; VFP2-NEXT: movw r2, #1075 +; VFP2-NEXT: subs r4, r2, r3 +; VFP2-NEXT: movw r2, #1043 +; VFP2-NEXT: lsr.w lr, r0, r4 +; VFP2-NEXT: rsb.w r0, r4, #32 +; VFP2-NEXT: subs r2, r2, r3 +; VFP2-NEXT: mov.w r3, #0 +; VFP2-NEXT: lsl.w r0, r1, r0 +; VFP2-NEXT: orr.w r0, r0, lr +; VFP2-NEXT: it pl +; VFP2-NEXT: lsrpl.w r0, r1, r2 +; VFP2-NEXT: lsr.w r1, r1, r4 +; VFP2-NEXT: it pl +; VFP2-NEXT: movpl r1, #0 +; VFP2-NEXT: movs r2, #0 +; VFP2-NEXT: b .LBB19_5 +; VFP2-NEXT: .LBB19_4: @ %fp-to-i-if-exp.large +; VFP2-NEXT: add.w lr, sp, #16 +; VFP2-NEXT: movs r2, #0 +; VFP2-NEXT: stm.w lr, {r0, r1, r2} +; VFP2-NEXT: subw r0, r3, #1075 +; VFP2-NEXT: movs r1, #12 +; VFP2-NEXT: str r2, [sp, #28] +; VFP2-NEXT: and.w r1, r1, r0, lsr #3 +; VFP2-NEXT: strd r2, r2, [sp, #8] +; VFP2-NEXT: and r0, r0, #31 +; VFP2-NEXT: strd r2, r2, [sp] +; VFP2-NEXT: mov r2, sp +; VFP2-NEXT: adds r2, #16 +; VFP2-NEXT: eor r5, r0, #31 +; VFP2-NEXT: subs r1, r2, r1 +; VFP2-NEXT: ldrd lr, r4, [r1] +; VFP2-NEXT: ldrd r3, r1, [r1, #8] +; VFP2-NEXT: lsls r1, r0 +; VFP2-NEXT: lsrs r2, r3, #1 +; VFP2-NEXT: lsrs r2, r5 +; VFP2-NEXT: orrs r2, r1 +; VFP2-NEXT: lsl.w r1, r3, r0 +; VFP2-NEXT: lsrs.w r3, r4, #1 +; VFP2-NEXT: lsrs r3, r5 +; VFP2-NEXT: orrs r3, r1 +; VFP2-NEXT: lsl.w r1, r4, r0 +; VFP2-NEXT: lsr.w r4, lr, #1 +; VFP2-NEXT: lsl.w r0, lr, r0 +; VFP2-NEXT: lsrs r4, r5 +; VFP2-NEXT: orrs r1, r4 +; VFP2-NEXT: .LBB19_5: +; VFP2-NEXT: add sp, #32 +; VFP2-NEXT: pop.w {r4, r5, r7, lr} +; VFP2-NEXT: .LBB19_6: @ %fp-to-i-cleanup +; VFP2-NEXT: subs r3, #1 +; VFP2-NEXT: sbcs r2, r2, #0 ; VFP2-NEXT: itt hs ; VFP2-NEXT: movhs r0, r12 ; VFP2-NEXT: movhs r1, r12 -; VFP2-NEXT: pop {r7, pc} +; VFP2-NEXT: bx lr ; ; FULL-LABEL: utest_f64i64: -; FULL: @ %bb.0: @ %entry -; FULL-NEXT: .save {r7, lr} -; FULL-NEXT: push {r7, lr} -; FULL-NEXT: bl __fixunsdfti -; FULL-NEXT: subs r2, #1 +; FULL: @ %bb.0: @ %entryfp-to-i-entry +; FULL-NEXT: vmov r0, r2, d0 ; FULL-NEXT: mov.w r12, #0 -; FULL-NEXT: sbcs r2, r3, #0 +; FULL-NEXT: movw r1, #1023 +; FULL-NEXT: ubfx r3, r2, #20, #11 +; FULL-NEXT: cmp r3, r1 +; FULL-NEXT: bhs .LBB19_2 +; FULL-NEXT: @ %bb.1: +; FULL-NEXT: movs r0, #0 +; FULL-NEXT: movs r2, #0 +; FULL-NEXT: movs r3, #0 +; FULL-NEXT: movs r1, #0 +; FULL-NEXT: b .LBB19_6 +; FULL-NEXT: .LBB19_2: @ %fp-to-i-if-check.exp.size +; FULL-NEXT: .save {r4, r5, r6, lr} +; FULL-NEXT: push {r4, r5, r6, lr} +; FULL-NEXT: .pad #32 +; FULL-NEXT: sub sp, #32 +; FULL-NEXT: movs r1, #1 +; FULL-NEXT: bfi r2, r1, #20, #12 +; FULL-NEXT: movw r1, #1074 +; FULL-NEXT: cmp r3, r1 +; FULL-NEXT: bhi .LBB19_4 +; FULL-NEXT: @ %bb.3: @ %fp-to-i-if-exp.small +; FULL-NEXT: movw r1, #1075 +; FULL-NEXT: subs r4, r1, r3 +; FULL-NEXT: movw r1, #1043 +; FULL-NEXT: lsr.w lr, r0, r4 +; FULL-NEXT: rsb.w r0, r4, #32 +; FULL-NEXT: subs r1, r1, r3 +; FULL-NEXT: mov.w r3, #0 +; FULL-NEXT: lsl.w r0, r2, r0 +; FULL-NEXT: orr.w r0, r0, lr +; FULL-NEXT: it pl +; FULL-NEXT: lsrpl.w r0, r2, r1 +; FULL-NEXT: lsr.w r2, r2, r4 +; FULL-NEXT: it pl +; FULL-NEXT: movpl r2, #0 +; FULL-NEXT: movs r1, #0 +; FULL-NEXT: b .LBB19_5 +; FULL-NEXT: .LBB19_4: @ %fp-to-i-if-exp.large +; FULL-NEXT: movs r1, #0 +; FULL-NEXT: strd r0, r2, [sp, #16] +; FULL-NEXT: subw r0, r3, #1075 +; FULL-NEXT: mov r2, sp +; FULL-NEXT: strd r1, r1, [sp, #24] +; FULL-NEXT: adds r2, #16 +; FULL-NEXT: strd r1, r1, [sp, #8] +; FULL-NEXT: strd r1, r1, [sp] +; FULL-NEXT: movs r1, #12 +; FULL-NEXT: and.w r1, r1, r0, lsr #3 +; FULL-NEXT: and r0, r0, #31 +; FULL-NEXT: subs r1, r2, r1 +; FULL-NEXT: eor r5, r0, #31 +; FULL-NEXT: ldrd r3, lr, [r1, #8] +; FULL-NEXT: ldrd r6, r2, [r1] +; FULL-NEXT: lsl.w r4, lr, r0 +; FULL-NEXT: lsrs r1, r3, #1 +; FULL-NEXT: lsls r3, r0 +; FULL-NEXT: lsrs r1, r5 +; FULL-NEXT: orrs r1, r4 +; FULL-NEXT: lsrs.w r4, r2, #1 +; FULL-NEXT: lsrs r4, r5 +; FULL-NEXT: orrs r3, r4 +; FULL-NEXT: lsls r2, r0 +; FULL-NEXT: lsrs r4, r6, #1 +; FULL-NEXT: lsr.w r5, r4, r5 +; FULL-NEXT: orrs r2, r5 +; FULL-NEXT: lsl.w r0, r6, r0 +; FULL-NEXT: .LBB19_5: +; FULL-NEXT: add sp, #32 +; FULL-NEXT: pop.w {r4, r5, r6, lr} +; FULL-NEXT: .LBB19_6: @ %fp-to-i-cleanup +; FULL-NEXT: subs r3, #1 +; FULL-NEXT: sbcs r1, r1, #0 ; FULL-NEXT: csel r0, r0, r12, lo -; FULL-NEXT: csel r1, r1, r12, lo -; FULL-NEXT: pop {r7, pc} +; FULL-NEXT: csel r1, r2, r12, lo +; FULL-NEXT: bx lr entry: %conv = fptoui double %x to i128 %0 = icmp ult i128 %conv, 18446744073709551616 @@ -1128,65 +1754,479 @@ entry: define i64 @ustest_f64i64(double %x) { ; SOFT-LABEL: ustest_f64i64: -; SOFT: @ %bb.0: @ %entry -; SOFT-NEXT: .save {r4, lr} -; SOFT-NEXT: push {r4, lr} -; SOFT-NEXT: bl __fixdfti +; SOFT: @ %bb.0: @ %entryfp-to-i-entry +; SOFT-NEXT: .save {r4, r5, r6, r7, lr} +; SOFT-NEXT: push {r4, r5, r6, r7, lr} +; SOFT-NEXT: .pad #60 +; SOFT-NEXT: sub sp, #60 ; SOFT-NEXT: movs r4, #0 +; SOFT-NEXT: ldr r3, .LCPI20_1 +; SOFT-NEXT: mov r5, r3 +; SOFT-NEXT: subs r5, #52 +; SOFT-NEXT: mov r6, r4 +; SOFT-NEXT: sbcs r6, r4 +; SOFT-NEXT: lsls r2, r1, #1 +; SOFT-NEXT: lsrs r2, r2, #21 +; SOFT-NEXT: subs r5, r2, r5 +; SOFT-NEXT: mov r5, r4 +; SOFT-NEXT: sbcs r5, r6 +; SOFT-NEXT: bhs .LBB20_2 +; SOFT-NEXT: @ %bb.1: +; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: mov r6, r4 +; SOFT-NEXT: mov r2, r4 +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: b .LBB20_5 +; SOFT-NEXT: .LBB20_2: @ %fp-to-i-if-check.exp.size +; SOFT-NEXT: asrs r5, r1, #31 +; SOFT-NEXT: movs r7, #1 +; SOFT-NEXT: str r5, [sp, #20] @ 4-byte Spill +; SOFT-NEXT: orrs r7, r5 +; SOFT-NEXT: ldr r5, .LCPI20_0 +; SOFT-NEXT: mvns r6, r5 +; SOFT-NEXT: mvns r1, r1 +; SOFT-NEXT: orrs r1, r6 +; SOFT-NEXT: subs r1, r5, r1 +; SOFT-NEXT: mvns r0, r0 +; SOFT-NEXT: mvns r0, r0 +; SOFT-NEXT: cmp r3, r2 +; SOFT-NEXT: bls .LBB20_4 +; SOFT-NEXT: @ %bb.3: @ %fp-to-i-if-exp.small +; SOFT-NEXT: subs r2, r3, r2 +; SOFT-NEXT: bl __aeabi_llsr +; SOFT-NEXT: mov r6, r1 +; SOFT-NEXT: str r1, [sp, #12] @ 4-byte Spill +; SOFT-NEXT: mov r5, r0 +; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: mov r2, r7 +; SOFT-NEXT: mov r3, r4 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: str r0, [sp, #16] @ 4-byte Spill +; SOFT-NEXT: str r1, [sp, #4] @ 4-byte Spill +; SOFT-NEXT: mov r0, r6 +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: mov r2, r7 +; SOFT-NEXT: mov r3, r4 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: mov r7, r1 +; SOFT-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; SOFT-NEXT: adds r6, r0, r1 +; SOFT-NEXT: adcs r7, r4 +; SOFT-NEXT: mov r0, r5 +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: ldr r5, [sp, #20] @ 4-byte Reload +; SOFT-NEXT: mov r2, r5 +; SOFT-NEXT: mov r3, r4 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: adds r6, r0, r6 +; SOFT-NEXT: adcs r1, r4 +; SOFT-NEXT: adds r0, r7, r1 +; SOFT-NEXT: str r0, [sp, #4] @ 4-byte Spill +; SOFT-NEXT: mov r7, r4 +; SOFT-NEXT: adcs r7, r4 +; SOFT-NEXT: ldr r0, [sp, #12] @ 4-byte Reload +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: mov r2, r5 +; SOFT-NEXT: mov r3, r4 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: mov r5, r1 +; SOFT-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; SOFT-NEXT: adds r0, r0, r1 +; SOFT-NEXT: str r0, [sp, #4] @ 4-byte Spill +; SOFT-NEXT: adcs r5, r7 +; SOFT-NEXT: ldr r0, [sp, #20] @ 4-byte Reload +; SOFT-NEXT: mov r1, r0 +; SOFT-NEXT: ldr r2, [sp, #8] @ 4-byte Reload +; SOFT-NEXT: ldr r3, [sp, #12] @ 4-byte Reload +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: mov r2, r0 +; SOFT-NEXT: ldr r0, [sp, #16] @ 4-byte Reload +; SOFT-NEXT: ldr r3, [sp, #4] @ 4-byte Reload +; SOFT-NEXT: adds r2, r3, r2 +; SOFT-NEXT: adcs r1, r5 +; SOFT-NEXT: b .LBB20_5 +; SOFT-NEXT: .LBB20_4: @ %fp-to-i-if-exp.large +; SOFT-NEXT: str r4, [sp, #52] +; SOFT-NEXT: str r4, [sp, #36] +; SOFT-NEXT: str r4, [sp, #32] +; SOFT-NEXT: str r4, [sp, #28] +; SOFT-NEXT: str r4, [sp, #24] +; SOFT-NEXT: add r3, sp, #40 +; SOFT-NEXT: stm r3!, {r0, r1, r4} +; SOFT-NEXT: ldr r0, .LCPI20_2 +; SOFT-NEXT: adds r6, r2, r0 +; SOFT-NEXT: str r7, [sp, #16] @ 4-byte Spill +; SOFT-NEXT: movs r7, #31 +; SOFT-NEXT: lsrs r0, r6, #3 +; SOFT-NEXT: ands r6, r7 +; SOFT-NEXT: movs r1, #12 +; SOFT-NEXT: ands r1, r0 +; SOFT-NEXT: add r0, sp, #24 +; SOFT-NEXT: adds r0, #16 +; SOFT-NEXT: subs r5, r0, r1 +; SOFT-NEXT: ldr r0, [r5, #4] +; SOFT-NEXT: str r0, [sp, #12] @ 4-byte Spill +; SOFT-NEXT: ldr r0, [r5, #8] +; SOFT-NEXT: ldr r1, [r5, #12] +; SOFT-NEXT: lsrs r3, r0, #1 +; SOFT-NEXT: lsls r0, r6 +; SOFT-NEXT: eors r7, r6 +; SOFT-NEXT: ldr r2, [sp, #12] @ 4-byte Reload +; SOFT-NEXT: lsrs r2, r2, #1 +; SOFT-NEXT: lsrs r2, r7 +; SOFT-NEXT: orrs r2, r0 +; SOFT-NEXT: lsls r1, r6 +; SOFT-NEXT: lsrs r3, r7 +; SOFT-NEXT: orrs r3, r1 +; SOFT-NEXT: ldr r0, [sp, #16] @ 4-byte Reload +; SOFT-NEXT: ldr r1, [sp, #20] @ 4-byte Reload +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: str r0, [sp] @ 4-byte Spill +; SOFT-NEXT: str r1, [sp, #8] @ 4-byte Spill +; SOFT-NEXT: ldr r1, [sp, #12] @ 4-byte Reload +; SOFT-NEXT: lsls r1, r6 +; SOFT-NEXT: ldr r5, [r5] +; SOFT-NEXT: lsrs r0, r5, #1 +; SOFT-NEXT: lsrs r0, r7 +; SOFT-NEXT: orrs r0, r1 +; SOFT-NEXT: mov r3, r0 +; SOFT-NEXT: lsls r5, r6 +; SOFT-NEXT: ldr r0, [sp, #20] @ 4-byte Reload +; SOFT-NEXT: mov r1, r0 +; SOFT-NEXT: mov r2, r5 +; SOFT-NEXT: mov r6, r3 +; SOFT-NEXT: str r3, [sp, #4] @ 4-byte Spill +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: ldr r2, [sp] @ 4-byte Reload +; SOFT-NEXT: adds r0, r0, r2 +; SOFT-NEXT: str r0, [sp, #12] @ 4-byte Spill +; SOFT-NEXT: ldr r0, [sp, #8] @ 4-byte Reload +; SOFT-NEXT: adcs r1, r0 +; SOFT-NEXT: str r1, [sp, #8] @ 4-byte Spill +; SOFT-NEXT: mov r0, r6 +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: ldr r6, [sp, #16] @ 4-byte Reload +; SOFT-NEXT: mov r2, r6 +; SOFT-NEXT: mov r3, r4 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: str r0, [sp] @ 4-byte Spill +; SOFT-NEXT: mov r7, r1 +; SOFT-NEXT: mov r0, r5 +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: mov r2, r6 +; SOFT-NEXT: mov r3, r4 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: str r0, [sp, #16] @ 4-byte Spill +; SOFT-NEXT: ldr r0, [sp] @ 4-byte Reload +; SOFT-NEXT: adds r6, r0, r1 +; SOFT-NEXT: adcs r7, r4 +; SOFT-NEXT: mov r0, r5 +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: ldr r2, [sp, #20] @ 4-byte Reload +; SOFT-NEXT: mov r3, r4 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: adds r6, r0, r6 +; SOFT-NEXT: adcs r1, r4 +; SOFT-NEXT: adds r5, r7, r1 +; SOFT-NEXT: mov r7, r4 +; SOFT-NEXT: adcs r7, r4 +; SOFT-NEXT: ldr r0, [sp, #4] @ 4-byte Reload +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: ldr r2, [sp, #20] @ 4-byte Reload +; SOFT-NEXT: mov r3, r4 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: mov r2, r0 +; SOFT-NEXT: ldr r0, [sp, #16] @ 4-byte Reload +; SOFT-NEXT: adds r2, r2, r5 +; SOFT-NEXT: adcs r1, r7 +; SOFT-NEXT: ldr r3, [sp, #12] @ 4-byte Reload +; SOFT-NEXT: adds r2, r2, r3 +; SOFT-NEXT: ldr r3, [sp, #8] @ 4-byte Reload +; SOFT-NEXT: adcs r1, r3 +; SOFT-NEXT: .LBB20_5: @ %fp-to-i-cleanup ; SOFT-NEXT: subs r2, r2, #1 -; SOFT-NEXT: mov r2, r3 +; SOFT-NEXT: mov r2, r1 ; SOFT-NEXT: sbcs r2, r4 -; SOFT-NEXT: bge .LBB20_5 -; SOFT-NEXT: @ %bb.1: @ %entry -; SOFT-NEXT: bge .LBB20_6 -; SOFT-NEXT: .LBB20_2: @ %entry -; SOFT-NEXT: blt .LBB20_4 -; SOFT-NEXT: .LBB20_3: @ %entry -; SOFT-NEXT: mov r3, r4 -; SOFT-NEXT: .LBB20_4: @ %entry -; SOFT-NEXT: asrs r2, r3, #31 -; SOFT-NEXT: bics r0, r2 -; SOFT-NEXT: bics r1, r2 -; SOFT-NEXT: pop {r4, pc} -; SOFT-NEXT: .LBB20_5: @ %entry +; SOFT-NEXT: bge .LBB20_10 +; SOFT-NEXT: @ %bb.6: @ %fp-to-i-cleanup +; SOFT-NEXT: bge .LBB20_11 +; SOFT-NEXT: .LBB20_7: @ %fp-to-i-cleanup +; SOFT-NEXT: blt .LBB20_9 +; SOFT-NEXT: .LBB20_8: @ %fp-to-i-cleanup ; SOFT-NEXT: mov r1, r4 -; SOFT-NEXT: blt .LBB20_2 -; SOFT-NEXT: .LBB20_6: @ %entry +; SOFT-NEXT: .LBB20_9: @ %fp-to-i-cleanup +; SOFT-NEXT: asrs r1, r1, #31 +; SOFT-NEXT: bics r0, r1 +; SOFT-NEXT: bics r6, r1 +; SOFT-NEXT: mov r1, r6 +; SOFT-NEXT: add sp, #60 +; SOFT-NEXT: pop {r4, r5, r6, r7, pc} +; SOFT-NEXT: .LBB20_10: @ %fp-to-i-cleanup +; SOFT-NEXT: mov r6, r4 +; SOFT-NEXT: blt .LBB20_7 +; SOFT-NEXT: .LBB20_11: @ %fp-to-i-cleanup ; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: bge .LBB20_3 -; SOFT-NEXT: b .LBB20_4 +; SOFT-NEXT: bge .LBB20_8 +; SOFT-NEXT: b .LBB20_9 +; SOFT-NEXT: .p2align 2 +; SOFT-NEXT: @ %bb.12: +; SOFT-NEXT: .LCPI20_0: +; SOFT-NEXT: .long 1048575 @ 0xfffff +; SOFT-NEXT: .LCPI20_1: +; SOFT-NEXT: .long 1075 @ 0x433 +; SOFT-NEXT: .LCPI20_2: +; SOFT-NEXT: .long 4294966221 @ 0xfffffbcd ; ; VFP2-LABEL: ustest_f64i64: -; VFP2: @ %bb.0: @ %entry -; VFP2-NEXT: .save {r7, lr} -; VFP2-NEXT: push {r7, lr} -; VFP2-NEXT: bl __fixdfti -; VFP2-NEXT: subs r2, #1 -; VFP2-NEXT: mov.w r12, #0 -; VFP2-NEXT: sbcs r2, r3, #0 +; VFP2: @ %bb.0: @ %entryfp-to-i-entry +; VFP2-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; VFP2-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; VFP2-NEXT: .pad #52 +; VFP2-NEXT: sub sp, #52 +; VFP2-NEXT: vmov r3, r0, d0 +; VFP2-NEXT: movs r7, #0 +; VFP2-NEXT: movw r1, #1023 +; VFP2-NEXT: ubfx r2, r0, #20, #11 +; VFP2-NEXT: cmp r2, r1 +; VFP2-NEXT: bhs .LBB20_2 +; VFP2-NEXT: @ %bb.1: +; VFP2-NEXT: movs r3, #0 +; VFP2-NEXT: movs r2, #0 +; VFP2-NEXT: movs r0, #0 +; VFP2-NEXT: movs r1, #0 +; VFP2-NEXT: b .LBB20_5 +; VFP2-NEXT: .LBB20_2: @ %fp-to-i-if-check.exp.size +; VFP2-NEXT: movs r1, #1 +; VFP2-NEXT: mov r4, r0 +; VFP2-NEXT: orr.w r10, r1, r0, asr #31 +; VFP2-NEXT: bfi r4, r1, #20, #12 +; VFP2-NEXT: asrs r0, r0, #31 +; VFP2-NEXT: movw r1, #1074 +; VFP2-NEXT: cmp r2, r1 +; VFP2-NEXT: bhi .LBB20_4 +; VFP2-NEXT: @ %bb.3: @ %fp-to-i-if-exp.small +; VFP2-NEXT: movw r1, #1075 +; VFP2-NEXT: subs r1, r1, r2 +; VFP2-NEXT: lsr.w lr, r3, r1 +; VFP2-NEXT: rsb.w r3, r1, #32 +; VFP2-NEXT: lsr.w r1, r4, r1 +; VFP2-NEXT: lsl.w r3, r4, r3 +; VFP2-NEXT: orr.w r5, lr, r3 +; VFP2-NEXT: movw r3, #1043 +; VFP2-NEXT: subs r2, r3, r2 +; VFP2-NEXT: it pl +; VFP2-NEXT: lsrpl.w r5, r4, r2 +; VFP2-NEXT: umull r3, r2, r5, r10 +; VFP2-NEXT: mov.w r4, #0 +; VFP2-NEXT: it pl +; VFP2-NEXT: movpl r1, #0 +; VFP2-NEXT: mov.w lr, #0 +; VFP2-NEXT: umlal r2, r4, r1, r10 +; VFP2-NEXT: umull r8, r12, r5, r0 +; VFP2-NEXT: adds.w r6, r8, r2 +; VFP2-NEXT: adcs.w r6, r4, r12 +; VFP2-NEXT: umlal r2, r4, r5, r0 +; VFP2-NEXT: adc r6, lr, #0 +; VFP2-NEXT: mla r12, r0, r1, r12 +; VFP2-NEXT: umlal r4, r6, r1, r0 +; VFP2-NEXT: mla r1, r0, r5, r12 +; VFP2-NEXT: adds.w r0, r4, r8 +; VFP2-NEXT: adcs r1, r6 +; VFP2-NEXT: b .LBB20_5 +; VFP2-NEXT: .LBB20_4: @ %fp-to-i-if-exp.large +; VFP2-NEXT: add r1, sp, #32 +; VFP2-NEXT: mov.w lr, #0 +; VFP2-NEXT: stm.w r1, {r3, r4, lr} +; VFP2-NEXT: subw r1, r2, #1075 +; VFP2-NEXT: movs r2, #12 +; VFP2-NEXT: add r3, sp, #16 +; VFP2-NEXT: and.w r2, r2, r1, lsr #3 +; VFP2-NEXT: adds r3, #16 +; VFP2-NEXT: str.w lr, [sp, #44] +; VFP2-NEXT: strd lr, lr, [sp, #24] +; VFP2-NEXT: subs r2, r3, r2 +; VFP2-NEXT: strd lr, lr, [sp, #16] +; VFP2-NEXT: and r1, r1, #31 +; VFP2-NEXT: ldrd r3, r4, [r2] +; VFP2-NEXT: eor r12, r1, #31 +; VFP2-NEXT: str r4, [sp, #8] @ 4-byte Spill +; VFP2-NEXT: lsls r4, r1 +; VFP2-NEXT: ldrd r7, r2, [r2, #8] +; VFP2-NEXT: lsl.w r5, r3, r1 +; VFP2-NEXT: umull r6, r8, r5, r0 +; VFP2-NEXT: str r2, [sp, #12] @ 4-byte Spill +; VFP2-NEXT: lsrs r2, r3, #1 +; VFP2-NEXT: lsr.w r2, r2, r12 +; VFP2-NEXT: orr.w r11, r4, r2 +; VFP2-NEXT: umull r3, r2, r5, r10 +; VFP2-NEXT: movs r4, #0 +; VFP2-NEXT: str r6, [sp, #4] @ 4-byte Spill +; VFP2-NEXT: umlal r2, r4, r11, r10 +; VFP2-NEXT: adds.w r9, r6, r2 +; VFP2-NEXT: adcs.w r6, r4, r8 +; VFP2-NEXT: umlal r2, r4, r5, r0 +; VFP2-NEXT: ldr r6, [sp, #12] @ 4-byte Reload +; VFP2-NEXT: adc r9, lr, #0 +; VFP2-NEXT: umlal r4, r9, r11, r0 +; VFP2-NEXT: lsl.w lr, r6, r1 +; VFP2-NEXT: mov r6, r7 +; VFP2-NEXT: lsl.w r1, r6, r1 +; VFP2-NEXT: ldr r6, [sp, #8] @ 4-byte Reload +; VFP2-NEXT: lsrs r7, r7, #1 +; VFP2-NEXT: lsr.w r7, r7, r12 +; VFP2-NEXT: orr.w lr, lr, r7 +; VFP2-NEXT: lsrs.w r7, r6, #1 +; VFP2-NEXT: lsr.w r7, r7, r12 +; VFP2-NEXT: orrs r1, r7 +; VFP2-NEXT: umull r12, r7, r10, r1 +; VFP2-NEXT: mla r7, r10, lr, r7 +; VFP2-NEXT: mla r1, r0, r1, r7 +; VFP2-NEXT: mla r7, r0, r11, r8 +; VFP2-NEXT: mla r0, r0, r5, r7 +; VFP2-NEXT: ldr r7, [sp, #4] @ 4-byte Reload +; VFP2-NEXT: adds.w r5, r7, r12 +; VFP2-NEXT: mov.w r7, #0 +; VFP2-NEXT: adcs r1, r0 +; VFP2-NEXT: adds r0, r4, r5 +; VFP2-NEXT: adc.w r1, r1, r9 +; VFP2-NEXT: .LBB20_5: @ %fp-to-i-cleanup +; VFP2-NEXT: subs r0, #1 +; VFP2-NEXT: sbcs r0, r1, #0 ; VFP2-NEXT: itt ge -; VFP2-NEXT: movge r3, r12 -; VFP2-NEXT: movge r0, r12 +; VFP2-NEXT: movge r1, r7 +; VFP2-NEXT: movge r3, r7 +; VFP2-NEXT: bic.w r0, r3, r1, asr #31 ; VFP2-NEXT: it ge -; VFP2-NEXT: movge r1, r12 -; VFP2-NEXT: bic.w r0, r0, r3, asr #31 -; VFP2-NEXT: bic.w r1, r1, r3, asr #31 -; VFP2-NEXT: pop {r7, pc} +; VFP2-NEXT: movge r2, r7 +; VFP2-NEXT: bic.w r1, r2, r1, asr #31 +; VFP2-NEXT: add sp, #52 +; VFP2-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; ; FULL-LABEL: ustest_f64i64: -; FULL: @ %bb.0: @ %entry -; FULL-NEXT: .save {r7, lr} -; FULL-NEXT: push {r7, lr} -; FULL-NEXT: bl __fixdfti -; FULL-NEXT: subs r2, #1 -; FULL-NEXT: mov.w r12, #0 -; FULL-NEXT: sbcs r2, r3, #0 -; FULL-NEXT: csel r2, r3, r12, lt -; FULL-NEXT: csel r0, r0, r12, lt -; FULL-NEXT: csel r1, r1, r12, lt -; FULL-NEXT: bic.w r0, r0, r2, asr #31 -; FULL-NEXT: bic.w r1, r1, r2, asr #31 -; FULL-NEXT: pop {r7, pc} +; FULL: @ %bb.0: @ %entryfp-to-i-entry +; FULL-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; FULL-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; FULL-NEXT: .pad #52 +; FULL-NEXT: sub sp, #52 +; FULL-NEXT: vmov r3, r0, d0 +; FULL-NEXT: movs r5, #0 +; FULL-NEXT: movw r1, #1023 +; FULL-NEXT: ubfx r2, r0, #20, #11 +; FULL-NEXT: cmp r2, r1 +; FULL-NEXT: bhs .LBB20_2 +; FULL-NEXT: @ %bb.1: +; FULL-NEXT: mov.w r8, #0 +; FULL-NEXT: movs r2, #0 +; FULL-NEXT: movs r0, #0 +; FULL-NEXT: movs r1, #0 +; FULL-NEXT: b .LBB20_5 +; FULL-NEXT: .LBB20_2: @ %fp-to-i-if-check.exp.size +; FULL-NEXT: movs r1, #1 +; FULL-NEXT: mov r4, r0 +; FULL-NEXT: orr.w r11, r1, r0, asr #31 +; FULL-NEXT: bfi r4, r1, #20, #12 +; FULL-NEXT: asrs r0, r0, #31 +; FULL-NEXT: movw r1, #1074 +; FULL-NEXT: cmp r2, r1 +; FULL-NEXT: bhi .LBB20_4 +; FULL-NEXT: @ %bb.3: @ %fp-to-i-if-exp.small +; FULL-NEXT: movw r1, #1075 +; FULL-NEXT: movw r6, #1043 +; FULL-NEXT: subs r1, r1, r2 +; FULL-NEXT: subs r2, r6, r2 +; FULL-NEXT: lsr.w lr, r3, r1 +; FULL-NEXT: rsb.w r3, r1, #32 +; FULL-NEXT: lsr.w r1, r4, r1 +; FULL-NEXT: lsl.w r3, r4, r3 +; FULL-NEXT: orr.w r3, r3, lr +; FULL-NEXT: mov.w lr, #0 +; FULL-NEXT: it pl +; FULL-NEXT: lsrpl.w r3, r4, r2 +; FULL-NEXT: umull r8, r2, r3, r11 +; FULL-NEXT: mov.w r4, #0 +; FULL-NEXT: it pl +; FULL-NEXT: movpl r1, #0 +; FULL-NEXT: umlal r2, r4, r1, r11 +; FULL-NEXT: umull r12, r7, r3, r0 +; FULL-NEXT: adds.w r6, r12, r2 +; FULL-NEXT: adcs.w r6, r4, r7 +; FULL-NEXT: umlal r2, r4, r3, r0 +; FULL-NEXT: adc r6, lr, #0 +; FULL-NEXT: mla r7, r0, r1, r7 +; FULL-NEXT: umlal r4, r6, r1, r0 +; FULL-NEXT: mla r1, r0, r3, r7 +; FULL-NEXT: adds.w r0, r4, r12 +; FULL-NEXT: adcs r1, r6 +; FULL-NEXT: b .LBB20_5 +; FULL-NEXT: .LBB20_4: @ %fp-to-i-if-exp.large +; FULL-NEXT: add r1, sp, #32 +; FULL-NEXT: mov.w lr, #0 +; FULL-NEXT: stm.w r1, {r3, r4, lr} +; FULL-NEXT: subw r1, r2, #1075 +; FULL-NEXT: movs r2, #12 +; FULL-NEXT: add r3, sp, #16 +; FULL-NEXT: and.w r2, r2, r1, lsr #3 +; FULL-NEXT: adds r3, #16 +; FULL-NEXT: str.w lr, [sp, #44] +; FULL-NEXT: strd lr, lr, [sp, #24] +; FULL-NEXT: subs r2, r3, r2 +; FULL-NEXT: strd lr, lr, [sp, #16] +; FULL-NEXT: and r1, r1, #31 +; FULL-NEXT: ldrd r3, r6, [r2] +; FULL-NEXT: lsl.w r4, r6, r1 +; FULL-NEXT: str r6, [sp, #4] @ 4-byte Spill +; FULL-NEXT: movs r6, #0 +; FULL-NEXT: ldr r7, [r2, #8] +; FULL-NEXT: str r7, [sp, #8] @ 4-byte Spill +; FULL-NEXT: eor r7, r1, #31 +; FULL-NEXT: ldr r2, [r2, #12] +; FULL-NEXT: str r2, [sp, #12] @ 4-byte Spill +; FULL-NEXT: lsrs r2, r3, #1 +; FULL-NEXT: lsrs r2, r7 +; FULL-NEXT: lsls r3, r1 +; FULL-NEXT: orr.w r12, r4, r2 +; FULL-NEXT: umull r8, r2, r3, r11 +; FULL-NEXT: ldr r5, [sp, #12] @ 4-byte Reload +; FULL-NEXT: umull r4, r9, r3, r0 +; FULL-NEXT: umlal r2, r6, r12, r11 +; FULL-NEXT: str r4, [sp] @ 4-byte Spill +; FULL-NEXT: adds.w r10, r4, r2 +; FULL-NEXT: ldr.w r10, [sp, #8] @ 4-byte Reload +; FULL-NEXT: adcs.w r4, r6, r9 +; FULL-NEXT: adc r4, lr, #0 +; FULL-NEXT: lsl.w lr, r5, r1 +; FULL-NEXT: umlal r2, r6, r3, r0 +; FULL-NEXT: lsr.w r5, r10, #1 +; FULL-NEXT: lsrs r5, r7 +; FULL-NEXT: orr.w lr, lr, r5 +; FULL-NEXT: ldr r5, [sp, #4] @ 4-byte Reload +; FULL-NEXT: lsl.w r1, r10, r1 +; FULL-NEXT: umlal r6, r4, r12, r0 +; FULL-NEXT: lsrs.w r5, r5, #1 +; FULL-NEXT: lsrs r5, r7 +; FULL-NEXT: orrs r1, r5 +; FULL-NEXT: movs r5, #0 +; FULL-NEXT: umull r10, r7, r11, r1 +; FULL-NEXT: mla r7, r11, lr, r7 +; FULL-NEXT: mla r1, r0, r1, r7 +; FULL-NEXT: mla r7, r0, r12, r9 +; FULL-NEXT: mla r0, r0, r3, r7 +; FULL-NEXT: ldr r3, [sp] @ 4-byte Reload +; FULL-NEXT: adds.w r3, r3, r10 +; FULL-NEXT: adcs r1, r0 +; FULL-NEXT: adds r0, r6, r3 +; FULL-NEXT: adcs r1, r4 +; FULL-NEXT: .LBB20_5: @ %fp-to-i-cleanup +; FULL-NEXT: subs r0, #1 +; FULL-NEXT: sbcs r0, r1, #0 +; FULL-NEXT: csel r1, r1, r5, lt +; FULL-NEXT: csel r0, r8, r5, lt +; FULL-NEXT: csel r2, r2, r5, lt +; FULL-NEXT: bic.w r0, r0, r1, asr #31 +; FULL-NEXT: bic.w r1, r2, r1, asr #31 +; FULL-NEXT: add sp, #52 +; FULL-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} entry: %conv = fptosi double %x to i128 %0 = icmp slt i128 %conv, 18446744073709551616 @@ -1199,126 +2239,500 @@ entry: define i64 @stest_f32i64(float %x) { ; SOFT-LABEL: stest_f32i64: -; SOFT: @ %bb.0: @ %entry +; SOFT: @ %bb.0: @ %entryfp-to-i-entry ; SOFT-NEXT: .save {r4, r5, r6, r7, lr} ; SOFT-NEXT: push {r4, r5, r6, r7, lr} -; SOFT-NEXT: .pad #4 -; SOFT-NEXT: sub sp, #4 -; SOFT-NEXT: bl __fixsfti -; SOFT-NEXT: movs r4, #0 -; SOFT-NEXT: mvns r5, r4 -; SOFT-NEXT: ldr r6, .LCPI21_0 -; SOFT-NEXT: adds r7, r0, #1 -; SOFT-NEXT: mov r7, r1 -; SOFT-NEXT: sbcs r7, r6 -; SOFT-NEXT: mov r7, r2 -; SOFT-NEXT: sbcs r7, r4 -; SOFT-NEXT: mov r7, r3 -; SOFT-NEXT: sbcs r7, r4 -; SOFT-NEXT: bge .LBB21_8 -; SOFT-NEXT: @ %bb.1: @ %entry -; SOFT-NEXT: bge .LBB21_9 -; SOFT-NEXT: .LBB21_2: @ %entry -; SOFT-NEXT: bge .LBB21_10 -; SOFT-NEXT: .LBB21_3: @ %entry -; SOFT-NEXT: blt .LBB21_5 -; SOFT-NEXT: .LBB21_4: @ %entry -; SOFT-NEXT: mov r0, r5 -; SOFT-NEXT: .LBB21_5: @ %entry -; SOFT-NEXT: movs r6, #1 -; SOFT-NEXT: lsls r6, r6, #31 -; SOFT-NEXT: rsbs r7, r0, #0 -; SOFT-NEXT: mov r7, r6 -; SOFT-NEXT: sbcs r7, r1 -; SOFT-NEXT: mov r7, r5 -; SOFT-NEXT: sbcs r7, r2 -; SOFT-NEXT: sbcs r5, r3 -; SOFT-NEXT: bge .LBB21_11 -; SOFT-NEXT: @ %bb.6: @ %entry -; SOFT-NEXT: bge .LBB21_12 -; SOFT-NEXT: .LBB21_7: @ %entry -; SOFT-NEXT: add sp, #4 -; SOFT-NEXT: pop {r4, r5, r6, r7, pc} -; SOFT-NEXT: .LBB21_8: @ %entry -; SOFT-NEXT: mov r3, r4 -; SOFT-NEXT: blt .LBB21_2 -; SOFT-NEXT: .LBB21_9: @ %entry +; SOFT-NEXT: .pad #68 +; SOFT-NEXT: sub sp, #68 +; SOFT-NEXT: movs r2, #1 +; SOFT-NEXT: lsrs r1, r0, #23 +; SOFT-NEXT: uxtb r5, r1 +; SOFT-NEXT: movs r7, #0 +; SOFT-NEXT: cmp r5, #127 +; SOFT-NEXT: str r2, [sp, #24] @ 4-byte Spill +; SOFT-NEXT: bhs .LBB21_2 +; SOFT-NEXT: @ %bb.1: +; SOFT-NEXT: mov r0, r7 +; SOFT-NEXT: mov r5, r7 +; SOFT-NEXT: mov r2, r7 +; SOFT-NEXT: mov r1, r7 +; SOFT-NEXT: b .LBB21_5 +; SOFT-NEXT: .LBB21_2: @ %fp-to-i-if-check.exp.size +; SOFT-NEXT: asrs r4, r0, #31 +; SOFT-NEXT: str r4, [sp, #28] @ 4-byte Spill +; SOFT-NEXT: orrs r4, r2 +; SOFT-NEXT: ldr r1, .LCPI21_0 +; SOFT-NEXT: ands r0, r1 +; SOFT-NEXT: adds r0, r0, r1 +; SOFT-NEXT: adds r0, r0, #1 +; SOFT-NEXT: cmp r5, #149 +; SOFT-NEXT: bhi .LBB21_4 +; SOFT-NEXT: @ %bb.3: @ %fp-to-i-if-exp.small +; SOFT-NEXT: movs r1, #150 +; SOFT-NEXT: subs r1, r1, r5 +; SOFT-NEXT: lsrs r0, r1 +; SOFT-NEXT: movs r6, #0 +; SOFT-NEXT: mov r1, r6 +; SOFT-NEXT: mov r5, r0 +; SOFT-NEXT: str r0, [sp, #16] @ 4-byte Spill ; SOFT-NEXT: mov r2, r4 -; SOFT-NEXT: blt .LBB21_3 -; SOFT-NEXT: .LBB21_10: @ %entry +; SOFT-NEXT: mov r3, r6 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: str r0, [sp, #20] @ 4-byte Spill +; SOFT-NEXT: str r1, [sp, #12] @ 4-byte Spill +; SOFT-NEXT: mov r0, r6 ; SOFT-NEXT: mov r1, r6 -; SOFT-NEXT: bge .LBB21_4 -; SOFT-NEXT: b .LBB21_5 -; SOFT-NEXT: .LBB21_11: @ %entry -; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: blt .LBB21_7 -; SOFT-NEXT: .LBB21_12: @ %entry +; SOFT-NEXT: mov r2, r4 +; SOFT-NEXT: mov r3, r6 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: mov r4, r1 +; SOFT-NEXT: ldr r1, [sp, #12] @ 4-byte Reload +; SOFT-NEXT: adds r0, r0, r1 +; SOFT-NEXT: str r0, [sp, #12] @ 4-byte Spill +; SOFT-NEXT: adcs r4, r6 +; SOFT-NEXT: mov r0, r5 ; SOFT-NEXT: mov r1, r6 -; SOFT-NEXT: add sp, #4 -; SOFT-NEXT: pop {r4, r5, r6, r7, pc} -; SOFT-NEXT: .p2align 2 -; SOFT-NEXT: @ %bb.13: -; SOFT-NEXT: .LCPI21_0: -; SOFT-NEXT: .long 2147483647 @ 0x7fffffff -; -; VFP2-LABEL: stest_f32i64: -; VFP2: @ %bb.0: @ %entry -; VFP2-NEXT: .save {r4, r5, r7, lr} -; VFP2-NEXT: push {r4, r5, r7, lr} -; VFP2-NEXT: bl __fixsfti -; VFP2-NEXT: subs.w r4, r0, #-1 -; VFP2-NEXT: mvn lr, #-2147483648 -; VFP2-NEXT: sbcs.w r4, r1, lr +; SOFT-NEXT: ldr r2, [sp, #28] @ 4-byte Reload +; SOFT-NEXT: mov r3, r6 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: ldr r2, [sp, #12] @ 4-byte Reload +; SOFT-NEXT: adds r5, r0, r2 +; SOFT-NEXT: adcs r1, r6 +; SOFT-NEXT: adds r0, r4, r1 +; SOFT-NEXT: str r0, [sp, #12] @ 4-byte Spill +; SOFT-NEXT: mov r4, r6 +; SOFT-NEXT: adcs r4, r6 +; SOFT-NEXT: mov r0, r6 +; SOFT-NEXT: mov r1, r6 +; SOFT-NEXT: ldr r2, [sp, #28] @ 4-byte Reload +; SOFT-NEXT: mov r3, r6 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: ldr r2, [sp, #12] @ 4-byte Reload +; SOFT-NEXT: adds r0, r0, r2 +; SOFT-NEXT: str r0, [sp, #12] @ 4-byte Spill +; SOFT-NEXT: adcs r1, r4 +; SOFT-NEXT: mov r4, r1 +; SOFT-NEXT: ldr r0, [sp, #28] @ 4-byte Reload +; SOFT-NEXT: mov r1, r0 +; SOFT-NEXT: ldr r2, [sp, #16] @ 4-byte Reload +; SOFT-NEXT: mov r3, r6 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: mov r2, r0 +; SOFT-NEXT: ldr r0, [sp, #20] @ 4-byte Reload +; SOFT-NEXT: ldr r3, [sp, #12] @ 4-byte Reload +; SOFT-NEXT: adds r2, r3, r2 +; SOFT-NEXT: adcs r1, r4 +; SOFT-NEXT: b .LBB21_5 +; SOFT-NEXT: .LBB21_4: @ %fp-to-i-if-exp.large +; SOFT-NEXT: movs r6, #0 +; SOFT-NEXT: str r6, [sp, #60] +; SOFT-NEXT: str r6, [sp, #56] +; SOFT-NEXT: str r6, [sp, #52] +; SOFT-NEXT: str r0, [sp, #48] +; SOFT-NEXT: str r6, [sp, #44] +; SOFT-NEXT: str r6, [sp, #40] +; SOFT-NEXT: str r6, [sp, #36] +; SOFT-NEXT: str r6, [sp, #32] +; SOFT-NEXT: subs r5, #150 +; SOFT-NEXT: movs r2, #31 +; SOFT-NEXT: lsrs r0, r5, #3 +; SOFT-NEXT: ands r5, r2 +; SOFT-NEXT: movs r1, #12 +; SOFT-NEXT: ands r1, r0 +; SOFT-NEXT: add r0, sp, #32 +; SOFT-NEXT: adds r0, #16 +; SOFT-NEXT: str r4, [sp, #20] @ 4-byte Spill +; SOFT-NEXT: subs r1, r0, r1 +; SOFT-NEXT: str r1, [sp, #12] @ 4-byte Spill +; SOFT-NEXT: ldr r0, [r1, #4] +; SOFT-NEXT: str r0, [sp, #16] @ 4-byte Spill +; SOFT-NEXT: ldr r0, [r1, #8] +; SOFT-NEXT: ldr r4, [r1, #12] +; SOFT-NEXT: lsrs r3, r0, #1 +; SOFT-NEXT: lsls r0, r5 +; SOFT-NEXT: eors r2, r5 +; SOFT-NEXT: mov r1, r2 +; SOFT-NEXT: str r2, [sp, #4] @ 4-byte Spill +; SOFT-NEXT: ldr r2, [sp, #16] @ 4-byte Reload +; SOFT-NEXT: lsrs r2, r2, #1 +; SOFT-NEXT: lsrs r2, r1 +; SOFT-NEXT: orrs r2, r0 +; SOFT-NEXT: lsls r4, r5 +; SOFT-NEXT: lsrs r3, r1 +; SOFT-NEXT: orrs r3, r4 +; SOFT-NEXT: ldr r0, [sp, #20] @ 4-byte Reload +; SOFT-NEXT: ldr r1, [sp, #28] @ 4-byte Reload +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: str r0, [sp] @ 4-byte Spill +; SOFT-NEXT: str r1, [sp, #8] @ 4-byte Spill +; SOFT-NEXT: ldr r1, [sp, #16] @ 4-byte Reload +; SOFT-NEXT: lsls r1, r5 +; SOFT-NEXT: ldr r0, [sp, #12] @ 4-byte Reload +; SOFT-NEXT: ldr r4, [r0] +; SOFT-NEXT: lsrs r0, r4, #1 +; SOFT-NEXT: ldr r2, [sp, #4] @ 4-byte Reload +; SOFT-NEXT: lsrs r0, r2 +; SOFT-NEXT: orrs r0, r1 +; SOFT-NEXT: mov r3, r0 +; SOFT-NEXT: lsls r4, r5 +; SOFT-NEXT: ldr r0, [sp, #28] @ 4-byte Reload +; SOFT-NEXT: mov r1, r0 +; SOFT-NEXT: mov r2, r4 +; SOFT-NEXT: mov r5, r3 +; SOFT-NEXT: str r3, [sp, #4] @ 4-byte Spill +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: ldr r2, [sp] @ 4-byte Reload +; SOFT-NEXT: adds r0, r0, r2 +; SOFT-NEXT: str r0, [sp, #12] @ 4-byte Spill +; SOFT-NEXT: ldr r0, [sp, #8] @ 4-byte Reload +; SOFT-NEXT: adcs r1, r0 +; SOFT-NEXT: str r1, [sp, #8] @ 4-byte Spill +; SOFT-NEXT: mov r0, r5 +; SOFT-NEXT: mov r1, r6 +; SOFT-NEXT: ldr r5, [sp, #20] @ 4-byte Reload +; SOFT-NEXT: mov r2, r5 +; SOFT-NEXT: mov r3, r6 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: str r0, [sp] @ 4-byte Spill +; SOFT-NEXT: str r1, [sp, #16] @ 4-byte Spill +; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: mov r1, r6 +; SOFT-NEXT: mov r2, r5 +; SOFT-NEXT: mov r3, r6 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: str r0, [sp, #20] @ 4-byte Spill +; SOFT-NEXT: ldr r0, [sp] @ 4-byte Reload +; SOFT-NEXT: adds r5, r0, r1 +; SOFT-NEXT: ldr r0, [sp, #16] @ 4-byte Reload +; SOFT-NEXT: adcs r0, r6 +; SOFT-NEXT: str r0, [sp, #16] @ 4-byte Spill +; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: mov r1, r6 +; SOFT-NEXT: ldr r2, [sp, #28] @ 4-byte Reload +; SOFT-NEXT: mov r3, r6 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: adds r5, r0, r5 +; SOFT-NEXT: adcs r1, r6 +; SOFT-NEXT: ldr r0, [sp, #16] @ 4-byte Reload +; SOFT-NEXT: adds r0, r0, r1 +; SOFT-NEXT: str r0, [sp, #16] @ 4-byte Spill +; SOFT-NEXT: mov r4, r6 +; SOFT-NEXT: adcs r4, r6 +; SOFT-NEXT: ldr r0, [sp, #4] @ 4-byte Reload +; SOFT-NEXT: mov r1, r6 +; SOFT-NEXT: ldr r2, [sp, #28] @ 4-byte Reload +; SOFT-NEXT: mov r3, r6 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: mov r2, r0 +; SOFT-NEXT: ldr r0, [sp, #20] @ 4-byte Reload +; SOFT-NEXT: ldr r3, [sp, #16] @ 4-byte Reload +; SOFT-NEXT: adds r2, r2, r3 +; SOFT-NEXT: adcs r1, r4 +; SOFT-NEXT: ldr r3, [sp, #12] @ 4-byte Reload +; SOFT-NEXT: adds r2, r2, r3 +; SOFT-NEXT: ldr r3, [sp, #8] @ 4-byte Reload +; SOFT-NEXT: adcs r1, r3 +; SOFT-NEXT: .LBB21_5: @ %fp-to-i-cleanup +; SOFT-NEXT: mvns r3, r7 +; SOFT-NEXT: ldr r4, .LCPI21_1 +; SOFT-NEXT: adds r6, r0, #1 +; SOFT-NEXT: mov r6, r5 +; SOFT-NEXT: sbcs r6, r4 +; SOFT-NEXT: mov r6, r2 +; SOFT-NEXT: sbcs r6, r7 +; SOFT-NEXT: mov r6, r1 +; SOFT-NEXT: sbcs r6, r7 +; SOFT-NEXT: bge .LBB21_15 +; SOFT-NEXT: @ %bb.6: @ %fp-to-i-cleanup +; SOFT-NEXT: bge .LBB21_16 +; SOFT-NEXT: .LBB21_7: @ %fp-to-i-cleanup +; SOFT-NEXT: bge .LBB21_17 +; SOFT-NEXT: .LBB21_8: @ %fp-to-i-cleanup +; SOFT-NEXT: blt .LBB21_10 +; SOFT-NEXT: .LBB21_9: @ %fp-to-i-cleanup +; SOFT-NEXT: mov r0, r3 +; SOFT-NEXT: .LBB21_10: @ %fp-to-i-cleanup +; SOFT-NEXT: ldr r4, [sp, #24] @ 4-byte Reload +; SOFT-NEXT: lsls r4, r4, #31 +; SOFT-NEXT: rsbs r6, r0, #0 +; SOFT-NEXT: mov r6, r4 +; SOFT-NEXT: sbcs r6, r5 +; SOFT-NEXT: mov r6, r3 +; SOFT-NEXT: sbcs r6, r2 +; SOFT-NEXT: sbcs r3, r1 +; SOFT-NEXT: blt .LBB21_12 +; SOFT-NEXT: @ %bb.11: @ %fp-to-i-cleanup +; SOFT-NEXT: mov r0, r7 +; SOFT-NEXT: .LBB21_12: @ %fp-to-i-cleanup +; SOFT-NEXT: blt .LBB21_14 +; SOFT-NEXT: @ %bb.13: @ %fp-to-i-cleanup +; SOFT-NEXT: mov r5, r4 +; SOFT-NEXT: .LBB21_14: @ %fp-to-i-cleanup +; SOFT-NEXT: mov r1, r5 +; SOFT-NEXT: add sp, #68 +; SOFT-NEXT: pop {r4, r5, r6, r7, pc} +; SOFT-NEXT: .LBB21_15: @ %fp-to-i-cleanup +; SOFT-NEXT: mov r1, r7 +; SOFT-NEXT: blt .LBB21_7 +; SOFT-NEXT: .LBB21_16: @ %fp-to-i-cleanup +; SOFT-NEXT: mov r2, r7 +; SOFT-NEXT: blt .LBB21_8 +; SOFT-NEXT: .LBB21_17: @ %fp-to-i-cleanup +; SOFT-NEXT: mov r5, r4 +; SOFT-NEXT: bge .LBB21_9 +; SOFT-NEXT: b .LBB21_10 +; SOFT-NEXT: .p2align 2 +; SOFT-NEXT: @ %bb.18: +; SOFT-NEXT: .LCPI21_0: +; SOFT-NEXT: .long 8388607 @ 0x7fffff +; SOFT-NEXT: .LCPI21_1: +; SOFT-NEXT: .long 2147483647 @ 0x7fffffff +; +; VFP2-LABEL: stest_f32i64: +; VFP2: @ %bb.0: @ %entryfp-to-i-entry +; VFP2-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; VFP2-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; VFP2-NEXT: .pad #52 +; VFP2-NEXT: sub sp, #52 +; VFP2-NEXT: vmov r1, s0 ; VFP2-NEXT: mov.w r12, #0 -; VFP2-NEXT: sbcs r4, r2, #0 -; VFP2-NEXT: sbcs r4, r3, #0 -; VFP2-NEXT: mov.w r4, #0 +; VFP2-NEXT: ubfx r0, r1, #23, #8 +; VFP2-NEXT: cmp r0, #127 +; VFP2-NEXT: bhs .LBB21_2 +; VFP2-NEXT: @ %bb.1: +; VFP2-NEXT: movs r0, #0 +; VFP2-NEXT: movs r3, #0 +; VFP2-NEXT: mov.w lr, #0 +; VFP2-NEXT: movs r2, #0 +; VFP2-NEXT: b .LBB21_5 +; VFP2-NEXT: .LBB21_2: @ %fp-to-i-if-check.exp.size +; VFP2-NEXT: movs r2, #1 +; VFP2-NEXT: mov r3, r1 +; VFP2-NEXT: bfi r3, r2, #23, #9 +; VFP2-NEXT: orr.w r10, r2, r1, asr #31 +; VFP2-NEXT: asrs r7, r1, #31 +; VFP2-NEXT: cmp r0, #149 +; VFP2-NEXT: bhi .LBB21_4 +; VFP2-NEXT: @ %bb.3: @ %fp-to-i-if-exp.small +; VFP2-NEXT: rsb.w r0, r0, #150 +; VFP2-NEXT: movs r5, #0 +; VFP2-NEXT: lsr.w r4, r3, r0 +; VFP2-NEXT: umull r0, r3, r4, r10 +; VFP2-NEXT: umull r2, r1, r4, r7 +; VFP2-NEXT: adds r6, r2, r3 +; VFP2-NEXT: adcs r6, r1, #0 +; VFP2-NEXT: adc r8, r5, #0 +; VFP2-NEXT: adds r6, r2, r3 +; VFP2-NEXT: mla r6, r7, r4, r1 +; VFP2-NEXT: adcs.w lr, r2, r1 +; VFP2-NEXT: umlal r3, r5, r4, r7 +; VFP2-NEXT: adc.w r2, r8, r6 +; VFP2-NEXT: b .LBB21_5 +; VFP2-NEXT: .LBB21_4: @ %fp-to-i-if-exp.large +; VFP2-NEXT: mov.w lr, #0 +; VFP2-NEXT: subs r0, #150 +; VFP2-NEXT: movs r1, #12 +; VFP2-NEXT: strd r3, lr, [sp, #32] +; VFP2-NEXT: add r3, sp, #16 +; VFP2-NEXT: and.w r1, r1, r0, lsr #3 +; VFP2-NEXT: adds r3, #16 +; VFP2-NEXT: strd lr, lr, [sp, #40] +; VFP2-NEXT: strd lr, lr, [sp, #24] +; VFP2-NEXT: subs r1, r3, r1 +; VFP2-NEXT: strd lr, lr, [sp, #16] +; VFP2-NEXT: ldrd r3, r2, [r1] +; VFP2-NEXT: str r2, [sp, #4] @ 4-byte Spill +; VFP2-NEXT: ldr r6, [r1, #8] +; VFP2-NEXT: str r6, [sp, #12] @ 4-byte Spill +; VFP2-NEXT: lsrs r6, r3, #1 +; VFP2-NEXT: ldr r1, [r1, #12] +; VFP2-NEXT: str r1, [sp, #8] @ 4-byte Spill +; VFP2-NEXT: and r1, r0, #31 +; VFP2-NEXT: eor r12, r1, #31 +; VFP2-NEXT: lsl.w r5, r3, r1 +; VFP2-NEXT: lsr.w r0, r6, r12 +; VFP2-NEXT: lsl.w r6, r2, r1 +; VFP2-NEXT: orr.w r11, r6, r0 +; VFP2-NEXT: umull r0, r3, r5, r10 +; VFP2-NEXT: movs r6, #0 +; VFP2-NEXT: umull r2, r8, r5, r7 +; VFP2-NEXT: umlal r3, r6, r11, r10 +; VFP2-NEXT: str r2, [sp] @ 4-byte Spill +; VFP2-NEXT: adds.w r9, r2, r3 +; VFP2-NEXT: ldr r2, [sp, #8] @ 4-byte Reload +; VFP2-NEXT: adcs.w r4, r6, r8 +; VFP2-NEXT: umlal r3, r6, r5, r7 +; VFP2-NEXT: ldr r4, [sp, #12] @ 4-byte Reload +; VFP2-NEXT: adc r9, lr, #0 +; VFP2-NEXT: lsl.w lr, r2, r1 +; VFP2-NEXT: umlal r6, r9, r11, r7 +; VFP2-NEXT: lsrs r2, r4, #1 +; VFP2-NEXT: lsr.w r2, r2, r12 +; VFP2-NEXT: orr.w lr, lr, r2 +; VFP2-NEXT: ldr r2, [sp, #4] @ 4-byte Reload +; VFP2-NEXT: lsl.w r1, r4, r1 +; VFP2-NEXT: lsrs.w r2, r2, #1 +; VFP2-NEXT: lsr.w r2, r2, r12 +; VFP2-NEXT: orrs r1, r2 +; VFP2-NEXT: umull r12, r2, r10, r1 +; VFP2-NEXT: mla r2, r10, lr, r2 +; VFP2-NEXT: mla r1, r7, r1, r2 +; VFP2-NEXT: mla r2, r7, r11, r8 +; VFP2-NEXT: mla r2, r7, r5, r2 +; VFP2-NEXT: ldr r7, [sp] @ 4-byte Reload +; VFP2-NEXT: adds.w r7, r7, r12 +; VFP2-NEXT: mov.w r12, #0 +; VFP2-NEXT: adcs r1, r2 +; VFP2-NEXT: adds.w lr, r6, r7 +; VFP2-NEXT: adc.w r2, r9, r1 +; VFP2-NEXT: .LBB21_5: @ %fp-to-i-cleanup +; VFP2-NEXT: subs.w r7, r0, #-1 +; VFP2-NEXT: mvn r1, #-2147483648 +; VFP2-NEXT: sbcs.w r7, r3, r1 +; VFP2-NEXT: mov.w r6, #-2147483648 +; VFP2-NEXT: sbcs r7, lr, #0 +; VFP2-NEXT: sbcs r7, r2, #0 ; VFP2-NEXT: it lt -; VFP2-NEXT: movlt r4, #1 -; VFP2-NEXT: cmp r4, #0 -; VFP2-NEXT: itet eq -; VFP2-NEXT: moveq r3, r4 -; VFP2-NEXT: movne r4, r2 -; VFP2-NEXT: moveq r1, lr -; VFP2-NEXT: mov.w r2, #-1 -; VFP2-NEXT: it eq -; VFP2-NEXT: moveq r0, r2 +; VFP2-NEXT: movlt r1, r3 +; VFP2-NEXT: mov.w r7, #0 +; VFP2-NEXT: mov.w r3, #-1 +; VFP2-NEXT: it ge +; VFP2-NEXT: movge r0, r3 +; VFP2-NEXT: it lt +; VFP2-NEXT: movlt r7, #1 +; VFP2-NEXT: cmp r7, #0 +; VFP2-NEXT: ite eq +; VFP2-NEXT: moveq r2, r7 +; VFP2-NEXT: movne r7, lr ; VFP2-NEXT: rsbs r5, r0, #0 -; VFP2-NEXT: mov.w lr, #-2147483648 -; VFP2-NEXT: sbcs.w r5, lr, r1 -; VFP2-NEXT: sbcs.w r4, r2, r4 -; VFP2-NEXT: sbcs r2, r3 +; VFP2-NEXT: sbcs.w r5, r6, r1 +; VFP2-NEXT: sbcs.w r7, r3, r7 +; VFP2-NEXT: sbcs.w r2, r3, r2 ; VFP2-NEXT: itt ge ; VFP2-NEXT: movge r0, r12 -; VFP2-NEXT: movge r1, lr -; VFP2-NEXT: pop {r4, r5, r7, pc} +; VFP2-NEXT: movge r1, r6 +; VFP2-NEXT: add sp, #52 +; VFP2-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; ; FULL-LABEL: stest_f32i64: -; FULL: @ %bb.0: @ %entry -; FULL-NEXT: .save {r4, r5, r7, lr} -; FULL-NEXT: push {r4, r5, r7, lr} -; FULL-NEXT: bl __fixsfti -; FULL-NEXT: subs.w lr, r0, #-1 -; FULL-NEXT: mvn r12, #-2147483648 -; FULL-NEXT: sbcs.w lr, r1, r12 -; FULL-NEXT: sbcs lr, r2, #0 -; FULL-NEXT: sbcs lr, r3, #0 -; FULL-NEXT: cset lr, lt -; FULL-NEXT: cmp.w lr, #0 -; FULL-NEXT: csel r5, r3, lr, ne -; FULL-NEXT: mov.w r3, #-1 -; FULL-NEXT: csel r0, r0, r3, ne -; FULL-NEXT: csel r1, r1, r12, ne -; FULL-NEXT: csel r2, r2, lr, ne -; FULL-NEXT: rsbs r4, r0, #0 -; FULL-NEXT: mov.w r12, #-2147483648 -; FULL-NEXT: sbcs.w r4, r12, r1 -; FULL-NEXT: sbcs.w r2, r3, r2 -; FULL-NEXT: sbcs.w r2, r3, r5 -; FULL-NEXT: it ge -; FULL-NEXT: movge r0, #0 -; FULL-NEXT: csel r1, r1, r12, lt -; FULL-NEXT: pop {r4, r5, r7, pc} +; FULL: @ %bb.0: @ %entryfp-to-i-entry +; FULL-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; FULL-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; FULL-NEXT: .pad #52 +; FULL-NEXT: sub sp, #52 +; FULL-NEXT: vmov r3, s0 +; FULL-NEXT: mov.w r12, #0 +; FULL-NEXT: ubfx r1, r3, #23, #8 +; FULL-NEXT: cmp r1, #127 +; FULL-NEXT: bhs .LBB21_2 +; FULL-NEXT: @ %bb.1: +; FULL-NEXT: movs r1, #0 +; FULL-NEXT: movs r2, #0 +; FULL-NEXT: movs r0, #0 +; FULL-NEXT: movs r3, #0 +; FULL-NEXT: b .LBB21_5 +; FULL-NEXT: .LBB21_2: @ %fp-to-i-if-check.exp.size +; FULL-NEXT: movs r0, #1 +; FULL-NEXT: mov r2, r3 +; FULL-NEXT: bfi r2, r0, #23, #9 +; FULL-NEXT: orr.w r10, r0, r3, asr #31 +; FULL-NEXT: asrs r7, r3, #31 +; FULL-NEXT: cmp r1, #149 +; FULL-NEXT: bhi .LBB21_4 +; FULL-NEXT: @ %bb.3: @ %fp-to-i-if-exp.small +; FULL-NEXT: rsb.w r1, r1, #150 +; FULL-NEXT: movs r5, #0 +; FULL-NEXT: lsr.w r4, r2, r1 +; FULL-NEXT: umull r1, r2, r4, r10 +; FULL-NEXT: umull r0, r3, r4, r7 +; FULL-NEXT: adds r6, r0, r2 +; FULL-NEXT: adcs r6, r3, #0 +; FULL-NEXT: adc lr, r5, #0 +; FULL-NEXT: adds r6, r0, r2 +; FULL-NEXT: mla r6, r7, r4, r3 +; FULL-NEXT: adcs r0, r3 +; FULL-NEXT: umlal r2, r5, r4, r7 +; FULL-NEXT: adc.w r3, lr, r6 +; FULL-NEXT: b .LBB21_5 +; FULL-NEXT: .LBB21_4: @ %fp-to-i-if-exp.large +; FULL-NEXT: mov.w lr, #0 +; FULL-NEXT: subs r1, #150 +; FULL-NEXT: add r3, sp, #16 +; FULL-NEXT: strd r2, lr, [sp, #32] +; FULL-NEXT: movs r2, #12 +; FULL-NEXT: adds r3, #16 +; FULL-NEXT: and.w r2, r2, r1, lsr #3 +; FULL-NEXT: strd lr, lr, [sp, #40] +; FULL-NEXT: strd lr, lr, [sp, #24] +; FULL-NEXT: subs r2, r3, r2 +; FULL-NEXT: strd lr, lr, [sp, #16] +; FULL-NEXT: movs r5, #0 +; FULL-NEXT: ldrd r3, r0, [r2] +; FULL-NEXT: str r0, [sp, #4] @ 4-byte Spill +; FULL-NEXT: ldr r6, [r2, #8] +; FULL-NEXT: str r6, [sp, #12] @ 4-byte Spill +; FULL-NEXT: and r6, r1, #31 +; FULL-NEXT: ldr r2, [r2, #12] +; FULL-NEXT: eor r12, r6, #31 +; FULL-NEXT: str r2, [sp, #8] @ 4-byte Spill +; FULL-NEXT: lsrs r2, r3, #1 +; FULL-NEXT: lsr.w r1, r2, r12 +; FULL-NEXT: lsl.w r2, r0, r6 +; FULL-NEXT: lsls r3, r6 +; FULL-NEXT: orr.w r11, r2, r1 +; FULL-NEXT: umull r1, r2, r3, r10 +; FULL-NEXT: umull r0, r8, r3, r7 +; FULL-NEXT: umlal r2, r5, r11, r10 +; FULL-NEXT: str r0, [sp] @ 4-byte Spill +; FULL-NEXT: adds.w r9, r0, r2 +; FULL-NEXT: ldr r0, [sp, #8] @ 4-byte Reload +; FULL-NEXT: adcs.w r4, r5, r8 +; FULL-NEXT: umlal r2, r5, r3, r7 +; FULL-NEXT: ldr r4, [sp, #12] @ 4-byte Reload +; FULL-NEXT: adc r9, lr, #0 +; FULL-NEXT: lsl.w lr, r0, r6 +; FULL-NEXT: umlal r5, r9, r11, r7 +; FULL-NEXT: lsrs r0, r4, #1 +; FULL-NEXT: lsr.w r0, r0, r12 +; FULL-NEXT: orr.w lr, lr, r0 +; FULL-NEXT: ldr r0, [sp, #4] @ 4-byte Reload +; FULL-NEXT: lsl.w r6, r4, r6 +; FULL-NEXT: lsrs.w r0, r0, #1 +; FULL-NEXT: lsr.w r0, r0, r12 +; FULL-NEXT: orrs r0, r6 +; FULL-NEXT: umull r12, r6, r10, r0 +; FULL-NEXT: mla r6, r10, lr, r6 +; FULL-NEXT: mla r0, r7, r0, r6 +; FULL-NEXT: mla r6, r7, r11, r8 +; FULL-NEXT: mla r3, r7, r3, r6 +; FULL-NEXT: ldr r7, [sp] @ 4-byte Reload +; FULL-NEXT: adds.w r7, r7, r12 +; FULL-NEXT: mov.w r12, #0 +; FULL-NEXT: adcs r3, r0 +; FULL-NEXT: adds r0, r5, r7 +; FULL-NEXT: adc.w r3, r3, r9 +; FULL-NEXT: .LBB21_5: @ %fp-to-i-cleanup +; FULL-NEXT: subs.w r6, r1, #-1 +; FULL-NEXT: mvn r7, #-2147483648 +; FULL-NEXT: sbcs.w r6, r2, r7 +; FULL-NEXT: sbcs r6, r0, #0 +; FULL-NEXT: sbcs r6, r3, #0 +; FULL-NEXT: csel r2, r2, r7, lt +; FULL-NEXT: mov.w r7, #-1 +; FULL-NEXT: cset r6, lt +; FULL-NEXT: csel r1, r1, r7, lt +; FULL-NEXT: cmp r6, #0 +; FULL-NEXT: csel r3, r3, r6, ne +; FULL-NEXT: csel r0, r0, r6, ne +; FULL-NEXT: rsbs r5, r1, #0 +; FULL-NEXT: mov.w r6, #-2147483648 +; FULL-NEXT: sbcs.w r5, r6, r2 +; FULL-NEXT: sbcs.w r0, r7, r0 +; FULL-NEXT: sbcs.w r0, r7, r3 +; FULL-NEXT: csel r0, r1, r12, lt +; FULL-NEXT: csel r1, r2, r6, lt +; FULL-NEXT: add sp, #52 +; FULL-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} entry: %conv = fptosi float %x to i128 %0 = icmp slt i128 %conv, 9223372036854775807 @@ -1331,49 +2745,224 @@ entry: define i64 @utest_f32i64(float %x) { ; SOFT-LABEL: utest_f32i64: -; SOFT: @ %bb.0: @ %entry -; SOFT-NEXT: .save {r4, lr} -; SOFT-NEXT: push {r4, lr} -; SOFT-NEXT: bl __fixunssfti -; SOFT-NEXT: movs r4, #0 -; SOFT-NEXT: subs r2, r2, #1 -; SOFT-NEXT: sbcs r3, r4 -; SOFT-NEXT: bhs .LBB22_3 -; SOFT-NEXT: @ %bb.1: @ %entry -; SOFT-NEXT: bhs .LBB22_4 -; SOFT-NEXT: .LBB22_2: @ %entry -; SOFT-NEXT: pop {r4, pc} -; SOFT-NEXT: .LBB22_3: @ %entry -; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: blo .LBB22_2 -; SOFT-NEXT: .LBB22_4: @ %entry -; SOFT-NEXT: mov r1, r4 -; SOFT-NEXT: pop {r4, pc} +; SOFT: @ %bb.0: @ %entryfp-to-i-entry +; SOFT-NEXT: .save {r4, r5, r6, r7, lr} +; SOFT-NEXT: push {r4, r5, r6, r7, lr} +; SOFT-NEXT: .pad #36 +; SOFT-NEXT: sub sp, #36 +; SOFT-NEXT: lsrs r1, r0, #23 +; SOFT-NEXT: uxtb r3, r1 +; SOFT-NEXT: movs r2, #0 +; SOFT-NEXT: cmp r3, #127 +; SOFT-NEXT: bhs .LBB22_2 +; SOFT-NEXT: @ %bb.1: +; SOFT-NEXT: mov r0, r2 +; SOFT-NEXT: mov r1, r2 +; SOFT-NEXT: mov r6, r2 +; SOFT-NEXT: mov r4, r2 +; SOFT-NEXT: b .LBB22_5 +; SOFT-NEXT: .LBB22_2: @ %fp-to-i-if-check.exp.size +; SOFT-NEXT: ldr r1, .LCPI22_0 +; SOFT-NEXT: ands r0, r1 +; SOFT-NEXT: adds r0, r0, r1 +; SOFT-NEXT: adds r0, r0, #1 +; SOFT-NEXT: cmp r3, #149 +; SOFT-NEXT: bhi .LBB22_4 +; SOFT-NEXT: @ %bb.3: @ %fp-to-i-if-exp.small +; SOFT-NEXT: movs r1, #150 +; SOFT-NEXT: subs r1, r1, r3 +; SOFT-NEXT: lsrs r0, r1 +; SOFT-NEXT: movs r1, #0 +; SOFT-NEXT: mov r6, r1 +; SOFT-NEXT: mov r4, r1 +; SOFT-NEXT: b .LBB22_5 +; SOFT-NEXT: .LBB22_4: @ %fp-to-i-if-exp.large +; SOFT-NEXT: movs r1, #0 +; SOFT-NEXT: str r1, [sp, #28] +; SOFT-NEXT: str r1, [sp, #24] +; SOFT-NEXT: str r1, [sp, #20] +; SOFT-NEXT: str r0, [sp, #16] +; SOFT-NEXT: str r1, [sp, #12] +; SOFT-NEXT: str r1, [sp, #8] +; SOFT-NEXT: str r1, [sp, #4] +; SOFT-NEXT: str r1, [sp] +; SOFT-NEXT: subs r3, #150 +; SOFT-NEXT: movs r5, #31 +; SOFT-NEXT: lsrs r0, r3, #3 +; SOFT-NEXT: ands r3, r5 +; SOFT-NEXT: movs r1, #12 +; SOFT-NEXT: ands r1, r0 +; SOFT-NEXT: mov r0, sp +; SOFT-NEXT: adds r0, #16 +; SOFT-NEXT: subs r4, r0, r1 +; SOFT-NEXT: ldm r4!, {r0, r7} +; SOFT-NEXT: ldr r1, [r4] +; SOFT-NEXT: ldr r6, [r4, #4] +; SOFT-NEXT: lsls r6, r3 +; SOFT-NEXT: eors r5, r3 +; SOFT-NEXT: lsrs r4, r1, #1 +; SOFT-NEXT: lsrs r4, r5 +; SOFT-NEXT: orrs r4, r6 +; SOFT-NEXT: lsls r1, r3 +; SOFT-NEXT: lsrs r6, r7, #1 +; SOFT-NEXT: lsrs r6, r5 +; SOFT-NEXT: orrs r6, r1 +; SOFT-NEXT: lsls r7, r3 +; SOFT-NEXT: lsrs r1, r0, #1 +; SOFT-NEXT: lsrs r1, r5 +; SOFT-NEXT: orrs r1, r7 +; SOFT-NEXT: lsls r0, r3 +; SOFT-NEXT: .LBB22_5: @ %fp-to-i-cleanup +; SOFT-NEXT: subs r3, r6, #1 +; SOFT-NEXT: sbcs r4, r2 +; SOFT-NEXT: bhs .LBB22_8 +; SOFT-NEXT: @ %bb.6: @ %fp-to-i-cleanup +; SOFT-NEXT: bhs .LBB22_9 +; SOFT-NEXT: .LBB22_7: @ %fp-to-i-cleanup +; SOFT-NEXT: add sp, #36 +; SOFT-NEXT: pop {r4, r5, r6, r7, pc} +; SOFT-NEXT: .LBB22_8: @ %fp-to-i-cleanup +; SOFT-NEXT: mov r0, r2 +; SOFT-NEXT: blo .LBB22_7 +; SOFT-NEXT: .LBB22_9: @ %fp-to-i-cleanup +; SOFT-NEXT: mov r1, r2 +; SOFT-NEXT: add sp, #36 +; SOFT-NEXT: pop {r4, r5, r6, r7, pc} +; SOFT-NEXT: .p2align 2 +; SOFT-NEXT: @ %bb.10: +; SOFT-NEXT: .LCPI22_0: +; SOFT-NEXT: .long 8388607 @ 0x7fffff ; ; VFP2-LABEL: utest_f32i64: -; VFP2: @ %bb.0: @ %entry -; VFP2-NEXT: .save {r7, lr} -; VFP2-NEXT: push {r7, lr} -; VFP2-NEXT: bl __fixunssfti -; VFP2-NEXT: subs r2, #1 +; VFP2: @ %bb.0: @ %entryfp-to-i-entry +; VFP2-NEXT: vmov r1, s0 ; VFP2-NEXT: mov.w r12, #0 +; VFP2-NEXT: ubfx r0, r1, #23, #8 +; VFP2-NEXT: cmp r0, #127 +; VFP2-NEXT: bhs .LBB22_2 +; VFP2-NEXT: @ %bb.1: +; VFP2-NEXT: movs r0, #0 +; VFP2-NEXT: b .LBB22_4 +; VFP2-NEXT: .LBB22_2: @ %fp-to-i-if-check.exp.size +; VFP2-NEXT: movs r2, #1 +; VFP2-NEXT: cmp r0, #149 +; VFP2-NEXT: bfi r1, r2, #23, #9 +; VFP2-NEXT: bhi .LBB22_5 +; VFP2-NEXT: @ %bb.3: @ %fp-to-i-if-exp.small +; VFP2-NEXT: rsb.w r0, r0, #150 +; VFP2-NEXT: lsr.w r0, r1, r0 +; VFP2-NEXT: .LBB22_4: @ %fp-to-i-cleanup +; VFP2-NEXT: movs r1, #0 +; VFP2-NEXT: movs r2, #0 +; VFP2-NEXT: movs r3, #0 +; VFP2-NEXT: b .LBB22_6 +; VFP2-NEXT: .LBB22_5: @ %fp-to-i-if-exp.large +; VFP2-NEXT: .save {r4, r5, r7, lr} +; VFP2-NEXT: push {r4, r5, r7, lr} +; VFP2-NEXT: .pad #32 +; VFP2-NEXT: sub sp, #32 +; VFP2-NEXT: movs r2, #0 +; VFP2-NEXT: subs r0, #150 +; VFP2-NEXT: strd r1, r2, [sp, #16] +; VFP2-NEXT: movs r1, #12 +; VFP2-NEXT: strd r2, r2, [sp, #24] +; VFP2-NEXT: and.w r1, r1, r0, lsr #3 +; VFP2-NEXT: strd r2, r2, [sp, #8] +; VFP2-NEXT: and r0, r0, #31 +; VFP2-NEXT: strd r2, r2, [sp] +; VFP2-NEXT: mov r2, sp +; VFP2-NEXT: adds r2, #16 +; VFP2-NEXT: eor r5, r0, #31 +; VFP2-NEXT: subs r1, r2, r1 +; VFP2-NEXT: ldrd lr, r4, [r1] +; VFP2-NEXT: ldrd r2, r1, [r1, #8] +; VFP2-NEXT: lsls r1, r0 +; VFP2-NEXT: lsrs r3, r2, #1 +; VFP2-NEXT: lsrs r3, r5 +; VFP2-NEXT: orrs r3, r1 +; VFP2-NEXT: lsl.w r1, r2, r0 +; VFP2-NEXT: lsrs.w r2, r4, #1 +; VFP2-NEXT: lsrs r2, r5 +; VFP2-NEXT: orrs r2, r1 +; VFP2-NEXT: lsl.w r1, r4, r0 +; VFP2-NEXT: lsr.w r4, lr, #1 +; VFP2-NEXT: lsl.w r0, lr, r0 +; VFP2-NEXT: lsrs r4, r5 +; VFP2-NEXT: orrs r1, r4 +; VFP2-NEXT: add sp, #32 +; VFP2-NEXT: pop.w {r4, r5, r7, lr} +; VFP2-NEXT: .LBB22_6: @ %fp-to-i-cleanup +; VFP2-NEXT: subs r2, #1 ; VFP2-NEXT: sbcs r2, r3, #0 ; VFP2-NEXT: itt hs ; VFP2-NEXT: movhs r0, r12 ; VFP2-NEXT: movhs r1, r12 -; VFP2-NEXT: pop {r7, pc} +; VFP2-NEXT: bx lr ; ; FULL-LABEL: utest_f32i64: -; FULL: @ %bb.0: @ %entry -; FULL-NEXT: .save {r7, lr} -; FULL-NEXT: push {r7, lr} -; FULL-NEXT: bl __fixunssfti -; FULL-NEXT: subs r2, #1 +; FULL: @ %bb.0: @ %entryfp-to-i-entry +; FULL-NEXT: vmov r1, s0 ; FULL-NEXT: mov.w r12, #0 -; FULL-NEXT: sbcs r2, r3, #0 +; FULL-NEXT: ubfx r0, r1, #23, #8 +; FULL-NEXT: cmp r0, #127 +; FULL-NEXT: bhs .LBB22_2 +; FULL-NEXT: @ %bb.1: +; FULL-NEXT: movs r0, #0 +; FULL-NEXT: b .LBB22_4 +; FULL-NEXT: .LBB22_2: @ %fp-to-i-if-check.exp.size +; FULL-NEXT: movs r2, #1 +; FULL-NEXT: cmp r0, #149 +; FULL-NEXT: bfi r1, r2, #23, #9 +; FULL-NEXT: bhi .LBB22_5 +; FULL-NEXT: @ %bb.3: @ %fp-to-i-if-exp.small +; FULL-NEXT: rsb.w r0, r0, #150 +; FULL-NEXT: lsr.w r0, r1, r0 +; FULL-NEXT: .LBB22_4: @ %fp-to-i-cleanup +; FULL-NEXT: movs r2, #0 +; FULL-NEXT: movs r1, #0 +; FULL-NEXT: movs r3, #0 +; FULL-NEXT: b .LBB22_6 +; FULL-NEXT: .LBB22_5: @ %fp-to-i-if-exp.large +; FULL-NEXT: .save {r4, r5, r7, lr} +; FULL-NEXT: push {r4, r5, r7, lr} +; FULL-NEXT: .pad #32 +; FULL-NEXT: sub sp, #32 +; FULL-NEXT: movs r2, #0 +; FULL-NEXT: subs r0, #150 +; FULL-NEXT: strd r1, r2, [sp, #16] +; FULL-NEXT: movs r1, #12 +; FULL-NEXT: strd r2, r2, [sp, #24] +; FULL-NEXT: and.w r1, r1, r0, lsr #3 +; FULL-NEXT: strd r2, r2, [sp, #8] +; FULL-NEXT: and r0, r0, #31 +; FULL-NEXT: strd r2, r2, [sp] +; FULL-NEXT: mov r2, sp +; FULL-NEXT: adds r2, #16 +; FULL-NEXT: eor r5, r0, #31 +; FULL-NEXT: subs r1, r2, r1 +; FULL-NEXT: ldrd lr, r2, [r1] +; FULL-NEXT: ldrd r4, r1, [r1, #8] +; FULL-NEXT: lsls r1, r0 +; FULL-NEXT: lsrs r3, r4, #1 +; FULL-NEXT: lsrs r3, r5 +; FULL-NEXT: orrs r3, r1 +; FULL-NEXT: lsl.w r1, r4, r0 +; FULL-NEXT: lsrs.w r4, r2, #1 +; FULL-NEXT: lsls r2, r0 +; FULL-NEXT: lsrs r4, r5 +; FULL-NEXT: orrs r1, r4 +; FULL-NEXT: lsr.w r4, lr, #1 +; FULL-NEXT: lsl.w r0, lr, r0 +; FULL-NEXT: lsrs r4, r5 +; FULL-NEXT: orrs r2, r4 +; FULL-NEXT: add sp, #32 +; FULL-NEXT: pop.w {r4, r5, r7, lr} +; FULL-NEXT: .LBB22_6: @ %fp-to-i-cleanup +; FULL-NEXT: subs r1, #1 +; FULL-NEXT: sbcs r1, r3, #0 ; FULL-NEXT: csel r0, r0, r12, lo -; FULL-NEXT: csel r1, r1, r12, lo -; FULL-NEXT: pop {r7, pc} +; FULL-NEXT: csel r1, r2, r12, lo +; FULL-NEXT: bx lr entry: %conv = fptoui float %x to i128 %0 = icmp ult i128 %conv, 18446744073709551616 @@ -1384,65 +2973,436 @@ entry: define i64 @ustest_f32i64(float %x) { ; SOFT-LABEL: ustest_f32i64: -; SOFT: @ %bb.0: @ %entry -; SOFT-NEXT: .save {r4, lr} -; SOFT-NEXT: push {r4, lr} -; SOFT-NEXT: bl __fixsfti -; SOFT-NEXT: movs r4, #0 -; SOFT-NEXT: subs r2, r2, #1 +; SOFT: @ %bb.0: @ %entryfp-to-i-entry +; SOFT-NEXT: .save {r4, r5, r6, r7, lr} +; SOFT-NEXT: push {r4, r5, r6, r7, lr} +; SOFT-NEXT: .pad #68 +; SOFT-NEXT: sub sp, #68 +; SOFT-NEXT: lsrs r1, r0, #23 +; SOFT-NEXT: uxtb r5, r1 +; SOFT-NEXT: movs r3, #0 +; SOFT-NEXT: cmp r5, #127 +; SOFT-NEXT: bhs .LBB23_2 +; SOFT-NEXT: @ %bb.1: +; SOFT-NEXT: mov r0, r3 +; SOFT-NEXT: mov r5, r3 ; SOFT-NEXT: mov r2, r3 -; SOFT-NEXT: sbcs r2, r4 -; SOFT-NEXT: bge .LBB23_5 -; SOFT-NEXT: @ %bb.1: @ %entry -; SOFT-NEXT: bge .LBB23_6 -; SOFT-NEXT: .LBB23_2: @ %entry -; SOFT-NEXT: blt .LBB23_4 -; SOFT-NEXT: .LBB23_3: @ %entry -; SOFT-NEXT: mov r3, r4 -; SOFT-NEXT: .LBB23_4: @ %entry -; SOFT-NEXT: asrs r2, r3, #31 -; SOFT-NEXT: bics r0, r2 -; SOFT-NEXT: bics r1, r2 -; SOFT-NEXT: pop {r4, pc} -; SOFT-NEXT: .LBB23_5: @ %entry -; SOFT-NEXT: mov r1, r4 -; SOFT-NEXT: blt .LBB23_2 -; SOFT-NEXT: .LBB23_6: @ %entry +; SOFT-NEXT: mov r1, r3 +; SOFT-NEXT: b .LBB23_6 +; SOFT-NEXT: .LBB23_2: @ %fp-to-i-if-check.exp.size +; SOFT-NEXT: str r3, [sp, #24] @ 4-byte Spill +; SOFT-NEXT: asrs r7, r0, #31 +; SOFT-NEXT: movs r4, #1 +; SOFT-NEXT: orrs r4, r7 +; SOFT-NEXT: ldr r1, .LCPI23_0 +; SOFT-NEXT: ands r0, r1 +; SOFT-NEXT: adds r0, r0, r1 +; SOFT-NEXT: adds r0, r0, #1 +; SOFT-NEXT: cmp r5, #149 +; SOFT-NEXT: str r7, [sp, #28] @ 4-byte Spill +; SOFT-NEXT: bhi .LBB23_4 +; SOFT-NEXT: @ %bb.3: @ %fp-to-i-if-exp.small +; SOFT-NEXT: movs r1, #150 +; SOFT-NEXT: subs r1, r1, r5 +; SOFT-NEXT: lsrs r0, r1 +; SOFT-NEXT: movs r6, #0 +; SOFT-NEXT: mov r1, r6 +; SOFT-NEXT: mov r5, r0 +; SOFT-NEXT: str r0, [sp, #16] @ 4-byte Spill +; SOFT-NEXT: mov r2, r4 +; SOFT-NEXT: mov r3, r6 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: str r0, [sp, #20] @ 4-byte Spill +; SOFT-NEXT: str r1, [sp, #12] @ 4-byte Spill +; SOFT-NEXT: mov r0, r6 +; SOFT-NEXT: mov r1, r6 +; SOFT-NEXT: mov r2, r4 +; SOFT-NEXT: mov r3, r6 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: mov r4, r1 +; SOFT-NEXT: ldr r1, [sp, #12] @ 4-byte Reload +; SOFT-NEXT: adds r0, r0, r1 +; SOFT-NEXT: str r0, [sp, #12] @ 4-byte Spill +; SOFT-NEXT: adcs r4, r6 +; SOFT-NEXT: mov r0, r5 +; SOFT-NEXT: mov r1, r6 +; SOFT-NEXT: mov r2, r7 +; SOFT-NEXT: mov r3, r6 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: ldr r2, [sp, #12] @ 4-byte Reload +; SOFT-NEXT: adds r5, r0, r2 +; SOFT-NEXT: adcs r1, r6 +; SOFT-NEXT: adds r0, r4, r1 +; SOFT-NEXT: str r0, [sp, #12] @ 4-byte Spill +; SOFT-NEXT: mov r2, r7 +; SOFT-NEXT: mov r7, r6 +; SOFT-NEXT: adcs r7, r6 +; SOFT-NEXT: mov r0, r6 +; SOFT-NEXT: mov r1, r6 +; SOFT-NEXT: mov r3, r6 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: mov r4, r1 +; SOFT-NEXT: ldr r1, [sp, #12] @ 4-byte Reload +; SOFT-NEXT: adds r0, r0, r1 +; SOFT-NEXT: str r0, [sp, #12] @ 4-byte Spill +; SOFT-NEXT: adcs r4, r7 +; SOFT-NEXT: ldr r0, [sp, #28] @ 4-byte Reload +; SOFT-NEXT: mov r1, r0 +; SOFT-NEXT: ldr r2, [sp, #16] @ 4-byte Reload +; SOFT-NEXT: mov r3, r6 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: mov r2, r0 +; SOFT-NEXT: ldr r0, [sp, #20] @ 4-byte Reload +; SOFT-NEXT: ldr r3, [sp, #12] @ 4-byte Reload +; SOFT-NEXT: adds r2, r3, r2 +; SOFT-NEXT: adcs r1, r4 +; SOFT-NEXT: b .LBB23_5 +; SOFT-NEXT: .LBB23_4: @ %fp-to-i-if-exp.large +; SOFT-NEXT: movs r6, #0 +; SOFT-NEXT: str r6, [sp, #60] +; SOFT-NEXT: str r6, [sp, #56] +; SOFT-NEXT: str r6, [sp, #52] +; SOFT-NEXT: str r0, [sp, #48] +; SOFT-NEXT: str r6, [sp, #44] +; SOFT-NEXT: str r6, [sp, #40] +; SOFT-NEXT: str r6, [sp, #36] +; SOFT-NEXT: str r6, [sp, #32] +; SOFT-NEXT: subs r5, #150 +; SOFT-NEXT: movs r7, #31 +; SOFT-NEXT: lsrs r0, r5, #3 +; SOFT-NEXT: ands r5, r7 +; SOFT-NEXT: movs r1, #12 +; SOFT-NEXT: ands r1, r0 +; SOFT-NEXT: add r0, sp, #32 +; SOFT-NEXT: adds r0, #16 +; SOFT-NEXT: str r4, [sp, #20] @ 4-byte Spill +; SOFT-NEXT: subs r4, r0, r1 +; SOFT-NEXT: ldr r0, [r4, #4] +; SOFT-NEXT: str r0, [sp, #16] @ 4-byte Spill +; SOFT-NEXT: ldr r0, [r4, #8] +; SOFT-NEXT: ldr r1, [r4, #12] +; SOFT-NEXT: lsrs r3, r0, #1 +; SOFT-NEXT: lsls r0, r5 +; SOFT-NEXT: eors r7, r5 +; SOFT-NEXT: ldr r2, [sp, #16] @ 4-byte Reload +; SOFT-NEXT: lsrs r2, r2, #1 +; SOFT-NEXT: lsrs r2, r7 +; SOFT-NEXT: orrs r2, r0 +; SOFT-NEXT: lsls r1, r5 +; SOFT-NEXT: lsrs r3, r7 +; SOFT-NEXT: orrs r3, r1 +; SOFT-NEXT: ldr r0, [sp, #20] @ 4-byte Reload +; SOFT-NEXT: ldr r1, [sp, #28] @ 4-byte Reload +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: str r0, [sp, #4] @ 4-byte Spill +; SOFT-NEXT: str r1, [sp, #12] @ 4-byte Spill +; SOFT-NEXT: ldr r1, [sp, #16] @ 4-byte Reload +; SOFT-NEXT: lsls r1, r5 +; SOFT-NEXT: ldr r4, [r4] +; SOFT-NEXT: lsrs r0, r4, #1 +; SOFT-NEXT: lsrs r0, r7 +; SOFT-NEXT: orrs r0, r1 +; SOFT-NEXT: mov r3, r0 +; SOFT-NEXT: lsls r4, r5 +; SOFT-NEXT: ldr r0, [sp, #28] @ 4-byte Reload +; SOFT-NEXT: mov r1, r0 +; SOFT-NEXT: mov r2, r4 +; SOFT-NEXT: mov r5, r3 +; SOFT-NEXT: str r3, [sp, #8] @ 4-byte Spill +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: ldr r2, [sp, #4] @ 4-byte Reload +; SOFT-NEXT: adds r0, r0, r2 +; SOFT-NEXT: str r0, [sp, #16] @ 4-byte Spill +; SOFT-NEXT: ldr r0, [sp, #12] @ 4-byte Reload +; SOFT-NEXT: adcs r1, r0 +; SOFT-NEXT: str r1, [sp, #12] @ 4-byte Spill +; SOFT-NEXT: mov r0, r5 +; SOFT-NEXT: mov r1, r6 +; SOFT-NEXT: ldr r5, [sp, #20] @ 4-byte Reload +; SOFT-NEXT: mov r2, r5 +; SOFT-NEXT: mov r3, r6 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: str r0, [sp, #4] @ 4-byte Spill +; SOFT-NEXT: mov r7, r1 ; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: bge .LBB23_3 -; SOFT-NEXT: b .LBB23_4 +; SOFT-NEXT: mov r1, r6 +; SOFT-NEXT: mov r2, r5 +; SOFT-NEXT: mov r3, r6 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: str r0, [sp, #20] @ 4-byte Spill +; SOFT-NEXT: ldr r0, [sp, #4] @ 4-byte Reload +; SOFT-NEXT: adds r5, r0, r1 +; SOFT-NEXT: adcs r7, r6 +; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: mov r1, r6 +; SOFT-NEXT: ldr r2, [sp, #28] @ 4-byte Reload +; SOFT-NEXT: mov r3, r6 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: adds r5, r0, r5 +; SOFT-NEXT: adcs r1, r6 +; SOFT-NEXT: adds r4, r7, r1 +; SOFT-NEXT: mov r7, r6 +; SOFT-NEXT: adcs r7, r6 +; SOFT-NEXT: ldr r0, [sp, #8] @ 4-byte Reload +; SOFT-NEXT: mov r1, r6 +; SOFT-NEXT: ldr r2, [sp, #28] @ 4-byte Reload +; SOFT-NEXT: mov r3, r6 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: mov r2, r0 +; SOFT-NEXT: ldr r0, [sp, #20] @ 4-byte Reload +; SOFT-NEXT: adds r2, r2, r4 +; SOFT-NEXT: adcs r1, r7 +; SOFT-NEXT: ldr r3, [sp, #16] @ 4-byte Reload +; SOFT-NEXT: adds r2, r2, r3 +; SOFT-NEXT: ldr r3, [sp, #12] @ 4-byte Reload +; SOFT-NEXT: adcs r1, r3 +; SOFT-NEXT: .LBB23_5: @ %fp-to-i-cleanup +; SOFT-NEXT: ldr r3, [sp, #24] @ 4-byte Reload +; SOFT-NEXT: .LBB23_6: @ %fp-to-i-cleanup +; SOFT-NEXT: subs r2, r2, #1 +; SOFT-NEXT: mov r2, r1 +; SOFT-NEXT: sbcs r2, r3 +; SOFT-NEXT: bge .LBB23_11 +; SOFT-NEXT: @ %bb.7: @ %fp-to-i-cleanup +; SOFT-NEXT: bge .LBB23_12 +; SOFT-NEXT: .LBB23_8: @ %fp-to-i-cleanup +; SOFT-NEXT: blt .LBB23_10 +; SOFT-NEXT: .LBB23_9: @ %fp-to-i-cleanup +; SOFT-NEXT: mov r1, r3 +; SOFT-NEXT: .LBB23_10: @ %fp-to-i-cleanup +; SOFT-NEXT: asrs r1, r1, #31 +; SOFT-NEXT: bics r0, r1 +; SOFT-NEXT: bics r5, r1 +; SOFT-NEXT: mov r1, r5 +; SOFT-NEXT: add sp, #68 +; SOFT-NEXT: pop {r4, r5, r6, r7, pc} +; SOFT-NEXT: .LBB23_11: @ %fp-to-i-cleanup +; SOFT-NEXT: mov r5, r3 +; SOFT-NEXT: blt .LBB23_8 +; SOFT-NEXT: .LBB23_12: @ %fp-to-i-cleanup +; SOFT-NEXT: mov r0, r3 +; SOFT-NEXT: bge .LBB23_9 +; SOFT-NEXT: b .LBB23_10 +; SOFT-NEXT: .p2align 2 +; SOFT-NEXT: @ %bb.13: +; SOFT-NEXT: .LCPI23_0: +; SOFT-NEXT: .long 8388607 @ 0x7fffff ; ; VFP2-LABEL: ustest_f32i64: -; VFP2: @ %bb.0: @ %entry -; VFP2-NEXT: .save {r7, lr} -; VFP2-NEXT: push {r7, lr} -; VFP2-NEXT: bl __fixsfti -; VFP2-NEXT: subs r2, #1 -; VFP2-NEXT: mov.w r12, #0 -; VFP2-NEXT: sbcs r2, r3, #0 +; VFP2: @ %bb.0: @ %entryfp-to-i-entry +; VFP2-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; VFP2-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; VFP2-NEXT: .pad #52 +; VFP2-NEXT: sub sp, #52 +; VFP2-NEXT: vmov r3, s0 +; VFP2-NEXT: movs r7, #0 +; VFP2-NEXT: ubfx r0, r3, #23, #8 +; VFP2-NEXT: cmp r0, #127 +; VFP2-NEXT: bhs .LBB23_2 +; VFP2-NEXT: @ %bb.1: +; VFP2-NEXT: movs r0, #0 +; VFP2-NEXT: movs r2, #0 +; VFP2-NEXT: movs r3, #0 +; VFP2-NEXT: movs r1, #0 +; VFP2-NEXT: b .LBB23_5 +; VFP2-NEXT: .LBB23_2: @ %fp-to-i-if-check.exp.size +; VFP2-NEXT: movs r1, #1 +; VFP2-NEXT: mov r2, r3 +; VFP2-NEXT: bfi r2, r1, #23, #9 +; VFP2-NEXT: orr.w r10, r1, r3, asr #31 +; VFP2-NEXT: asr.w r8, r3, #31 +; VFP2-NEXT: cmp r0, #149 +; VFP2-NEXT: bhi .LBB23_4 +; VFP2-NEXT: @ %bb.3: @ %fp-to-i-if-exp.small +; VFP2-NEXT: rsb.w r0, r0, #150 +; VFP2-NEXT: movs r5, #0 +; VFP2-NEXT: lsr.w r4, r2, r0 +; VFP2-NEXT: umull r0, r2, r4, r10 +; VFP2-NEXT: umull r1, r3, r4, r8 +; VFP2-NEXT: adds r6, r1, r2 +; VFP2-NEXT: adcs r6, r3, #0 +; VFP2-NEXT: adc lr, r5, #0 +; VFP2-NEXT: adds r6, r1, r2 +; VFP2-NEXT: mla r6, r8, r4, r3 +; VFP2-NEXT: adcs r3, r1 +; VFP2-NEXT: umlal r2, r5, r4, r8 +; VFP2-NEXT: adc.w r1, lr, r6 +; VFP2-NEXT: b .LBB23_5 +; VFP2-NEXT: .LBB23_4: @ %fp-to-i-if-exp.large +; VFP2-NEXT: mov.w lr, #0 +; VFP2-NEXT: subs r0, #150 +; VFP2-NEXT: add r3, sp, #16 +; VFP2-NEXT: strd r2, lr, [sp, #32] +; VFP2-NEXT: movs r2, #12 +; VFP2-NEXT: adds r3, #16 +; VFP2-NEXT: and.w r2, r2, r0, lsr #3 +; VFP2-NEXT: strd lr, lr, [sp, #40] +; VFP2-NEXT: strd lr, lr, [sp, #24] +; VFP2-NEXT: subs r2, r3, r2 +; VFP2-NEXT: strd lr, lr, [sp, #16] +; VFP2-NEXT: and r6, r0, #31 +; VFP2-NEXT: ldrd r3, r1, [r2] +; VFP2-NEXT: eor r12, r6, #31 +; VFP2-NEXT: movs r5, #0 +; VFP2-NEXT: str r1, [sp, #4] @ 4-byte Spill +; VFP2-NEXT: ldr r7, [r2, #8] +; VFP2-NEXT: str r7, [sp, #12] @ 4-byte Spill +; VFP2-NEXT: ldr r2, [r2, #12] +; VFP2-NEXT: str r2, [sp, #8] @ 4-byte Spill +; VFP2-NEXT: lsrs r2, r3, #1 +; VFP2-NEXT: lsr.w r0, r2, r12 +; VFP2-NEXT: lsl.w r2, r1, r6 +; VFP2-NEXT: lsls r3, r6 +; VFP2-NEXT: orr.w r11, r2, r0 +; VFP2-NEXT: umull r0, r2, r3, r10 +; VFP2-NEXT: umull r1, r7, r3, r8 +; VFP2-NEXT: umlal r2, r5, r11, r10 +; VFP2-NEXT: str r1, [sp] @ 4-byte Spill +; VFP2-NEXT: adds.w r9, r1, r2 +; VFP2-NEXT: ldr r1, [sp, #8] @ 4-byte Reload +; VFP2-NEXT: adcs.w r4, r5, r7 +; VFP2-NEXT: umlal r2, r5, r3, r8 +; VFP2-NEXT: ldr r4, [sp, #12] @ 4-byte Reload +; VFP2-NEXT: adc r9, lr, #0 +; VFP2-NEXT: lsl.w lr, r1, r6 +; VFP2-NEXT: umlal r5, r9, r11, r8 +; VFP2-NEXT: lsrs r1, r4, #1 +; VFP2-NEXT: lsr.w r1, r1, r12 +; VFP2-NEXT: orr.w lr, lr, r1 +; VFP2-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; VFP2-NEXT: lsl.w r6, r4, r6 +; VFP2-NEXT: lsrs.w r1, r1, #1 +; VFP2-NEXT: lsr.w r1, r1, r12 +; VFP2-NEXT: orrs r1, r6 +; VFP2-NEXT: umull r12, r6, r10, r1 +; VFP2-NEXT: mla r6, r10, lr, r6 +; VFP2-NEXT: mla r1, r8, r1, r6 +; VFP2-NEXT: mla r6, r8, r11, r7 +; VFP2-NEXT: ldr r7, [sp] @ 4-byte Reload +; VFP2-NEXT: mla r3, r8, r3, r6 +; VFP2-NEXT: adds.w r6, r7, r12 +; VFP2-NEXT: mov.w r7, #0 +; VFP2-NEXT: adcs r1, r3 +; VFP2-NEXT: adds r3, r5, r6 +; VFP2-NEXT: adc.w r1, r1, r9 +; VFP2-NEXT: .LBB23_5: @ %fp-to-i-cleanup +; VFP2-NEXT: subs r3, #1 +; VFP2-NEXT: sbcs r3, r1, #0 ; VFP2-NEXT: itt ge -; VFP2-NEXT: movge r3, r12 -; VFP2-NEXT: movge r0, r12 +; VFP2-NEXT: movge r1, r7 +; VFP2-NEXT: movge r0, r7 +; VFP2-NEXT: bic.w r0, r0, r1, asr #31 ; VFP2-NEXT: it ge -; VFP2-NEXT: movge r1, r12 -; VFP2-NEXT: bic.w r0, r0, r3, asr #31 -; VFP2-NEXT: bic.w r1, r1, r3, asr #31 -; VFP2-NEXT: pop {r7, pc} +; VFP2-NEXT: movge r2, r7 +; VFP2-NEXT: bic.w r1, r2, r1, asr #31 +; VFP2-NEXT: add sp, #52 +; VFP2-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; ; FULL-LABEL: ustest_f32i64: -; FULL: @ %bb.0: @ %entry -; FULL-NEXT: .save {r7, lr} -; FULL-NEXT: push {r7, lr} -; FULL-NEXT: bl __fixsfti -; FULL-NEXT: subs r2, #1 -; FULL-NEXT: mov.w r12, #0 -; FULL-NEXT: sbcs r2, r3, #0 -; FULL-NEXT: csel r2, r3, r12, lt -; FULL-NEXT: csel r0, r0, r12, lt -; FULL-NEXT: csel r1, r1, r12, lt -; FULL-NEXT: bic.w r0, r0, r2, asr #31 -; FULL-NEXT: bic.w r1, r1, r2, asr #31 -; FULL-NEXT: pop {r7, pc} +; FULL: @ %bb.0: @ %entryfp-to-i-entry +; FULL-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; FULL-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; FULL-NEXT: .pad #52 +; FULL-NEXT: sub sp, #52 +; FULL-NEXT: vmov r3, s0 +; FULL-NEXT: movs r7, #0 +; FULL-NEXT: ubfx r0, r3, #23, #8 +; FULL-NEXT: cmp r0, #127 +; FULL-NEXT: bhs .LBB23_2 +; FULL-NEXT: @ %bb.1: +; FULL-NEXT: mov.w r10, #0 +; FULL-NEXT: movs r2, #0 +; FULL-NEXT: movs r1, #0 +; FULL-NEXT: movs r3, #0 +; FULL-NEXT: b .LBB23_5 +; FULL-NEXT: .LBB23_2: @ %fp-to-i-if-check.exp.size +; FULL-NEXT: movs r1, #1 +; FULL-NEXT: mov r2, r3 +; FULL-NEXT: orr.w r8, r1, r3, asr #31 +; FULL-NEXT: bfi r2, r1, #23, #9 +; FULL-NEXT: asrs r3, r3, #31 +; FULL-NEXT: cmp r0, #149 +; FULL-NEXT: bhi .LBB23_4 +; FULL-NEXT: @ %bb.3: @ %fp-to-i-if-exp.small +; FULL-NEXT: rsb.w r0, r0, #150 +; FULL-NEXT: movs r5, #0 +; FULL-NEXT: lsr.w r4, r2, r0 +; FULL-NEXT: umull r10, r2, r4, r8 +; FULL-NEXT: umull r1, r0, r4, r3 +; FULL-NEXT: adds r6, r1, r2 +; FULL-NEXT: adcs r6, r0, #0 +; FULL-NEXT: adc lr, r5, #0 +; FULL-NEXT: adds r6, r1, r2 +; FULL-NEXT: mla r6, r3, r4, r0 +; FULL-NEXT: adcs r1, r0 +; FULL-NEXT: umlal r2, r5, r4, r3 +; FULL-NEXT: adc.w r3, lr, r6 +; FULL-NEXT: b .LBB23_5 +; FULL-NEXT: .LBB23_4: @ %fp-to-i-if-exp.large +; FULL-NEXT: mov.w lr, #0 +; FULL-NEXT: subs r0, #150 +; FULL-NEXT: add r4, sp, #16 +; FULL-NEXT: strd r2, lr, [sp, #32] +; FULL-NEXT: movs r2, #12 +; FULL-NEXT: adds r4, #16 +; FULL-NEXT: and.w r2, r2, r0, lsr #3 +; FULL-NEXT: strd lr, lr, [sp, #40] +; FULL-NEXT: strd lr, lr, [sp, #24] +; FULL-NEXT: subs r2, r4, r2 +; FULL-NEXT: strd lr, lr, [sp, #16] +; FULL-NEXT: and r0, r0, #31 +; FULL-NEXT: ldrd r5, r4, [r2] +; FULL-NEXT: eor r12, r0, #31 +; FULL-NEXT: lsl.w r7, r4, r0 +; FULL-NEXT: movs r6, #0 +; FULL-NEXT: str r4, [sp, #8] @ 4-byte Spill +; FULL-NEXT: ldrd r1, r2, [r2, #8] +; FULL-NEXT: str r2, [sp, #12] @ 4-byte Spill +; FULL-NEXT: lsrs r2, r5, #1 +; FULL-NEXT: lsr.w r2, r2, r12 +; FULL-NEXT: lsls r5, r0 +; FULL-NEXT: orr.w r11, r7, r2 +; FULL-NEXT: umull r10, r2, r5, r8 +; FULL-NEXT: umull r4, r7, r5, r3 +; FULL-NEXT: umlal r2, r6, r11, r8 +; FULL-NEXT: str r4, [sp, #4] @ 4-byte Spill +; FULL-NEXT: adds.w r9, r4, r2 +; FULL-NEXT: adcs.w r4, r6, r7 +; FULL-NEXT: mov r9, r7 +; FULL-NEXT: ldr r7, [sp, #12] @ 4-byte Reload +; FULL-NEXT: adc r4, lr, #0 +; FULL-NEXT: umlal r2, r6, r5, r3 +; FULL-NEXT: lsl.w lr, r7, r0 +; FULL-NEXT: lsrs r7, r1, #1 +; FULL-NEXT: lsl.w r0, r1, r0 +; FULL-NEXT: ldr r1, [sp, #8] @ 4-byte Reload +; FULL-NEXT: lsr.w r7, r7, r12 +; FULL-NEXT: orr.w lr, lr, r7 +; FULL-NEXT: umlal r6, r4, r11, r3 +; FULL-NEXT: lsrs.w r7, r1, #1 +; FULL-NEXT: lsr.w r7, r7, r12 +; FULL-NEXT: orrs r0, r7 +; FULL-NEXT: umull r12, r7, r8, r0 +; FULL-NEXT: mla r1, r8, lr, r7 +; FULL-NEXT: movs r7, #0 +; FULL-NEXT: mla r0, r3, r0, r1 +; FULL-NEXT: mla r1, r3, r11, r9 +; FULL-NEXT: mla r1, r3, r5, r1 +; FULL-NEXT: ldr r3, [sp, #4] @ 4-byte Reload +; FULL-NEXT: adds.w r3, r3, r12 +; FULL-NEXT: adcs r0, r1 +; FULL-NEXT: adds r1, r6, r3 +; FULL-NEXT: adc.w r3, r4, r0 +; FULL-NEXT: .LBB23_5: @ %fp-to-i-cleanup +; FULL-NEXT: subs r0, r1, #1 +; FULL-NEXT: sbcs r0, r3, #0 +; FULL-NEXT: csel r1, r3, r7, lt +; FULL-NEXT: csel r0, r10, r7, lt +; FULL-NEXT: csel r2, r2, r7, lt +; FULL-NEXT: bic.w r0, r0, r1, asr #31 +; FULL-NEXT: bic.w r1, r2, r1, asr #31 +; FULL-NEXT: add sp, #52 +; FULL-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} entry: %conv = fptosi float %x to i128 %0 = icmp slt i128 %conv, 18446744073709551616 @@ -1456,132 +3416,128 @@ entry: define i64 @stest_f16i64(half %x) { ; SOFT-LABEL: stest_f16i64: ; SOFT: @ %bb.0: @ %entry -; SOFT-NEXT: .save {r4, r5, r6, r7, lr} -; SOFT-NEXT: push {r4, r5, r6, r7, lr} -; SOFT-NEXT: .pad #4 -; SOFT-NEXT: sub sp, #4 +; SOFT-NEXT: .save {r4, r5, r6, lr} +; SOFT-NEXT: push {r4, r5, r6, lr} ; SOFT-NEXT: uxth r0, r0 ; SOFT-NEXT: bl __aeabi_h2f -; SOFT-NEXT: bl __fixsfti -; SOFT-NEXT: movs r4, #0 -; SOFT-NEXT: mvns r5, r4 -; SOFT-NEXT: ldr r6, .LCPI24_0 -; SOFT-NEXT: adds r7, r0, #1 -; SOFT-NEXT: mov r7, r1 -; SOFT-NEXT: sbcs r7, r6 -; SOFT-NEXT: mov r7, r2 -; SOFT-NEXT: sbcs r7, r4 -; SOFT-NEXT: mov r7, r3 -; SOFT-NEXT: sbcs r7, r4 -; SOFT-NEXT: bge .LBB24_8 +; SOFT-NEXT: bl __aeabi_f2iz +; SOFT-NEXT: movs r2, #0 +; SOFT-NEXT: mvns r3, r2 +; SOFT-NEXT: asrs r1, r0, #31 +; SOFT-NEXT: ldr r5, .LCPI24_0 +; SOFT-NEXT: adds r4, r0, #1 +; SOFT-NEXT: mov r4, r1 +; SOFT-NEXT: sbcs r4, r5 +; SOFT-NEXT: mov r4, r1 +; SOFT-NEXT: sbcs r4, r2 +; SOFT-NEXT: mov r4, r1 +; SOFT-NEXT: sbcs r4, r2 +; SOFT-NEXT: mov r4, r1 +; SOFT-NEXT: bge .LBB24_7 ; SOFT-NEXT: @ %bb.1: @ %entry -; SOFT-NEXT: bge .LBB24_9 +; SOFT-NEXT: bge .LBB24_8 ; SOFT-NEXT: .LBB24_2: @ %entry -; SOFT-NEXT: bge .LBB24_10 +; SOFT-NEXT: blt .LBB24_4 ; SOFT-NEXT: .LBB24_3: @ %entry -; SOFT-NEXT: blt .LBB24_5 +; SOFT-NEXT: mov r0, r3 ; SOFT-NEXT: .LBB24_4: @ %entry -; SOFT-NEXT: mov r0, r5 -; SOFT-NEXT: .LBB24_5: @ %entry -; SOFT-NEXT: movs r6, #1 -; SOFT-NEXT: lsls r6, r6, #31 -; SOFT-NEXT: rsbs r7, r0, #0 -; SOFT-NEXT: mov r7, r6 -; SOFT-NEXT: sbcs r7, r1 -; SOFT-NEXT: mov r7, r5 -; SOFT-NEXT: sbcs r7, r2 -; SOFT-NEXT: sbcs r5, r3 -; SOFT-NEXT: bge .LBB24_11 -; SOFT-NEXT: @ %bb.6: @ %entry -; SOFT-NEXT: bge .LBB24_12 +; SOFT-NEXT: movs r5, #1 +; SOFT-NEXT: lsls r5, r5, #31 +; SOFT-NEXT: rsbs r6, r0, #0 +; SOFT-NEXT: mov r6, r5 +; SOFT-NEXT: sbcs r6, r1 +; SOFT-NEXT: mov r6, r3 +; SOFT-NEXT: sbcs r6, r4 +; SOFT-NEXT: sbcs r3, r4 +; SOFT-NEXT: bge .LBB24_9 +; SOFT-NEXT: @ %bb.5: @ %entry +; SOFT-NEXT: bge .LBB24_10 +; SOFT-NEXT: .LBB24_6: @ %entry +; SOFT-NEXT: pop {r4, r5, r6, pc} ; SOFT-NEXT: .LBB24_7: @ %entry -; SOFT-NEXT: add sp, #4 -; SOFT-NEXT: pop {r4, r5, r6, r7, pc} -; SOFT-NEXT: .LBB24_8: @ %entry -; SOFT-NEXT: mov r3, r4 +; SOFT-NEXT: mov r4, r2 ; SOFT-NEXT: blt .LBB24_2 +; SOFT-NEXT: .LBB24_8: @ %entry +; SOFT-NEXT: mov r1, r5 +; SOFT-NEXT: bge .LBB24_3 +; SOFT-NEXT: b .LBB24_4 ; SOFT-NEXT: .LBB24_9: @ %entry -; SOFT-NEXT: mov r2, r4 -; SOFT-NEXT: blt .LBB24_3 +; SOFT-NEXT: mov r0, r2 +; SOFT-NEXT: blt .LBB24_6 ; SOFT-NEXT: .LBB24_10: @ %entry -; SOFT-NEXT: mov r1, r6 -; SOFT-NEXT: bge .LBB24_4 -; SOFT-NEXT: b .LBB24_5 -; SOFT-NEXT: .LBB24_11: @ %entry -; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: blt .LBB24_7 -; SOFT-NEXT: .LBB24_12: @ %entry -; SOFT-NEXT: mov r1, r6 -; SOFT-NEXT: add sp, #4 -; SOFT-NEXT: pop {r4, r5, r6, r7, pc} +; SOFT-NEXT: mov r1, r5 +; SOFT-NEXT: pop {r4, r5, r6, pc} ; SOFT-NEXT: .p2align 2 -; SOFT-NEXT: @ %bb.13: +; SOFT-NEXT: @ %bb.11: ; SOFT-NEXT: .LCPI24_0: ; SOFT-NEXT: .long 2147483647 @ 0x7fffffff ; ; VFP2-LABEL: stest_f16i64: ; VFP2: @ %bb.0: @ %entry -; VFP2-NEXT: .save {r4, r5, r7, lr} -; VFP2-NEXT: push {r4, r5, r7, lr} +; VFP2-NEXT: .save {r4, lr} +; VFP2-NEXT: push {r4, lr} ; VFP2-NEXT: vmov r0, s0 ; VFP2-NEXT: bl __aeabi_h2f ; VFP2-NEXT: vmov s0, r0 -; VFP2-NEXT: bl __fixsfti -; VFP2-NEXT: subs.w r4, r0, #-1 -; VFP2-NEXT: mvn lr, #-2147483648 -; VFP2-NEXT: sbcs.w r4, r1, lr +; VFP2-NEXT: mvn r1, #-2147483648 +; VFP2-NEXT: mov.w lr, #-2147483648 ; VFP2-NEXT: mov.w r12, #0 -; VFP2-NEXT: sbcs r4, r2, #0 -; VFP2-NEXT: sbcs r4, r3, #0 -; VFP2-NEXT: mov.w r4, #0 +; VFP2-NEXT: vcvt.s32.f32 s0, s0 +; VFP2-NEXT: vmov r0, s0 +; VFP2-NEXT: subs.w r3, r0, #-1 +; VFP2-NEXT: asr.w r2, r0, #31 +; VFP2-NEXT: sbcs.w r3, r2, r1 +; VFP2-NEXT: sbcs r3, r2, #0 +; VFP2-NEXT: sbcs r2, r2, #0 +; VFP2-NEXT: mov.w r3, #0 ; VFP2-NEXT: it lt -; VFP2-NEXT: movlt r4, #1 -; VFP2-NEXT: cmp r4, #0 -; VFP2-NEXT: itet eq -; VFP2-NEXT: moveq r3, r4 -; VFP2-NEXT: movne r4, r2 -; VFP2-NEXT: moveq r1, lr +; VFP2-NEXT: asrlt r1, r0, #31 +; VFP2-NEXT: it lt +; VFP2-NEXT: movlt r3, #1 +; VFP2-NEXT: cmp r3, #0 +; VFP2-NEXT: it ne +; VFP2-NEXT: asrne r3, r0, #31 ; VFP2-NEXT: mov.w r2, #-1 ; VFP2-NEXT: it eq ; VFP2-NEXT: moveq r0, r2 -; VFP2-NEXT: rsbs r5, r0, #0 -; VFP2-NEXT: mov.w lr, #-2147483648 -; VFP2-NEXT: sbcs.w r5, lr, r1 -; VFP2-NEXT: sbcs.w r4, r2, r4 +; VFP2-NEXT: rsbs r4, r0, #0 +; VFP2-NEXT: sbcs.w r4, lr, r1 +; VFP2-NEXT: sbcs.w r4, r2, r3 ; VFP2-NEXT: sbcs r2, r3 ; VFP2-NEXT: itt ge ; VFP2-NEXT: movge r0, r12 ; VFP2-NEXT: movge r1, lr -; VFP2-NEXT: pop {r4, r5, r7, pc} +; VFP2-NEXT: pop {r4, pc} ; ; FULL-LABEL: stest_f16i64: ; FULL: @ %bb.0: @ %entry -; FULL-NEXT: .save {r4, r5, r7, lr} -; FULL-NEXT: push {r4, r5, r7, lr} -; FULL-NEXT: vmov.f16 r0, s0 -; FULL-NEXT: vmov s0, r0 -; FULL-NEXT: bl __fixhfti -; FULL-NEXT: subs.w lr, r0, #-1 -; FULL-NEXT: mvn r12, #-2147483648 -; FULL-NEXT: sbcs.w lr, r1, r12 -; FULL-NEXT: sbcs lr, r2, #0 -; FULL-NEXT: sbcs lr, r3, #0 -; FULL-NEXT: cset lr, lt -; FULL-NEXT: cmp.w lr, #0 -; FULL-NEXT: csel r5, r3, lr, ne -; FULL-NEXT: mov.w r3, #-1 -; FULL-NEXT: csel r0, r0, r3, ne -; FULL-NEXT: csel r1, r1, r12, ne -; FULL-NEXT: csel r2, r2, lr, ne -; FULL-NEXT: rsbs r4, r0, #0 +; FULL-NEXT: .save {r7, lr} +; FULL-NEXT: push {r7, lr} +; FULL-NEXT: vcvt.s32.f16 s0, s0 +; FULL-NEXT: mvn r2, #-2147483648 +; FULL-NEXT: vmov r0, s0 +; FULL-NEXT: mov.w lr, #-1 ; FULL-NEXT: mov.w r12, #-2147483648 -; FULL-NEXT: sbcs.w r4, r12, r1 -; FULL-NEXT: sbcs.w r2, r3, r2 -; FULL-NEXT: sbcs.w r2, r3, r5 +; FULL-NEXT: subs.w r3, r0, #-1 +; FULL-NEXT: asr.w r1, r0, #31 +; FULL-NEXT: sbcs.w r3, r1, r2 +; FULL-NEXT: sbcs r3, r1, #0 +; FULL-NEXT: sbcs r1, r1, #0 +; FULL-NEXT: it lt +; FULL-NEXT: asrlt r2, r0, #31 +; FULL-NEXT: cset r1, lt +; FULL-NEXT: cmp r1, #0 +; FULL-NEXT: it ne +; FULL-NEXT: asrne r1, r0, #31 +; FULL-NEXT: csel r0, r0, lr, ne +; FULL-NEXT: rsbs r3, r0, #0 +; FULL-NEXT: sbcs.w r3, r12, r2 +; FULL-NEXT: sbcs.w r3, lr, r1 +; FULL-NEXT: sbcs.w r1, lr, r1 ; FULL-NEXT: it ge ; FULL-NEXT: movge r0, #0 -; FULL-NEXT: csel r1, r1, r12, lt -; FULL-NEXT: pop {r4, r5, r7, pc} +; FULL-NEXT: csel r1, r2, r12, lt +; FULL-NEXT: pop {r7, pc} entry: %conv = fptosi half %x to i128 %0 = icmp slt i128 %conv, 9223372036854775807 @@ -1595,25 +3551,13 @@ entry: define i64 @utesth_f16i64(half %x) { ; SOFT-LABEL: utesth_f16i64: ; SOFT: @ %bb.0: @ %entry -; SOFT-NEXT: .save {r4, lr} -; SOFT-NEXT: push {r4, lr} +; SOFT-NEXT: .save {r7, lr} +; SOFT-NEXT: push {r7, lr} ; SOFT-NEXT: uxth r0, r0 ; SOFT-NEXT: bl __aeabi_h2f -; SOFT-NEXT: bl __fixunssfti -; SOFT-NEXT: movs r4, #0 -; SOFT-NEXT: subs r2, r2, #1 -; SOFT-NEXT: sbcs r3, r4 -; SOFT-NEXT: bhs .LBB25_3 -; SOFT-NEXT: @ %bb.1: @ %entry -; SOFT-NEXT: bhs .LBB25_4 -; SOFT-NEXT: .LBB25_2: @ %entry -; SOFT-NEXT: pop {r4, pc} -; SOFT-NEXT: .LBB25_3: @ %entry -; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: blo .LBB25_2 -; SOFT-NEXT: .LBB25_4: @ %entry -; SOFT-NEXT: mov r1, r4 -; SOFT-NEXT: pop {r4, pc} +; SOFT-NEXT: bl __aeabi_f2uiz +; SOFT-NEXT: movs r1, #0 +; SOFT-NEXT: pop {r7, pc} ; ; VFP2-LABEL: utesth_f16i64: ; VFP2: @ %bb.0: @ %entry @@ -1622,28 +3566,17 @@ define i64 @utesth_f16i64(half %x) { ; VFP2-NEXT: vmov r0, s0 ; VFP2-NEXT: bl __aeabi_h2f ; VFP2-NEXT: vmov s0, r0 -; VFP2-NEXT: bl __fixunssfti -; VFP2-NEXT: subs r2, #1 -; VFP2-NEXT: mov.w r12, #0 -; VFP2-NEXT: sbcs r2, r3, #0 -; VFP2-NEXT: itt hs -; VFP2-NEXT: movhs r0, r12 -; VFP2-NEXT: movhs r1, r12 +; VFP2-NEXT: movs r1, #0 +; VFP2-NEXT: vcvt.u32.f32 s0, s0 +; VFP2-NEXT: vmov r0, s0 ; VFP2-NEXT: pop {r7, pc} ; ; FULL-LABEL: utesth_f16i64: ; FULL: @ %bb.0: @ %entry -; FULL-NEXT: .save {r7, lr} -; FULL-NEXT: push {r7, lr} -; FULL-NEXT: vmov.f16 r0, s0 -; FULL-NEXT: vmov s0, r0 -; FULL-NEXT: bl __fixunshfti -; FULL-NEXT: subs r2, #1 -; FULL-NEXT: mov.w r12, #0 -; FULL-NEXT: sbcs r2, r3, #0 -; FULL-NEXT: csel r0, r0, r12, lo -; FULL-NEXT: csel r1, r1, r12, lo -; FULL-NEXT: pop {r7, pc} +; FULL-NEXT: vcvt.u32.f16 s0, s0 +; FULL-NEXT: movs r1, #0 +; FULL-NEXT: vmov r0, s0 +; FULL-NEXT: bx lr entry: %conv = fptoui half %x to i128 %0 = icmp ult i128 %conv, 18446744073709551616 @@ -1655,34 +3588,29 @@ entry: define i64 @ustest_f16i64(half %x) { ; SOFT-LABEL: ustest_f16i64: ; SOFT: @ %bb.0: @ %entry -; SOFT-NEXT: .save {r4, lr} -; SOFT-NEXT: push {r4, lr} +; SOFT-NEXT: .save {r7, lr} +; SOFT-NEXT: push {r7, lr} ; SOFT-NEXT: uxth r0, r0 ; SOFT-NEXT: bl __aeabi_h2f -; SOFT-NEXT: bl __fixsfti -; SOFT-NEXT: movs r4, #0 -; SOFT-NEXT: subs r2, r2, #1 -; SOFT-NEXT: mov r2, r3 -; SOFT-NEXT: sbcs r2, r4 -; SOFT-NEXT: bge .LBB26_5 +; SOFT-NEXT: bl __aeabi_f2iz +; SOFT-NEXT: asrs r2, r0, #31 +; SOFT-NEXT: movs r1, #0 +; SOFT-NEXT: subs r3, r2, #1 +; SOFT-NEXT: mov r3, r2 +; SOFT-NEXT: sbcs r3, r1 +; SOFT-NEXT: bge .LBB26_3 ; SOFT-NEXT: @ %bb.1: @ %entry -; SOFT-NEXT: bge .LBB26_6 +; SOFT-NEXT: bge .LBB26_4 ; SOFT-NEXT: .LBB26_2: @ %entry -; SOFT-NEXT: blt .LBB26_4 +; SOFT-NEXT: bics r0, r2 +; SOFT-NEXT: pop {r7, pc} ; SOFT-NEXT: .LBB26_3: @ %entry -; SOFT-NEXT: mov r3, r4 +; SOFT-NEXT: mov r2, r1 +; SOFT-NEXT: blt .LBB26_2 ; SOFT-NEXT: .LBB26_4: @ %entry -; SOFT-NEXT: asrs r2, r3, #31 +; SOFT-NEXT: mov r0, r1 ; SOFT-NEXT: bics r0, r2 -; SOFT-NEXT: bics r1, r2 -; SOFT-NEXT: pop {r4, pc} -; SOFT-NEXT: .LBB26_5: @ %entry -; SOFT-NEXT: mov r1, r4 -; SOFT-NEXT: blt .LBB26_2 -; SOFT-NEXT: .LBB26_6: @ %entry -; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: bge .LBB26_3 -; SOFT-NEXT: b .LBB26_4 +; SOFT-NEXT: pop {r7, pc} ; ; VFP2-LABEL: ustest_f16i64: ; VFP2: @ %bb.0: @ %entry @@ -1691,35 +3619,33 @@ define i64 @ustest_f16i64(half %x) { ; VFP2-NEXT: vmov r0, s0 ; VFP2-NEXT: bl __aeabi_h2f ; VFP2-NEXT: vmov s0, r0 -; VFP2-NEXT: bl __fixsfti -; VFP2-NEXT: subs r2, #1 -; VFP2-NEXT: mov.w r12, #0 -; VFP2-NEXT: sbcs r2, r3, #0 -; VFP2-NEXT: itt ge -; VFP2-NEXT: movge r3, r12 -; VFP2-NEXT: movge r0, r12 -; VFP2-NEXT: it ge -; VFP2-NEXT: movge r1, r12 -; VFP2-NEXT: bic.w r0, r0, r3, asr #31 -; VFP2-NEXT: bic.w r1, r1, r3, asr #31 +; VFP2-NEXT: movs r2, #1 +; VFP2-NEXT: vcvt.s32.f32 s0, s0 +; VFP2-NEXT: vmov r1, s0 +; VFP2-NEXT: rsbs r2, r2, r1, asr #31 +; VFP2-NEXT: asr.w r0, r1, #31 +; VFP2-NEXT: sbcs r0, r0, #0 +; VFP2-NEXT: mov.w r0, #0 +; VFP2-NEXT: itt lt +; VFP2-NEXT: movlt r0, r1 +; VFP2-NEXT: biclt.w r0, r0, r1, asr #31 +; VFP2-NEXT: movs r1, #0 ; VFP2-NEXT: pop {r7, pc} ; ; FULL-LABEL: ustest_f16i64: ; FULL: @ %bb.0: @ %entry -; FULL-NEXT: .save {r7, lr} -; FULL-NEXT: push {r7, lr} -; FULL-NEXT: vmov.f16 r0, s0 -; FULL-NEXT: vmov s0, r0 -; FULL-NEXT: bl __fixhfti -; FULL-NEXT: subs r2, #1 -; FULL-NEXT: mov.w r12, #0 -; FULL-NEXT: sbcs r2, r3, #0 -; FULL-NEXT: csel r2, r3, r12, lt -; FULL-NEXT: csel r0, r0, r12, lt -; FULL-NEXT: csel r1, r1, r12, lt -; FULL-NEXT: bic.w r0, r0, r2, asr #31 -; FULL-NEXT: bic.w r1, r1, r2, asr #31 -; FULL-NEXT: pop {r7, pc} +; FULL-NEXT: vcvt.s32.f16 s0, s0 +; FULL-NEXT: movs r3, #1 +; FULL-NEXT: vmov r1, s0 +; FULL-NEXT: movs r2, #0 +; FULL-NEXT: rsbs r3, r3, r1, asr #31 +; FULL-NEXT: asr.w r0, r1, #31 +; FULL-NEXT: sbcs r0, r0, #0 +; FULL-NEXT: csel r0, r1, r2, lt +; FULL-NEXT: it lt +; FULL-NEXT: biclt.w r0, r0, r1, asr #31 +; FULL-NEXT: movs r1, #0 +; FULL-NEXT: bx lr entry: %conv = fptosi half %x to i128 %0 = icmp slt i128 %conv, 18446744073709551616 @@ -2589,150 +4515,557 @@ entry: define i64 @stest_f64i64_mm(double %x) { ; SOFT-LABEL: stest_f64i64_mm: -; SOFT: @ %bb.0: @ %entry +; SOFT: @ %bb.0: @ %entryfp-to-i-entry ; SOFT-NEXT: .save {r4, r5, r6, r7, lr} ; SOFT-NEXT: push {r4, r5, r6, r7, lr} -; SOFT-NEXT: .pad #12 -; SOFT-NEXT: sub sp, #12 -; SOFT-NEXT: bl __fixdfti -; SOFT-NEXT: mov r7, r0 -; SOFT-NEXT: movs r0, #1 -; SOFT-NEXT: movs r5, #0 -; SOFT-NEXT: ldr r6, .LCPI45_0 -; SOFT-NEXT: adds r4, r7, #1 -; SOFT-NEXT: mov r4, r1 -; SOFT-NEXT: sbcs r4, r6 -; SOFT-NEXT: mov r4, r2 -; SOFT-NEXT: sbcs r4, r5 -; SOFT-NEXT: mov r4, r3 -; SOFT-NEXT: sbcs r4, r5 -; SOFT-NEXT: mov r4, r0 -; SOFT-NEXT: blt .LBB45_2 -; SOFT-NEXT: @ %bb.1: @ %entry -; SOFT-NEXT: mov r4, r5 -; SOFT-NEXT: .LBB45_2: @ %entry +; SOFT-NEXT: .pad #68 +; SOFT-NEXT: sub sp, #68 +; SOFT-NEXT: movs r7, #1 +; SOFT-NEXT: movs r4, #0 +; SOFT-NEXT: ldr r3, .LCPI45_1 +; SOFT-NEXT: mov r5, r3 +; SOFT-NEXT: subs r5, #52 +; SOFT-NEXT: mov r6, r4 +; SOFT-NEXT: sbcs r6, r4 +; SOFT-NEXT: lsls r2, r1, #1 +; SOFT-NEXT: lsrs r2, r2, #21 +; SOFT-NEXT: subs r5, r2, r5 +; SOFT-NEXT: mov r5, r4 +; SOFT-NEXT: sbcs r5, r6 +; SOFT-NEXT: str r7, [sp, #28] @ 4-byte Spill +; SOFT-NEXT: bhs .LBB45_2 +; SOFT-NEXT: @ %bb.1: +; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: mov r6, r4 +; SOFT-NEXT: mov r2, r4 +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: b .LBB45_6 +; SOFT-NEXT: .LBB45_2: @ %fp-to-i-if-check.exp.size +; SOFT-NEXT: asrs r5, r1, #31 +; SOFT-NEXT: str r5, [sp, #24] @ 4-byte Spill +; SOFT-NEXT: orrs r5, r7 +; SOFT-NEXT: str r5, [sp, #20] @ 4-byte Spill +; SOFT-NEXT: ldr r5, .LCPI45_0 ; SOFT-NEXT: mvns r6, r5 -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: beq .LBB45_12 -; SOFT-NEXT: @ %bb.3: @ %entry -; SOFT-NEXT: beq .LBB45_13 -; SOFT-NEXT: .LBB45_4: @ %entry -; SOFT-NEXT: str r2, [sp, #8] @ 4-byte Spill -; SOFT-NEXT: beq .LBB45_14 -; SOFT-NEXT: .LBB45_5: @ %entry -; SOFT-NEXT: str r3, [sp, #4] @ 4-byte Spill -; SOFT-NEXT: bne .LBB45_7 -; SOFT-NEXT: .LBB45_6: @ %entry -; SOFT-NEXT: mov r7, r6 -; SOFT-NEXT: .LBB45_7: @ %entry -; SOFT-NEXT: lsls r3, r0, #31 -; SOFT-NEXT: rsbs r4, r7, #0 -; SOFT-NEXT: mov r4, r3 -; SOFT-NEXT: sbcs r4, r1 -; SOFT-NEXT: mov r4, r6 -; SOFT-NEXT: ldr r2, [sp, #8] @ 4-byte Reload -; SOFT-NEXT: sbcs r4, r2 -; SOFT-NEXT: ldr r2, [sp, #4] @ 4-byte Reload -; SOFT-NEXT: sbcs r6, r2 -; SOFT-NEXT: bge .LBB45_15 -; SOFT-NEXT: @ %bb.8: @ %entry -; SOFT-NEXT: cmp r0, #0 -; SOFT-NEXT: beq .LBB45_16 -; SOFT-NEXT: .LBB45_9: @ %entry -; SOFT-NEXT: bne .LBB45_11 -; SOFT-NEXT: .LBB45_10: @ %entry -; SOFT-NEXT: mov r1, r3 -; SOFT-NEXT: .LBB45_11: @ %entry +; SOFT-NEXT: mvns r1, r1 +; SOFT-NEXT: orrs r1, r6 +; SOFT-NEXT: subs r1, r5, r1 +; SOFT-NEXT: mvns r0, r0 +; SOFT-NEXT: mvns r0, r0 +; SOFT-NEXT: cmp r3, r2 +; SOFT-NEXT: bls .LBB45_4 +; SOFT-NEXT: @ %bb.3: @ %fp-to-i-if-exp.small +; SOFT-NEXT: subs r2, r3, r2 +; SOFT-NEXT: bl __aeabi_llsr +; SOFT-NEXT: mov r7, r1 +; SOFT-NEXT: str r1, [sp, #12] @ 4-byte Spill +; SOFT-NEXT: mov r5, r0 +; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: ldr r6, [sp, #20] @ 4-byte Reload +; SOFT-NEXT: mov r2, r6 +; SOFT-NEXT: mov r3, r4 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: str r0, [sp, #16] @ 4-byte Spill +; SOFT-NEXT: str r1, [sp, #4] @ 4-byte Spill ; SOFT-NEXT: mov r0, r7 -; SOFT-NEXT: add sp, #12 -; SOFT-NEXT: pop {r4, r5, r6, r7, pc} -; SOFT-NEXT: .LBB45_12: @ %entry +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: mov r2, r6 ; SOFT-NEXT: mov r3, r4 -; SOFT-NEXT: bne .LBB45_4 -; SOFT-NEXT: .LBB45_13: @ %entry -; SOFT-NEXT: mov r2, r4 -; SOFT-NEXT: str r2, [sp, #8] @ 4-byte Spill -; SOFT-NEXT: bne .LBB45_5 -; SOFT-NEXT: .LBB45_14: @ %entry -; SOFT-NEXT: ldr r1, .LCPI45_0 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: mov r7, r1 +; SOFT-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; SOFT-NEXT: adds r6, r0, r1 +; SOFT-NEXT: adcs r7, r4 +; SOFT-NEXT: mov r0, r5 +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: ldr r5, [sp, #24] @ 4-byte Reload +; SOFT-NEXT: mov r2, r5 +; SOFT-NEXT: mov r3, r4 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: adds r6, r0, r6 +; SOFT-NEXT: adcs r1, r4 +; SOFT-NEXT: adds r0, r7, r1 +; SOFT-NEXT: str r0, [sp, #20] @ 4-byte Spill +; SOFT-NEXT: mov r7, r4 +; SOFT-NEXT: adcs r7, r4 +; SOFT-NEXT: ldr r0, [sp, #12] @ 4-byte Reload +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: mov r2, r5 +; SOFT-NEXT: mov r3, r4 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: mov r5, r1 +; SOFT-NEXT: ldr r1, [sp, #20] @ 4-byte Reload +; SOFT-NEXT: adds r0, r0, r1 +; SOFT-NEXT: str r0, [sp, #20] @ 4-byte Spill +; SOFT-NEXT: adcs r5, r7 +; SOFT-NEXT: ldr r0, [sp, #24] @ 4-byte Reload +; SOFT-NEXT: mov r1, r0 +; SOFT-NEXT: ldr r2, [sp, #8] @ 4-byte Reload +; SOFT-NEXT: ldr r3, [sp, #12] @ 4-byte Reload +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: mov r2, r0 +; SOFT-NEXT: ldr r0, [sp, #16] @ 4-byte Reload +; SOFT-NEXT: ldr r3, [sp, #20] @ 4-byte Reload +; SOFT-NEXT: adds r2, r3, r2 +; SOFT-NEXT: adcs r1, r5 +; SOFT-NEXT: b .LBB45_5 +; SOFT-NEXT: .LBB45_4: @ %fp-to-i-if-exp.large +; SOFT-NEXT: str r4, [sp, #60] +; SOFT-NEXT: str r4, [sp, #44] +; SOFT-NEXT: str r4, [sp, #40] +; SOFT-NEXT: str r4, [sp, #36] +; SOFT-NEXT: str r4, [sp, #32] +; SOFT-NEXT: add r3, sp, #48 +; SOFT-NEXT: stm r3!, {r0, r1, r4} +; SOFT-NEXT: ldr r0, .LCPI45_2 +; SOFT-NEXT: adds r6, r2, r0 +; SOFT-NEXT: movs r7, #31 +; SOFT-NEXT: lsrs r0, r6, #3 +; SOFT-NEXT: ands r6, r7 +; SOFT-NEXT: movs r1, #12 +; SOFT-NEXT: ands r1, r0 +; SOFT-NEXT: add r0, sp, #32 +; SOFT-NEXT: adds r0, #16 +; SOFT-NEXT: subs r5, r0, r1 +; SOFT-NEXT: ldr r0, [r5, #4] +; SOFT-NEXT: str r0, [sp, #16] @ 4-byte Spill +; SOFT-NEXT: ldr r0, [r5, #8] +; SOFT-NEXT: ldr r1, [r5, #12] +; SOFT-NEXT: lsrs r3, r0, #1 +; SOFT-NEXT: lsls r0, r6 +; SOFT-NEXT: eors r7, r6 +; SOFT-NEXT: ldr r2, [sp, #16] @ 4-byte Reload +; SOFT-NEXT: lsrs r2, r2, #1 +; SOFT-NEXT: lsrs r2, r7 +; SOFT-NEXT: orrs r2, r0 +; SOFT-NEXT: lsls r1, r6 +; SOFT-NEXT: lsrs r3, r7 +; SOFT-NEXT: orrs r3, r1 +; SOFT-NEXT: ldr r0, [sp, #20] @ 4-byte Reload +; SOFT-NEXT: ldr r1, [sp, #24] @ 4-byte Reload +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: str r0, [sp, #12] @ 4-byte Spill +; SOFT-NEXT: str r1, [sp, #8] @ 4-byte Spill +; SOFT-NEXT: ldr r1, [sp, #16] @ 4-byte Reload +; SOFT-NEXT: lsls r1, r6 +; SOFT-NEXT: ldr r5, [r5] +; SOFT-NEXT: lsrs r0, r5, #1 +; SOFT-NEXT: lsrs r0, r7 +; SOFT-NEXT: orrs r0, r1 +; SOFT-NEXT: mov r3, r0 +; SOFT-NEXT: lsls r5, r6 +; SOFT-NEXT: ldr r0, [sp, #24] @ 4-byte Reload +; SOFT-NEXT: mov r1, r0 +; SOFT-NEXT: mov r2, r5 +; SOFT-NEXT: mov r6, r3 ; SOFT-NEXT: str r3, [sp, #4] @ 4-byte Spill -; SOFT-NEXT: beq .LBB45_6 -; SOFT-NEXT: b .LBB45_7 -; SOFT-NEXT: .LBB45_15: @ %entry +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: ldr r2, [sp, #12] @ 4-byte Reload +; SOFT-NEXT: adds r0, r0, r2 +; SOFT-NEXT: str r0, [sp, #12] @ 4-byte Spill +; SOFT-NEXT: ldr r0, [sp, #8] @ 4-byte Reload +; SOFT-NEXT: adcs r1, r0 +; SOFT-NEXT: str r1, [sp, #8] @ 4-byte Spill +; SOFT-NEXT: mov r0, r6 +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: ldr r6, [sp, #20] @ 4-byte Reload +; SOFT-NEXT: mov r2, r6 +; SOFT-NEXT: mov r3, r4 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: str r0, [sp] @ 4-byte Spill +; SOFT-NEXT: mov r7, r1 ; SOFT-NEXT: mov r0, r5 -; SOFT-NEXT: cmp r0, #0 -; SOFT-NEXT: bne .LBB45_9 -; SOFT-NEXT: .LBB45_16: @ %entry -; SOFT-NEXT: mov r7, r0 -; SOFT-NEXT: beq .LBB45_10 -; SOFT-NEXT: b .LBB45_11 +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: mov r2, r6 +; SOFT-NEXT: mov r3, r4 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: str r0, [sp, #16] @ 4-byte Spill +; SOFT-NEXT: ldr r0, [sp] @ 4-byte Reload +; SOFT-NEXT: adds r6, r0, r1 +; SOFT-NEXT: adcs r7, r4 +; SOFT-NEXT: mov r0, r5 +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: ldr r2, [sp, #24] @ 4-byte Reload +; SOFT-NEXT: mov r3, r4 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: adds r6, r0, r6 +; SOFT-NEXT: adcs r1, r4 +; SOFT-NEXT: adds r5, r7, r1 +; SOFT-NEXT: mov r7, r4 +; SOFT-NEXT: adcs r7, r4 +; SOFT-NEXT: ldr r0, [sp, #4] @ 4-byte Reload +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: ldr r2, [sp, #24] @ 4-byte Reload +; SOFT-NEXT: mov r3, r4 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: mov r2, r0 +; SOFT-NEXT: ldr r0, [sp, #16] @ 4-byte Reload +; SOFT-NEXT: adds r2, r2, r5 +; SOFT-NEXT: adcs r1, r7 +; SOFT-NEXT: ldr r3, [sp, #12] @ 4-byte Reload +; SOFT-NEXT: adds r2, r2, r3 +; SOFT-NEXT: ldr r3, [sp, #8] @ 4-byte Reload +; SOFT-NEXT: adcs r1, r3 +; SOFT-NEXT: .LBB45_5: @ %fp-to-i-cleanup +; SOFT-NEXT: ldr r7, [sp, #28] @ 4-byte Reload +; SOFT-NEXT: .LBB45_6: @ %fp-to-i-cleanup +; SOFT-NEXT: ldr r5, .LCPI45_3 +; SOFT-NEXT: adds r3, r0, #1 +; SOFT-NEXT: mov r3, r6 +; SOFT-NEXT: sbcs r3, r5 +; SOFT-NEXT: mov r3, r2 +; SOFT-NEXT: sbcs r3, r4 +; SOFT-NEXT: mov r3, r1 +; SOFT-NEXT: sbcs r3, r4 +; SOFT-NEXT: blt .LBB45_8 +; SOFT-NEXT: @ %bb.7: @ %fp-to-i-cleanup +; SOFT-NEXT: mov r7, r4 +; SOFT-NEXT: .LBB45_8: @ %fp-to-i-cleanup +; SOFT-NEXT: mvns r3, r4 +; SOFT-NEXT: cmp r7, #0 +; SOFT-NEXT: beq .LBB45_18 +; SOFT-NEXT: @ %bb.9: @ %fp-to-i-cleanup +; SOFT-NEXT: beq .LBB45_19 +; SOFT-NEXT: .LBB45_10: @ %fp-to-i-cleanup +; SOFT-NEXT: beq .LBB45_20 +; SOFT-NEXT: .LBB45_11: @ %fp-to-i-cleanup +; SOFT-NEXT: ldr r5, [sp, #28] @ 4-byte Reload +; SOFT-NEXT: bne .LBB45_13 +; SOFT-NEXT: .LBB45_12: @ %fp-to-i-cleanup +; SOFT-NEXT: mov r0, r3 +; SOFT-NEXT: .LBB45_13: @ %fp-to-i-cleanup +; SOFT-NEXT: lsls r5, r5, #31 +; SOFT-NEXT: rsbs r7, r0, #0 +; SOFT-NEXT: mov r7, r5 +; SOFT-NEXT: sbcs r7, r6 +; SOFT-NEXT: mov r7, r3 +; SOFT-NEXT: sbcs r7, r2 +; SOFT-NEXT: ldr r2, [sp, #28] @ 4-byte Reload +; SOFT-NEXT: sbcs r3, r1 +; SOFT-NEXT: bge .LBB45_21 +; SOFT-NEXT: @ %bb.14: @ %fp-to-i-cleanup +; SOFT-NEXT: cmp r2, #0 +; SOFT-NEXT: beq .LBB45_22 +; SOFT-NEXT: .LBB45_15: @ %fp-to-i-cleanup +; SOFT-NEXT: bne .LBB45_17 +; SOFT-NEXT: .LBB45_16: @ %fp-to-i-cleanup +; SOFT-NEXT: mov r6, r5 +; SOFT-NEXT: .LBB45_17: @ %fp-to-i-cleanup +; SOFT-NEXT: mov r1, r6 +; SOFT-NEXT: add sp, #68 +; SOFT-NEXT: pop {r4, r5, r6, r7, pc} +; SOFT-NEXT: .LBB45_18: @ %fp-to-i-cleanup +; SOFT-NEXT: mov r1, r7 +; SOFT-NEXT: bne .LBB45_10 +; SOFT-NEXT: .LBB45_19: @ %fp-to-i-cleanup +; SOFT-NEXT: mov r2, r7 +; SOFT-NEXT: bne .LBB45_11 +; SOFT-NEXT: .LBB45_20: @ %fp-to-i-cleanup +; SOFT-NEXT: mov r6, r5 +; SOFT-NEXT: ldr r5, [sp, #28] @ 4-byte Reload +; SOFT-NEXT: beq .LBB45_12 +; SOFT-NEXT: b .LBB45_13 +; SOFT-NEXT: .LBB45_21: @ %fp-to-i-cleanup +; SOFT-NEXT: mov r2, r4 +; SOFT-NEXT: cmp r2, #0 +; SOFT-NEXT: bne .LBB45_15 +; SOFT-NEXT: .LBB45_22: @ %fp-to-i-cleanup +; SOFT-NEXT: mov r0, r2 +; SOFT-NEXT: beq .LBB45_16 +; SOFT-NEXT: b .LBB45_17 ; SOFT-NEXT: .p2align 2 -; SOFT-NEXT: @ %bb.17: +; SOFT-NEXT: @ %bb.23: ; SOFT-NEXT: .LCPI45_0: +; SOFT-NEXT: .long 1048575 @ 0xfffff +; SOFT-NEXT: .LCPI45_1: +; SOFT-NEXT: .long 1075 @ 0x433 +; SOFT-NEXT: .LCPI45_2: +; SOFT-NEXT: .long 4294966221 @ 0xfffffbcd +; SOFT-NEXT: .LCPI45_3: ; SOFT-NEXT: .long 2147483647 @ 0x7fffffff ; ; VFP2-LABEL: stest_f64i64_mm: -; VFP2: @ %bb.0: @ %entry -; VFP2-NEXT: .save {r4, r5, r7, lr} -; VFP2-NEXT: push {r4, r5, r7, lr} -; VFP2-NEXT: bl __fixdfti -; VFP2-NEXT: subs.w r4, r0, #-1 -; VFP2-NEXT: mvn lr, #-2147483648 -; VFP2-NEXT: sbcs.w r4, r1, lr +; VFP2: @ %bb.0: @ %entryfp-to-i-entry +; VFP2-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; VFP2-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; VFP2-NEXT: .pad #60 +; VFP2-NEXT: sub sp, #60 +; VFP2-NEXT: vmov r3, r1, d0 +; VFP2-NEXT: movs r0, #0 +; VFP2-NEXT: movw r5, #1023 +; VFP2-NEXT: ubfx r2, r1, #20, #11 +; VFP2-NEXT: cmp r2, r5 +; VFP2-NEXT: bhs .LBB45_2 +; VFP2-NEXT: @ %bb.1: +; VFP2-NEXT: mov.w lr, #0 +; VFP2-NEXT: movs r3, #0 ; VFP2-NEXT: mov.w r12, #0 +; VFP2-NEXT: movs r2, #0 +; VFP2-NEXT: b .LBB45_5 +; VFP2-NEXT: .LBB45_2: @ %fp-to-i-if-check.exp.size +; VFP2-NEXT: movs r4, #1 +; VFP2-NEXT: mov r5, r1 +; VFP2-NEXT: orr.w r8, r4, r1, asr #31 +; VFP2-NEXT: bfi r5, r4, #20, #12 +; VFP2-NEXT: asrs r1, r1, #31 +; VFP2-NEXT: movw r4, #1074 +; VFP2-NEXT: cmp r2, r4 +; VFP2-NEXT: bhi .LBB45_4 +; VFP2-NEXT: @ %bb.3: @ %fp-to-i-if-exp.small +; VFP2-NEXT: movw r4, #1075 +; VFP2-NEXT: mov.w r9, #0 +; VFP2-NEXT: subs r4, r4, r2 +; VFP2-NEXT: lsr.w lr, r3, r4 +; VFP2-NEXT: rsb.w r3, r4, #32 +; VFP2-NEXT: lsl.w r3, r5, r3 +; VFP2-NEXT: orr.w r6, lr, r3 +; VFP2-NEXT: movw r3, #1043 +; VFP2-NEXT: subs r2, r3, r2 +; VFP2-NEXT: it pl +; VFP2-NEXT: lsrpl.w r6, r5, r2 +; VFP2-NEXT: umull lr, r3, r6, r8 +; VFP2-NEXT: lsr.w r2, r5, r4 +; VFP2-NEXT: mov.w r5, #0 +; VFP2-NEXT: it pl +; VFP2-NEXT: movpl r2, #0 +; VFP2-NEXT: umlal r3, r5, r2, r8 +; VFP2-NEXT: umull r12, r7, r6, r1 +; VFP2-NEXT: adds.w r4, r12, r3 +; VFP2-NEXT: adcs.w r4, r5, r7 +; VFP2-NEXT: umlal r3, r5, r6, r1 +; VFP2-NEXT: adc r4, r9, #0 +; VFP2-NEXT: mla r7, r1, r2, r7 +; VFP2-NEXT: umlal r5, r4, r2, r1 +; VFP2-NEXT: mla r1, r1, r6, r7 +; VFP2-NEXT: adds.w r12, r12, r5 +; VFP2-NEXT: adc.w r2, r4, r1 +; VFP2-NEXT: b .LBB45_5 +; VFP2-NEXT: .LBB45_4: @ %fp-to-i-if-exp.large +; VFP2-NEXT: add.w r12, sp, #40 +; VFP2-NEXT: mov.w r11, #0 +; VFP2-NEXT: stm.w r12, {r3, r5, r11} +; VFP2-NEXT: subw r3, r2, #1075 +; VFP2-NEXT: movs r2, #12 +; VFP2-NEXT: add r7, sp, #24 +; VFP2-NEXT: and.w r2, r2, r3, lsr #3 +; VFP2-NEXT: adds r7, #16 +; VFP2-NEXT: str.w r11, [sp, #52] +; VFP2-NEXT: strd r11, r11, [sp, #32] +; VFP2-NEXT: subs r7, r7, r2 +; VFP2-NEXT: strd r11, r11, [sp, #24] +; VFP2-NEXT: movs r4, #0 +; VFP2-NEXT: ldrd r6, r5, [r7] +; VFP2-NEXT: str r5, [sp, #12] @ 4-byte Spill +; VFP2-NEXT: ldr r2, [r7, #8] +; VFP2-NEXT: str r2, [sp, #20] @ 4-byte Spill +; VFP2-NEXT: ldr r2, [r7, #12] +; VFP2-NEXT: lsrs r7, r6, #1 +; VFP2-NEXT: str r2, [sp, #16] @ 4-byte Spill +; VFP2-NEXT: and r2, r3, #31 +; VFP2-NEXT: eor r12, r2, #31 +; VFP2-NEXT: lsls r6, r2 +; VFP2-NEXT: lsr.w r3, r7, r12 +; VFP2-NEXT: lsl.w r7, r5, r2 +; VFP2-NEXT: orr.w r9, r7, r3 +; VFP2-NEXT: umull lr, r3, r6, r8 +; VFP2-NEXT: umull r5, r7, r6, r1 +; VFP2-NEXT: umlal r3, r4, r9, r8 +; VFP2-NEXT: strd r7, r5, [sp, #4] @ 8-byte Folded Spill +; VFP2-NEXT: adds.w r10, r5, r3 +; VFP2-NEXT: ldr r5, [sp, #16] @ 4-byte Reload +; VFP2-NEXT: adcs r7, r4 +; VFP2-NEXT: umlal r3, r4, r6, r1 +; VFP2-NEXT: ldr r7, [sp, #20] @ 4-byte Reload +; VFP2-NEXT: lsl.w r10, r5, r2 +; VFP2-NEXT: adc r11, r11, #0 +; VFP2-NEXT: lsrs r5, r7, #1 +; VFP2-NEXT: lsl.w r2, r7, r2 +; VFP2-NEXT: lsr.w r5, r5, r12 +; VFP2-NEXT: orr.w r10, r10, r5 +; VFP2-NEXT: ldr r5, [sp, #12] @ 4-byte Reload +; VFP2-NEXT: umlal r4, r11, r9, r1 +; VFP2-NEXT: lsrs.w r5, r5, #1 +; VFP2-NEXT: lsr.w r5, r5, r12 +; VFP2-NEXT: orrs r2, r5 +; VFP2-NEXT: umull r7, r5, r8, r2 +; VFP2-NEXT: mla r5, r8, r10, r5 +; VFP2-NEXT: mla r2, r1, r2, r5 +; VFP2-NEXT: ldr r5, [sp, #4] @ 4-byte Reload +; VFP2-NEXT: mla r5, r1, r9, r5 +; VFP2-NEXT: mla r1, r1, r6, r5 +; VFP2-NEXT: ldr r5, [sp, #8] @ 4-byte Reload +; VFP2-NEXT: adds r6, r5, r7 +; VFP2-NEXT: adcs r1, r2 +; VFP2-NEXT: adds.w r12, r4, r6 +; VFP2-NEXT: adc.w r2, r11, r1 +; VFP2-NEXT: .LBB45_5: @ %fp-to-i-cleanup +; VFP2-NEXT: subs.w r4, lr, #-1 +; VFP2-NEXT: mvn r1, #-2147483648 +; VFP2-NEXT: sbcs.w r4, r3, r1 +; VFP2-NEXT: mov.w r7, #-2147483648 +; VFP2-NEXT: sbcs r4, r12, #0 ; VFP2-NEXT: sbcs r4, r2, #0 -; VFP2-NEXT: sbcs r4, r3, #0 ; VFP2-NEXT: mov.w r4, #0 ; VFP2-NEXT: it lt ; VFP2-NEXT: movlt r4, #1 ; VFP2-NEXT: cmp r4, #0 -; VFP2-NEXT: itet eq -; VFP2-NEXT: moveq r3, r4 -; VFP2-NEXT: movne r4, r2 -; VFP2-NEXT: moveq r1, lr -; VFP2-NEXT: mov.w r2, #-1 +; VFP2-NEXT: itee eq +; VFP2-NEXT: moveq r2, r4 +; VFP2-NEXT: movne r4, r12 +; VFP2-NEXT: movne r1, r3 +; VFP2-NEXT: mov.w r3, #-1 ; VFP2-NEXT: it eq -; VFP2-NEXT: moveq r0, r2 -; VFP2-NEXT: rsbs r5, r0, #0 -; VFP2-NEXT: mov.w lr, #-2147483648 -; VFP2-NEXT: sbcs.w r5, lr, r1 -; VFP2-NEXT: sbcs.w r4, r2, r4 -; VFP2-NEXT: sbcs r2, r3 +; VFP2-NEXT: moveq lr, r3 +; VFP2-NEXT: rsbs.w r6, lr, #0 +; VFP2-NEXT: sbcs.w r6, r7, r1 +; VFP2-NEXT: sbcs.w r6, r3, r4 +; VFP2-NEXT: sbcs.w r2, r3, r2 ; VFP2-NEXT: it lt -; VFP2-NEXT: movlt.w r12, #1 -; VFP2-NEXT: cmp.w r12, #0 -; VFP2-NEXT: itt eq -; VFP2-NEXT: moveq r0, r12 -; VFP2-NEXT: moveq r1, lr -; VFP2-NEXT: pop {r4, r5, r7, pc} +; VFP2-NEXT: movlt r0, #1 +; VFP2-NEXT: cmp r0, #0 +; VFP2-NEXT: ite ne +; VFP2-NEXT: movne r0, lr +; VFP2-NEXT: moveq r1, r7 +; VFP2-NEXT: add sp, #60 +; VFP2-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; ; FULL-LABEL: stest_f64i64_mm: -; FULL: @ %bb.0: @ %entry -; FULL-NEXT: .save {r4, r5, r7, lr} -; FULL-NEXT: push {r4, r5, r7, lr} -; FULL-NEXT: bl __fixdfti -; FULL-NEXT: subs.w lr, r0, #-1 -; FULL-NEXT: mvn r12, #-2147483648 -; FULL-NEXT: sbcs.w lr, r1, r12 -; FULL-NEXT: sbcs lr, r2, #0 -; FULL-NEXT: sbcs lr, r3, #0 -; FULL-NEXT: cset lr, lt -; FULL-NEXT: cmp.w lr, #0 -; FULL-NEXT: csel r5, r3, lr, ne -; FULL-NEXT: mov.w r3, #-1 -; FULL-NEXT: csel r0, r0, r3, ne -; FULL-NEXT: csel r1, r1, r12, ne -; FULL-NEXT: csel r2, r2, lr, ne -; FULL-NEXT: rsbs r4, r0, #0 -; FULL-NEXT: mov.w r12, #-2147483648 -; FULL-NEXT: sbcs.w r4, r12, r1 -; FULL-NEXT: sbcs.w r2, r3, r2 -; FULL-NEXT: sbcs.w r2, r3, r5 -; FULL-NEXT: cset r2, lt -; FULL-NEXT: cmp r2, #0 -; FULL-NEXT: csel r0, r0, r2, ne -; FULL-NEXT: csel r1, r1, r12, ne -; FULL-NEXT: pop {r4, r5, r7, pc} +; FULL: @ %bb.0: @ %entryfp-to-i-entry +; FULL-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; FULL-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; FULL-NEXT: .pad #52 +; FULL-NEXT: sub sp, #52 +; FULL-NEXT: vmov r2, r1, d0 +; FULL-NEXT: movw r3, #1023 +; FULL-NEXT: ubfx r0, r1, #20, #11 +; FULL-NEXT: cmp r0, r3 +; FULL-NEXT: bhs .LBB45_2 +; FULL-NEXT: @ %bb.1: +; FULL-NEXT: movs r0, #0 +; FULL-NEXT: movs r2, #0 +; FULL-NEXT: movs r1, #0 +; FULL-NEXT: movs r3, #0 +; FULL-NEXT: b .LBB45_5 +; FULL-NEXT: .LBB45_2: @ %fp-to-i-if-check.exp.size +; FULL-NEXT: mov.w r12, #1 +; FULL-NEXT: mov r3, r1 +; FULL-NEXT: bfi r3, r12, #20, #12 +; FULL-NEXT: orr.w lr, r12, r1, asr #31 +; FULL-NEXT: asrs r5, r1, #31 +; FULL-NEXT: movw r1, #1074 +; FULL-NEXT: cmp r0, r1 +; FULL-NEXT: bhi .LBB45_4 +; FULL-NEXT: @ %bb.3: @ %fp-to-i-if-exp.small +; FULL-NEXT: movw r1, #1075 +; FULL-NEXT: subs r1, r1, r0 +; FULL-NEXT: lsr.w r12, r2, r1 +; FULL-NEXT: rsb.w r2, r1, #32 +; FULL-NEXT: lsr.w r1, r3, r1 +; FULL-NEXT: lsl.w r2, r3, r2 +; FULL-NEXT: orr.w r4, r12, r2 +; FULL-NEXT: movw r2, #1043 +; FULL-NEXT: subs r0, r2, r0 +; FULL-NEXT: it pl +; FULL-NEXT: lsrpl.w r4, r3, r0 +; FULL-NEXT: umull r0, r2, r4, lr +; FULL-NEXT: mov.w r3, #0 +; FULL-NEXT: it pl +; FULL-NEXT: movpl r1, #0 +; FULL-NEXT: mov.w r12, #0 +; FULL-NEXT: umlal r2, r3, r1, lr +; FULL-NEXT: umull lr, r6, r4, r5 +; FULL-NEXT: adds.w r7, lr, r2 +; FULL-NEXT: adcs.w r7, r3, r6 +; FULL-NEXT: umlal r2, r3, r4, r5 +; FULL-NEXT: adc r7, r12, #0 +; FULL-NEXT: mla r6, r5, r1, r6 +; FULL-NEXT: umlal r3, r7, r1, r5 +; FULL-NEXT: mla r6, r5, r4, r6 +; FULL-NEXT: adds.w r1, r3, lr +; FULL-NEXT: adc.w r3, r7, r6 +; FULL-NEXT: b .LBB45_5 +; FULL-NEXT: .LBB45_4: @ %fp-to-i-if-exp.large +; FULL-NEXT: add r1, sp, #32 +; FULL-NEXT: subw r0, r0, #1075 +; FULL-NEXT: mov.w r10, #0 +; FULL-NEXT: stm.w r1, {r2, r3, r10} +; FULL-NEXT: movs r1, #12 +; FULL-NEXT: add r2, sp, #16 +; FULL-NEXT: and.w r1, r1, r0, lsr #3 +; FULL-NEXT: adds r2, #16 +; FULL-NEXT: str.w r10, [sp, #44] +; FULL-NEXT: strd r10, r10, [sp, #24] +; FULL-NEXT: subs r1, r2, r1 +; FULL-NEXT: strd r10, r10, [sp, #16] +; FULL-NEXT: movs r4, #0 +; FULL-NEXT: ldrd r2, r3, [r1] +; FULL-NEXT: str r3, [sp, #4] @ 4-byte Spill +; FULL-NEXT: ldr r7, [r1, #8] +; FULL-NEXT: lsrs r6, r2, #1 +; FULL-NEXT: str r7, [sp, #12] @ 4-byte Spill +; FULL-NEXT: and r7, r0, #31 +; FULL-NEXT: eor r12, r7, #31 +; FULL-NEXT: ldr r1, [r1, #12] +; FULL-NEXT: str r1, [sp, #8] @ 4-byte Spill +; FULL-NEXT: lsr.w r0, r6, r12 +; FULL-NEXT: lsl.w r6, r3, r7 +; FULL-NEXT: lsl.w r3, r2, r7 +; FULL-NEXT: orr.w r8, r6, r0 +; FULL-NEXT: umull r0, r2, r3, lr +; FULL-NEXT: umull r1, r11, r3, r5 +; FULL-NEXT: umlal r2, r4, r8, lr +; FULL-NEXT: str r1, [sp] @ 4-byte Spill +; FULL-NEXT: adds.w r9, r1, r2 +; FULL-NEXT: ldr r1, [sp, #8] @ 4-byte Reload +; FULL-NEXT: adcs.w r6, r4, r11 +; FULL-NEXT: umlal r2, r4, r3, r5 +; FULL-NEXT: ldr r6, [sp, #12] @ 4-byte Reload +; FULL-NEXT: lsl.w r9, r1, r7 +; FULL-NEXT: adc r10, r10, #0 +; FULL-NEXT: lsrs r1, r6, #1 +; FULL-NEXT: lsl.w r7, r6, r7 +; FULL-NEXT: lsr.w r1, r1, r12 +; FULL-NEXT: orr.w r9, r9, r1 +; FULL-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; FULL-NEXT: umlal r4, r10, r8, r5 +; FULL-NEXT: lsrs.w r1, r1, #1 +; FULL-NEXT: lsr.w r1, r1, r12 +; FULL-NEXT: orrs r1, r7 +; FULL-NEXT: umull r12, r7, lr, r1 +; FULL-NEXT: mla r7, lr, r9, r7 +; FULL-NEXT: mla r1, r5, r1, r7 +; FULL-NEXT: mla r7, r5, r8, r11 +; FULL-NEXT: mla r3, r5, r3, r7 +; FULL-NEXT: ldr r7, [sp] @ 4-byte Reload +; FULL-NEXT: adds.w r7, r7, r12 +; FULL-NEXT: adcs r3, r1 +; FULL-NEXT: adds r1, r4, r7 +; FULL-NEXT: adc.w r3, r3, r10 +; FULL-NEXT: .LBB45_5: @ %fp-to-i-cleanup +; FULL-NEXT: subs.w r6, r0, #-1 +; FULL-NEXT: mvn r7, #-2147483648 +; FULL-NEXT: sbcs.w r6, r2, r7 +; FULL-NEXT: sbcs r6, r1, #0 +; FULL-NEXT: sbcs r6, r3, #0 +; FULL-NEXT: cset r6, lt +; FULL-NEXT: cmp r6, #0 +; FULL-NEXT: csel r2, r2, r7, ne +; FULL-NEXT: mov.w r7, #-1 +; FULL-NEXT: csel r0, r0, r7, ne +; FULL-NEXT: csel r3, r3, r6, ne +; FULL-NEXT: csel r1, r1, r6, ne +; FULL-NEXT: rsbs r5, r0, #0 +; FULL-NEXT: mov.w r6, #-2147483648 +; FULL-NEXT: sbcs.w r5, r6, r2 +; FULL-NEXT: sbcs.w r1, r7, r1 +; FULL-NEXT: sbcs.w r1, r7, r3 +; FULL-NEXT: cset r1, lt +; FULL-NEXT: cmp r1, #0 +; FULL-NEXT: csel r0, r0, r1, ne +; FULL-NEXT: csel r1, r2, r6, ne +; FULL-NEXT: add sp, #52 +; FULL-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} entry: %conv = fptosi double %x to i128 %spec.store.select = call i128 @llvm.smin.i128(i128 %conv, i128 9223372036854775807) @@ -2743,60 +5076,273 @@ entry: define i64 @utest_f64i64_mm(double %x) { ; SOFT-LABEL: utest_f64i64_mm: -; SOFT: @ %bb.0: @ %entry -; SOFT-NEXT: .save {r4, lr} -; SOFT-NEXT: push {r4, lr} -; SOFT-NEXT: bl __fixunsdfti +; SOFT: @ %bb.0: @ %entryfp-to-i-entry +; SOFT-NEXT: .save {r4, r5, r6, r7, lr} +; SOFT-NEXT: push {r4, r5, r6, r7, lr} +; SOFT-NEXT: .pad #36 +; SOFT-NEXT: sub sp, #36 ; SOFT-NEXT: movs r4, #0 -; SOFT-NEXT: subs r2, r2, #1 +; SOFT-NEXT: ldr r3, .LCPI46_1 +; SOFT-NEXT: mov r5, r3 +; SOFT-NEXT: subs r5, #52 +; SOFT-NEXT: mov r6, r4 +; SOFT-NEXT: sbcs r6, r4 +; SOFT-NEXT: lsls r2, r1, #1 +; SOFT-NEXT: lsrs r2, r2, #21 +; SOFT-NEXT: subs r5, r2, r5 +; SOFT-NEXT: mov r5, r4 +; SOFT-NEXT: sbcs r5, r6 +; SOFT-NEXT: bhs .LBB46_2 +; SOFT-NEXT: @ %bb.1: +; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: b .LBB46_4 +; SOFT-NEXT: .LBB46_2: @ %fp-to-i-if-check.exp.size +; SOFT-NEXT: ldr r5, .LCPI46_0 +; SOFT-NEXT: mvns r6, r5 +; SOFT-NEXT: mvns r1, r1 +; SOFT-NEXT: orrs r1, r6 +; SOFT-NEXT: subs r1, r5, r1 +; SOFT-NEXT: mvns r0, r0 +; SOFT-NEXT: mvns r0, r0 +; SOFT-NEXT: cmp r3, r2 +; SOFT-NEXT: bls .LBB46_5 +; SOFT-NEXT: @ %bb.3: @ %fp-to-i-if-exp.small +; SOFT-NEXT: subs r2, r3, r2 +; SOFT-NEXT: bl __aeabi_llsr +; SOFT-NEXT: .LBB46_4: @ %fp-to-i-cleanup +; SOFT-NEXT: mov r6, r4 +; SOFT-NEXT: mov r3, r4 +; SOFT-NEXT: b .LBB46_6 +; SOFT-NEXT: .LBB46_5: @ %fp-to-i-if-exp.large +; SOFT-NEXT: str r4, [sp, #28] +; SOFT-NEXT: str r4, [sp, #12] +; SOFT-NEXT: str r4, [sp, #8] +; SOFT-NEXT: str r4, [sp, #4] +; SOFT-NEXT: str r4, [sp] +; SOFT-NEXT: add r3, sp, #16 +; SOFT-NEXT: stm r3!, {r0, r1, r4} +; SOFT-NEXT: ldr r0, .LCPI46_2 +; SOFT-NEXT: adds r2, r2, r0 +; SOFT-NEXT: movs r5, #31 +; SOFT-NEXT: lsrs r0, r2, #3 +; SOFT-NEXT: ands r2, r5 +; SOFT-NEXT: movs r1, #12 +; SOFT-NEXT: ands r1, r0 +; SOFT-NEXT: mov r0, sp +; SOFT-NEXT: adds r0, #16 +; SOFT-NEXT: subs r3, r0, r1 +; SOFT-NEXT: ldm r3!, {r0, r7} +; SOFT-NEXT: ldr r1, [r3] +; SOFT-NEXT: ldr r6, [r3, #4] +; SOFT-NEXT: lsls r6, r2 +; SOFT-NEXT: eors r5, r2 +; SOFT-NEXT: lsrs r3, r1, #1 +; SOFT-NEXT: lsrs r3, r5 +; SOFT-NEXT: orrs r3, r6 +; SOFT-NEXT: lsls r1, r2 +; SOFT-NEXT: lsrs r6, r7, #1 +; SOFT-NEXT: lsrs r6, r5 +; SOFT-NEXT: orrs r6, r1 +; SOFT-NEXT: lsls r7, r2 +; SOFT-NEXT: lsrs r1, r0, #1 +; SOFT-NEXT: lsrs r1, r5 +; SOFT-NEXT: orrs r1, r7 +; SOFT-NEXT: lsls r0, r2 +; SOFT-NEXT: .LBB46_6: @ %fp-to-i-cleanup +; SOFT-NEXT: subs r2, r6, #1 ; SOFT-NEXT: sbcs r3, r4 -; SOFT-NEXT: blo .LBB46_4 -; SOFT-NEXT: @ %bb.1: @ %entry +; SOFT-NEXT: blo .LBB46_10 +; SOFT-NEXT: @ %bb.7: @ %fp-to-i-cleanup ; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: beq .LBB46_5 -; SOFT-NEXT: .LBB46_2: @ %entry -; SOFT-NEXT: beq .LBB46_6 -; SOFT-NEXT: .LBB46_3: @ %entry -; SOFT-NEXT: pop {r4, pc} -; SOFT-NEXT: .LBB46_4: +; SOFT-NEXT: beq .LBB46_11 +; SOFT-NEXT: .LBB46_8: @ %fp-to-i-cleanup +; SOFT-NEXT: beq .LBB46_12 +; SOFT-NEXT: .LBB46_9: @ %fp-to-i-cleanup +; SOFT-NEXT: add sp, #36 +; SOFT-NEXT: pop {r4, r5, r6, r7, pc} +; SOFT-NEXT: .LBB46_10: ; SOFT-NEXT: movs r4, #1 ; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: bne .LBB46_2 -; SOFT-NEXT: .LBB46_5: @ %entry +; SOFT-NEXT: bne .LBB46_8 +; SOFT-NEXT: .LBB46_11: @ %fp-to-i-cleanup ; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: bne .LBB46_3 -; SOFT-NEXT: .LBB46_6: @ %entry +; SOFT-NEXT: bne .LBB46_9 +; SOFT-NEXT: .LBB46_12: @ %fp-to-i-cleanup ; SOFT-NEXT: mov r1, r4 -; SOFT-NEXT: pop {r4, pc} +; SOFT-NEXT: add sp, #36 +; SOFT-NEXT: pop {r4, r5, r6, r7, pc} +; SOFT-NEXT: .p2align 2 +; SOFT-NEXT: @ %bb.13: +; SOFT-NEXT: .LCPI46_0: +; SOFT-NEXT: .long 1048575 @ 0xfffff +; SOFT-NEXT: .LCPI46_1: +; SOFT-NEXT: .long 1075 @ 0x433 +; SOFT-NEXT: .LCPI46_2: +; SOFT-NEXT: .long 4294966221 @ 0xfffffbcd ; ; VFP2-LABEL: utest_f64i64_mm: -; VFP2: @ %bb.0: @ %entry -; VFP2-NEXT: .save {r7, lr} -; VFP2-NEXT: push {r7, lr} -; VFP2-NEXT: bl __fixunsdfti -; VFP2-NEXT: subs r2, #1 +; VFP2: @ %bb.0: @ %entryfp-to-i-entry +; VFP2-NEXT: .save {r4, r5, r6, lr} +; VFP2-NEXT: push {r4, r5, r6, lr} +; VFP2-NEXT: .pad #32 +; VFP2-NEXT: sub sp, #32 +; VFP2-NEXT: vmov r12, r2, d0 +; VFP2-NEXT: movs r1, #0 +; VFP2-NEXT: movw r0, #1023 +; VFP2-NEXT: ubfx r4, r2, #20, #11 +; VFP2-NEXT: cmp r4, r0 +; VFP2-NEXT: bhs .LBB46_2 +; VFP2-NEXT: @ %bb.1: +; VFP2-NEXT: movs r0, #0 +; VFP2-NEXT: movs r2, #0 +; VFP2-NEXT: b .LBB46_4 +; VFP2-NEXT: .LBB46_2: @ %fp-to-i-if-check.exp.size +; VFP2-NEXT: movs r0, #1 +; VFP2-NEXT: bfi r2, r0, #20, #12 +; VFP2-NEXT: movw r0, #1074 +; VFP2-NEXT: cmp r4, r0 +; VFP2-NEXT: bhi .LBB46_5 +; VFP2-NEXT: @ %bb.3: @ %fp-to-i-if-exp.small +; VFP2-NEXT: movw r0, #1075 +; VFP2-NEXT: sub.w lr, r0, r4 +; VFP2-NEXT: rsb.w r0, lr, #32 +; VFP2-NEXT: movw r3, #1043 +; VFP2-NEXT: lsr.w r12, r12, lr +; VFP2-NEXT: subs r3, r3, r4 +; VFP2-NEXT: lsl.w r0, r2, r0 +; VFP2-NEXT: orr.w r0, r0, r12 +; VFP2-NEXT: it pl +; VFP2-NEXT: lsrpl.w r0, r2, r3 +; VFP2-NEXT: lsr.w r2, r2, lr +; VFP2-NEXT: it pl +; VFP2-NEXT: movpl r2, #0 +; VFP2-NEXT: .LBB46_4: @ %fp-to-i-cleanup ; VFP2-NEXT: mov.w r12, #0 -; VFP2-NEXT: sbcs r2, r3, #0 +; VFP2-NEXT: movs r3, #0 +; VFP2-NEXT: b .LBB46_6 +; VFP2-NEXT: .LBB46_5: @ %fp-to-i-if-exp.large +; VFP2-NEXT: movs r0, #0 +; VFP2-NEXT: mov r3, sp +; VFP2-NEXT: strd r0, r0, [sp, #24] +; VFP2-NEXT: adds r3, #16 +; VFP2-NEXT: strd r0, r0, [sp, #8] +; VFP2-NEXT: strd r0, r0, [sp] +; VFP2-NEXT: subw r0, r4, #1075 +; VFP2-NEXT: strd r12, r2, [sp, #16] +; VFP2-NEXT: movs r2, #12 +; VFP2-NEXT: and.w r2, r2, r0, lsr #3 +; VFP2-NEXT: and r0, r0, #31 +; VFP2-NEXT: subs r2, r3, r2 +; VFP2-NEXT: eor r6, r0, #31 +; VFP2-NEXT: ldrd lr, r5, [r2] +; VFP2-NEXT: ldrd r4, r2, [r2, #8] +; VFP2-NEXT: lsls r2, r0 +; VFP2-NEXT: lsrs r3, r4, #1 +; VFP2-NEXT: lsrs r3, r6 +; VFP2-NEXT: orrs r3, r2 +; VFP2-NEXT: lsl.w r2, r4, r0 +; VFP2-NEXT: lsrs.w r4, r5, #1 +; VFP2-NEXT: lsrs r4, r6 +; VFP2-NEXT: orr.w r12, r2, r4 +; VFP2-NEXT: lsr.w r4, lr, #1 +; VFP2-NEXT: lsl.w r2, r5, r0 +; VFP2-NEXT: lsrs r4, r6 +; VFP2-NEXT: orrs r2, r4 +; VFP2-NEXT: lsl.w r0, lr, r0 +; VFP2-NEXT: .LBB46_6: @ %fp-to-i-cleanup +; VFP2-NEXT: subs.w r6, r12, #1 +; VFP2-NEXT: sbcs r3, r3, #0 ; VFP2-NEXT: it lo -; VFP2-NEXT: movlo.w r12, #1 -; VFP2-NEXT: cmp.w r12, #0 -; VFP2-NEXT: itt eq -; VFP2-NEXT: moveq r0, r12 -; VFP2-NEXT: moveq r1, r12 -; VFP2-NEXT: pop {r7, pc} +; VFP2-NEXT: movlo r1, #1 +; VFP2-NEXT: cmp r1, #0 +; VFP2-NEXT: ite eq +; VFP2-NEXT: moveq r0, r1 +; VFP2-NEXT: movne r1, r2 +; VFP2-NEXT: add sp, #32 +; VFP2-NEXT: pop {r4, r5, r6, pc} ; ; FULL-LABEL: utest_f64i64_mm: -; FULL: @ %bb.0: @ %entry -; FULL-NEXT: .save {r7, lr} -; FULL-NEXT: push {r7, lr} -; FULL-NEXT: bl __fixunsdfti +; FULL: @ %bb.0: @ %entryfp-to-i-entry +; FULL-NEXT: vmov r0, r1, d0 +; FULL-NEXT: movw r3, #1023 +; FULL-NEXT: ubfx r2, r1, #20, #11 +; FULL-NEXT: cmp r2, r3 +; FULL-NEXT: bhs .LBB46_2 +; FULL-NEXT: @ %bb.1: +; FULL-NEXT: movs r0, #0 +; FULL-NEXT: movs r1, #0 +; FULL-NEXT: movs r2, #0 +; FULL-NEXT: movs r3, #0 +; FULL-NEXT: b .LBB46_6 +; FULL-NEXT: .LBB46_2: @ %fp-to-i-if-check.exp.size +; FULL-NEXT: .save {r4, lr} +; FULL-NEXT: push {r4, lr} +; FULL-NEXT: .pad #32 +; FULL-NEXT: sub sp, #32 +; FULL-NEXT: movs r3, #1 +; FULL-NEXT: bfi r1, r3, #20, #12 +; FULL-NEXT: movw r3, #1074 +; FULL-NEXT: cmp r2, r3 +; FULL-NEXT: bhi .LBB46_4 +; FULL-NEXT: @ %bb.3: @ %fp-to-i-if-exp.small +; FULL-NEXT: movw r3, #1075 +; FULL-NEXT: sub.w lr, r3, r2 +; FULL-NEXT: movw r3, #1043 +; FULL-NEXT: subs r2, r3, r2 +; FULL-NEXT: lsr.w r12, r0, lr +; FULL-NEXT: rsb.w r0, lr, #32 +; FULL-NEXT: mov.w r3, #0 +; FULL-NEXT: lsl.w r0, r1, r0 +; FULL-NEXT: orr.w r0, r0, r12 +; FULL-NEXT: it pl +; FULL-NEXT: lsrpl.w r0, r1, r2 +; FULL-NEXT: lsr.w r1, r1, lr +; FULL-NEXT: it pl +; FULL-NEXT: movpl r1, #0 +; FULL-NEXT: movs r2, #0 +; FULL-NEXT: b .LBB46_5 +; FULL-NEXT: .LBB46_4: @ %fp-to-i-if-exp.large +; FULL-NEXT: add.w r12, sp, #16 +; FULL-NEXT: movs r3, #0 +; FULL-NEXT: stm.w r12, {r0, r1, r3} +; FULL-NEXT: subw r0, r2, #1075 +; FULL-NEXT: movs r1, #12 +; FULL-NEXT: mov r2, sp +; FULL-NEXT: and.w r1, r1, r0, lsr #3 +; FULL-NEXT: adds r2, #16 +; FULL-NEXT: str r3, [sp, #28] +; FULL-NEXT: strd r3, r3, [sp, #8] +; FULL-NEXT: subs r1, r2, r1 +; FULL-NEXT: strd r3, r3, [sp] +; FULL-NEXT: and r0, r0, #31 +; FULL-NEXT: ldrd r12, lr, [r1] +; FULL-NEXT: eor r4, r0, #31 +; FULL-NEXT: ldrd r2, r1, [r1, #8] +; FULL-NEXT: lsls r1, r0 +; FULL-NEXT: lsrs r3, r2, #1 +; FULL-NEXT: lsrs r3, r4 +; FULL-NEXT: orrs r3, r1 +; FULL-NEXT: lsl.w r1, r2, r0 +; FULL-NEXT: lsrs.w r2, lr, #1 +; FULL-NEXT: lsl.w lr, lr, r0 +; FULL-NEXT: lsrs r2, r4 +; FULL-NEXT: lsl.w r0, r12, r0 +; FULL-NEXT: orrs r2, r1 +; FULL-NEXT: lsr.w r1, r12, #1 +; FULL-NEXT: lsrs r1, r4 +; FULL-NEXT: orr.w r1, r1, lr +; FULL-NEXT: .LBB46_5: +; FULL-NEXT: add sp, #32 +; FULL-NEXT: pop.w {r4, lr} +; FULL-NEXT: .LBB46_6: @ %fp-to-i-cleanup ; FULL-NEXT: subs r2, #1 ; FULL-NEXT: sbcs r2, r3, #0 ; FULL-NEXT: cset r2, lo ; FULL-NEXT: cmp r2, #0 ; FULL-NEXT: csel r0, r0, r2, ne ; FULL-NEXT: csel r1, r1, r2, ne -; FULL-NEXT: pop {r7, pc} +; FULL-NEXT: bx lr entry: %conv = fptoui double %x to i128 %spec.store.select = call i128 @llvm.umin.i128(i128 %conv, i128 18446744073709551616) @@ -2806,92 +5352,505 @@ entry: define i64 @ustest_f64i64_mm(double %x) { ; SOFT-LABEL: ustest_f64i64_mm: -; SOFT: @ %bb.0: @ %entry -; SOFT-NEXT: .save {r4, lr} -; SOFT-NEXT: push {r4, lr} -; SOFT-NEXT: bl __fixdfti -; SOFT-NEXT: mov r4, r1 -; SOFT-NEXT: movs r1, #0 -; SOFT-NEXT: subs r2, r2, #1 -; SOFT-NEXT: mov r2, r3 -; SOFT-NEXT: sbcs r2, r1 -; SOFT-NEXT: blt .LBB47_2 -; SOFT-NEXT: @ %bb.1: @ %entry -; SOFT-NEXT: mov r2, r1 -; SOFT-NEXT: cmp r2, #0 -; SOFT-NEXT: beq .LBB47_3 -; SOFT-NEXT: b .LBB47_4 -; SOFT-NEXT: .LBB47_2: -; SOFT-NEXT: movs r2, #1 -; SOFT-NEXT: cmp r2, #0 -; SOFT-NEXT: bne .LBB47_4 -; SOFT-NEXT: .LBB47_3: @ %entry -; SOFT-NEXT: mov r4, r2 -; SOFT-NEXT: .LBB47_4: @ %entry -; SOFT-NEXT: beq .LBB47_10 -; SOFT-NEXT: @ %bb.5: @ %entry -; SOFT-NEXT: bne .LBB47_7 -; SOFT-NEXT: .LBB47_6: @ %entry -; SOFT-NEXT: mov r3, r2 -; SOFT-NEXT: .LBB47_7: @ %entry -; SOFT-NEXT: cmp r3, #0 -; SOFT-NEXT: mov r2, r1 -; SOFT-NEXT: bpl .LBB47_11 -; SOFT-NEXT: @ %bb.8: @ %entry -; SOFT-NEXT: bpl .LBB47_12 -; SOFT-NEXT: .LBB47_9: @ %entry -; SOFT-NEXT: mov r0, r2 -; SOFT-NEXT: pop {r4, pc} -; SOFT-NEXT: .LBB47_10: @ %entry -; SOFT-NEXT: mov r0, r2 -; SOFT-NEXT: beq .LBB47_6 -; SOFT-NEXT: b .LBB47_7 -; SOFT-NEXT: .LBB47_11: @ %entry -; SOFT-NEXT: mov r2, r0 -; SOFT-NEXT: bmi .LBB47_9 -; SOFT-NEXT: .LBB47_12: @ %entry +; SOFT: @ %bb.0: @ %entryfp-to-i-entry +; SOFT-NEXT: .save {r4, r5, r6, r7, lr} +; SOFT-NEXT: push {r4, r5, r6, r7, lr} +; SOFT-NEXT: .pad #68 +; SOFT-NEXT: sub sp, #68 +; SOFT-NEXT: movs r7, #1 +; SOFT-NEXT: movs r4, #0 +; SOFT-NEXT: ldr r3, .LCPI47_1 +; SOFT-NEXT: mov r5, r3 +; SOFT-NEXT: subs r5, #52 +; SOFT-NEXT: mov r6, r4 +; SOFT-NEXT: sbcs r6, r4 +; SOFT-NEXT: lsls r2, r1, #1 +; SOFT-NEXT: lsrs r2, r2, #21 +; SOFT-NEXT: subs r5, r2, r5 +; SOFT-NEXT: mov r5, r4 +; SOFT-NEXT: sbcs r5, r6 +; SOFT-NEXT: bhs .LBB47_2 +; SOFT-NEXT: @ %bb.1: +; SOFT-NEXT: mov r2, r4 +; SOFT-NEXT: mov r3, r4 +; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: b .LBB47_6 +; SOFT-NEXT: .LBB47_2: @ %fp-to-i-if-check.exp.size +; SOFT-NEXT: asrs r5, r1, #31 +; SOFT-NEXT: str r5, [sp, #28] @ 4-byte Spill +; SOFT-NEXT: str r7, [sp, #20] @ 4-byte Spill +; SOFT-NEXT: orrs r5, r7 +; SOFT-NEXT: str r5, [sp, #24] @ 4-byte Spill +; SOFT-NEXT: ldr r5, .LCPI47_0 +; SOFT-NEXT: mvns r6, r5 +; SOFT-NEXT: mvns r1, r1 +; SOFT-NEXT: orrs r1, r6 +; SOFT-NEXT: subs r1, r5, r1 +; SOFT-NEXT: mvns r0, r0 +; SOFT-NEXT: mvns r0, r0 +; SOFT-NEXT: cmp r3, r2 +; SOFT-NEXT: bls .LBB47_4 +; SOFT-NEXT: @ %bb.3: @ %fp-to-i-if-exp.small +; SOFT-NEXT: subs r2, r3, r2 +; SOFT-NEXT: bl __aeabi_llsr +; SOFT-NEXT: mov r7, r1 +; SOFT-NEXT: str r1, [sp, #8] @ 4-byte Spill +; SOFT-NEXT: mov r5, r0 +; SOFT-NEXT: str r0, [sp, #12] @ 4-byte Spill +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: ldr r6, [sp, #24] @ 4-byte Reload +; SOFT-NEXT: mov r2, r6 +; SOFT-NEXT: mov r3, r4 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: str r0, [sp, #16] @ 4-byte Spill +; SOFT-NEXT: str r1, [sp, #4] @ 4-byte Spill +; SOFT-NEXT: mov r0, r7 +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: mov r2, r6 +; SOFT-NEXT: mov r3, r4 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: mov r7, r1 +; SOFT-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; SOFT-NEXT: adds r6, r0, r1 +; SOFT-NEXT: adcs r7, r4 +; SOFT-NEXT: mov r0, r5 +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: ldr r5, [sp, #28] @ 4-byte Reload +; SOFT-NEXT: mov r2, r5 +; SOFT-NEXT: mov r3, r4 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: adds r0, r0, r6 +; SOFT-NEXT: str r0, [sp, #24] @ 4-byte Spill +; SOFT-NEXT: adcs r1, r4 +; SOFT-NEXT: adds r0, r7, r1 +; SOFT-NEXT: str r0, [sp, #4] @ 4-byte Spill +; SOFT-NEXT: mov r6, r4 +; SOFT-NEXT: adcs r6, r4 +; SOFT-NEXT: ldr r7, [sp, #8] @ 4-byte Reload +; SOFT-NEXT: mov r0, r7 +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: mov r2, r5 +; SOFT-NEXT: mov r3, r4 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: mov r5, r1 +; SOFT-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; SOFT-NEXT: adds r0, r0, r1 +; SOFT-NEXT: str r0, [sp, #4] @ 4-byte Spill +; SOFT-NEXT: adcs r5, r6 +; SOFT-NEXT: ldr r0, [sp, #28] @ 4-byte Reload +; SOFT-NEXT: mov r1, r0 +; SOFT-NEXT: ldr r2, [sp, #12] @ 4-byte Reload +; SOFT-NEXT: mov r3, r7 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: ldr r3, [sp, #24] @ 4-byte Reload +; SOFT-NEXT: ldr r2, [sp, #16] @ 4-byte Reload +; SOFT-NEXT: ldr r6, [sp, #4] @ 4-byte Reload +; SOFT-NEXT: adds r0, r6, r0 +; SOFT-NEXT: b .LBB47_5 +; SOFT-NEXT: .LBB47_4: @ %fp-to-i-if-exp.large +; SOFT-NEXT: str r4, [sp, #60] +; SOFT-NEXT: str r4, [sp, #44] +; SOFT-NEXT: str r4, [sp, #40] +; SOFT-NEXT: str r4, [sp, #36] +; SOFT-NEXT: str r4, [sp, #32] +; SOFT-NEXT: add r3, sp, #48 +; SOFT-NEXT: stm r3!, {r0, r1, r4} +; SOFT-NEXT: ldr r0, .LCPI47_2 +; SOFT-NEXT: adds r7, r2, r0 +; SOFT-NEXT: movs r6, #31 +; SOFT-NEXT: lsrs r0, r7, #3 +; SOFT-NEXT: ands r7, r6 +; SOFT-NEXT: movs r1, #12 +; SOFT-NEXT: ands r1, r0 +; SOFT-NEXT: add r0, sp, #32 +; SOFT-NEXT: adds r0, #16 +; SOFT-NEXT: subs r5, r0, r1 +; SOFT-NEXT: ldr r0, [r5, #4] +; SOFT-NEXT: str r0, [sp, #16] @ 4-byte Spill +; SOFT-NEXT: ldr r0, [r5, #8] +; SOFT-NEXT: ldr r1, [r5, #12] +; SOFT-NEXT: lsrs r3, r0, #1 +; SOFT-NEXT: lsls r0, r7 +; SOFT-NEXT: eors r6, r7 +; SOFT-NEXT: ldr r2, [sp, #16] @ 4-byte Reload +; SOFT-NEXT: lsrs r2, r2, #1 +; SOFT-NEXT: lsrs r2, r6 +; SOFT-NEXT: orrs r2, r0 +; SOFT-NEXT: lsls r1, r7 +; SOFT-NEXT: lsrs r3, r6 +; SOFT-NEXT: orrs r3, r1 +; SOFT-NEXT: ldr r0, [sp, #24] @ 4-byte Reload +; SOFT-NEXT: ldr r1, [sp, #28] @ 4-byte Reload +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: str r0, [sp, #12] @ 4-byte Spill +; SOFT-NEXT: str r1, [sp, #8] @ 4-byte Spill +; SOFT-NEXT: ldr r1, [sp, #16] @ 4-byte Reload +; SOFT-NEXT: lsls r1, r7 +; SOFT-NEXT: ldr r5, [r5] +; SOFT-NEXT: lsrs r0, r5, #1 +; SOFT-NEXT: lsrs r0, r6 +; SOFT-NEXT: orrs r0, r1 +; SOFT-NEXT: mov r6, r0 +; SOFT-NEXT: lsls r5, r7 +; SOFT-NEXT: ldr r0, [sp, #28] @ 4-byte Reload +; SOFT-NEXT: mov r1, r0 +; SOFT-NEXT: mov r2, r5 +; SOFT-NEXT: str r6, [sp, #4] @ 4-byte Spill +; SOFT-NEXT: mov r3, r6 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: ldr r2, [sp, #12] @ 4-byte Reload +; SOFT-NEXT: adds r0, r0, r2 +; SOFT-NEXT: str r0, [sp, #12] @ 4-byte Spill +; SOFT-NEXT: ldr r0, [sp, #8] @ 4-byte Reload +; SOFT-NEXT: adcs r1, r0 +; SOFT-NEXT: str r1, [sp, #8] @ 4-byte Spill +; SOFT-NEXT: mov r0, r6 ; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: ldr r6, [sp, #24] @ 4-byte Reload +; SOFT-NEXT: mov r2, r6 +; SOFT-NEXT: mov r3, r4 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: str r0, [sp] @ 4-byte Spill +; SOFT-NEXT: mov r7, r1 +; SOFT-NEXT: mov r0, r5 +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: mov r2, r6 +; SOFT-NEXT: mov r3, r4 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: str r0, [sp, #16] @ 4-byte Spill +; SOFT-NEXT: ldr r0, [sp] @ 4-byte Reload +; SOFT-NEXT: adds r6, r0, r1 +; SOFT-NEXT: adcs r7, r4 +; SOFT-NEXT: mov r0, r5 +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: ldr r5, [sp, #28] @ 4-byte Reload +; SOFT-NEXT: mov r2, r5 +; SOFT-NEXT: mov r3, r4 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: adds r0, r0, r6 +; SOFT-NEXT: str r0, [sp, #24] @ 4-byte Spill +; SOFT-NEXT: adcs r1, r4 +; SOFT-NEXT: adds r7, r7, r1 +; SOFT-NEXT: mov r6, r4 +; SOFT-NEXT: adcs r6, r4 +; SOFT-NEXT: ldr r0, [sp, #4] @ 4-byte Reload +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: mov r2, r5 +; SOFT-NEXT: mov r3, r4 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: ldr r3, [sp, #24] @ 4-byte Reload +; SOFT-NEXT: ldr r2, [sp, #16] @ 4-byte Reload +; SOFT-NEXT: adds r0, r0, r7 +; SOFT-NEXT: adcs r1, r6 +; SOFT-NEXT: ldr r5, [sp, #12] @ 4-byte Reload +; SOFT-NEXT: adds r0, r0, r5 +; SOFT-NEXT: ldr r5, [sp, #8] @ 4-byte Reload +; SOFT-NEXT: .LBB47_5: @ %fp-to-i-cleanup +; SOFT-NEXT: adcs r1, r5 +; SOFT-NEXT: ldr r7, [sp, #20] @ 4-byte Reload +; SOFT-NEXT: .LBB47_6: @ %fp-to-i-cleanup +; SOFT-NEXT: subs r0, r0, #1 +; SOFT-NEXT: mov r0, r1 +; SOFT-NEXT: sbcs r0, r4 +; SOFT-NEXT: bge .LBB47_16 +; SOFT-NEXT: @ %bb.7: @ %fp-to-i-cleanup +; SOFT-NEXT: cmp r7, #0 +; SOFT-NEXT: beq .LBB47_17 +; SOFT-NEXT: .LBB47_8: @ %fp-to-i-cleanup +; SOFT-NEXT: beq .LBB47_18 +; SOFT-NEXT: .LBB47_9: @ %fp-to-i-cleanup +; SOFT-NEXT: bne .LBB47_11 +; SOFT-NEXT: .LBB47_10: @ %fp-to-i-cleanup +; SOFT-NEXT: mov r1, r7 +; SOFT-NEXT: .LBB47_11: @ %fp-to-i-cleanup +; SOFT-NEXT: cmp r1, #0 +; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: bmi .LBB47_13 +; SOFT-NEXT: @ %bb.12: @ %fp-to-i-cleanup ; SOFT-NEXT: mov r0, r2 -; SOFT-NEXT: pop {r4, pc} +; SOFT-NEXT: .LBB47_13: @ %fp-to-i-cleanup +; SOFT-NEXT: bmi .LBB47_15 +; SOFT-NEXT: @ %bb.14: @ %fp-to-i-cleanup +; SOFT-NEXT: mov r4, r3 +; SOFT-NEXT: .LBB47_15: @ %fp-to-i-cleanup +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: add sp, #68 +; SOFT-NEXT: pop {r4, r5, r6, r7, pc} +; SOFT-NEXT: .LBB47_16: @ %fp-to-i-cleanup +; SOFT-NEXT: mov r7, r4 +; SOFT-NEXT: cmp r7, #0 +; SOFT-NEXT: bne .LBB47_8 +; SOFT-NEXT: .LBB47_17: @ %fp-to-i-cleanup +; SOFT-NEXT: mov r3, r7 +; SOFT-NEXT: bne .LBB47_9 +; SOFT-NEXT: .LBB47_18: @ %fp-to-i-cleanup +; SOFT-NEXT: mov r2, r7 +; SOFT-NEXT: beq .LBB47_10 +; SOFT-NEXT: b .LBB47_11 +; SOFT-NEXT: .p2align 2 +; SOFT-NEXT: @ %bb.19: +; SOFT-NEXT: .LCPI47_0: +; SOFT-NEXT: .long 1048575 @ 0xfffff +; SOFT-NEXT: .LCPI47_1: +; SOFT-NEXT: .long 1075 @ 0x433 +; SOFT-NEXT: .LCPI47_2: +; SOFT-NEXT: .long 4294966221 @ 0xfffffbcd ; ; VFP2-LABEL: ustest_f64i64_mm: -; VFP2: @ %bb.0: @ %entry -; VFP2-NEXT: .save {r7, lr} -; VFP2-NEXT: push {r7, lr} -; VFP2-NEXT: bl __fixdfti -; VFP2-NEXT: subs r2, #1 -; VFP2-NEXT: mov.w r12, #0 -; VFP2-NEXT: sbcs r2, r3, #0 +; VFP2: @ %bb.0: @ %entryfp-to-i-entry +; VFP2-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; VFP2-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; VFP2-NEXT: .pad #52 +; VFP2-NEXT: sub sp, #52 +; VFP2-NEXT: vmov r1, r3, d0 +; VFP2-NEXT: movs r2, #0 +; VFP2-NEXT: movw r4, #1023 +; VFP2-NEXT: ubfx r0, r3, #20, #11 +; VFP2-NEXT: cmp r0, r4 +; VFP2-NEXT: bhs .LBB47_2 +; VFP2-NEXT: @ %bb.1: +; VFP2-NEXT: movs r0, #0 +; VFP2-NEXT: movs r1, #0 +; VFP2-NEXT: movs r4, #0 +; VFP2-NEXT: movs r3, #0 +; VFP2-NEXT: b .LBB47_5 +; VFP2-NEXT: .LBB47_2: @ %fp-to-i-if-check.exp.size +; VFP2-NEXT: mov.w r12, #1 +; VFP2-NEXT: mov r4, r3 +; VFP2-NEXT: bfi r4, r12, #20, #12 +; VFP2-NEXT: orr.w r8, r12, r3, asr #31 +; VFP2-NEXT: asrs r6, r3, #31 +; VFP2-NEXT: movw r3, #1074 +; VFP2-NEXT: cmp r0, r3 +; VFP2-NEXT: bhi .LBB47_4 +; VFP2-NEXT: @ %bb.3: @ %fp-to-i-if-exp.small +; VFP2-NEXT: movw r3, #1075 +; VFP2-NEXT: subs r3, r3, r0 +; VFP2-NEXT: lsr.w lr, r1, r3 +; VFP2-NEXT: rsb.w r1, r3, #32 +; VFP2-NEXT: lsr.w r3, r4, r3 +; VFP2-NEXT: lsl.w r1, r4, r1 +; VFP2-NEXT: orr.w r5, lr, r1 +; VFP2-NEXT: movw r1, #1043 +; VFP2-NEXT: subs r0, r1, r0 +; VFP2-NEXT: it pl +; VFP2-NEXT: lsrpl.w r5, r4, r0 +; VFP2-NEXT: umull r0, r1, r5, r8 +; VFP2-NEXT: mov.w r4, #0 +; VFP2-NEXT: it pl +; VFP2-NEXT: movpl r3, #0 +; VFP2-NEXT: mov.w lr, #0 +; VFP2-NEXT: umlal r1, r4, r3, r8 +; VFP2-NEXT: umull r12, r8, r5, r6 +; VFP2-NEXT: adds.w r7, r12, r1 +; VFP2-NEXT: adcs.w r7, r4, r8 +; VFP2-NEXT: umlal r1, r4, r5, r6 +; VFP2-NEXT: adc r7, lr, #0 +; VFP2-NEXT: mla lr, r6, r3, r8 +; VFP2-NEXT: umlal r4, r7, r3, r6 +; VFP2-NEXT: mla r3, r6, r5, lr +; VFP2-NEXT: adds.w r4, r4, r12 +; VFP2-NEXT: adcs r3, r7 +; VFP2-NEXT: b .LBB47_5 +; VFP2-NEXT: .LBB47_4: @ %fp-to-i-if-exp.large +; VFP2-NEXT: subw r0, r0, #1075 +; VFP2-NEXT: add r3, sp, #32 +; VFP2-NEXT: mov.w lr, #0 +; VFP2-NEXT: stm.w r3, {r1, r4, lr} +; VFP2-NEXT: movs r1, #12 +; VFP2-NEXT: add r3, sp, #16 +; VFP2-NEXT: and.w r1, r1, r0, lsr #3 +; VFP2-NEXT: adds r3, #16 +; VFP2-NEXT: str.w lr, [sp, #44] +; VFP2-NEXT: strd lr, lr, [sp, #24] +; VFP2-NEXT: subs r1, r3, r1 +; VFP2-NEXT: strd lr, lr, [sp, #16] +; VFP2-NEXT: and r7, r0, #31 +; VFP2-NEXT: ldrd r3, r4, [r1] +; VFP2-NEXT: eor r12, r7, #31 +; VFP2-NEXT: movs r5, #0 +; VFP2-NEXT: str r4, [sp, #8] @ 4-byte Spill +; VFP2-NEXT: ldrd r9, r1, [r1, #8] +; VFP2-NEXT: str.w r8, [sp] @ 4-byte Spill +; VFP2-NEXT: str r1, [sp, #12] @ 4-byte Spill +; VFP2-NEXT: lsrs r1, r3, #1 +; VFP2-NEXT: lsr.w r0, r1, r12 +; VFP2-NEXT: lsl.w r1, r4, r7 +; VFP2-NEXT: lsls r3, r7 +; VFP2-NEXT: orr.w r11, r1, r0 +; VFP2-NEXT: umull r0, r1, r3, r8 +; VFP2-NEXT: umlal r1, r5, r11, r8 +; VFP2-NEXT: umull r4, r8, r3, r6 +; VFP2-NEXT: str r4, [sp, #4] @ 4-byte Spill +; VFP2-NEXT: adds.w r10, r4, r1 +; VFP2-NEXT: adcs.w r4, r5, r8 +; VFP2-NEXT: umlal r1, r5, r3, r6 +; VFP2-NEXT: ldr r4, [sp, #12] @ 4-byte Reload +; VFP2-NEXT: adc lr, lr, #0 +; VFP2-NEXT: umlal r5, lr, r11, r6 +; VFP2-NEXT: lsl.w r10, r4, r7 +; VFP2-NEXT: lsr.w r4, r9, #1 +; VFP2-NEXT: lsr.w r4, r4, r12 +; VFP2-NEXT: orr.w r10, r10, r4 +; VFP2-NEXT: ldr r4, [sp, #8] @ 4-byte Reload +; VFP2-NEXT: lsl.w r7, r9, r7 +; VFP2-NEXT: lsrs.w r4, r4, #1 +; VFP2-NEXT: lsr.w r4, r4, r12 +; VFP2-NEXT: orr.w r9, r7, r4 +; VFP2-NEXT: ldr r7, [sp] @ 4-byte Reload +; VFP2-NEXT: umull r12, r4, r7, r9 +; VFP2-NEXT: mla r4, r7, r10, r4 +; VFP2-NEXT: mla r7, r6, r11, r8 +; VFP2-NEXT: mla r4, r6, r9, r4 +; VFP2-NEXT: mla r3, r6, r3, r7 +; VFP2-NEXT: ldr r6, [sp, #4] @ 4-byte Reload +; VFP2-NEXT: adds.w r7, r6, r12 +; VFP2-NEXT: adcs r3, r4 +; VFP2-NEXT: adds r4, r5, r7 +; VFP2-NEXT: adc.w r3, r3, lr +; VFP2-NEXT: .LBB47_5: @ %fp-to-i-cleanup +; VFP2-NEXT: subs r7, r4, #1 +; VFP2-NEXT: sbcs r7, r3, #0 ; VFP2-NEXT: it lt -; VFP2-NEXT: movlt.w r12, #1 -; VFP2-NEXT: cmp.w r12, #0 +; VFP2-NEXT: movlt r2, #1 +; VFP2-NEXT: cmp r2, #0 ; VFP2-NEXT: itte eq -; VFP2-NEXT: moveq r1, r12 -; VFP2-NEXT: moveq r0, r12 -; VFP2-NEXT: movne r12, r3 -; VFP2-NEXT: cmp.w r12, #0 +; VFP2-NEXT: moveq r1, r2 +; VFP2-NEXT: moveq r0, r2 +; VFP2-NEXT: movne r2, r3 +; VFP2-NEXT: cmp r2, #0 ; VFP2-NEXT: itt mi ; VFP2-NEXT: movmi r0, #0 ; VFP2-NEXT: movmi r1, #0 -; VFP2-NEXT: pop {r7, pc} +; VFP2-NEXT: add sp, #52 +; VFP2-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; ; FULL-LABEL: ustest_f64i64_mm: -; FULL: @ %bb.0: @ %entry -; FULL-NEXT: .save {r7, lr} -; FULL-NEXT: push {r7, lr} -; FULL-NEXT: bl __fixdfti -; FULL-NEXT: subs r2, #1 -; FULL-NEXT: sbcs r2, r3, #0 +; FULL: @ %bb.0: @ %entryfp-to-i-entry +; FULL-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; FULL-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; FULL-NEXT: .pad #52 +; FULL-NEXT: sub sp, #52 +; FULL-NEXT: vmov r3, r0, d0 +; FULL-NEXT: movw r2, #1023 +; FULL-NEXT: ubfx r1, r0, #20, #11 +; FULL-NEXT: cmp r1, r2 +; FULL-NEXT: bhs .LBB47_2 +; FULL-NEXT: @ %bb.1: +; FULL-NEXT: mov.w lr, #0 +; FULL-NEXT: movs r1, #0 +; FULL-NEXT: movs r0, #0 +; FULL-NEXT: movs r3, #0 +; FULL-NEXT: b .LBB47_5 +; FULL-NEXT: .LBB47_2: @ %fp-to-i-if-check.exp.size +; FULL-NEXT: movs r4, #1 +; FULL-NEXT: mov r2, r0 +; FULL-NEXT: orr.w r11, r4, r0, asr #31 +; FULL-NEXT: bfi r2, r4, #20, #12 +; FULL-NEXT: asrs r0, r0, #31 +; FULL-NEXT: movw r4, #1074 +; FULL-NEXT: cmp r1, r4 +; FULL-NEXT: bhi .LBB47_4 +; FULL-NEXT: @ %bb.3: @ %fp-to-i-if-exp.small +; FULL-NEXT: movw r4, #1075 +; FULL-NEXT: movw r5, #1043 +; FULL-NEXT: subs r4, r4, r1 +; FULL-NEXT: subs r1, r5, r1 +; FULL-NEXT: lsr.w r12, r3, r4 +; FULL-NEXT: rsb.w r3, r4, #32 +; FULL-NEXT: lsl.w r3, r2, r3 +; FULL-NEXT: orr.w r3, r3, r12 +; FULL-NEXT: mov.w r12, #0 +; FULL-NEXT: it pl +; FULL-NEXT: lsrpl.w r3, r2, r1 +; FULL-NEXT: umull lr, r1, r3, r11 +; FULL-NEXT: lsr.w r2, r2, r4 +; FULL-NEXT: mov.w r4, #0 +; FULL-NEXT: it pl +; FULL-NEXT: movpl r2, #0 +; FULL-NEXT: umlal r1, r4, r2, r11 +; FULL-NEXT: umull r6, r5, r3, r0 +; FULL-NEXT: adds r7, r6, r1 +; FULL-NEXT: adcs.w r7, r4, r5 +; FULL-NEXT: umlal r1, r4, r3, r0 +; FULL-NEXT: adc r7, r12, #0 +; FULL-NEXT: mla r5, r0, r2, r5 +; FULL-NEXT: umlal r4, r7, r2, r0 +; FULL-NEXT: mla r2, r0, r3, r5 +; FULL-NEXT: adds r0, r4, r6 +; FULL-NEXT: adc.w r3, r7, r2 +; FULL-NEXT: b .LBB47_5 +; FULL-NEXT: .LBB47_4: @ %fp-to-i-if-exp.large +; FULL-NEXT: subw r1, r1, #1075 +; FULL-NEXT: strd r3, r2, [sp, #32] +; FULL-NEXT: movs r2, #12 +; FULL-NEXT: add r3, sp, #16 +; FULL-NEXT: mov.w r8, #0 +; FULL-NEXT: and.w r2, r2, r1, lsr #3 +; FULL-NEXT: adds r3, #16 +; FULL-NEXT: strd r8, r8, [sp, #40] +; FULL-NEXT: strd r8, r8, [sp, #24] +; FULL-NEXT: subs r3, r3, r2 +; FULL-NEXT: strd r8, r8, [sp, #16] +; FULL-NEXT: movs r4, #0 +; FULL-NEXT: ldrd r7, r6, [r3] +; FULL-NEXT: str r6, [sp, #4] @ 4-byte Spill +; FULL-NEXT: ldr r2, [r3, #8] +; FULL-NEXT: lsrs r5, r7, #1 +; FULL-NEXT: str r2, [sp, #12] @ 4-byte Spill +; FULL-NEXT: ldr r2, [r3, #12] +; FULL-NEXT: and r3, r1, #31 +; FULL-NEXT: str r2, [sp, #8] @ 4-byte Spill +; FULL-NEXT: eor r2, r3, #31 +; FULL-NEXT: lsls r7, r3 +; FULL-NEXT: lsr.w r1, r5, r2 +; FULL-NEXT: lsl.w r5, r6, r3 +; FULL-NEXT: orr.w r12, r5, r1 +; FULL-NEXT: umull lr, r1, r7, r11 +; FULL-NEXT: umull r5, r10, r7, r0 +; FULL-NEXT: umlal r1, r4, r12, r11 +; FULL-NEXT: str r5, [sp] @ 4-byte Spill +; FULL-NEXT: adds.w r9, r5, r1 +; FULL-NEXT: adcs.w r5, r4, r10 +; FULL-NEXT: umlal r1, r4, r7, r0 +; FULL-NEXT: ldr r5, [sp, #8] @ 4-byte Reload +; FULL-NEXT: adc r9, r8, #0 +; FULL-NEXT: umlal r4, r9, r12, r0 +; FULL-NEXT: lsl.w r8, r5, r3 +; FULL-NEXT: ldr r5, [sp, #12] @ 4-byte Reload +; FULL-NEXT: lsrs r6, r5, #1 +; FULL-NEXT: lsl.w r3, r5, r3 +; FULL-NEXT: ldr r5, [sp, #4] @ 4-byte Reload +; FULL-NEXT: lsrs r6, r2 +; FULL-NEXT: orr.w r8, r8, r6 +; FULL-NEXT: lsrs.w r6, r5, #1 +; FULL-NEXT: lsr.w r2, r6, r2 +; FULL-NEXT: orrs r2, r3 +; FULL-NEXT: umull r3, r6, r11, r2 +; FULL-NEXT: mla r6, r11, r8, r6 +; FULL-NEXT: mla r2, r0, r2, r6 +; FULL-NEXT: mla r6, r0, r12, r10 +; FULL-NEXT: mla r0, r0, r7, r6 +; FULL-NEXT: ldr r7, [sp] @ 4-byte Reload +; FULL-NEXT: adds r3, r3, r7 +; FULL-NEXT: adcs r2, r0 +; FULL-NEXT: adds r0, r4, r3 +; FULL-NEXT: adc.w r3, r9, r2 +; FULL-NEXT: .LBB47_5: @ %fp-to-i-cleanup +; FULL-NEXT: subs r0, #1 +; FULL-NEXT: sbcs r0, r3, #0 ; FULL-NEXT: cset r2, lt ; FULL-NEXT: cmp r2, #0 ; FULL-NEXT: csel r1, r1, r2, ne -; FULL-NEXT: csel r0, r0, r2, ne +; FULL-NEXT: csel r0, lr, r2, ne ; FULL-NEXT: csel r2, r3, r2, ne ; FULL-NEXT: cmp r2, #0 ; FULL-NEXT: itt mi ; FULL-NEXT: movmi r0, #0 ; FULL-NEXT: movmi r1, #0 -; FULL-NEXT: pop {r7, pc} +; FULL-NEXT: add sp, #52 +; FULL-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} entry: %conv = fptosi double %x to i128 %spec.store.select = call i128 @llvm.smin.i128(i128 %conv, i128 18446744073709551616) @@ -2902,150 +5861,510 @@ entry: define i64 @stest_f32i64_mm(float %x) { ; SOFT-LABEL: stest_f32i64_mm: -; SOFT: @ %bb.0: @ %entry +; SOFT: @ %bb.0: @ %entryfp-to-i-entry ; SOFT-NEXT: .save {r4, r5, r6, r7, lr} ; SOFT-NEXT: push {r4, r5, r6, r7, lr} -; SOFT-NEXT: .pad #12 -; SOFT-NEXT: sub sp, #12 -; SOFT-NEXT: bl __fixsfti -; SOFT-NEXT: mov r7, r0 -; SOFT-NEXT: movs r0, #1 -; SOFT-NEXT: movs r5, #0 -; SOFT-NEXT: ldr r6, .LCPI48_0 -; SOFT-NEXT: adds r4, r7, #1 +; SOFT-NEXT: .pad #68 +; SOFT-NEXT: sub sp, #68 +; SOFT-NEXT: movs r7, #1 +; SOFT-NEXT: lsrs r1, r0, #23 +; SOFT-NEXT: uxtb r5, r1 +; SOFT-NEXT: movs r6, #0 +; SOFT-NEXT: cmp r5, #127 +; SOFT-NEXT: str r6, [sp, #24] @ 4-byte Spill +; SOFT-NEXT: bhs .LBB48_2 +; SOFT-NEXT: @ %bb.1: +; SOFT-NEXT: mov r0, r6 +; SOFT-NEXT: mov r5, r6 +; SOFT-NEXT: mov r2, r6 +; SOFT-NEXT: mov r1, r6 +; SOFT-NEXT: b .LBB48_6 +; SOFT-NEXT: .LBB48_2: @ %fp-to-i-if-check.exp.size +; SOFT-NEXT: asrs r4, r0, #31 +; SOFT-NEXT: str r4, [sp, #28] @ 4-byte Spill +; SOFT-NEXT: str r7, [sp, #20] @ 4-byte Spill +; SOFT-NEXT: orrs r4, r7 +; SOFT-NEXT: ldr r1, .LCPI48_0 +; SOFT-NEXT: ands r0, r1 +; SOFT-NEXT: adds r0, r0, r1 +; SOFT-NEXT: adds r7, r0, #1 +; SOFT-NEXT: cmp r5, #149 +; SOFT-NEXT: bhi .LBB48_4 +; SOFT-NEXT: @ %bb.3: @ %fp-to-i-if-exp.small +; SOFT-NEXT: movs r1, #150 +; SOFT-NEXT: subs r1, r1, r5 +; SOFT-NEXT: lsrs r7, r1 +; SOFT-NEXT: movs r6, #0 +; SOFT-NEXT: mov r0, r7 +; SOFT-NEXT: mov r1, r6 +; SOFT-NEXT: str r7, [sp, #12] @ 4-byte Spill +; SOFT-NEXT: mov r2, r4 +; SOFT-NEXT: mov r3, r6 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: str r0, [sp, #16] @ 4-byte Spill +; SOFT-NEXT: mov r5, r1 +; SOFT-NEXT: mov r0, r6 +; SOFT-NEXT: mov r1, r6 +; SOFT-NEXT: mov r2, r4 +; SOFT-NEXT: mov r3, r6 +; SOFT-NEXT: bl __aeabi_lmul ; SOFT-NEXT: mov r4, r1 -; SOFT-NEXT: sbcs r4, r6 -; SOFT-NEXT: mov r4, r2 -; SOFT-NEXT: sbcs r4, r5 -; SOFT-NEXT: mov r4, r3 -; SOFT-NEXT: sbcs r4, r5 -; SOFT-NEXT: mov r4, r0 -; SOFT-NEXT: blt .LBB48_2 -; SOFT-NEXT: @ %bb.1: @ %entry -; SOFT-NEXT: mov r4, r5 -; SOFT-NEXT: .LBB48_2: @ %entry -; SOFT-NEXT: mvns r6, r5 -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: beq .LBB48_12 -; SOFT-NEXT: @ %bb.3: @ %entry -; SOFT-NEXT: beq .LBB48_13 -; SOFT-NEXT: .LBB48_4: @ %entry -; SOFT-NEXT: str r2, [sp, #8] @ 4-byte Spill -; SOFT-NEXT: beq .LBB48_14 -; SOFT-NEXT: .LBB48_5: @ %entry +; SOFT-NEXT: adds r5, r0, r5 +; SOFT-NEXT: adcs r4, r6 +; SOFT-NEXT: mov r0, r7 +; SOFT-NEXT: mov r1, r6 +; SOFT-NEXT: ldr r2, [sp, #28] @ 4-byte Reload +; SOFT-NEXT: mov r3, r6 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: adds r5, r0, r5 +; SOFT-NEXT: adcs r1, r6 +; SOFT-NEXT: adds r0, r4, r1 +; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill +; SOFT-NEXT: mov r7, r6 +; SOFT-NEXT: adcs r7, r6 +; SOFT-NEXT: mov r0, r6 +; SOFT-NEXT: mov r1, r6 +; SOFT-NEXT: ldr r2, [sp, #28] @ 4-byte Reload +; SOFT-NEXT: mov r3, r6 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: mov r4, r1 +; SOFT-NEXT: ldr r1, [sp, #8] @ 4-byte Reload +; SOFT-NEXT: adds r0, r0, r1 +; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill +; SOFT-NEXT: adcs r4, r7 +; SOFT-NEXT: ldr r0, [sp, #28] @ 4-byte Reload +; SOFT-NEXT: mov r1, r0 +; SOFT-NEXT: ldr r2, [sp, #12] @ 4-byte Reload +; SOFT-NEXT: mov r3, r6 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: mov r2, r0 +; SOFT-NEXT: ldr r0, [sp, #16] @ 4-byte Reload +; SOFT-NEXT: ldr r3, [sp, #8] @ 4-byte Reload +; SOFT-NEXT: adds r2, r3, r2 +; SOFT-NEXT: adcs r1, r4 +; SOFT-NEXT: b .LBB48_5 +; SOFT-NEXT: .LBB48_4: @ %fp-to-i-if-exp.large +; SOFT-NEXT: movs r6, #0 +; SOFT-NEXT: str r6, [sp, #60] +; SOFT-NEXT: str r6, [sp, #56] +; SOFT-NEXT: str r6, [sp, #52] +; SOFT-NEXT: str r7, [sp, #48] +; SOFT-NEXT: str r6, [sp, #44] +; SOFT-NEXT: str r6, [sp, #40] +; SOFT-NEXT: str r6, [sp, #36] +; SOFT-NEXT: str r6, [sp, #32] +; SOFT-NEXT: subs r5, #150 +; SOFT-NEXT: movs r7, #31 +; SOFT-NEXT: lsrs r0, r5, #3 +; SOFT-NEXT: ands r5, r7 +; SOFT-NEXT: movs r1, #12 +; SOFT-NEXT: ands r1, r0 +; SOFT-NEXT: add r0, sp, #32 +; SOFT-NEXT: adds r0, #16 +; SOFT-NEXT: str r4, [sp, #16] @ 4-byte Spill +; SOFT-NEXT: subs r4, r0, r1 +; SOFT-NEXT: ldr r0, [r4, #4] +; SOFT-NEXT: str r0, [sp, #12] @ 4-byte Spill +; SOFT-NEXT: ldr r0, [r4, #8] +; SOFT-NEXT: ldr r1, [r4, #12] +; SOFT-NEXT: lsrs r3, r0, #1 +; SOFT-NEXT: lsls r0, r5 +; SOFT-NEXT: eors r7, r5 +; SOFT-NEXT: ldr r2, [sp, #12] @ 4-byte Reload +; SOFT-NEXT: lsrs r2, r2, #1 +; SOFT-NEXT: lsrs r2, r7 +; SOFT-NEXT: orrs r2, r0 +; SOFT-NEXT: lsls r1, r5 +; SOFT-NEXT: lsrs r3, r7 +; SOFT-NEXT: orrs r3, r1 +; SOFT-NEXT: ldr r0, [sp, #16] @ 4-byte Reload +; SOFT-NEXT: ldr r1, [sp, #28] @ 4-byte Reload +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: str r0, [sp] @ 4-byte Spill +; SOFT-NEXT: str r1, [sp, #8] @ 4-byte Spill +; SOFT-NEXT: ldr r1, [sp, #12] @ 4-byte Reload +; SOFT-NEXT: lsls r1, r5 +; SOFT-NEXT: ldr r4, [r4] +; SOFT-NEXT: lsrs r0, r4, #1 +; SOFT-NEXT: lsrs r0, r7 +; SOFT-NEXT: orrs r0, r1 +; SOFT-NEXT: mov r3, r0 +; SOFT-NEXT: lsls r4, r5 +; SOFT-NEXT: ldr r0, [sp, #28] @ 4-byte Reload +; SOFT-NEXT: mov r1, r0 +; SOFT-NEXT: mov r2, r4 +; SOFT-NEXT: mov r5, r3 ; SOFT-NEXT: str r3, [sp, #4] @ 4-byte Spill -; SOFT-NEXT: bne .LBB48_7 -; SOFT-NEXT: .LBB48_6: @ %entry +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: ldr r2, [sp] @ 4-byte Reload +; SOFT-NEXT: adds r0, r0, r2 +; SOFT-NEXT: str r0, [sp, #12] @ 4-byte Spill +; SOFT-NEXT: ldr r0, [sp, #8] @ 4-byte Reload +; SOFT-NEXT: adcs r1, r0 +; SOFT-NEXT: str r1, [sp, #8] @ 4-byte Spill +; SOFT-NEXT: mov r0, r5 +; SOFT-NEXT: mov r1, r6 +; SOFT-NEXT: ldr r5, [sp, #16] @ 4-byte Reload +; SOFT-NEXT: mov r2, r5 +; SOFT-NEXT: mov r3, r6 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: str r0, [sp] @ 4-byte Spill +; SOFT-NEXT: mov r7, r1 +; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: mov r1, r6 +; SOFT-NEXT: mov r2, r5 +; SOFT-NEXT: mov r3, r6 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: str r0, [sp, #16] @ 4-byte Spill +; SOFT-NEXT: ldr r0, [sp] @ 4-byte Reload +; SOFT-NEXT: adds r5, r0, r1 +; SOFT-NEXT: adcs r7, r6 +; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: mov r1, r6 +; SOFT-NEXT: ldr r2, [sp, #28] @ 4-byte Reload +; SOFT-NEXT: mov r3, r6 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: adds r5, r0, r5 +; SOFT-NEXT: adcs r1, r6 +; SOFT-NEXT: adds r4, r7, r1 ; SOFT-NEXT: mov r7, r6 -; SOFT-NEXT: .LBB48_7: @ %entry -; SOFT-NEXT: lsls r3, r0, #31 -; SOFT-NEXT: rsbs r4, r7, #0 -; SOFT-NEXT: mov r4, r3 -; SOFT-NEXT: sbcs r4, r1 -; SOFT-NEXT: mov r4, r6 -; SOFT-NEXT: ldr r2, [sp, #8] @ 4-byte Reload -; SOFT-NEXT: sbcs r4, r2 -; SOFT-NEXT: ldr r2, [sp, #4] @ 4-byte Reload +; SOFT-NEXT: adcs r7, r6 +; SOFT-NEXT: ldr r0, [sp, #4] @ 4-byte Reload +; SOFT-NEXT: mov r1, r6 +; SOFT-NEXT: ldr r2, [sp, #28] @ 4-byte Reload +; SOFT-NEXT: mov r3, r6 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: mov r2, r0 +; SOFT-NEXT: ldr r0, [sp, #16] @ 4-byte Reload +; SOFT-NEXT: adds r2, r2, r4 +; SOFT-NEXT: adcs r1, r7 +; SOFT-NEXT: ldr r3, [sp, #12] @ 4-byte Reload +; SOFT-NEXT: adds r2, r2, r3 +; SOFT-NEXT: ldr r3, [sp, #8] @ 4-byte Reload +; SOFT-NEXT: adcs r1, r3 +; SOFT-NEXT: .LBB48_5: @ %fp-to-i-cleanup +; SOFT-NEXT: ldr r7, [sp, #20] @ 4-byte Reload +; SOFT-NEXT: ldr r6, [sp, #24] @ 4-byte Reload +; SOFT-NEXT: .LBB48_6: @ %fp-to-i-cleanup +; SOFT-NEXT: ldr r4, .LCPI48_1 +; SOFT-NEXT: adds r3, r0, #1 +; SOFT-NEXT: mov r3, r5 +; SOFT-NEXT: sbcs r3, r4 +; SOFT-NEXT: mov r3, r2 +; SOFT-NEXT: sbcs r3, r6 +; SOFT-NEXT: mov r3, r1 +; SOFT-NEXT: sbcs r3, r6 +; SOFT-NEXT: mov r3, r6 +; SOFT-NEXT: mov r6, r7 +; SOFT-NEXT: blt .LBB48_8 +; SOFT-NEXT: @ %bb.7: @ %fp-to-i-cleanup +; SOFT-NEXT: mov r6, r3 +; SOFT-NEXT: .LBB48_8: @ %fp-to-i-cleanup +; SOFT-NEXT: mvns r3, r3 +; SOFT-NEXT: cmp r6, #0 +; SOFT-NEXT: beq .LBB48_18 +; SOFT-NEXT: @ %bb.9: @ %fp-to-i-cleanup +; SOFT-NEXT: beq .LBB48_19 +; SOFT-NEXT: .LBB48_10: @ %fp-to-i-cleanup +; SOFT-NEXT: beq .LBB48_20 +; SOFT-NEXT: .LBB48_11: @ %fp-to-i-cleanup +; SOFT-NEXT: bne .LBB48_13 +; SOFT-NEXT: .LBB48_12: @ %fp-to-i-cleanup +; SOFT-NEXT: mov r0, r3 +; SOFT-NEXT: .LBB48_13: @ %fp-to-i-cleanup +; SOFT-NEXT: lsls r4, r7, #31 +; SOFT-NEXT: rsbs r6, r0, #0 +; SOFT-NEXT: mov r6, r4 +; SOFT-NEXT: sbcs r6, r5 +; SOFT-NEXT: mov r6, r3 ; SOFT-NEXT: sbcs r6, r2 -; SOFT-NEXT: bge .LBB48_15 -; SOFT-NEXT: @ %bb.8: @ %entry -; SOFT-NEXT: cmp r0, #0 -; SOFT-NEXT: beq .LBB48_16 -; SOFT-NEXT: .LBB48_9: @ %entry +; SOFT-NEXT: sbcs r3, r1 +; SOFT-NEXT: bge .LBB48_21 +; SOFT-NEXT: @ %bb.14: @ %fp-to-i-cleanup +; SOFT-NEXT: cmp r7, #0 +; SOFT-NEXT: beq .LBB48_22 +; SOFT-NEXT: .LBB48_15: @ %fp-to-i-cleanup +; SOFT-NEXT: bne .LBB48_17 +; SOFT-NEXT: .LBB48_16: @ %fp-to-i-cleanup +; SOFT-NEXT: mov r5, r4 +; SOFT-NEXT: .LBB48_17: @ %fp-to-i-cleanup +; SOFT-NEXT: mov r1, r5 +; SOFT-NEXT: add sp, #68 +; SOFT-NEXT: pop {r4, r5, r6, r7, pc} +; SOFT-NEXT: .LBB48_18: @ %fp-to-i-cleanup +; SOFT-NEXT: mov r1, r6 +; SOFT-NEXT: bne .LBB48_10 +; SOFT-NEXT: .LBB48_19: @ %fp-to-i-cleanup +; SOFT-NEXT: mov r2, r6 ; SOFT-NEXT: bne .LBB48_11 -; SOFT-NEXT: .LBB48_10: @ %entry -; SOFT-NEXT: mov r1, r3 -; SOFT-NEXT: .LBB48_11: @ %entry +; SOFT-NEXT: .LBB48_20: @ %fp-to-i-cleanup +; SOFT-NEXT: mov r5, r4 +; SOFT-NEXT: beq .LBB48_12 +; SOFT-NEXT: b .LBB48_13 +; SOFT-NEXT: .LBB48_21: @ %fp-to-i-cleanup +; SOFT-NEXT: ldr r7, [sp, #24] @ 4-byte Reload +; SOFT-NEXT: cmp r7, #0 +; SOFT-NEXT: bne .LBB48_15 +; SOFT-NEXT: .LBB48_22: @ %fp-to-i-cleanup ; SOFT-NEXT: mov r0, r7 -; SOFT-NEXT: add sp, #12 -; SOFT-NEXT: pop {r4, r5, r6, r7, pc} -; SOFT-NEXT: .LBB48_12: @ %entry -; SOFT-NEXT: mov r3, r4 -; SOFT-NEXT: bne .LBB48_4 -; SOFT-NEXT: .LBB48_13: @ %entry -; SOFT-NEXT: mov r2, r4 -; SOFT-NEXT: str r2, [sp, #8] @ 4-byte Spill -; SOFT-NEXT: bne .LBB48_5 -; SOFT-NEXT: .LBB48_14: @ %entry -; SOFT-NEXT: ldr r1, .LCPI48_0 -; SOFT-NEXT: str r3, [sp, #4] @ 4-byte Spill -; SOFT-NEXT: beq .LBB48_6 -; SOFT-NEXT: b .LBB48_7 -; SOFT-NEXT: .LBB48_15: @ %entry -; SOFT-NEXT: mov r0, r5 -; SOFT-NEXT: cmp r0, #0 -; SOFT-NEXT: bne .LBB48_9 -; SOFT-NEXT: .LBB48_16: @ %entry -; SOFT-NEXT: mov r7, r0 -; SOFT-NEXT: beq .LBB48_10 -; SOFT-NEXT: b .LBB48_11 +; SOFT-NEXT: beq .LBB48_16 +; SOFT-NEXT: b .LBB48_17 ; SOFT-NEXT: .p2align 2 -; SOFT-NEXT: @ %bb.17: +; SOFT-NEXT: @ %bb.23: ; SOFT-NEXT: .LCPI48_0: +; SOFT-NEXT: .long 8388607 @ 0x7fffff +; SOFT-NEXT: .LCPI48_1: ; SOFT-NEXT: .long 2147483647 @ 0x7fffffff ; ; VFP2-LABEL: stest_f32i64_mm: -; VFP2: @ %bb.0: @ %entry -; VFP2-NEXT: .save {r4, r5, r7, lr} -; VFP2-NEXT: push {r4, r5, r7, lr} -; VFP2-NEXT: bl __fixsfti -; VFP2-NEXT: subs.w r4, r0, #-1 -; VFP2-NEXT: mvn lr, #-2147483648 -; VFP2-NEXT: sbcs.w r4, r1, lr +; VFP2: @ %bb.0: @ %entryfp-to-i-entry +; VFP2-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; VFP2-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; VFP2-NEXT: .pad #60 +; VFP2-NEXT: sub sp, #60 +; VFP2-NEXT: vmov r1, s0 +; VFP2-NEXT: movs r0, #0 +; VFP2-NEXT: ubfx r2, r1, #23, #8 +; VFP2-NEXT: cmp r2, #127 +; VFP2-NEXT: bhs .LBB48_2 +; VFP2-NEXT: @ %bb.1: +; VFP2-NEXT: movs r2, #0 +; VFP2-NEXT: mov.w lr, #0 ; VFP2-NEXT: mov.w r12, #0 -; VFP2-NEXT: sbcs r4, r2, #0 +; VFP2-NEXT: movs r3, #0 +; VFP2-NEXT: b .LBB48_5 +; VFP2-NEXT: .LBB48_2: @ %fp-to-i-if-check.exp.size +; VFP2-NEXT: movs r3, #1 +; VFP2-NEXT: mov r4, r1 +; VFP2-NEXT: bfi r4, r3, #23, #9 +; VFP2-NEXT: orr.w r11, r3, r1, asr #31 +; VFP2-NEXT: asrs r7, r1, #31 +; VFP2-NEXT: cmp r2, #149 +; VFP2-NEXT: bhi .LBB48_4 +; VFP2-NEXT: @ %bb.3: @ %fp-to-i-if-exp.small +; VFP2-NEXT: rsb.w r2, r2, #150 +; VFP2-NEXT: movs r5, #0 +; VFP2-NEXT: lsr.w r3, r4, r2 +; VFP2-NEXT: umull r2, lr, r3, r11 +; VFP2-NEXT: umull r4, r1, r3, r7 +; VFP2-NEXT: adds.w r6, r4, lr +; VFP2-NEXT: adcs r6, r1, #0 +; VFP2-NEXT: adc r8, r5, #0 +; VFP2-NEXT: adds.w r6, r4, lr +; VFP2-NEXT: mla r6, r7, r3, r1 +; VFP2-NEXT: adcs.w r12, r4, r1 +; VFP2-NEXT: umlal lr, r5, r3, r7 +; VFP2-NEXT: adc.w r3, r8, r6 +; VFP2-NEXT: b .LBB48_5 +; VFP2-NEXT: .LBB48_4: @ %fp-to-i-if-exp.large +; VFP2-NEXT: sub.w r1, r2, #150 +; VFP2-NEXT: movs r2, #12 +; VFP2-NEXT: add r6, sp, #24 +; VFP2-NEXT: mov.w r9, #0 +; VFP2-NEXT: and.w r2, r2, r1, lsr #3 +; VFP2-NEXT: adds r6, #16 +; VFP2-NEXT: strd r9, r9, [sp, #48] +; VFP2-NEXT: subs r2, r6, r2 +; VFP2-NEXT: strd r4, r9, [sp, #40] +; VFP2-NEXT: and r1, r1, #31 +; VFP2-NEXT: strd r9, r9, [sp, #32] +; VFP2-NEXT: eor r8, r1, #31 +; VFP2-NEXT: strd r9, r9, [sp, #24] +; VFP2-NEXT: ldrd r6, r3, [r2] +; VFP2-NEXT: str r3, [sp, #16] @ 4-byte Spill +; VFP2-NEXT: ldr r5, [r2, #8] +; VFP2-NEXT: str r5, [sp, #8] @ 4-byte Spill +; VFP2-NEXT: lsl.w r5, r3, r1 +; VFP2-NEXT: ldr r2, [r2, #12] +; VFP2-NEXT: str r2, [sp, #20] @ 4-byte Spill +; VFP2-NEXT: lsrs r2, r6, #1 +; VFP2-NEXT: lsr.w r2, r2, r8 +; VFP2-NEXT: lsls r6, r1 +; VFP2-NEXT: orr.w r4, r5, r2 +; VFP2-NEXT: umull r2, lr, r6, r11 +; VFP2-NEXT: movs r5, #0 +; VFP2-NEXT: umull r3, r12, r6, r7 +; VFP2-NEXT: umlal lr, r5, r4, r11 +; VFP2-NEXT: str r3, [sp, #12] @ 4-byte Spill +; VFP2-NEXT: str.w r12, [sp, #4] @ 4-byte Spill +; VFP2-NEXT: adds.w r10, r3, lr +; VFP2-NEXT: adcs.w r3, r5, r12 +; VFP2-NEXT: umlal lr, r5, r6, r7 +; VFP2-NEXT: ldr r3, [sp, #20] @ 4-byte Reload +; VFP2-NEXT: adc r10, r9, #0 +; VFP2-NEXT: umlal r5, r10, r4, r7 +; VFP2-NEXT: mov r12, r4 +; VFP2-NEXT: lsl.w r9, r3, r1 +; VFP2-NEXT: ldr r3, [sp, #8] @ 4-byte Reload +; VFP2-NEXT: lsrs r4, r3, #1 +; VFP2-NEXT: lsl.w r1, r3, r1 +; VFP2-NEXT: ldr r3, [sp, #16] @ 4-byte Reload +; VFP2-NEXT: lsr.w r4, r4, r8 +; VFP2-NEXT: orr.w r9, r9, r4 +; VFP2-NEXT: lsrs.w r4, r3, #1 +; VFP2-NEXT: ldr r3, [sp, #4] @ 4-byte Reload +; VFP2-NEXT: lsr.w r4, r4, r8 +; VFP2-NEXT: orrs r1, r4 +; VFP2-NEXT: umull r8, r4, r11, r1 +; VFP2-NEXT: mla r4, r11, r9, r4 +; VFP2-NEXT: mla r1, r7, r1, r4 +; VFP2-NEXT: mla r4, r7, r12, r3 +; VFP2-NEXT: ldr r3, [sp, #12] @ 4-byte Reload +; VFP2-NEXT: mla r7, r7, r6, r4 +; VFP2-NEXT: adds.w r6, r3, r8 +; VFP2-NEXT: adcs r1, r7 +; VFP2-NEXT: adds.w r12, r5, r6 +; VFP2-NEXT: adc.w r3, r10, r1 +; VFP2-NEXT: .LBB48_5: @ %fp-to-i-cleanup +; VFP2-NEXT: subs.w r4, r2, #-1 +; VFP2-NEXT: mvn r1, #-2147483648 +; VFP2-NEXT: sbcs.w r4, lr, r1 +; VFP2-NEXT: mov.w r7, #-1 +; VFP2-NEXT: sbcs r4, r12, #0 +; VFP2-NEXT: mov.w r6, #-2147483648 ; VFP2-NEXT: sbcs r4, r3, #0 ; VFP2-NEXT: mov.w r4, #0 ; VFP2-NEXT: it lt ; VFP2-NEXT: movlt r4, #1 ; VFP2-NEXT: cmp r4, #0 -; VFP2-NEXT: itet eq +; VFP2-NEXT: itee eq ; VFP2-NEXT: moveq r3, r4 -; VFP2-NEXT: movne r4, r2 -; VFP2-NEXT: moveq r1, lr -; VFP2-NEXT: mov.w r2, #-1 +; VFP2-NEXT: movne r4, r12 +; VFP2-NEXT: movne r1, lr ; VFP2-NEXT: it eq -; VFP2-NEXT: moveq r0, r2 -; VFP2-NEXT: rsbs r5, r0, #0 -; VFP2-NEXT: mov.w lr, #-2147483648 -; VFP2-NEXT: sbcs.w r5, lr, r1 -; VFP2-NEXT: sbcs.w r4, r2, r4 -; VFP2-NEXT: sbcs r2, r3 +; VFP2-NEXT: moveq r2, r7 +; VFP2-NEXT: rsbs r5, r2, #0 +; VFP2-NEXT: sbcs.w r5, r6, r1 +; VFP2-NEXT: sbcs.w r5, r7, r4 +; VFP2-NEXT: sbcs.w r3, r7, r3 ; VFP2-NEXT: it lt -; VFP2-NEXT: movlt.w r12, #1 -; VFP2-NEXT: cmp.w r12, #0 -; VFP2-NEXT: itt eq -; VFP2-NEXT: moveq r0, r12 -; VFP2-NEXT: moveq r1, lr -; VFP2-NEXT: pop {r4, r5, r7, pc} +; VFP2-NEXT: movlt r0, #1 +; VFP2-NEXT: cmp r0, #0 +; VFP2-NEXT: ite ne +; VFP2-NEXT: movne r0, r2 +; VFP2-NEXT: moveq r1, r6 +; VFP2-NEXT: add sp, #60 +; VFP2-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; ; FULL-LABEL: stest_f32i64_mm: -; FULL: @ %bb.0: @ %entry -; FULL-NEXT: .save {r4, r5, r7, lr} -; FULL-NEXT: push {r4, r5, r7, lr} -; FULL-NEXT: bl __fixsfti -; FULL-NEXT: subs.w lr, r0, #-1 -; FULL-NEXT: mvn r12, #-2147483648 -; FULL-NEXT: sbcs.w lr, r1, r12 -; FULL-NEXT: sbcs lr, r2, #0 -; FULL-NEXT: sbcs lr, r3, #0 -; FULL-NEXT: cset lr, lt -; FULL-NEXT: cmp.w lr, #0 -; FULL-NEXT: csel r5, r3, lr, ne -; FULL-NEXT: mov.w r3, #-1 -; FULL-NEXT: csel r0, r0, r3, ne -; FULL-NEXT: csel r1, r1, r12, ne -; FULL-NEXT: csel r2, r2, lr, ne -; FULL-NEXT: rsbs r4, r0, #0 -; FULL-NEXT: mov.w r12, #-2147483648 -; FULL-NEXT: sbcs.w r4, r12, r1 -; FULL-NEXT: sbcs.w r2, r3, r2 -; FULL-NEXT: sbcs.w r2, r3, r5 -; FULL-NEXT: cset r2, lt -; FULL-NEXT: cmp r2, #0 -; FULL-NEXT: csel r0, r0, r2, ne -; FULL-NEXT: csel r1, r1, r12, ne -; FULL-NEXT: pop {r4, r5, r7, pc} +; FULL: @ %bb.0: @ %entryfp-to-i-entry +; FULL-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; FULL-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; FULL-NEXT: .pad #52 +; FULL-NEXT: sub sp, #52 +; FULL-NEXT: vmov r2, s0 +; FULL-NEXT: ubfx r0, r2, #23, #8 +; FULL-NEXT: cmp r0, #127 +; FULL-NEXT: bhs .LBB48_2 +; FULL-NEXT: @ %bb.1: +; FULL-NEXT: movs r7, #0 +; FULL-NEXT: movs r1, #0 +; FULL-NEXT: movs r2, #0 +; FULL-NEXT: movs r3, #0 +; FULL-NEXT: b .LBB48_5 +; FULL-NEXT: .LBB48_2: @ %fp-to-i-if-check.exp.size +; FULL-NEXT: movs r3, #1 +; FULL-NEXT: mov r1, r2 +; FULL-NEXT: orr.w r11, r3, r2, asr #31 +; FULL-NEXT: bfi r1, r3, #23, #9 +; FULL-NEXT: asrs r2, r2, #31 +; FULL-NEXT: cmp r0, #149 +; FULL-NEXT: bhi .LBB48_4 +; FULL-NEXT: @ %bb.3: @ %fp-to-i-if-exp.small +; FULL-NEXT: rsb.w r0, r0, #150 +; FULL-NEXT: movs r5, #0 +; FULL-NEXT: lsr.w r3, r1, r0 +; FULL-NEXT: umull r7, r1, r3, r11 +; FULL-NEXT: umull r4, r0, r3, r2 +; FULL-NEXT: adds r6, r4, r1 +; FULL-NEXT: adcs r6, r0, #0 +; FULL-NEXT: adc r12, r5, #0 +; FULL-NEXT: adds r6, r4, r1 +; FULL-NEXT: mla r6, r2, r3, r0 +; FULL-NEXT: umlal r1, r5, r3, r2 +; FULL-NEXT: adcs.w r2, r4, r0 +; FULL-NEXT: adc.w r3, r12, r6 +; FULL-NEXT: b .LBB48_5 +; FULL-NEXT: .LBB48_4: @ %fp-to-i-if-exp.large +; FULL-NEXT: mov.w lr, #0 +; FULL-NEXT: subs r0, #150 +; FULL-NEXT: add r3, sp, #16 +; FULL-NEXT: strd r1, lr, [sp, #32] +; FULL-NEXT: movs r1, #12 +; FULL-NEXT: adds r3, #16 +; FULL-NEXT: and.w r1, r1, r0, lsr #3 +; FULL-NEXT: strd lr, lr, [sp, #40] +; FULL-NEXT: strd lr, lr, [sp, #24] +; FULL-NEXT: subs r1, r3, r1 +; FULL-NEXT: strd lr, lr, [sp, #16] +; FULL-NEXT: and r0, r0, #31 +; FULL-NEXT: ldrd r3, r4, [r1] +; FULL-NEXT: eor r12, r0, #31 +; FULL-NEXT: movs r5, #0 +; FULL-NEXT: str r4, [sp, #4] @ 4-byte Spill +; FULL-NEXT: ldr r7, [r1, #8] +; FULL-NEXT: str r7, [sp, #12] @ 4-byte Spill +; FULL-NEXT: lsl.w r7, r4, r0 +; FULL-NEXT: ldr r1, [r1, #12] +; FULL-NEXT: str r1, [sp, #8] @ 4-byte Spill +; FULL-NEXT: lsrs r1, r3, #1 +; FULL-NEXT: lsr.w r1, r1, r12 +; FULL-NEXT: lsls r3, r0 +; FULL-NEXT: orr.w r10, r7, r1 +; FULL-NEXT: umull r7, r1, r3, r11 +; FULL-NEXT: umull r4, r9, r3, r2 +; FULL-NEXT: umlal r1, r5, r10, r11 +; FULL-NEXT: str r4, [sp] @ 4-byte Spill +; FULL-NEXT: adds.w r8, r4, r1 +; FULL-NEXT: adcs.w r6, r5, r9 +; FULL-NEXT: umlal r1, r5, r3, r2 +; FULL-NEXT: ldr r6, [sp, #8] @ 4-byte Reload +; FULL-NEXT: adc r8, lr, #0 +; FULL-NEXT: umlal r5, r8, r10, r2 +; FULL-NEXT: lsl.w lr, r6, r0 +; FULL-NEXT: ldr r6, [sp, #12] @ 4-byte Reload +; FULL-NEXT: lsrs r4, r6, #1 +; FULL-NEXT: lsl.w r0, r6, r0 +; FULL-NEXT: lsr.w r4, r4, r12 +; FULL-NEXT: orr.w lr, lr, r4 +; FULL-NEXT: ldr r4, [sp, #4] @ 4-byte Reload +; FULL-NEXT: lsrs.w r4, r4, #1 +; FULL-NEXT: lsr.w r4, r4, r12 +; FULL-NEXT: orrs r0, r4 +; FULL-NEXT: umull r12, r4, r11, r0 +; FULL-NEXT: mla r4, r11, lr, r4 +; FULL-NEXT: mla r0, r2, r0, r4 +; FULL-NEXT: mla r4, r2, r10, r9 +; FULL-NEXT: mla r2, r2, r3, r4 +; FULL-NEXT: ldr r3, [sp] @ 4-byte Reload +; FULL-NEXT: adds.w r3, r3, r12 +; FULL-NEXT: adcs r0, r2 +; FULL-NEXT: adds r2, r5, r3 +; FULL-NEXT: adc.w r3, r8, r0 +; FULL-NEXT: .LBB48_5: @ %fp-to-i-cleanup +; FULL-NEXT: subs.w r6, r7, #-1 +; FULL-NEXT: mvn r0, #-2147483648 +; FULL-NEXT: sbcs.w r6, r1, r0 +; FULL-NEXT: sbcs r6, r2, #0 +; FULL-NEXT: sbcs r6, r3, #0 +; FULL-NEXT: cset r6, lt +; FULL-NEXT: cmp r6, #0 +; FULL-NEXT: csel r1, r1, r0, ne +; FULL-NEXT: mov.w r0, #-1 +; FULL-NEXT: csel r7, r7, r0, ne +; FULL-NEXT: csel r3, r3, r6, ne +; FULL-NEXT: csel r2, r2, r6, ne +; FULL-NEXT: rsbs r5, r7, #0 +; FULL-NEXT: mov.w r6, #-2147483648 +; FULL-NEXT: sbcs.w r5, r6, r1 +; FULL-NEXT: sbcs.w r2, r0, r2 +; FULL-NEXT: sbcs r0, r3 +; FULL-NEXT: cset r0, lt +; FULL-NEXT: cmp r0, #0 +; FULL-NEXT: csel r0, r7, r0, ne +; FULL-NEXT: csel r1, r1, r6, ne +; FULL-NEXT: add sp, #52 +; FULL-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} entry: %conv = fptosi float %x to i128 %spec.store.select = call i128 @llvm.smin.i128(i128 %conv, i128 9223372036854775807) @@ -3056,155 +6375,701 @@ entry: define i64 @utest_f32i64_mm(float %x) { ; SOFT-LABEL: utest_f32i64_mm: -; SOFT: @ %bb.0: @ %entry -; SOFT-NEXT: .save {r4, lr} -; SOFT-NEXT: push {r4, lr} -; SOFT-NEXT: bl __fixunssfti -; SOFT-NEXT: movs r4, #0 -; SOFT-NEXT: subs r2, r2, #1 -; SOFT-NEXT: sbcs r3, r4 -; SOFT-NEXT: blo .LBB49_4 -; SOFT-NEXT: @ %bb.1: @ %entry -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: beq .LBB49_5 -; SOFT-NEXT: .LBB49_2: @ %entry -; SOFT-NEXT: beq .LBB49_6 -; SOFT-NEXT: .LBB49_3: @ %entry -; SOFT-NEXT: pop {r4, pc} -; SOFT-NEXT: .LBB49_4: -; SOFT-NEXT: movs r4, #1 -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: bne .LBB49_2 -; SOFT-NEXT: .LBB49_5: @ %entry +; SOFT: @ %bb.0: @ %entryfp-to-i-entry +; SOFT-NEXT: .save {r4, r5, r6, r7, lr} +; SOFT-NEXT: push {r4, r5, r6, r7, lr} +; SOFT-NEXT: .pad #36 +; SOFT-NEXT: sub sp, #36 +; SOFT-NEXT: lsrs r1, r0, #23 +; SOFT-NEXT: uxtb r3, r1 +; SOFT-NEXT: movs r2, #0 +; SOFT-NEXT: cmp r3, #127 +; SOFT-NEXT: bhs .LBB49_2 +; SOFT-NEXT: @ %bb.1: +; SOFT-NEXT: mov r0, r2 +; SOFT-NEXT: mov r1, r2 +; SOFT-NEXT: mov r6, r2 +; SOFT-NEXT: mov r4, r2 +; SOFT-NEXT: b .LBB49_5 +; SOFT-NEXT: .LBB49_2: @ %fp-to-i-if-check.exp.size +; SOFT-NEXT: ldr r1, .LCPI49_0 +; SOFT-NEXT: ands r0, r1 +; SOFT-NEXT: adds r0, r0, r1 +; SOFT-NEXT: adds r0, r0, #1 +; SOFT-NEXT: cmp r3, #149 +; SOFT-NEXT: bhi .LBB49_4 +; SOFT-NEXT: @ %bb.3: @ %fp-to-i-if-exp.small +; SOFT-NEXT: movs r1, #150 +; SOFT-NEXT: subs r1, r1, r3 +; SOFT-NEXT: lsrs r0, r1 +; SOFT-NEXT: movs r1, #0 +; SOFT-NEXT: mov r6, r1 +; SOFT-NEXT: mov r4, r1 +; SOFT-NEXT: b .LBB49_5 +; SOFT-NEXT: .LBB49_4: @ %fp-to-i-if-exp.large +; SOFT-NEXT: movs r1, #0 +; SOFT-NEXT: str r1, [sp, #28] +; SOFT-NEXT: str r1, [sp, #24] +; SOFT-NEXT: str r1, [sp, #20] +; SOFT-NEXT: str r0, [sp, #16] +; SOFT-NEXT: str r1, [sp, #12] +; SOFT-NEXT: str r1, [sp, #8] +; SOFT-NEXT: str r1, [sp, #4] +; SOFT-NEXT: str r1, [sp] +; SOFT-NEXT: subs r3, #150 +; SOFT-NEXT: movs r5, #31 +; SOFT-NEXT: lsrs r0, r3, #3 +; SOFT-NEXT: ands r3, r5 +; SOFT-NEXT: movs r1, #12 +; SOFT-NEXT: ands r1, r0 +; SOFT-NEXT: mov r0, sp +; SOFT-NEXT: adds r0, #16 +; SOFT-NEXT: subs r4, r0, r1 +; SOFT-NEXT: ldm r4!, {r0, r7} +; SOFT-NEXT: ldr r1, [r4] +; SOFT-NEXT: ldr r6, [r4, #4] +; SOFT-NEXT: lsls r6, r3 +; SOFT-NEXT: eors r5, r3 +; SOFT-NEXT: lsrs r4, r1, #1 +; SOFT-NEXT: lsrs r4, r5 +; SOFT-NEXT: orrs r4, r6 +; SOFT-NEXT: lsls r1, r3 +; SOFT-NEXT: lsrs r6, r7, #1 +; SOFT-NEXT: lsrs r6, r5 +; SOFT-NEXT: orrs r6, r1 +; SOFT-NEXT: lsls r7, r3 +; SOFT-NEXT: lsrs r1, r0, #1 +; SOFT-NEXT: lsrs r1, r5 +; SOFT-NEXT: orrs r1, r7 +; SOFT-NEXT: lsls r0, r3 +; SOFT-NEXT: .LBB49_5: @ %fp-to-i-cleanup +; SOFT-NEXT: subs r3, r6, #1 +; SOFT-NEXT: sbcs r4, r2 +; SOFT-NEXT: blo .LBB49_9 +; SOFT-NEXT: @ %bb.6: @ %fp-to-i-cleanup +; SOFT-NEXT: cmp r2, #0 +; SOFT-NEXT: beq .LBB49_10 +; SOFT-NEXT: .LBB49_7: @ %fp-to-i-cleanup +; SOFT-NEXT: beq .LBB49_11 +; SOFT-NEXT: .LBB49_8: @ %fp-to-i-cleanup +; SOFT-NEXT: add sp, #36 +; SOFT-NEXT: pop {r4, r5, r6, r7, pc} +; SOFT-NEXT: .LBB49_9: +; SOFT-NEXT: movs r2, #1 +; SOFT-NEXT: cmp r2, #0 +; SOFT-NEXT: bne .LBB49_7 +; SOFT-NEXT: .LBB49_10: @ %fp-to-i-cleanup +; SOFT-NEXT: mov r0, r2 +; SOFT-NEXT: bne .LBB49_8 +; SOFT-NEXT: .LBB49_11: @ %fp-to-i-cleanup +; SOFT-NEXT: mov r1, r2 +; SOFT-NEXT: add sp, #36 +; SOFT-NEXT: pop {r4, r5, r6, r7, pc} +; SOFT-NEXT: .p2align 2 +; SOFT-NEXT: @ %bb.12: +; SOFT-NEXT: .LCPI49_0: +; SOFT-NEXT: .long 8388607 @ 0x7fffff +; +; VFP2-LABEL: utest_f32i64_mm: +; VFP2: @ %bb.0: @ %entryfp-to-i-entry +; VFP2-NEXT: vmov r2, s0 +; VFP2-NEXT: movs r1, #0 +; VFP2-NEXT: ubfx r0, r2, #23, #8 +; VFP2-NEXT: cmp r0, #127 +; VFP2-NEXT: bhs .LBB49_2 +; VFP2-NEXT: @ %bb.1: +; VFP2-NEXT: movs r0, #0 +; VFP2-NEXT: b .LBB49_4 +; VFP2-NEXT: .LBB49_2: @ %fp-to-i-if-check.exp.size +; VFP2-NEXT: movs r3, #1 +; VFP2-NEXT: cmp r0, #149 +; VFP2-NEXT: bfi r2, r3, #23, #9 +; VFP2-NEXT: bhi .LBB49_5 +; VFP2-NEXT: @ %bb.3: @ %fp-to-i-if-exp.small +; VFP2-NEXT: rsb.w r0, r0, #150 +; VFP2-NEXT: lsr.w r0, r2, r0 +; VFP2-NEXT: .LBB49_4: @ %fp-to-i-cleanup +; VFP2-NEXT: mov.w r12, #0 +; VFP2-NEXT: movs r2, #0 +; VFP2-NEXT: movs r3, #0 +; VFP2-NEXT: b .LBB49_6 +; VFP2-NEXT: .LBB49_5: @ %fp-to-i-if-exp.large +; VFP2-NEXT: .save {r4, r5, r7, lr} +; VFP2-NEXT: push {r4, r5, r7, lr} +; VFP2-NEXT: .pad #32 +; VFP2-NEXT: sub sp, #32 +; VFP2-NEXT: movs r3, #0 +; VFP2-NEXT: subs r0, #150 +; VFP2-NEXT: strd r2, r3, [sp, #16] +; VFP2-NEXT: movs r2, #12 +; VFP2-NEXT: strd r3, r3, [sp, #24] +; VFP2-NEXT: and.w r2, r2, r0, lsr #3 +; VFP2-NEXT: strd r3, r3, [sp, #8] +; VFP2-NEXT: and r0, r0, #31 +; VFP2-NEXT: strd r3, r3, [sp] +; VFP2-NEXT: mov r3, sp +; VFP2-NEXT: adds r3, #16 +; VFP2-NEXT: eor r5, r0, #31 +; VFP2-NEXT: subs r2, r3, r2 +; VFP2-NEXT: ldrd lr, r12, [r2] +; VFP2-NEXT: ldrd r4, r2, [r2, #8] +; VFP2-NEXT: lsls r2, r0 +; VFP2-NEXT: lsrs r3, r4, #1 +; VFP2-NEXT: lsrs r3, r5 +; VFP2-NEXT: orrs r3, r2 +; VFP2-NEXT: lsl.w r2, r4, r0 +; VFP2-NEXT: lsrs.w r4, r12, #1 +; VFP2-NEXT: lsl.w r12, r12, r0 +; VFP2-NEXT: lsrs r4, r5 +; VFP2-NEXT: orrs r2, r4 +; VFP2-NEXT: lsr.w r4, lr, #1 +; VFP2-NEXT: lsl.w r0, lr, r0 +; VFP2-NEXT: lsrs r4, r5 +; VFP2-NEXT: orr.w r12, r12, r4 +; VFP2-NEXT: add sp, #32 +; VFP2-NEXT: pop.w {r4, r5, r7, lr} +; VFP2-NEXT: .LBB49_6: @ %fp-to-i-cleanup +; VFP2-NEXT: subs r2, #1 +; VFP2-NEXT: sbcs r2, r3, #0 +; VFP2-NEXT: it lo +; VFP2-NEXT: movlo r1, #1 +; VFP2-NEXT: cmp r1, #0 +; VFP2-NEXT: ite eq +; VFP2-NEXT: moveq r0, r1 +; VFP2-NEXT: movne r1, r12 +; VFP2-NEXT: bx lr +; +; FULL-LABEL: utest_f32i64_mm: +; FULL: @ %bb.0: @ %entryfp-to-i-entry +; FULL-NEXT: vmov r1, s0 +; FULL-NEXT: ubfx r0, r1, #23, #8 +; FULL-NEXT: cmp r0, #127 +; FULL-NEXT: bhs .LBB49_2 +; FULL-NEXT: @ %bb.1: +; FULL-NEXT: movs r0, #0 +; FULL-NEXT: b .LBB49_4 +; FULL-NEXT: .LBB49_2: @ %fp-to-i-if-check.exp.size +; FULL-NEXT: movs r2, #1 +; FULL-NEXT: cmp r0, #149 +; FULL-NEXT: bfi r1, r2, #23, #9 +; FULL-NEXT: bhi .LBB49_5 +; FULL-NEXT: @ %bb.3: @ %fp-to-i-if-exp.small +; FULL-NEXT: rsb.w r0, r0, #150 +; FULL-NEXT: lsr.w r0, r1, r0 +; FULL-NEXT: .LBB49_4: @ %fp-to-i-cleanup +; FULL-NEXT: movs r1, #0 +; FULL-NEXT: movs r3, #0 +; FULL-NEXT: movs r2, #0 +; FULL-NEXT: b .LBB49_6 +; FULL-NEXT: .LBB49_5: @ %fp-to-i-if-exp.large +; FULL-NEXT: .save {r4, lr} +; FULL-NEXT: push {r4, lr} +; FULL-NEXT: .pad #32 +; FULL-NEXT: sub sp, #32 +; FULL-NEXT: movs r2, #0 +; FULL-NEXT: subs r0, #150 +; FULL-NEXT: strd r1, r2, [sp, #16] +; FULL-NEXT: movs r1, #12 +; FULL-NEXT: strd r2, r2, [sp, #24] +; FULL-NEXT: and.w r1, r1, r0, lsr #3 +; FULL-NEXT: strd r2, r2, [sp, #8] +; FULL-NEXT: and r0, r0, #31 +; FULL-NEXT: strd r2, r2, [sp] +; FULL-NEXT: mov r2, sp +; FULL-NEXT: adds r2, #16 +; FULL-NEXT: eor r4, r0, #31 +; FULL-NEXT: subs r1, r2, r1 +; FULL-NEXT: ldrd r12, lr, [r1] +; FULL-NEXT: ldrd r3, r1, [r1, #8] +; FULL-NEXT: lsls r1, r0 +; FULL-NEXT: lsrs r2, r3, #1 +; FULL-NEXT: lsrs r2, r4 +; FULL-NEXT: orrs r2, r1 +; FULL-NEXT: lsl.w r1, r3, r0 +; FULL-NEXT: lsrs.w r3, lr, #1 +; FULL-NEXT: lsl.w lr, lr, r0 +; FULL-NEXT: lsrs r3, r4 +; FULL-NEXT: lsl.w r0, r12, r0 +; FULL-NEXT: orrs r3, r1 +; FULL-NEXT: lsr.w r1, r12, #1 +; FULL-NEXT: lsrs r1, r4 +; FULL-NEXT: orr.w r1, r1, lr +; FULL-NEXT: add sp, #32 +; FULL-NEXT: pop.w {r4, lr} +; FULL-NEXT: .LBB49_6: @ %fp-to-i-cleanup +; FULL-NEXT: subs r3, #1 +; FULL-NEXT: sbcs r2, r2, #0 +; FULL-NEXT: cset r2, lo +; FULL-NEXT: cmp r2, #0 +; FULL-NEXT: csel r0, r0, r2, ne +; FULL-NEXT: csel r1, r1, r2, ne +; FULL-NEXT: bx lr +entry: + %conv = fptoui float %x to i128 + %spec.store.select = call i128 @llvm.umin.i128(i128 %conv, i128 18446744073709551616) + %conv6 = trunc i128 %spec.store.select to i64 + ret i64 %conv6 +} + +define i64 @ustest_f32i64_mm(float %x) { +; SOFT-LABEL: ustest_f32i64_mm: +; SOFT: @ %bb.0: @ %entryfp-to-i-entry +; SOFT-NEXT: .save {r4, r5, r6, r7, lr} +; SOFT-NEXT: push {r4, r5, r6, r7, lr} +; SOFT-NEXT: .pad #68 +; SOFT-NEXT: sub sp, #68 +; SOFT-NEXT: movs r3, #1 +; SOFT-NEXT: lsrs r1, r0, #23 +; SOFT-NEXT: uxtb r4, r1 +; SOFT-NEXT: movs r2, #0 +; SOFT-NEXT: cmp r4, #127 +; SOFT-NEXT: bhs .LBB50_2 +; SOFT-NEXT: @ %bb.1: +; SOFT-NEXT: mov r4, r2 +; SOFT-NEXT: mov r5, r2 +; SOFT-NEXT: mov r0, r2 +; SOFT-NEXT: mov r1, r2 +; SOFT-NEXT: b .LBB50_6 +; SOFT-NEXT: .LBB50_2: @ %fp-to-i-if-check.exp.size +; SOFT-NEXT: str r2, [sp, #24] @ 4-byte Spill +; SOFT-NEXT: asrs r5, r0, #31 +; SOFT-NEXT: str r5, [sp, #28] @ 4-byte Spill +; SOFT-NEXT: str r3, [sp, #20] @ 4-byte Spill +; SOFT-NEXT: orrs r5, r3 +; SOFT-NEXT: ldr r1, .LCPI50_0 +; SOFT-NEXT: ands r0, r1 +; SOFT-NEXT: adds r0, r0, r1 +; SOFT-NEXT: adds r0, r0, #1 +; SOFT-NEXT: cmp r4, #149 +; SOFT-NEXT: bhi .LBB50_4 +; SOFT-NEXT: @ %bb.3: @ %fp-to-i-if-exp.small +; SOFT-NEXT: movs r1, #150 +; SOFT-NEXT: subs r1, r1, r4 +; SOFT-NEXT: lsrs r0, r1 +; SOFT-NEXT: movs r6, #0 +; SOFT-NEXT: mov r1, r6 +; SOFT-NEXT: mov r4, r0 +; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill +; SOFT-NEXT: mov r2, r5 +; SOFT-NEXT: mov r3, r6 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: str r0, [sp, #16] @ 4-byte Spill +; SOFT-NEXT: str r1, [sp, #12] @ 4-byte Spill +; SOFT-NEXT: mov r0, r6 +; SOFT-NEXT: mov r1, r6 +; SOFT-NEXT: mov r2, r5 +; SOFT-NEXT: mov r3, r6 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: mov r7, r1 +; SOFT-NEXT: ldr r1, [sp, #12] @ 4-byte Reload +; SOFT-NEXT: adds r0, r0, r1 +; SOFT-NEXT: str r0, [sp, #12] @ 4-byte Spill +; SOFT-NEXT: adcs r7, r6 +; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: mov r1, r6 +; SOFT-NEXT: ldr r5, [sp, #28] @ 4-byte Reload +; SOFT-NEXT: mov r2, r5 +; SOFT-NEXT: mov r3, r6 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: ldr r2, [sp, #12] @ 4-byte Reload +; SOFT-NEXT: adds r0, r0, r2 +; SOFT-NEXT: str r0, [sp, #12] @ 4-byte Spill +; SOFT-NEXT: adcs r1, r6 +; SOFT-NEXT: adds r0, r7, r1 +; SOFT-NEXT: str r0, [sp, #4] @ 4-byte Spill +; SOFT-NEXT: mov r4, r6 +; SOFT-NEXT: adcs r4, r6 +; SOFT-NEXT: mov r0, r6 +; SOFT-NEXT: mov r1, r6 +; SOFT-NEXT: mov r2, r5 +; SOFT-NEXT: mov r3, r6 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: mov r7, r1 +; SOFT-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; SOFT-NEXT: adds r0, r0, r1 +; SOFT-NEXT: str r0, [sp, #4] @ 4-byte Spill +; SOFT-NEXT: adcs r7, r4 +; SOFT-NEXT: ldr r4, [sp, #16] @ 4-byte Reload +; SOFT-NEXT: mov r0, r5 +; SOFT-NEXT: mov r1, r5 +; SOFT-NEXT: ldr r5, [sp, #12] @ 4-byte Reload +; SOFT-NEXT: ldr r2, [sp, #8] @ 4-byte Reload +; SOFT-NEXT: mov r3, r6 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: ldr r2, [sp, #4] @ 4-byte Reload +; SOFT-NEXT: adds r0, r2, r0 +; SOFT-NEXT: adcs r1, r7 +; SOFT-NEXT: b .LBB50_5 +; SOFT-NEXT: .LBB50_4: @ %fp-to-i-if-exp.large +; SOFT-NEXT: movs r6, #0 +; SOFT-NEXT: str r6, [sp, #60] +; SOFT-NEXT: str r6, [sp, #56] +; SOFT-NEXT: str r6, [sp, #52] +; SOFT-NEXT: str r0, [sp, #48] +; SOFT-NEXT: str r6, [sp, #44] +; SOFT-NEXT: str r6, [sp, #40] +; SOFT-NEXT: str r6, [sp, #36] +; SOFT-NEXT: str r6, [sp, #32] +; SOFT-NEXT: subs r4, #150 +; SOFT-NEXT: str r5, [sp, #16] @ 4-byte Spill +; SOFT-NEXT: movs r5, #31 +; SOFT-NEXT: lsrs r0, r4, #3 +; SOFT-NEXT: ands r4, r5 +; SOFT-NEXT: movs r1, #12 +; SOFT-NEXT: ands r1, r0 +; SOFT-NEXT: add r0, sp, #32 +; SOFT-NEXT: adds r0, #16 +; SOFT-NEXT: subs r7, r0, r1 +; SOFT-NEXT: ldr r0, [r7, #4] +; SOFT-NEXT: str r0, [sp, #12] @ 4-byte Spill +; SOFT-NEXT: ldr r0, [r7, #8] +; SOFT-NEXT: ldr r1, [r7, #12] +; SOFT-NEXT: lsrs r3, r0, #1 +; SOFT-NEXT: lsls r0, r4 +; SOFT-NEXT: eors r5, r4 +; SOFT-NEXT: ldr r2, [sp, #12] @ 4-byte Reload +; SOFT-NEXT: lsrs r2, r2, #1 +; SOFT-NEXT: lsrs r2, r5 +; SOFT-NEXT: orrs r2, r0 +; SOFT-NEXT: lsls r1, r4 +; SOFT-NEXT: lsrs r3, r5 +; SOFT-NEXT: orrs r3, r1 +; SOFT-NEXT: ldr r0, [sp, #16] @ 4-byte Reload +; SOFT-NEXT: ldr r1, [sp, #28] @ 4-byte Reload +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill +; SOFT-NEXT: str r1, [sp, #4] @ 4-byte Spill +; SOFT-NEXT: ldr r1, [sp, #12] @ 4-byte Reload +; SOFT-NEXT: lsls r1, r4 +; SOFT-NEXT: ldr r7, [r7] +; SOFT-NEXT: lsrs r0, r7, #1 +; SOFT-NEXT: lsrs r0, r5 +; SOFT-NEXT: orrs r0, r1 +; SOFT-NEXT: mov r3, r0 +; SOFT-NEXT: lsls r7, r4 +; SOFT-NEXT: ldr r0, [sp, #28] @ 4-byte Reload +; SOFT-NEXT: mov r1, r0 +; SOFT-NEXT: mov r2, r7 +; SOFT-NEXT: mov r4, r3 +; SOFT-NEXT: str r3, [sp] @ 4-byte Spill +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: ldr r2, [sp, #8] @ 4-byte Reload +; SOFT-NEXT: adds r0, r0, r2 +; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill +; SOFT-NEXT: ldr r0, [sp, #4] @ 4-byte Reload +; SOFT-NEXT: adcs r1, r0 +; SOFT-NEXT: str r1, [sp, #4] @ 4-byte Spill +; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: mov r1, r6 +; SOFT-NEXT: ldr r4, [sp, #16] @ 4-byte Reload +; SOFT-NEXT: mov r2, r4 +; SOFT-NEXT: mov r3, r6 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: str r0, [sp, #12] @ 4-byte Spill +; SOFT-NEXT: mov r5, r1 +; SOFT-NEXT: mov r0, r7 +; SOFT-NEXT: mov r1, r6 +; SOFT-NEXT: mov r2, r4 +; SOFT-NEXT: mov r3, r6 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: str r0, [sp, #16] @ 4-byte Spill +; SOFT-NEXT: ldr r0, [sp, #12] @ 4-byte Reload +; SOFT-NEXT: adds r0, r0, r1 +; SOFT-NEXT: str r0, [sp, #12] @ 4-byte Spill +; SOFT-NEXT: adcs r5, r6 +; SOFT-NEXT: mov r0, r7 +; SOFT-NEXT: mov r1, r6 +; SOFT-NEXT: ldr r4, [sp, #28] @ 4-byte Reload +; SOFT-NEXT: mov r2, r4 +; SOFT-NEXT: mov r3, r6 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: ldr r2, [sp, #12] @ 4-byte Reload +; SOFT-NEXT: adds r0, r0, r2 +; SOFT-NEXT: str r0, [sp, #12] @ 4-byte Spill +; SOFT-NEXT: adcs r1, r6 +; SOFT-NEXT: adds r5, r5, r1 +; SOFT-NEXT: mov r7, r6 +; SOFT-NEXT: adcs r7, r6 +; SOFT-NEXT: ldr r0, [sp] @ 4-byte Reload +; SOFT-NEXT: mov r1, r6 +; SOFT-NEXT: mov r2, r4 +; SOFT-NEXT: mov r3, r6 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: adds r0, r0, r5 +; SOFT-NEXT: ldr r5, [sp, #12] @ 4-byte Reload +; SOFT-NEXT: adcs r1, r7 +; SOFT-NEXT: ldr r2, [sp, #8] @ 4-byte Reload +; SOFT-NEXT: adds r0, r0, r2 +; SOFT-NEXT: ldr r4, [sp, #16] @ 4-byte Reload +; SOFT-NEXT: ldr r2, [sp, #4] @ 4-byte Reload +; SOFT-NEXT: adcs r1, r2 +; SOFT-NEXT: .LBB50_5: @ %fp-to-i-cleanup +; SOFT-NEXT: ldr r2, [sp, #24] @ 4-byte Reload +; SOFT-NEXT: ldr r3, [sp, #20] @ 4-byte Reload +; SOFT-NEXT: .LBB50_6: @ %fp-to-i-cleanup +; SOFT-NEXT: subs r0, r0, #1 +; SOFT-NEXT: mov r0, r1 +; SOFT-NEXT: sbcs r0, r2 +; SOFT-NEXT: bge .LBB50_16 +; SOFT-NEXT: @ %bb.7: @ %fp-to-i-cleanup +; SOFT-NEXT: cmp r3, #0 +; SOFT-NEXT: beq .LBB50_17 +; SOFT-NEXT: .LBB50_8: @ %fp-to-i-cleanup +; SOFT-NEXT: beq .LBB50_18 +; SOFT-NEXT: .LBB50_9: @ %fp-to-i-cleanup +; SOFT-NEXT: bne .LBB50_11 +; SOFT-NEXT: .LBB50_10: @ %fp-to-i-cleanup +; SOFT-NEXT: mov r1, r3 +; SOFT-NEXT: .LBB50_11: @ %fp-to-i-cleanup +; SOFT-NEXT: cmp r1, #0 +; SOFT-NEXT: mov r0, r2 +; SOFT-NEXT: bmi .LBB50_13 +; SOFT-NEXT: @ %bb.12: @ %fp-to-i-cleanup ; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: bne .LBB49_3 -; SOFT-NEXT: .LBB49_6: @ %entry -; SOFT-NEXT: mov r1, r4 -; SOFT-NEXT: pop {r4, pc} -; -; VFP2-LABEL: utest_f32i64_mm: -; VFP2: @ %bb.0: @ %entry -; VFP2-NEXT: .save {r7, lr} -; VFP2-NEXT: push {r7, lr} -; VFP2-NEXT: bl __fixunssfti -; VFP2-NEXT: subs r2, #1 -; VFP2-NEXT: mov.w r12, #0 -; VFP2-NEXT: sbcs r2, r3, #0 -; VFP2-NEXT: it lo -; VFP2-NEXT: movlo.w r12, #1 -; VFP2-NEXT: cmp.w r12, #0 -; VFP2-NEXT: itt eq -; VFP2-NEXT: moveq r0, r12 -; VFP2-NEXT: moveq r1, r12 -; VFP2-NEXT: pop {r7, pc} -; -; FULL-LABEL: utest_f32i64_mm: -; FULL: @ %bb.0: @ %entry -; FULL-NEXT: .save {r7, lr} -; FULL-NEXT: push {r7, lr} -; FULL-NEXT: bl __fixunssfti -; FULL-NEXT: subs r2, #1 -; FULL-NEXT: sbcs r2, r3, #0 -; FULL-NEXT: cset r2, lo -; FULL-NEXT: cmp r2, #0 -; FULL-NEXT: csel r0, r0, r2, ne -; FULL-NEXT: csel r1, r1, r2, ne -; FULL-NEXT: pop {r7, pc} -entry: - %conv = fptoui float %x to i128 - %spec.store.select = call i128 @llvm.umin.i128(i128 %conv, i128 18446744073709551616) - %conv6 = trunc i128 %spec.store.select to i64 - ret i64 %conv6 -} - -define i64 @ustest_f32i64_mm(float %x) { -; SOFT-LABEL: ustest_f32i64_mm: -; SOFT: @ %bb.0: @ %entry -; SOFT-NEXT: .save {r4, lr} -; SOFT-NEXT: push {r4, lr} -; SOFT-NEXT: bl __fixsfti -; SOFT-NEXT: mov r4, r1 -; SOFT-NEXT: movs r1, #0 -; SOFT-NEXT: subs r2, r2, #1 -; SOFT-NEXT: mov r2, r3 -; SOFT-NEXT: sbcs r2, r1 -; SOFT-NEXT: blt .LBB50_2 -; SOFT-NEXT: @ %bb.1: @ %entry -; SOFT-NEXT: mov r2, r1 -; SOFT-NEXT: cmp r2, #0 -; SOFT-NEXT: beq .LBB50_3 -; SOFT-NEXT: b .LBB50_4 -; SOFT-NEXT: .LBB50_2: -; SOFT-NEXT: movs r2, #1 -; SOFT-NEXT: cmp r2, #0 -; SOFT-NEXT: bne .LBB50_4 -; SOFT-NEXT: .LBB50_3: @ %entry -; SOFT-NEXT: mov r4, r2 -; SOFT-NEXT: .LBB50_4: @ %entry -; SOFT-NEXT: beq .LBB50_10 -; SOFT-NEXT: @ %bb.5: @ %entry -; SOFT-NEXT: bne .LBB50_7 -; SOFT-NEXT: .LBB50_6: @ %entry +; SOFT-NEXT: .LBB50_13: @ %fp-to-i-cleanup +; SOFT-NEXT: bmi .LBB50_15 +; SOFT-NEXT: @ %bb.14: @ %fp-to-i-cleanup +; SOFT-NEXT: mov r2, r5 +; SOFT-NEXT: .LBB50_15: @ %fp-to-i-cleanup +; SOFT-NEXT: mov r1, r2 +; SOFT-NEXT: add sp, #68 +; SOFT-NEXT: pop {r4, r5, r6, r7, pc} +; SOFT-NEXT: .LBB50_16: @ %fp-to-i-cleanup ; SOFT-NEXT: mov r3, r2 -; SOFT-NEXT: .LBB50_7: @ %entry ; SOFT-NEXT: cmp r3, #0 -; SOFT-NEXT: mov r2, r1 -; SOFT-NEXT: bpl .LBB50_11 -; SOFT-NEXT: @ %bb.8: @ %entry -; SOFT-NEXT: bpl .LBB50_12 -; SOFT-NEXT: .LBB50_9: @ %entry -; SOFT-NEXT: mov r0, r2 -; SOFT-NEXT: pop {r4, pc} -; SOFT-NEXT: .LBB50_10: @ %entry -; SOFT-NEXT: mov r0, r2 -; SOFT-NEXT: beq .LBB50_6 -; SOFT-NEXT: b .LBB50_7 -; SOFT-NEXT: .LBB50_11: @ %entry -; SOFT-NEXT: mov r2, r0 -; SOFT-NEXT: bmi .LBB50_9 -; SOFT-NEXT: .LBB50_12: @ %entry -; SOFT-NEXT: mov r1, r4 -; SOFT-NEXT: mov r0, r2 -; SOFT-NEXT: pop {r4, pc} +; SOFT-NEXT: bne .LBB50_8 +; SOFT-NEXT: .LBB50_17: @ %fp-to-i-cleanup +; SOFT-NEXT: mov r5, r3 +; SOFT-NEXT: bne .LBB50_9 +; SOFT-NEXT: .LBB50_18: @ %fp-to-i-cleanup +; SOFT-NEXT: mov r4, r3 +; SOFT-NEXT: beq .LBB50_10 +; SOFT-NEXT: b .LBB50_11 +; SOFT-NEXT: .p2align 2 +; SOFT-NEXT: @ %bb.19: +; SOFT-NEXT: .LCPI50_0: +; SOFT-NEXT: .long 8388607 @ 0x7fffff ; ; VFP2-LABEL: ustest_f32i64_mm: -; VFP2: @ %bb.0: @ %entry -; VFP2-NEXT: .save {r7, lr} -; VFP2-NEXT: push {r7, lr} -; VFP2-NEXT: bl __fixsfti -; VFP2-NEXT: subs r2, #1 -; VFP2-NEXT: mov.w r12, #0 -; VFP2-NEXT: sbcs r2, r3, #0 +; VFP2: @ %bb.0: @ %entryfp-to-i-entry +; VFP2-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; VFP2-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; VFP2-NEXT: .pad #60 +; VFP2-NEXT: sub sp, #60 +; VFP2-NEXT: vmov r3, s0 +; VFP2-NEXT: movs r2, #0 +; VFP2-NEXT: ubfx r0, r3, #23, #8 +; VFP2-NEXT: cmp r0, #127 +; VFP2-NEXT: bhs .LBB50_2 +; VFP2-NEXT: @ %bb.1: +; VFP2-NEXT: movs r0, #0 +; VFP2-NEXT: movs r1, #0 +; VFP2-NEXT: movs r4, #0 +; VFP2-NEXT: movs r3, #0 +; VFP2-NEXT: b .LBB50_5 +; VFP2-NEXT: .LBB50_2: @ %fp-to-i-if-check.exp.size +; VFP2-NEXT: movs r4, #1 +; VFP2-NEXT: mov r1, r3 +; VFP2-NEXT: bfi r1, r4, #23, #9 +; VFP2-NEXT: orr.w lr, r4, r3, asr #31 +; VFP2-NEXT: asr.w r8, r3, #31 +; VFP2-NEXT: cmp r0, #149 +; VFP2-NEXT: bhi .LBB50_4 +; VFP2-NEXT: @ %bb.3: @ %fp-to-i-if-exp.small +; VFP2-NEXT: rsb.w r0, r0, #150 +; VFP2-NEXT: movs r6, #0 +; VFP2-NEXT: lsr.w r4, r1, r0 +; VFP2-NEXT: umull r0, r1, r4, lr +; VFP2-NEXT: umull r3, r5, r4, r8 +; VFP2-NEXT: adds r7, r3, r1 +; VFP2-NEXT: adcs r7, r5, #0 +; VFP2-NEXT: adc r12, r6, #0 +; VFP2-NEXT: adds r7, r3, r1 +; VFP2-NEXT: mla r7, r8, r4, r5 +; VFP2-NEXT: umlal r1, r6, r4, r8 +; VFP2-NEXT: adcs.w r4, r3, r5 +; VFP2-NEXT: adc.w r3, r12, r7 +; VFP2-NEXT: b .LBB50_5 +; VFP2-NEXT: .LBB50_4: @ %fp-to-i-if-exp.large +; VFP2-NEXT: mov.w r10, #0 +; VFP2-NEXT: subs r0, #150 +; VFP2-NEXT: add r3, sp, #24 +; VFP2-NEXT: strd r1, r10, [sp, #40] +; VFP2-NEXT: movs r1, #12 +; VFP2-NEXT: adds r3, #16 +; VFP2-NEXT: and.w r1, r1, r0, lsr #3 +; VFP2-NEXT: strd r10, r10, [sp, #48] +; VFP2-NEXT: strd r10, r10, [sp, #32] +; VFP2-NEXT: subs r1, r3, r1 +; VFP2-NEXT: strd r10, r10, [sp, #24] +; VFP2-NEXT: and r7, r0, #31 +; VFP2-NEXT: ldrd r3, r4, [r1] +; VFP2-NEXT: eor r0, r7, #31 +; VFP2-NEXT: movs r5, #0 +; VFP2-NEXT: str r4, [sp, #16] @ 4-byte Spill +; VFP2-NEXT: ldrd r12, r1, [r1, #8] +; VFP2-NEXT: str r0, [sp, #8] @ 4-byte Spill +; VFP2-NEXT: str r1, [sp, #20] @ 4-byte Spill +; VFP2-NEXT: lsrs r1, r3, #1 +; VFP2-NEXT: lsr.w r0, r1, r0 +; VFP2-NEXT: lsl.w r1, r4, r7 +; VFP2-NEXT: lsls r3, r7 +; VFP2-NEXT: orr.w r9, r1, r0 +; VFP2-NEXT: umull r0, r1, r3, lr +; VFP2-NEXT: umull r4, r6, r3, r8 +; VFP2-NEXT: umlal r1, r5, r9, lr +; VFP2-NEXT: str r4, [sp, #12] @ 4-byte Spill +; VFP2-NEXT: str r6, [sp, #4] @ 4-byte Spill +; VFP2-NEXT: adds.w r11, r4, r1 +; VFP2-NEXT: adcs.w r4, r5, r6 +; VFP2-NEXT: ldr r6, [sp, #8] @ 4-byte Reload +; VFP2-NEXT: ldr r4, [sp, #20] @ 4-byte Reload +; VFP2-NEXT: adc r10, r10, #0 +; VFP2-NEXT: umlal r1, r5, r3, r8 +; VFP2-NEXT: lsl.w r11, r4, r7 +; VFP2-NEXT: lsr.w r4, r12, #1 +; VFP2-NEXT: lsl.w r7, r12, r7 +; VFP2-NEXT: lsrs r4, r6 +; VFP2-NEXT: orr.w r11, r11, r4 +; VFP2-NEXT: ldr r4, [sp, #16] @ 4-byte Reload +; VFP2-NEXT: umlal r5, r10, r9, r8 +; VFP2-NEXT: lsrs.w r4, r4, #1 +; VFP2-NEXT: lsrs r4, r6 +; VFP2-NEXT: ldr r6, [sp, #4] @ 4-byte Reload +; VFP2-NEXT: orrs r4, r7 +; VFP2-NEXT: umull r12, r7, lr, r4 +; VFP2-NEXT: mla r7, lr, r11, r7 +; VFP2-NEXT: mla r7, r8, r4, r7 +; VFP2-NEXT: mla r4, r8, r9, r6 +; VFP2-NEXT: mla r3, r8, r3, r4 +; VFP2-NEXT: ldr r4, [sp, #12] @ 4-byte Reload +; VFP2-NEXT: adds.w r6, r4, r12 +; VFP2-NEXT: adcs r3, r7 +; VFP2-NEXT: adds r4, r5, r6 +; VFP2-NEXT: adc.w r3, r3, r10 +; VFP2-NEXT: .LBB50_5: @ %fp-to-i-cleanup +; VFP2-NEXT: subs r7, r4, #1 +; VFP2-NEXT: sbcs r7, r3, #0 ; VFP2-NEXT: it lt -; VFP2-NEXT: movlt.w r12, #1 -; VFP2-NEXT: cmp.w r12, #0 +; VFP2-NEXT: movlt r2, #1 +; VFP2-NEXT: cmp r2, #0 ; VFP2-NEXT: itte eq -; VFP2-NEXT: moveq r1, r12 -; VFP2-NEXT: moveq r0, r12 -; VFP2-NEXT: movne r12, r3 -; VFP2-NEXT: cmp.w r12, #0 +; VFP2-NEXT: moveq r1, r2 +; VFP2-NEXT: moveq r0, r2 +; VFP2-NEXT: movne r2, r3 +; VFP2-NEXT: cmp r2, #0 ; VFP2-NEXT: itt mi ; VFP2-NEXT: movmi r0, #0 ; VFP2-NEXT: movmi r1, #0 -; VFP2-NEXT: pop {r7, pc} +; VFP2-NEXT: add sp, #60 +; VFP2-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; ; FULL-LABEL: ustest_f32i64_mm: -; FULL: @ %bb.0: @ %entry -; FULL-NEXT: .save {r7, lr} -; FULL-NEXT: push {r7, lr} -; FULL-NEXT: bl __fixsfti -; FULL-NEXT: subs r2, #1 -; FULL-NEXT: sbcs r2, r3, #0 -; FULL-NEXT: cset r2, lt -; FULL-NEXT: cmp r2, #0 -; FULL-NEXT: csel r1, r1, r2, ne -; FULL-NEXT: csel r0, r0, r2, ne -; FULL-NEXT: csel r2, r3, r2, ne +; FULL: @ %bb.0: @ %entryfp-to-i-entry +; FULL-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; FULL-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; FULL-NEXT: .pad #52 +; FULL-NEXT: sub sp, #52 +; FULL-NEXT: vmov r2, s0 +; FULL-NEXT: ubfx r0, r2, #23, #8 +; FULL-NEXT: cmp r0, #127 +; FULL-NEXT: bhs .LBB50_2 +; FULL-NEXT: @ %bb.1: +; FULL-NEXT: mov.w r11, #0 +; FULL-NEXT: movs r1, #0 +; FULL-NEXT: movs r3, #0 +; FULL-NEXT: movs r2, #0 +; FULL-NEXT: b .LBB50_5 +; FULL-NEXT: .LBB50_2: @ %fp-to-i-if-check.exp.size +; FULL-NEXT: movs r3, #1 +; FULL-NEXT: mov r1, r2 +; FULL-NEXT: bfi r1, r3, #23, #9 +; FULL-NEXT: orr.w r3, r3, r2, asr #31 +; FULL-NEXT: asrs r2, r2, #31 +; FULL-NEXT: cmp r0, #149 +; FULL-NEXT: bhi .LBB50_4 +; FULL-NEXT: @ %bb.3: @ %fp-to-i-if-exp.small +; FULL-NEXT: rsb.w r0, r0, #150 +; FULL-NEXT: movs r5, #0 +; FULL-NEXT: lsr.w r0, r1, r0 +; FULL-NEXT: umull r11, r1, r0, r3 +; FULL-NEXT: umull r3, r4, r0, r2 +; FULL-NEXT: adds r6, r3, r1 +; FULL-NEXT: adcs r6, r4, #0 +; FULL-NEXT: adc r12, r5, #0 +; FULL-NEXT: adds r6, r3, r1 +; FULL-NEXT: mla r6, r2, r0, r4 +; FULL-NEXT: adcs r3, r4 +; FULL-NEXT: umlal r1, r5, r0, r2 +; FULL-NEXT: adc.w r2, r12, r6 +; FULL-NEXT: b .LBB50_5 +; FULL-NEXT: .LBB50_4: @ %fp-to-i-if-exp.large +; FULL-NEXT: movs r6, #0 +; FULL-NEXT: subs r0, #150 +; FULL-NEXT: add r7, sp, #16 +; FULL-NEXT: strd r1, r6, [sp, #32] +; FULL-NEXT: movs r1, #12 +; FULL-NEXT: adds r7, #16 +; FULL-NEXT: and.w r1, r1, r0, lsr #3 +; FULL-NEXT: strd r6, r6, [sp, #40] +; FULL-NEXT: strd r6, r6, [sp, #24] +; FULL-NEXT: subs r1, r7, r1 +; FULL-NEXT: strd r6, r6, [sp, #16] +; FULL-NEXT: and r0, r0, #31 +; FULL-NEXT: ldrd r4, r7, [r1] +; FULL-NEXT: eor r12, r0, #31 +; FULL-NEXT: movs r5, #0 +; FULL-NEXT: str r7, [sp, #8] @ 4-byte Spill +; FULL-NEXT: lsls r7, r0 +; FULL-NEXT: ldrd r9, r1, [r1, #8] +; FULL-NEXT: str r1, [sp, #12] @ 4-byte Spill +; FULL-NEXT: lsrs r1, r4, #1 +; FULL-NEXT: lsr.w r1, r1, r12 +; FULL-NEXT: lsls r4, r0 +; FULL-NEXT: orr.w r10, r7, r1 +; FULL-NEXT: umull r11, r1, r4, r3 +; FULL-NEXT: umull r7, lr, r4, r2 +; FULL-NEXT: umlal r1, r5, r10, r3 +; FULL-NEXT: str r7, [sp, #4] @ 4-byte Spill +; FULL-NEXT: adds.w r8, r7, r1 +; FULL-NEXT: adcs.w r7, r5, lr +; FULL-NEXT: umlal r1, r5, r4, r2 +; FULL-NEXT: adc r7, r6, #0 +; FULL-NEXT: ldr r6, [sp, #12] @ 4-byte Reload +; FULL-NEXT: umlal r5, r7, r10, r2 +; FULL-NEXT: lsl.w r8, r6, r0 +; FULL-NEXT: lsr.w r6, r9, #1 +; FULL-NEXT: lsl.w r0, r9, r0 +; FULL-NEXT: lsr.w r6, r6, r12 +; FULL-NEXT: orr.w r8, r8, r6 +; FULL-NEXT: ldr r6, [sp, #8] @ 4-byte Reload +; FULL-NEXT: lsrs.w r6, r6, #1 +; FULL-NEXT: lsr.w r6, r6, r12 +; FULL-NEXT: orrs r0, r6 +; FULL-NEXT: umull r12, r6, r3, r0 +; FULL-NEXT: mla r3, r3, r8, r6 +; FULL-NEXT: mla r0, r2, r0, r3 +; FULL-NEXT: mla r3, r2, r10, lr +; FULL-NEXT: mla r2, r2, r4, r3 +; FULL-NEXT: ldr r3, [sp, #4] @ 4-byte Reload +; FULL-NEXT: adds.w r3, r3, r12 +; FULL-NEXT: adcs r0, r2 +; FULL-NEXT: adds r3, r3, r5 +; FULL-NEXT: adc.w r2, r7, r0 +; FULL-NEXT: .LBB50_5: @ %fp-to-i-cleanup +; FULL-NEXT: subs r0, r3, #1 +; FULL-NEXT: sbcs r0, r2, #0 +; FULL-NEXT: cset r3, lt +; FULL-NEXT: cmp r3, #0 +; FULL-NEXT: csel r1, r1, r3, ne +; FULL-NEXT: csel r0, r11, r3, ne +; FULL-NEXT: csel r2, r2, r3, ne ; FULL-NEXT: cmp r2, #0 ; FULL-NEXT: itt mi ; FULL-NEXT: movmi r0, #0 ; FULL-NEXT: movmi r1, #0 -; FULL-NEXT: pop {r7, pc} +; FULL-NEXT: add sp, #52 +; FULL-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} entry: %conv = fptosi float %x to i128 %spec.store.select = call i128 @llvm.smin.i128(i128 %conv, i128 18446744073709551616) @@ -3216,156 +7081,32 @@ entry: define i64 @stest_f16i64_mm(half %x) { ; SOFT-LABEL: stest_f16i64_mm: ; SOFT: @ %bb.0: @ %entry -; SOFT-NEXT: .save {r4, r5, r6, r7, lr} -; SOFT-NEXT: push {r4, r5, r6, r7, lr} -; SOFT-NEXT: .pad #12 -; SOFT-NEXT: sub sp, #12 +; SOFT-NEXT: .save {r7, lr} +; SOFT-NEXT: push {r7, lr} ; SOFT-NEXT: uxth r0, r0 ; SOFT-NEXT: bl __aeabi_h2f -; SOFT-NEXT: bl __fixsfti -; SOFT-NEXT: mov r7, r0 -; SOFT-NEXT: movs r0, #1 -; SOFT-NEXT: movs r5, #0 -; SOFT-NEXT: ldr r6, .LCPI51_0 -; SOFT-NEXT: adds r4, r7, #1 -; SOFT-NEXT: mov r4, r1 -; SOFT-NEXT: sbcs r4, r6 -; SOFT-NEXT: mov r4, r2 -; SOFT-NEXT: sbcs r4, r5 -; SOFT-NEXT: mov r4, r3 -; SOFT-NEXT: sbcs r4, r5 -; SOFT-NEXT: mov r4, r0 -; SOFT-NEXT: blt .LBB51_2 -; SOFT-NEXT: @ %bb.1: @ %entry -; SOFT-NEXT: mov r4, r5 -; SOFT-NEXT: .LBB51_2: @ %entry -; SOFT-NEXT: mvns r6, r5 -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: beq .LBB51_12 -; SOFT-NEXT: @ %bb.3: @ %entry -; SOFT-NEXT: beq .LBB51_13 -; SOFT-NEXT: .LBB51_4: @ %entry -; SOFT-NEXT: str r2, [sp, #8] @ 4-byte Spill -; SOFT-NEXT: beq .LBB51_14 -; SOFT-NEXT: .LBB51_5: @ %entry -; SOFT-NEXT: str r3, [sp, #4] @ 4-byte Spill -; SOFT-NEXT: bne .LBB51_7 -; SOFT-NEXT: .LBB51_6: @ %entry -; SOFT-NEXT: mov r7, r6 -; SOFT-NEXT: .LBB51_7: @ %entry -; SOFT-NEXT: lsls r3, r0, #31 -; SOFT-NEXT: rsbs r4, r7, #0 -; SOFT-NEXT: mov r4, r3 -; SOFT-NEXT: sbcs r4, r1 -; SOFT-NEXT: mov r4, r6 -; SOFT-NEXT: ldr r2, [sp, #8] @ 4-byte Reload -; SOFT-NEXT: sbcs r4, r2 -; SOFT-NEXT: ldr r2, [sp, #4] @ 4-byte Reload -; SOFT-NEXT: sbcs r6, r2 -; SOFT-NEXT: bge .LBB51_15 -; SOFT-NEXT: @ %bb.8: @ %entry -; SOFT-NEXT: cmp r0, #0 -; SOFT-NEXT: beq .LBB51_16 -; SOFT-NEXT: .LBB51_9: @ %entry -; SOFT-NEXT: bne .LBB51_11 -; SOFT-NEXT: .LBB51_10: @ %entry -; SOFT-NEXT: mov r1, r3 -; SOFT-NEXT: .LBB51_11: @ %entry -; SOFT-NEXT: mov r0, r7 -; SOFT-NEXT: add sp, #12 -; SOFT-NEXT: pop {r4, r5, r6, r7, pc} -; SOFT-NEXT: .LBB51_12: @ %entry -; SOFT-NEXT: mov r3, r4 -; SOFT-NEXT: bne .LBB51_4 -; SOFT-NEXT: .LBB51_13: @ %entry -; SOFT-NEXT: mov r2, r4 -; SOFT-NEXT: str r2, [sp, #8] @ 4-byte Spill -; SOFT-NEXT: bne .LBB51_5 -; SOFT-NEXT: .LBB51_14: @ %entry -; SOFT-NEXT: ldr r1, .LCPI51_0 -; SOFT-NEXT: str r3, [sp, #4] @ 4-byte Spill -; SOFT-NEXT: beq .LBB51_6 -; SOFT-NEXT: b .LBB51_7 -; SOFT-NEXT: .LBB51_15: @ %entry -; SOFT-NEXT: mov r0, r5 -; SOFT-NEXT: cmp r0, #0 -; SOFT-NEXT: bne .LBB51_9 -; SOFT-NEXT: .LBB51_16: @ %entry -; SOFT-NEXT: mov r7, r0 -; SOFT-NEXT: beq .LBB51_10 -; SOFT-NEXT: b .LBB51_11 -; SOFT-NEXT: .p2align 2 -; SOFT-NEXT: @ %bb.17: -; SOFT-NEXT: .LCPI51_0: -; SOFT-NEXT: .long 2147483647 @ 0x7fffffff +; SOFT-NEXT: bl __aeabi_f2iz +; SOFT-NEXT: asrs r1, r0, #31 +; SOFT-NEXT: pop {r7, pc} ; ; VFP2-LABEL: stest_f16i64_mm: ; VFP2: @ %bb.0: @ %entry -; VFP2-NEXT: .save {r4, r5, r7, lr} -; VFP2-NEXT: push {r4, r5, r7, lr} +; VFP2-NEXT: .save {r7, lr} +; VFP2-NEXT: push {r7, lr} ; VFP2-NEXT: vmov r0, s0 ; VFP2-NEXT: bl __aeabi_h2f ; VFP2-NEXT: vmov s0, r0 -; VFP2-NEXT: bl __fixsfti -; VFP2-NEXT: subs.w r4, r0, #-1 -; VFP2-NEXT: mvn lr, #-2147483648 -; VFP2-NEXT: sbcs.w r4, r1, lr -; VFP2-NEXT: mov.w r12, #0 -; VFP2-NEXT: sbcs r4, r2, #0 -; VFP2-NEXT: sbcs r4, r3, #0 -; VFP2-NEXT: mov.w r4, #0 -; VFP2-NEXT: it lt -; VFP2-NEXT: movlt r4, #1 -; VFP2-NEXT: cmp r4, #0 -; VFP2-NEXT: itet eq -; VFP2-NEXT: moveq r3, r4 -; VFP2-NEXT: movne r4, r2 -; VFP2-NEXT: moveq r1, lr -; VFP2-NEXT: mov.w r2, #-1 -; VFP2-NEXT: it eq -; VFP2-NEXT: moveq r0, r2 -; VFP2-NEXT: rsbs r5, r0, #0 -; VFP2-NEXT: mov.w lr, #-2147483648 -; VFP2-NEXT: sbcs.w r5, lr, r1 -; VFP2-NEXT: sbcs.w r4, r2, r4 -; VFP2-NEXT: sbcs r2, r3 -; VFP2-NEXT: it lt -; VFP2-NEXT: movlt.w r12, #1 -; VFP2-NEXT: cmp.w r12, #0 -; VFP2-NEXT: itt eq -; VFP2-NEXT: moveq r0, r12 -; VFP2-NEXT: moveq r1, lr -; VFP2-NEXT: pop {r4, r5, r7, pc} +; VFP2-NEXT: vcvt.s32.f32 s0, s0 +; VFP2-NEXT: vmov r0, s0 +; VFP2-NEXT: asrs r1, r0, #31 +; VFP2-NEXT: pop {r7, pc} ; ; FULL-LABEL: stest_f16i64_mm: ; FULL: @ %bb.0: @ %entry -; FULL-NEXT: .save {r4, r5, r7, lr} -; FULL-NEXT: push {r4, r5, r7, lr} -; FULL-NEXT: vmov.f16 r0, s0 -; FULL-NEXT: vmov s0, r0 -; FULL-NEXT: bl __fixhfti -; FULL-NEXT: subs.w lr, r0, #-1 -; FULL-NEXT: mvn r12, #-2147483648 -; FULL-NEXT: sbcs.w lr, r1, r12 -; FULL-NEXT: sbcs lr, r2, #0 -; FULL-NEXT: sbcs lr, r3, #0 -; FULL-NEXT: cset lr, lt -; FULL-NEXT: cmp.w lr, #0 -; FULL-NEXT: csel r5, r3, lr, ne -; FULL-NEXT: mov.w r3, #-1 -; FULL-NEXT: csel r0, r0, r3, ne -; FULL-NEXT: csel r1, r1, r12, ne -; FULL-NEXT: csel r2, r2, lr, ne -; FULL-NEXT: rsbs r4, r0, #0 -; FULL-NEXT: mov.w r12, #-2147483648 -; FULL-NEXT: sbcs.w r4, r12, r1 -; FULL-NEXT: sbcs.w r2, r3, r2 -; FULL-NEXT: sbcs.w r2, r3, r5 -; FULL-NEXT: cset r2, lt -; FULL-NEXT: cmp r2, #0 -; FULL-NEXT: csel r0, r0, r2, ne -; FULL-NEXT: csel r1, r1, r12, ne -; FULL-NEXT: pop {r4, r5, r7, pc} +; FULL-NEXT: vcvt.s32.f16 s0, s0 +; FULL-NEXT: vmov r0, s0 +; FULL-NEXT: asrs r1, r0, #31 +; FULL-NEXT: bx lr entry: %conv = fptosi half %x to i128 %spec.store.select = call i128 @llvm.smin.i128(i128 %conv, i128 9223372036854775807) @@ -3377,32 +7118,13 @@ entry: define i64 @utesth_f16i64_mm(half %x) { ; SOFT-LABEL: utesth_f16i64_mm: ; SOFT: @ %bb.0: @ %entry -; SOFT-NEXT: .save {r4, lr} -; SOFT-NEXT: push {r4, lr} +; SOFT-NEXT: .save {r7, lr} +; SOFT-NEXT: push {r7, lr} ; SOFT-NEXT: uxth r0, r0 ; SOFT-NEXT: bl __aeabi_h2f -; SOFT-NEXT: bl __fixunssfti -; SOFT-NEXT: movs r4, #0 -; SOFT-NEXT: subs r2, r2, #1 -; SOFT-NEXT: sbcs r3, r4 -; SOFT-NEXT: blo .LBB52_4 -; SOFT-NEXT: @ %bb.1: @ %entry -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: beq .LBB52_5 -; SOFT-NEXT: .LBB52_2: @ %entry -; SOFT-NEXT: beq .LBB52_6 -; SOFT-NEXT: .LBB52_3: @ %entry -; SOFT-NEXT: pop {r4, pc} -; SOFT-NEXT: .LBB52_4: -; SOFT-NEXT: movs r4, #1 -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: bne .LBB52_2 -; SOFT-NEXT: .LBB52_5: @ %entry -; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: bne .LBB52_3 -; SOFT-NEXT: .LBB52_6: @ %entry -; SOFT-NEXT: mov r1, r4 -; SOFT-NEXT: pop {r4, pc} +; SOFT-NEXT: bl __aeabi_f2uiz +; SOFT-NEXT: movs r1, #0 +; SOFT-NEXT: pop {r7, pc} ; ; VFP2-LABEL: utesth_f16i64_mm: ; VFP2: @ %bb.0: @ %entry @@ -3411,32 +7133,17 @@ define i64 @utesth_f16i64_mm(half %x) { ; VFP2-NEXT: vmov r0, s0 ; VFP2-NEXT: bl __aeabi_h2f ; VFP2-NEXT: vmov s0, r0 -; VFP2-NEXT: bl __fixunssfti -; VFP2-NEXT: subs r2, #1 -; VFP2-NEXT: mov.w r12, #0 -; VFP2-NEXT: sbcs r2, r3, #0 -; VFP2-NEXT: it lo -; VFP2-NEXT: movlo.w r12, #1 -; VFP2-NEXT: cmp.w r12, #0 -; VFP2-NEXT: itt eq -; VFP2-NEXT: moveq r0, r12 -; VFP2-NEXT: moveq r1, r12 +; VFP2-NEXT: movs r1, #0 +; VFP2-NEXT: vcvt.u32.f32 s0, s0 +; VFP2-NEXT: vmov r0, s0 ; VFP2-NEXT: pop {r7, pc} ; ; FULL-LABEL: utesth_f16i64_mm: ; FULL: @ %bb.0: @ %entry -; FULL-NEXT: .save {r7, lr} -; FULL-NEXT: push {r7, lr} -; FULL-NEXT: vmov.f16 r0, s0 -; FULL-NEXT: vmov s0, r0 -; FULL-NEXT: bl __fixunshfti -; FULL-NEXT: subs r2, #1 -; FULL-NEXT: sbcs r2, r3, #0 -; FULL-NEXT: cset r2, lo -; FULL-NEXT: cmp r2, #0 -; FULL-NEXT: csel r0, r0, r2, ne -; FULL-NEXT: csel r1, r1, r2, ne -; FULL-NEXT: pop {r7, pc} +; FULL-NEXT: vcvt.u32.f16 s0, s0 +; FULL-NEXT: movs r1, #0 +; FULL-NEXT: vmov r0, s0 +; FULL-NEXT: bx lr entry: %conv = fptoui half %x to i128 %spec.store.select = call i128 @llvm.umin.i128(i128 %conv, i128 18446744073709551616) @@ -3447,54 +7154,45 @@ entry: define i64 @ustest_f16i64_mm(half %x) { ; SOFT-LABEL: ustest_f16i64_mm: ; SOFT: @ %bb.0: @ %entry -; SOFT-NEXT: .save {r4, lr} -; SOFT-NEXT: push {r4, lr} +; SOFT-NEXT: .save {r7, lr} +; SOFT-NEXT: push {r7, lr} ; SOFT-NEXT: uxth r0, r0 ; SOFT-NEXT: bl __aeabi_h2f -; SOFT-NEXT: bl __fixsfti -; SOFT-NEXT: mov r4, r1 -; SOFT-NEXT: movs r1, #0 -; SOFT-NEXT: subs r2, r2, #1 -; SOFT-NEXT: mov r2, r3 -; SOFT-NEXT: sbcs r2, r1 +; SOFT-NEXT: bl __aeabi_f2iz +; SOFT-NEXT: mov r2, r0 +; SOFT-NEXT: asrs r1, r0, #31 +; SOFT-NEXT: movs r0, #0 +; SOFT-NEXT: subs r3, r1, #1 +; SOFT-NEXT: mov r3, r1 +; SOFT-NEXT: sbcs r3, r0 ; SOFT-NEXT: blt .LBB53_2 ; SOFT-NEXT: @ %bb.1: @ %entry -; SOFT-NEXT: mov r2, r1 -; SOFT-NEXT: cmp r2, #0 +; SOFT-NEXT: mov r3, r0 +; SOFT-NEXT: cmp r3, #0 ; SOFT-NEXT: beq .LBB53_3 ; SOFT-NEXT: b .LBB53_4 ; SOFT-NEXT: .LBB53_2: -; SOFT-NEXT: movs r2, #1 -; SOFT-NEXT: cmp r2, #0 +; SOFT-NEXT: movs r3, #1 +; SOFT-NEXT: cmp r3, #0 ; SOFT-NEXT: bne .LBB53_4 ; SOFT-NEXT: .LBB53_3: @ %entry -; SOFT-NEXT: mov r4, r2 +; SOFT-NEXT: mov r2, r3 ; SOFT-NEXT: .LBB53_4: @ %entry -; SOFT-NEXT: beq .LBB53_10 +; SOFT-NEXT: beq .LBB53_8 ; SOFT-NEXT: @ %bb.5: @ %entry -; SOFT-NEXT: bne .LBB53_7 +; SOFT-NEXT: cmp r1, #0 +; SOFT-NEXT: bmi .LBB53_7 ; SOFT-NEXT: .LBB53_6: @ %entry -; SOFT-NEXT: mov r3, r2 -; SOFT-NEXT: .LBB53_7: @ %entry -; SOFT-NEXT: cmp r3, #0 -; SOFT-NEXT: mov r2, r1 -; SOFT-NEXT: bpl .LBB53_11 -; SOFT-NEXT: @ %bb.8: @ %entry -; SOFT-NEXT: bpl .LBB53_12 -; SOFT-NEXT: .LBB53_9: @ %entry ; SOFT-NEXT: mov r0, r2 -; SOFT-NEXT: pop {r4, pc} -; SOFT-NEXT: .LBB53_10: @ %entry -; SOFT-NEXT: mov r0, r2 -; SOFT-NEXT: beq .LBB53_6 +; SOFT-NEXT: .LBB53_7: @ %entry +; SOFT-NEXT: asrs r2, r1, #31 +; SOFT-NEXT: bics r1, r2 +; SOFT-NEXT: pop {r7, pc} +; SOFT-NEXT: .LBB53_8: @ %entry +; SOFT-NEXT: mov r1, r3 +; SOFT-NEXT: cmp r1, #0 +; SOFT-NEXT: bpl .LBB53_6 ; SOFT-NEXT: b .LBB53_7 -; SOFT-NEXT: .LBB53_11: @ %entry -; SOFT-NEXT: mov r2, r0 -; SOFT-NEXT: bmi .LBB53_9 -; SOFT-NEXT: .LBB53_12: @ %entry -; SOFT-NEXT: mov r1, r4 -; SOFT-NEXT: mov r0, r2 -; SOFT-NEXT: pop {r4, pc} ; ; VFP2-LABEL: ustest_f16i64_mm: ; VFP2: @ %bb.0: @ %entry @@ -3503,42 +7201,44 @@ define i64 @ustest_f16i64_mm(half %x) { ; VFP2-NEXT: vmov r0, s0 ; VFP2-NEXT: bl __aeabi_h2f ; VFP2-NEXT: vmov s0, r0 -; VFP2-NEXT: bl __fixsfti -; VFP2-NEXT: subs r2, #1 -; VFP2-NEXT: mov.w r12, #0 -; VFP2-NEXT: sbcs r2, r3, #0 +; VFP2-NEXT: movs r3, #1 +; VFP2-NEXT: movs r2, #0 +; VFP2-NEXT: vcvt.s32.f32 s0, s0 +; VFP2-NEXT: vmov r1, s0 +; VFP2-NEXT: rsbs r3, r3, r1, asr #31 +; VFP2-NEXT: asr.w r0, r1, #31 +; VFP2-NEXT: sbcs r0, r0, #0 ; VFP2-NEXT: it lt -; VFP2-NEXT: movlt.w r12, #1 -; VFP2-NEXT: cmp.w r12, #0 -; VFP2-NEXT: itte eq -; VFP2-NEXT: moveq r1, r12 -; VFP2-NEXT: moveq r0, r12 -; VFP2-NEXT: movne r12, r3 -; VFP2-NEXT: cmp.w r12, #0 -; VFP2-NEXT: itt mi +; VFP2-NEXT: movlt r2, #1 +; VFP2-NEXT: mov r0, r2 +; VFP2-NEXT: cmp r2, #0 +; VFP2-NEXT: itt ne +; VFP2-NEXT: movne r0, r1 +; VFP2-NEXT: asrne r2, r1, #31 +; VFP2-NEXT: bic.w r1, r2, r2, asr #31 +; VFP2-NEXT: cmp r2, #0 +; VFP2-NEXT: it mi ; VFP2-NEXT: movmi r0, #0 -; VFP2-NEXT: movmi r1, #0 ; VFP2-NEXT: pop {r7, pc} ; ; FULL-LABEL: ustest_f16i64_mm: ; FULL: @ %bb.0: @ %entry -; FULL-NEXT: .save {r7, lr} -; FULL-NEXT: push {r7, lr} -; FULL-NEXT: vmov.f16 r0, s0 -; FULL-NEXT: vmov s0, r0 -; FULL-NEXT: bl __fixhfti -; FULL-NEXT: subs r2, #1 -; FULL-NEXT: sbcs r2, r3, #0 +; FULL-NEXT: vcvt.s32.f16 s0, s0 +; FULL-NEXT: movs r2, #1 +; FULL-NEXT: vmov r1, s0 +; FULL-NEXT: rsbs r2, r2, r1, asr #31 +; FULL-NEXT: asr.w r0, r1, #31 +; FULL-NEXT: sbcs r0, r0, #0 ; FULL-NEXT: cset r2, lt ; FULL-NEXT: cmp r2, #0 -; FULL-NEXT: csel r1, r1, r2, ne -; FULL-NEXT: csel r0, r0, r2, ne -; FULL-NEXT: csel r2, r3, r2, ne +; FULL-NEXT: csel r0, r1, r2, ne +; FULL-NEXT: it ne +; FULL-NEXT: asrne r2, r1, #31 +; FULL-NEXT: bic.w r1, r2, r2, asr #31 ; FULL-NEXT: cmp r2, #0 -; FULL-NEXT: itt mi +; FULL-NEXT: it mi ; FULL-NEXT: movmi r0, #0 -; FULL-NEXT: movmi r1, #0 -; FULL-NEXT: pop {r7, pc} +; FULL-NEXT: bx lr entry: %conv = fptosi half %x to i128 %spec.store.select = call i128 @llvm.smin.i128(i128 %conv, i128 18446744073709551616) diff --git a/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll b/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll index 40b360e9158ff..1076432ec2110 100644 --- a/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll @@ -1640,66 +1640,268 @@ entry: define <2 x i64> @stest_f64i64(<2 x double> %x) { ; CHECK-LABEL: stest_f64i64: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} -; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} -; CHECK-NEXT: .vsave {d8, d9} -; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: vorr q4, q0, q0 -; CHECK-NEXT: vorr d0, d9, d9 -; CHECK-NEXT: bl __fixdfti -; CHECK-NEXT: mov r4, r1 -; CHECK-NEXT: mvn r9, #0 -; CHECK-NEXT: subs r1, r0, r9 -; CHECK-NEXT: mvn r5, #-2147483648 -; CHECK-NEXT: sbcs r1, r4, r5 -; CHECK-NEXT: vorr d0, d8, d8 -; CHECK-NEXT: sbcs r1, r2, #0 +; CHECK: @ %bb.0: @ %entryfp-to-i-entryfp-to-i-entry +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-NEXT: .pad #100 +; CHECK-NEXT: sub sp, sp, #100 +; CHECK-NEXT: vmov r5, r1, d0 +; CHECK-NEXT: mov r9, #1 +; CHECK-NEXT: mvn lr, #0 +; CHECK-NEXT: movw r3, #1023 +; CHECK-NEXT: ubfx r0, r1, #20, #11 +; CHECK-NEXT: cmp r0, r3 +; CHECK-NEXT: bhs .LBB18_2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: mov r8, #0 +; CHECK-NEXT: str r0, [sp, #28] @ 4-byte Spill +; CHECK-NEXT: mov r12, #0 +; CHECK-NEXT: mov r10, #0 +; CHECK-NEXT: b .LBB18_5 +; CHECK-NEXT: .LBB18_2: @ %fp-to-i-if-check.exp.size2 +; CHECK-NEXT: mov r6, r1 +; CHECK-NEXT: orr r11, r9, r1, asr #31 +; CHECK-NEXT: bfi r6, r9, #20, #12 +; CHECK-NEXT: asr r1, r1, #31 +; CHECK-NEXT: movw r7, #1074 +; CHECK-NEXT: cmp r0, r7 +; CHECK-NEXT: bhi .LBB18_4 +; CHECK-NEXT: @ %bb.3: @ %fp-to-i-if-exp.small3 +; CHECK-NEXT: movw r7, #1075 +; CHECK-NEXT: mov r12, #0 +; CHECK-NEXT: sub r7, r7, r0 +; CHECK-NEXT: rsb r4, r7, #32 +; CHECK-NEXT: lsr r5, r5, r7 +; CHECK-NEXT: lsr r7, r6, r7 +; CHECK-NEXT: orr r5, r5, r6, lsl r4 +; CHECK-NEXT: movw r4, #1043 +; CHECK-NEXT: subs r0, r4, r0 +; CHECK-NEXT: mov r4, #0 +; CHECK-NEXT: movwpl r7, #0 +; CHECK-NEXT: lsrpl r5, r6, r0 +; CHECK-NEXT: umull r8, r0, r5, r11 +; CHECK-NEXT: umlal r0, r4, r7, r11 +; CHECK-NEXT: umull r2, r6, r5, r1 +; CHECK-NEXT: adds r0, r2, r0 +; CHECK-NEXT: str r0, [sp, #28] @ 4-byte Spill +; CHECK-NEXT: adcs r0, r4, r6 +; CHECK-NEXT: mla r6, r1, r7, r6 +; CHECK-NEXT: adc r4, r12, #0 +; CHECK-NEXT: umlal r0, r4, r7, r1 +; CHECK-NEXT: mla r1, r1, r5, r6 +; CHECK-NEXT: adds r12, r0, r2 +; CHECK-NEXT: adc r10, r4, r1 +; CHECK-NEXT: b .LBB18_5 +; CHECK-NEXT: .LBB18_4: @ %fp-to-i-if-exp.large4 +; CHECK-NEXT: add r2, sp, #80 ; CHECK-NEXT: mov r7, #0 -; CHECK-NEXT: sbcs r1, r3, #0 -; CHECK-NEXT: mov r8, #-2147483648 +; CHECK-NEXT: stm r2, {r5, r6, r7} +; CHECK-NEXT: movw r6, #1075 +; CHECK-NEXT: sub r0, r0, r6 +; CHECK-NEXT: mov r6, #12 +; CHECK-NEXT: add r5, sp, #64 +; CHECK-NEXT: and r6, r6, r0, lsr #3 +; CHECK-NEXT: add r5, r5, #16 +; CHECK-NEXT: str r7, [sp, #92] +; CHECK-NEXT: and r0, r0, #31 +; CHECK-NEXT: str r7, [sp, #76] +; CHECK-NEXT: eor r9, r0, #31 +; CHECK-NEXT: str r7, [sp, #72] +; CHECK-NEXT: mov r12, #0 +; CHECK-NEXT: str r7, [sp, #68] +; CHECK-NEXT: str r7, [sp, #64] +; CHECK-NEXT: ldr r6, [r5, -r6]! +; CHECK-NEXT: ldmib r5, {r3, r10} +; CHECK-NEXT: lsr r4, r6, #1 +; CHECK-NEXT: ldr r2, [r5, #12] +; CHECK-NEXT: lsl r5, r3, r0 +; CHECK-NEXT: orr r4, r5, r4, lsr r9 +; CHECK-NEXT: lsl r5, r6, r0 +; CHECK-NEXT: str r2, [sp, #24] @ 4-byte Spill +; CHECK-NEXT: umull r8, r6, r5, r11 +; CHECK-NEXT: umlal r6, r12, r4, r11 +; CHECK-NEXT: umull lr, r2, r5, r1 +; CHECK-NEXT: adds r6, lr, r6 +; CHECK-NEXT: str r6, [sp, #28] @ 4-byte Spill +; CHECK-NEXT: adcs r6, r12, r2 +; CHECK-NEXT: mla r2, r1, r4, r2 +; CHECK-NEXT: adc r7, r7, #0 +; CHECK-NEXT: umlal r6, r7, r4, r1 +; CHECK-NEXT: lsr r4, r10, #1 +; CHECK-NEXT: mla r12, r1, r5, r2 +; CHECK-NEXT: ldr r2, [sp, #24] @ 4-byte Reload +; CHECK-NEXT: lsl r5, r2, r0 +; CHECK-NEXT: lsl r0, r10, r0 +; CHECK-NEXT: orr r5, r5, r4, lsr r9 +; CHECK-NEXT: lsrs r4, r3, #1 +; CHECK-NEXT: orr r0, r0, r4, lsr r9 +; CHECK-NEXT: mov r9, #1 +; CHECK-NEXT: movw r3, #1023 +; CHECK-NEXT: umull r4, r2, r11, r0 +; CHECK-NEXT: mla r2, r11, r5, r2 +; CHECK-NEXT: mla r0, r1, r0, r2 +; CHECK-NEXT: adds r1, lr, r4 +; CHECK-NEXT: mvn lr, #0 +; CHECK-NEXT: adc r0, r12, r0 +; CHECK-NEXT: adds r12, r6, r1 +; CHECK-NEXT: adc r10, r7, r0 +; CHECK-NEXT: .LBB18_5: @ %fp-to-i-cleanup1 +; CHECK-NEXT: vmov r5, r7, d1 +; CHECK-NEXT: mvn r6, #0 ; CHECK-NEXT: mov r1, #0 -; CHECK-NEXT: mov r10, #0 -; CHECK-NEXT: movwlt r1, #1 -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: moveq r3, r1 -; CHECK-NEXT: movne r1, r2 -; CHECK-NEXT: moveq r4, r5 -; CHECK-NEXT: moveq r0, r9 -; CHECK-NEXT: rsbs r2, r0, #0 -; CHECK-NEXT: rscs r2, r4, #-2147483648 -; CHECK-NEXT: sbcs r1, r9, r1 -; CHECK-NEXT: sbcs r1, r9, r3 +; CHECK-NEXT: cmn r7, #1 +; CHECK-NEXT: ubfx r4, r7, #20, #11 +; CHECK-NEXT: movwgt r6, #0 +; CHECK-NEXT: movwgt lr, #1 +; CHECK-NEXT: cmp r4, r3 +; CHECK-NEXT: bhs .LBB18_7 +; CHECK-NEXT: @ %bb.6: +; CHECK-NEXT: mov r5, #0 +; CHECK-NEXT: mov r4, #0 +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: mov r7, #0 +; CHECK-NEXT: b .LBB18_10 +; CHECK-NEXT: .LBB18_7: @ %fp-to-i-if-check.exp.size +; CHECK-NEXT: bfi r7, r9, #20, #12 +; CHECK-NEXT: movw r0, #1074 +; CHECK-NEXT: cmp r4, r0 +; CHECK-NEXT: bhi .LBB18_9 +; CHECK-NEXT: @ %bb.8: @ %fp-to-i-if-exp.small +; CHECK-NEXT: movw r0, #1075 +; CHECK-NEXT: mov r9, #0 +; CHECK-NEXT: sub r0, r0, r4 +; CHECK-NEXT: lsr r2, r5, r0 +; CHECK-NEXT: rsb r5, r0, #32 +; CHECK-NEXT: lsr r0, r7, r0 +; CHECK-NEXT: orr r2, r2, r7, lsl r5 +; CHECK-NEXT: movw r5, #1043 +; CHECK-NEXT: subs r4, r5, r4 +; CHECK-NEXT: movwpl r0, #0 +; CHECK-NEXT: lsrpl r2, r7, r4 +; CHECK-NEXT: mov r7, #0 +; CHECK-NEXT: umull r5, r4, r2, lr +; CHECK-NEXT: umlal r4, r7, r0, lr +; CHECK-NEXT: umull lr, r11, r2, r6 +; CHECK-NEXT: adds r4, lr, r4 +; CHECK-NEXT: adcs r7, r7, r11 +; CHECK-NEXT: adc r3, r9, #0 +; CHECK-NEXT: mla r9, r6, r0, r11 +; CHECK-NEXT: umlal r7, r3, r0, r6 +; CHECK-NEXT: mla r2, r6, r2, r9 +; CHECK-NEXT: adds r0, r7, lr +; CHECK-NEXT: adc r7, r3, r2 +; CHECK-NEXT: b .LBB18_10 +; CHECK-NEXT: .LBB18_9: @ %fp-to-i-if-exp.large +; CHECK-NEXT: movw r2, #1075 +; CHECK-NEXT: mov r3, #12 +; CHECK-NEXT: sub r2, r4, r2 +; CHECK-NEXT: add r4, sp, #32 +; CHECK-NEXT: mov r9, #0 +; CHECK-NEXT: add r0, sp, #48 +; CHECK-NEXT: and r3, r3, r2, lsr #3 +; CHECK-NEXT: add r4, r4, #16 +; CHECK-NEXT: str r9, [sp, #60] +; CHECK-NEXT: and r2, r2, #31 +; CHECK-NEXT: stm r0, {r5, r7, r9} +; CHECK-NEXT: eor r7, r2, #31 +; CHECK-NEXT: str r9, [sp, #44] +; CHECK-NEXT: str r9, [sp, #40] +; CHECK-NEXT: str r9, [sp, #36] +; CHECK-NEXT: str r9, [sp, #32] +; CHECK-NEXT: ldr r3, [r4, -r3]! +; CHECK-NEXT: ldr r0, [r4, #4] +; CHECK-NEXT: str r0, [sp, #20] @ 4-byte Spill +; CHECK-NEXT: ldr r5, [r4, #8] +; CHECK-NEXT: str r5, [sp, #16] @ 4-byte Spill +; CHECK-NEXT: lsr r5, r3, #1 +; CHECK-NEXT: ldr r4, [r4, #12] +; CHECK-NEXT: lsl r3, r3, r2 +; CHECK-NEXT: str r4, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: lsl r4, r0, r2 +; CHECK-NEXT: orr r0, r4, r5, lsr r7 +; CHECK-NEXT: umull r5, r4, r3, lr +; CHECK-NEXT: str r12, [sp, #24] @ 4-byte Spill +; CHECK-NEXT: mov r12, lr +; CHECK-NEXT: mov lr, #0 +; CHECK-NEXT: str r8, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: umlal r4, lr, r0, r12 +; CHECK-NEXT: umull r8, r11, r3, r6 +; CHECK-NEXT: adds r4, r8, r4 +; CHECK-NEXT: adcs lr, lr, r11 +; CHECK-NEXT: adc r9, r9, #0 +; CHECK-NEXT: umlal lr, r9, r0, r6 +; CHECK-NEXT: mla r0, r6, r0, r11 +; CHECK-NEXT: mla r0, r6, r3, r0 +; CHECK-NEXT: ldr r3, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: lsl r11, r0, r2 +; CHECK-NEXT: lsr r0, r3, #1 +; CHECK-NEXT: orr r11, r11, r0, lsr r7 +; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: lsl r2, r3, r2 +; CHECK-NEXT: lsrs r3, r0, #1 +; CHECK-NEXT: orr r2, r2, r3, lsr r7 +; CHECK-NEXT: umull r3, r0, r12, r2 +; CHECK-NEXT: mla r0, r12, r11, r0 +; CHECK-NEXT: ldr r12, [sp, #24] @ 4-byte Reload +; CHECK-NEXT: mla r0, r6, r2, r0 +; CHECK-NEXT: adds r2, r8, r3 +; CHECK-NEXT: ldmib sp, {r3, r8} @ 8-byte Folded Reload +; CHECK-NEXT: adc r3, r3, r0 +; CHECK-NEXT: adds r0, lr, r2 +; CHECK-NEXT: adc r7, r9, r3 +; CHECK-NEXT: .LBB18_10: @ %fp-to-i-cleanup +; CHECK-NEXT: mvn r6, #0 +; CHECK-NEXT: subs r3, r5, r6 +; CHECK-NEXT: mvn r2, #-2147483648 +; CHECK-NEXT: sbcs r3, r4, r2 +; CHECK-NEXT: sbcs r3, r0, #0 +; CHECK-NEXT: sbcs r3, r7, #0 +; CHECK-NEXT: mov r3, #0 +; CHECK-NEXT: movge r4, r2 +; CHECK-NEXT: movge r5, r6 +; CHECK-NEXT: movwlt r3, #1 +; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: moveq r7, r3 +; CHECK-NEXT: movne r3, r0 +; CHECK-NEXT: rsbs r0, r5, #0 +; CHECK-NEXT: rscs r0, r4, #-2147483648 +; CHECK-NEXT: sbcs r0, r6, r3 +; CHECK-NEXT: sbcs r0, r6, r7 +; CHECK-NEXT: mov r7, #0 ; CHECK-NEXT: movwlt r7, #1 ; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: movne r7, r0 -; CHECK-NEXT: moveq r4, r8 -; CHECK-NEXT: bl __fixdfti -; CHECK-NEXT: subs r6, r0, r9 +; CHECK-NEXT: movne r7, r5 +; CHECK-NEXT: mov r0, #-2147483648 +; CHECK-NEXT: ldr r5, [sp, #28] @ 4-byte Reload +; CHECK-NEXT: moveq r4, r0 +; CHECK-NEXT: subs r3, r8, r6 ; CHECK-NEXT: vmov.32 d1[0], r7 -; CHECK-NEXT: sbcs r6, r1, r5 -; CHECK-NEXT: sbcs r6, r2, #0 -; CHECK-NEXT: sbcs r6, r3, #0 -; CHECK-NEXT: mov r6, #0 -; CHECK-NEXT: movwlt r6, #1 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: moveq r3, r6 -; CHECK-NEXT: movne r6, r2 -; CHECK-NEXT: movne r5, r1 -; CHECK-NEXT: moveq r0, r9 -; CHECK-NEXT: rsbs r1, r0, #0 -; CHECK-NEXT: rscs r1, r5, #-2147483648 -; CHECK-NEXT: sbcs r1, r9, r6 -; CHECK-NEXT: sbcs r1, r9, r3 -; CHECK-NEXT: movwlt r10, #1 -; CHECK-NEXT: cmp r10, #0 -; CHECK-NEXT: movne r10, r0 -; CHECK-NEXT: moveq r5, r8 -; CHECK-NEXT: vmov.32 d0[0], r10 +; CHECK-NEXT: sbcs r3, r5, r2 +; CHECK-NEXT: sbcs r3, r12, #0 +; CHECK-NEXT: sbcs r3, r10, #0 +; CHECK-NEXT: mov r3, #0 +; CHECK-NEXT: movlt r2, r5 +; CHECK-NEXT: movge r8, r6 +; CHECK-NEXT: movwlt r3, #1 +; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: moveq r10, r3 +; CHECK-NEXT: movne r3, r12 +; CHECK-NEXT: rsbs r5, r8, #0 +; CHECK-NEXT: rscs r5, r2, #-2147483648 +; CHECK-NEXT: sbcs r3, r6, r3 +; CHECK-NEXT: sbcs r3, r6, r10 +; CHECK-NEXT: movwlt r1, #1 +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: movne r1, r8 +; CHECK-NEXT: moveq r2, r0 +; CHECK-NEXT: vmov.32 d0[0], r1 ; CHECK-NEXT: vmov.32 d1[1], r4 -; CHECK-NEXT: vmov.32 d0[1], r5 -; CHECK-NEXT: vpop {d8, d9} -; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} +; CHECK-NEXT: vmov.32 d0[1], r2 +; CHECK-NEXT: add sp, sp, #100 +; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} entry: %conv = fptosi <2 x double> %x to <2 x i128> %0 = icmp slt <2 x i128> %conv, @@ -1712,37 +1914,154 @@ entry: define <2 x i64> @utest_f64i64(<2 x double> %x) { ; CHECK-LABEL: utest_f64i64: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, lr} -; CHECK-NEXT: push {r4, r5, r6, lr} -; CHECK-NEXT: .vsave {d8, d9} -; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: vorr q4, q0, q0 -; CHECK-NEXT: vorr d0, d9, d9 -; CHECK-NEXT: bl __fixunsdfti -; CHECK-NEXT: mov r4, r1 -; CHECK-NEXT: subs r1, r2, #1 -; CHECK-NEXT: vorr d0, d8, d8 -; CHECK-NEXT: sbcs r1, r3, #0 +; CHECK: @ %bb.0: @ %entryfp-to-i-entryfp-to-i-entry +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} +; CHECK-NEXT: .pad #64 +; CHECK-NEXT: sub sp, sp, #64 +; CHECK-NEXT: vmov r1, r0, d0 +; CHECK-NEXT: mov lr, #1 +; CHECK-NEXT: movw r8, #1023 +; CHECK-NEXT: ubfx r2, r0, #20, #11 +; CHECK-NEXT: cmp r2, r8 +; CHECK-NEXT: bhs .LBB19_2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: mov r1, #0 +; CHECK-NEXT: mov r10, #0 +; CHECK-NEXT: b .LBB19_4 +; CHECK-NEXT: .LBB19_2: @ %fp-to-i-if-check.exp.size2 +; CHECK-NEXT: bfi r0, lr, #20, #12 +; CHECK-NEXT: movw r3, #1074 +; CHECK-NEXT: cmp r2, r3 +; CHECK-NEXT: bhi .LBB19_5 +; CHECK-NEXT: @ %bb.3: @ %fp-to-i-if-exp.small3 +; CHECK-NEXT: movw r3, #1075 +; CHECK-NEXT: sub r3, r3, r2 +; CHECK-NEXT: rsb r7, r3, #32 +; CHECK-NEXT: lsr r1, r1, r3 +; CHECK-NEXT: lsr r10, r0, r3 +; CHECK-NEXT: orr r1, r1, r0, lsl r7 +; CHECK-NEXT: movw r7, #1043 +; CHECK-NEXT: subs r2, r7, r2 +; CHECK-NEXT: movwpl r10, #0 +; CHECK-NEXT: lsrpl r1, r0, r2 +; CHECK-NEXT: .LBB19_4: @ %fp-to-i-cleanup1 +; CHECK-NEXT: mov r12, #0 +; CHECK-NEXT: mov r9, #0 +; CHECK-NEXT: b .LBB19_6 +; CHECK-NEXT: .LBB19_5: @ %fp-to-i-if-exp.large4 +; CHECK-NEXT: str r0, [sp, #52] +; CHECK-NEXT: movw r0, #1075 +; CHECK-NEXT: sub r0, r2, r0 +; CHECK-NEXT: add r2, sp, #32 +; CHECK-NEXT: str r1, [sp, #48] +; CHECK-NEXT: mov r1, #12 +; CHECK-NEXT: mov r3, #0 +; CHECK-NEXT: and r1, r1, r0, lsr #3 +; CHECK-NEXT: add r2, r2, #16 +; CHECK-NEXT: str r3, [sp, #60] +; CHECK-NEXT: str r3, [sp, #56] +; CHECK-NEXT: and r6, r0, #31 +; CHECK-NEXT: str r3, [sp, #44] +; CHECK-NEXT: eor r4, r6, #31 +; CHECK-NEXT: str r3, [sp, #40] +; CHECK-NEXT: str r3, [sp, #36] +; CHECK-NEXT: str r3, [sp, #32] +; CHECK-NEXT: ldr r1, [r2, -r1]! +; CHECK-NEXT: ldmib r2, {r3, r7} +; CHECK-NEXT: lsr r5, r1, #1 +; CHECK-NEXT: ldr r2, [r2, #12] +; CHECK-NEXT: lsl r1, r1, r6 +; CHECK-NEXT: lsl r0, r3, r6 +; CHECK-NEXT: orr r10, r0, r5, lsr r4 +; CHECK-NEXT: lsr r5, r7, #1 +; CHECK-NEXT: lsrs r3, r3, #1 +; CHECK-NEXT: lsl r2, r2, r6 +; CHECK-NEXT: orr r9, r2, r5, lsr r4 +; CHECK-NEXT: lsl r2, r7, r6 +; CHECK-NEXT: orr r12, r2, r3, lsr r4 +; CHECK-NEXT: .LBB19_6: @ %fp-to-i-cleanup1 +; CHECK-NEXT: vmov r3, r6, d1 +; CHECK-NEXT: mov r2, #0 +; CHECK-NEXT: ubfx r7, r6, #20, #11 +; CHECK-NEXT: cmp r7, r8 +; CHECK-NEXT: bhs .LBB19_8 +; CHECK-NEXT: @ %bb.7: +; CHECK-NEXT: mov r3, #0 +; CHECK-NEXT: mov r4, #0 +; CHECK-NEXT: b .LBB19_10 +; CHECK-NEXT: .LBB19_8: @ %fp-to-i-if-check.exp.size +; CHECK-NEXT: bfi r6, lr, #20, #12 +; CHECK-NEXT: movw r4, #1074 +; CHECK-NEXT: cmp r7, r4 +; CHECK-NEXT: bhi .LBB19_11 +; CHECK-NEXT: @ %bb.9: @ %fp-to-i-if-exp.small +; CHECK-NEXT: movw r4, #1075 +; CHECK-NEXT: sub r4, r4, r7 +; CHECK-NEXT: rsb r5, r4, #32 +; CHECK-NEXT: lsr r3, r3, r4 +; CHECK-NEXT: lsr r4, r6, r4 +; CHECK-NEXT: orr r3, r3, r6, lsl r5 +; CHECK-NEXT: movw r5, #1043 +; CHECK-NEXT: subs r7, r5, r7 +; CHECK-NEXT: movwpl r4, #0 +; CHECK-NEXT: lsrpl r3, r6, r7 +; CHECK-NEXT: .LBB19_10: @ %fp-to-i-cleanup +; CHECK-NEXT: mov r7, #0 ; CHECK-NEXT: mov r6, #0 +; CHECK-NEXT: b .LBB19_12 +; CHECK-NEXT: .LBB19_11: @ %fp-to-i-if-exp.large +; CHECK-NEXT: str r3, [sp, #16] +; CHECK-NEXT: movw r3, #1075 +; CHECK-NEXT: sub r3, r7, r3 +; CHECK-NEXT: mov r7, #12 +; CHECK-NEXT: str r6, [sp, #20] +; CHECK-NEXT: mov r6, sp ; CHECK-NEXT: mov r5, #0 -; CHECK-NEXT: movwlo r6, #1 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: moveq r4, r6 -; CHECK-NEXT: movne r6, r0 -; CHECK-NEXT: bl __fixunsdfti -; CHECK-NEXT: subs r2, r2, #1 -; CHECK-NEXT: vmov.32 d1[0], r6 -; CHECK-NEXT: sbcs r2, r3, #0 -; CHECK-NEXT: movwlo r5, #1 -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: moveq r0, r5 -; CHECK-NEXT: movne r5, r1 -; CHECK-NEXT: vmov.32 d0[0], r0 +; CHECK-NEXT: and r7, r7, r3, lsr #3 +; CHECK-NEXT: add r6, r6, #16 +; CHECK-NEXT: str r5, [sp, #28] +; CHECK-NEXT: str r5, [sp, #24] +; CHECK-NEXT: and r3, r3, #31 +; CHECK-NEXT: str r5, [sp, #12] +; CHECK-NEXT: eor lr, r3, #31 +; CHECK-NEXT: str r5, [sp, #8] +; CHECK-NEXT: str r5, [sp, #4] +; CHECK-NEXT: str r5, [sp] +; CHECK-NEXT: ldr r5, [r6, -r7]! +; CHECK-NEXT: ldr r7, [r6, #4] +; CHECK-NEXT: ldr r0, [r6, #8] +; CHECK-NEXT: ldr r8, [r6, #12] +; CHECK-NEXT: lsr r6, r5, #1 +; CHECK-NEXT: lsl r4, r7, r3 +; CHECK-NEXT: lsrs r7, r7, #1 +; CHECK-NEXT: orr r4, r4, r6, lsr lr +; CHECK-NEXT: lsr r6, r0, #1 +; CHECK-NEXT: lsl r8, r8, r3 +; CHECK-NEXT: lsl r0, r0, r3 +; CHECK-NEXT: orr r6, r8, r6, lsr lr +; CHECK-NEXT: orr r7, r0, r7, lsr lr +; CHECK-NEXT: lsl r3, r5, r3 +; CHECK-NEXT: .LBB19_12: @ %fp-to-i-cleanup +; CHECK-NEXT: subs r0, r7, #1 +; CHECK-NEXT: sbcs r0, r6, #0 +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: movwlo r0, #1 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: moveq r4, r0 +; CHECK-NEXT: movne r0, r3 +; CHECK-NEXT: subs r3, r12, #1 +; CHECK-NEXT: vmov.32 d1[0], r0 +; CHECK-NEXT: sbcs r3, r9, #0 +; CHECK-NEXT: movwlo r2, #1 +; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: moveq r1, r2 +; CHECK-NEXT: movne r2, r10 +; CHECK-NEXT: vmov.32 d0[0], r1 ; CHECK-NEXT: vmov.32 d1[1], r4 -; CHECK-NEXT: vmov.32 d0[1], r5 -; CHECK-NEXT: vpop {d8, d9} -; CHECK-NEXT: pop {r4, r5, r6, pc} +; CHECK-NEXT: vmov.32 d0[1], r2 +; CHECK-NEXT: add sp, sp, #64 +; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} entry: %conv = fptoui <2 x double> %x to <2 x i128> %0 = icmp ult <2 x i128> %conv, @@ -1753,60 +2072,267 @@ entry: define <2 x i64> @ustest_f64i64(<2 x double> %x) { ; CHECK-LABEL: ustest_f64i64: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr} -; CHECK-NEXT: push {r4, r5, r6, r7, r8, lr} -; CHECK-NEXT: .vsave {d8, d9} -; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: vorr q4, q0, q0 -; CHECK-NEXT: vorr d0, d9, d9 -; CHECK-NEXT: bl __fixdfti -; CHECK-NEXT: mov r4, r1 +; CHECK: @ %bb.0: @ %entryfp-to-i-entryfp-to-i-entry +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-NEXT: .pad #100 +; CHECK-NEXT: sub sp, sp, #100 +; CHECK-NEXT: vmov r5, r0, d0 +; CHECK-NEXT: mov r9, #1 +; CHECK-NEXT: mvn r12, #0 +; CHECK-NEXT: movw r3, #1023 +; CHECK-NEXT: ubfx r1, r0, #20, #11 +; CHECK-NEXT: cmp r1, r3 +; CHECK-NEXT: bhs .LBB20_2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: mov r11, #0 +; CHECK-NEXT: str r0, [sp, #28] @ 4-byte Spill +; CHECK-NEXT: mov lr, #0 +; CHECK-NEXT: mov r10, #0 +; CHECK-NEXT: b .LBB20_5 +; CHECK-NEXT: .LBB20_2: @ %fp-to-i-if-check.exp.size2 +; CHECK-NEXT: mov r6, r0 +; CHECK-NEXT: orr r8, r9, r0, asr #31 +; CHECK-NEXT: bfi r6, r9, #20, #12 +; CHECK-NEXT: asr r0, r0, #31 +; CHECK-NEXT: movw r7, #1074 +; CHECK-NEXT: cmp r1, r7 +; CHECK-NEXT: bhi .LBB20_4 +; CHECK-NEXT: @ %bb.3: @ %fp-to-i-if-exp.small3 +; CHECK-NEXT: movw r7, #1075 +; CHECK-NEXT: sub r7, r7, r1 +; CHECK-NEXT: rsb r4, r7, #32 +; CHECK-NEXT: lsr r5, r5, r7 +; CHECK-NEXT: lsr r7, r6, r7 +; CHECK-NEXT: orr r5, r5, r6, lsl r4 +; CHECK-NEXT: movw r4, #1043 +; CHECK-NEXT: subs r1, r4, r1 +; CHECK-NEXT: mov r4, #0 +; CHECK-NEXT: movwpl r7, #0 +; CHECK-NEXT: lsrpl r5, r6, r1 +; CHECK-NEXT: umull r2, r1, r5, r8 +; CHECK-NEXT: umlal r1, r4, r7, r8 +; CHECK-NEXT: str r2, [sp, #28] @ 4-byte Spill +; CHECK-NEXT: mov r2, #0 +; CHECK-NEXT: umull lr, r6, r5, r0 +; CHECK-NEXT: adds r11, lr, r1 +; CHECK-NEXT: adcs r4, r4, r6 +; CHECK-NEXT: mla r6, r0, r7, r6 +; CHECK-NEXT: adc r2, r2, #0 +; CHECK-NEXT: umlal r4, r2, r7, r0 +; CHECK-NEXT: mla r0, r0, r5, r6 +; CHECK-NEXT: adds lr, r4, lr +; CHECK-NEXT: adc r10, r2, r0 +; CHECK-NEXT: b .LBB20_5 +; CHECK-NEXT: .LBB20_4: @ %fp-to-i-if-exp.large4 +; CHECK-NEXT: add r2, sp, #80 +; CHECK-NEXT: mov r7, #0 +; CHECK-NEXT: stm r2, {r5, r6, r7} +; CHECK-NEXT: movw r2, #1075 +; CHECK-NEXT: sub r1, r1, r2 +; CHECK-NEXT: mov r2, #12 +; CHECK-NEXT: add r6, sp, #64 +; CHECK-NEXT: and r2, r2, r1, lsr #3 +; CHECK-NEXT: add r6, r6, #16 +; CHECK-NEXT: str r7, [sp, #92] +; CHECK-NEXT: mov r12, #0 +; CHECK-NEXT: str r7, [sp, #76] +; CHECK-NEXT: str r7, [sp, #72] +; CHECK-NEXT: str r7, [sp, #68] +; CHECK-NEXT: str r7, [sp, #64] +; CHECK-NEXT: ldr r2, [r6, -r2]! +; CHECK-NEXT: ldr r3, [r6, #4] +; CHECK-NEXT: str r3, [sp, #24] @ 4-byte Spill +; CHECK-NEXT: lsr r4, r2, #1 +; CHECK-NEXT: ldr lr, [r6, #8] +; CHECK-NEXT: ldr r6, [r6, #12] +; CHECK-NEXT: str r6, [sp, #20] @ 4-byte Spill +; CHECK-NEXT: and r6, r1, #31 +; CHECK-NEXT: eor r5, r6, #31 +; CHECK-NEXT: lsl r1, r3, r6 +; CHECK-NEXT: lsl r2, r2, r6 +; CHECK-NEXT: orr r4, r1, r4, lsr r5 +; CHECK-NEXT: umull r3, r1, r2, r8 +; CHECK-NEXT: umlal r1, r12, r4, r8 +; CHECK-NEXT: str r3, [sp, #28] @ 4-byte Spill +; CHECK-NEXT: ldr r3, [sp, #24] @ 4-byte Reload +; CHECK-NEXT: umull r9, r10, r2, r0 +; CHECK-NEXT: adds r11, r9, r1 +; CHECK-NEXT: adcs r1, r12, r10 +; CHECK-NEXT: adc r7, r7, #0 +; CHECK-NEXT: umlal r1, r7, r4, r0 +; CHECK-NEXT: mla r4, r0, r4, r10 +; CHECK-NEXT: mla r12, r0, r2, r4 +; CHECK-NEXT: ldr r2, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: lsl r4, r2, r6 +; CHECK-NEXT: lsr r2, lr, #1 +; CHECK-NEXT: orr r2, r4, r2, lsr r5 +; CHECK-NEXT: lsl r4, lr, r6 +; CHECK-NEXT: lsrs r6, r3, #1 +; CHECK-NEXT: movw r3, #1023 +; CHECK-NEXT: orr r4, r4, r6, lsr r5 +; CHECK-NEXT: umull r5, r6, r8, r4 +; CHECK-NEXT: mla r2, r8, r2, r6 +; CHECK-NEXT: mla r0, r0, r4, r2 +; CHECK-NEXT: adds r2, r9, r5 +; CHECK-NEXT: mov r9, #1 +; CHECK-NEXT: adc r0, r12, r0 +; CHECK-NEXT: adds lr, r1, r2 +; CHECK-NEXT: adc r10, r7, r0 +; CHECK-NEXT: mvn r12, #0 +; CHECK-NEXT: .LBB20_5: @ %fp-to-i-cleanup1 +; CHECK-NEXT: vmov r5, r7, d1 +; CHECK-NEXT: mvn r6, #0 +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: cmn r7, #1 +; CHECK-NEXT: ubfx r4, r7, #20, #11 +; CHECK-NEXT: movwgt r6, #0 +; CHECK-NEXT: movwgt r12, #1 +; CHECK-NEXT: cmp r4, r3 +; CHECK-NEXT: bhs .LBB20_7 +; CHECK-NEXT: @ %bb.6: +; CHECK-NEXT: mov r9, #0 +; CHECK-NEXT: mov r4, #0 +; CHECK-NEXT: mov r2, #0 +; CHECK-NEXT: mov r6, #0 +; CHECK-NEXT: b .LBB20_11 +; CHECK-NEXT: .LBB20_7: @ %fp-to-i-if-check.exp.size +; CHECK-NEXT: bfi r7, r9, #20, #12 +; CHECK-NEXT: movw r1, #1074 +; CHECK-NEXT: cmp r4, r1 +; CHECK-NEXT: str lr, [sp, #24] @ 4-byte Spill +; CHECK-NEXT: bhi .LBB20_9 +; CHECK-NEXT: @ %bb.8: @ %fp-to-i-if-exp.small +; CHECK-NEXT: movw r1, #1075 +; CHECK-NEXT: mov r3, #0 +; CHECK-NEXT: sub r1, r1, r4 +; CHECK-NEXT: lsr r2, r5, r1 +; CHECK-NEXT: rsb r5, r1, #32 +; CHECK-NEXT: lsr r1, r7, r1 +; CHECK-NEXT: orr r2, r2, r7, lsl r5 +; CHECK-NEXT: movw r5, #1043 +; CHECK-NEXT: subs r4, r5, r4 +; CHECK-NEXT: movwpl r1, #0 +; CHECK-NEXT: lsrpl r2, r7, r4 +; CHECK-NEXT: mov r7, #0 +; CHECK-NEXT: umull r9, r4, r2, r12 +; CHECK-NEXT: umlal r4, r7, r1, r12 +; CHECK-NEXT: umull lr, r5, r2, r6 +; CHECK-NEXT: adds r4, lr, r4 +; CHECK-NEXT: adcs r7, r7, r5 +; CHECK-NEXT: mla r5, r6, r1, r5 +; CHECK-NEXT: adc r3, r3, #0 +; CHECK-NEXT: umlal r7, r3, r1, r6 +; CHECK-NEXT: mla r1, r6, r2, r5 +; CHECK-NEXT: adds r2, r7, lr +; CHECK-NEXT: adc r6, r3, r1 +; CHECK-NEXT: b .LBB20_10 +; CHECK-NEXT: .LBB20_9: @ %fp-to-i-if-exp.large +; CHECK-NEXT: mov r1, #0 +; CHECK-NEXT: add r2, sp, #44 +; CHECK-NEXT: stm r2, {r1, r5, r7} +; CHECK-NEXT: add r3, sp, #32 +; CHECK-NEXT: mov r2, #12 +; CHECK-NEXT: str r1, [sp, #60] +; CHECK-NEXT: add r3, r3, #16 +; CHECK-NEXT: str r1, [sp, #56] +; CHECK-NEXT: mov lr, #0 +; CHECK-NEXT: str r1, [sp, #40] +; CHECK-NEXT: str r1, [sp, #36] +; CHECK-NEXT: str r1, [sp, #32] +; CHECK-NEXT: movw r1, #1075 +; CHECK-NEXT: sub r1, r4, r1 +; CHECK-NEXT: and r2, r2, r1, lsr #3 +; CHECK-NEXT: and r1, r1, #31 +; CHECK-NEXT: ldr r2, [r3, -r2]! +; CHECK-NEXT: ldr r7, [r3, #4] +; CHECK-NEXT: str r7, [sp, #16] @ 4-byte Spill +; CHECK-NEXT: ldr r5, [r3, #8] +; CHECK-NEXT: str r5, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: lsr r5, r2, #1 +; CHECK-NEXT: ldr r3, [r3, #12] +; CHECK-NEXT: lsl r4, r7, r1 +; CHECK-NEXT: str r3, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: eor r3, r1, #31 +; CHECK-NEXT: lsl r2, r2, r1 +; CHECK-NEXT: str r11, [sp, #20] @ 4-byte Spill +; CHECK-NEXT: orr r5, r4, r5, lsr r3 +; CHECK-NEXT: umull r9, r4, r2, r12 +; CHECK-NEXT: umull r7, r8, r2, r6 +; CHECK-NEXT: umlal r4, lr, r5, r12 +; CHECK-NEXT: str r7, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: adds r4, r7, r4 +; CHECK-NEXT: mov r7, #0 +; CHECK-NEXT: adcs lr, lr, r8 +; CHECK-NEXT: adc r11, r7, #0 +; CHECK-NEXT: ldr r7, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: umlal lr, r11, r5, r6 +; CHECK-NEXT: mla r5, r6, r5, r8 +; CHECK-NEXT: mla r8, r6, r2, r5 +; CHECK-NEXT: ldr r2, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: lsl r5, r2, r1 +; CHECK-NEXT: lsr r2, r7, #1 +; CHECK-NEXT: orr r2, r5, r2, lsr r3 +; CHECK-NEXT: ldr r5, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: lsl r1, r7, r1 +; CHECK-NEXT: lsrs r5, r5, #1 +; CHECK-NEXT: orr r1, r1, r5, lsr r3 +; CHECK-NEXT: umull r3, r5, r12, r1 +; CHECK-NEXT: mla r2, r12, r2, r5 +; CHECK-NEXT: mla r1, r6, r1, r2 +; CHECK-NEXT: ldr r2, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: adds r2, r2, r3 +; CHECK-NEXT: adc r1, r8, r1 +; CHECK-NEXT: adds r2, lr, r2 +; CHECK-NEXT: adc r6, r11, r1 +; CHECK-NEXT: ldr r11, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: .LBB20_10: @ %fp-to-i-cleanup +; CHECK-NEXT: ldr lr, [sp, #24] @ 4-byte Reload +; CHECK-NEXT: .LBB20_11: @ %fp-to-i-cleanup ; CHECK-NEXT: subs r1, r2, #1 -; CHECK-NEXT: sbcs r1, r3, #0 -; CHECK-NEXT: mov r8, #1 +; CHECK-NEXT: mov r5, #1 +; CHECK-NEXT: sbcs r1, r6, #0 ; CHECK-NEXT: mov r1, #0 -; CHECK-NEXT: movge r2, r8 ; CHECK-NEXT: movwlt r1, #1 ; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: moveq r3, r1 +; CHECK-NEXT: moveq r6, r1 ; CHECK-NEXT: moveq r4, r1 -; CHECK-NEXT: movne r1, r0 -; CHECK-NEXT: rsbs r0, r1, #0 -; CHECK-NEXT: rscs r0, r4, #0 -; CHECK-NEXT: vorr d0, d8, d8 -; CHECK-NEXT: rscs r0, r2, #0 -; CHECK-NEXT: mov r7, #0 -; CHECK-NEXT: rscs r0, r3, #0 -; CHECK-NEXT: mov r5, #0 -; CHECK-NEXT: movwlt r7, #1 -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: moveq r4, r7 -; CHECK-NEXT: movne r7, r1 -; CHECK-NEXT: bl __fixdfti -; CHECK-NEXT: subs r6, r2, #1 -; CHECK-NEXT: vmov.32 d1[0], r7 -; CHECK-NEXT: sbcs r6, r3, #0 -; CHECK-NEXT: movlt r8, r2 +; CHECK-NEXT: movne r1, r9 +; CHECK-NEXT: moveq r2, r5 +; CHECK-NEXT: rsbs r3, r1, #0 +; CHECK-NEXT: rscs r3, r4, #0 +; CHECK-NEXT: rscs r2, r2, #0 +; CHECK-NEXT: ldr r3, [sp, #28] @ 4-byte Reload +; CHECK-NEXT: rscs r2, r6, #0 ; CHECK-NEXT: mov r2, #0 ; CHECK-NEXT: movwlt r2, #1 ; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: moveq r3, r2 -; CHECK-NEXT: moveq r1, r2 -; CHECK-NEXT: movne r2, r0 -; CHECK-NEXT: rsbs r0, r2, #0 -; CHECK-NEXT: rscs r0, r1, #0 -; CHECK-NEXT: rscs r0, r8, #0 -; CHECK-NEXT: rscs r0, r3, #0 -; CHECK-NEXT: movwlt r5, #1 -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: moveq r2, r5 -; CHECK-NEXT: movne r5, r1 -; CHECK-NEXT: vmov.32 d0[0], r2 +; CHECK-NEXT: moveq r4, r2 +; CHECK-NEXT: movne r2, r1 +; CHECK-NEXT: subs r1, lr, #1 +; CHECK-NEXT: vmov.32 d1[0], r2 +; CHECK-NEXT: sbcs r1, r10, #0 +; CHECK-NEXT: mov r1, #0 +; CHECK-NEXT: movwlt r1, #1 +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: moveq r10, r1 +; CHECK-NEXT: moveq r11, r1 +; CHECK-NEXT: movne r1, r3 +; CHECK-NEXT: movne r5, lr +; CHECK-NEXT: rsbs r3, r1, #0 +; CHECK-NEXT: rscs r3, r11, #0 +; CHECK-NEXT: rscs r3, r5, #0 +; CHECK-NEXT: rscs r3, r10, #0 +; CHECK-NEXT: movwlt r0, #1 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: moveq r1, r0 +; CHECK-NEXT: movne r0, r11 +; CHECK-NEXT: vmov.32 d0[0], r1 ; CHECK-NEXT: vmov.32 d1[1], r4 -; CHECK-NEXT: vmov.32 d0[1], r5 -; CHECK-NEXT: vpop {d8, d9} -; CHECK-NEXT: pop {r4, r5, r6, r7, r8, pc} +; CHECK-NEXT: vmov.32 d0[1], r0 +; CHECK-NEXT: add sp, sp, #100 +; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} entry: %conv = fptosi <2 x double> %x to <2 x i128> %0 = icmp slt <2 x i128> %conv, @@ -1819,66 +2345,251 @@ entry: define <2 x i64> @stest_f32i64(<2 x float> %x) { ; CHECK-LABEL: stest_f32i64: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} -; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} -; CHECK-NEXT: .vsave {d8} -; CHECK-NEXT: vpush {d8} -; CHECK-NEXT: vmov.f64 d8, d0 -; CHECK-NEXT: vmov.f32 s0, s17 -; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: mov r4, r1 +; CHECK: @ %bb.0: @ %entryfp-to-i-entryfp-to-i-entry +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-NEXT: .pad #100 +; CHECK-NEXT: sub sp, sp, #100 +; CHECK-NEXT: vmov r6, s0 +; CHECK-NEXT: mov r1, #0 ; CHECK-NEXT: mvn r9, #0 -; CHECK-NEXT: subs r1, r0, r9 -; CHECK-NEXT: mvn r5, #-2147483648 -; CHECK-NEXT: sbcs r1, r4, r5 -; CHECK-NEXT: vmov.f32 s0, s16 -; CHECK-NEXT: sbcs r1, r2, #0 +; CHECK-NEXT: ubfx r7, r6, #23, #8 +; CHECK-NEXT: cmp r7, #127 +; CHECK-NEXT: bhs .LBB21_2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: mov r8, #0 +; CHECK-NEXT: mov r12, #0 +; CHECK-NEXT: mov r10, #0 +; CHECK-NEXT: mov lr, #0 +; CHECK-NEXT: b .LBB21_5 +; CHECK-NEXT: .LBB21_2: @ %fp-to-i-if-check.exp.size2 +; CHECK-NEXT: mov r0, #1 +; CHECK-NEXT: orr lr, r0, r6, asr #31 +; CHECK-NEXT: asr r2, r6, #31 +; CHECK-NEXT: bfi r6, r0, #23, #9 +; CHECK-NEXT: cmp r7, #149 +; CHECK-NEXT: bhi .LBB21_4 +; CHECK-NEXT: @ %bb.3: @ %fp-to-i-if-exp.small3 +; CHECK-NEXT: rsb r0, r7, #150 +; CHECK-NEXT: mov r5, #0 +; CHECK-NEXT: lsr r0, r6, r0 +; CHECK-NEXT: umull r8, r12, r0, lr +; CHECK-NEXT: umull r7, r6, r0, r2 +; CHECK-NEXT: adds r4, r7, r12 +; CHECK-NEXT: adcs r4, r6, #0 +; CHECK-NEXT: adc r3, r5, #0 +; CHECK-NEXT: adds r4, r7, r12 +; CHECK-NEXT: mla r4, r2, r0, r6 +; CHECK-NEXT: adcs r10, r7, r6 +; CHECK-NEXT: umlal r12, r5, r0, r2 +; CHECK-NEXT: adc lr, r3, r4 +; CHECK-NEXT: b .LBB21_5 +; CHECK-NEXT: .LBB21_4: @ %fp-to-i-if-exp.large4 +; CHECK-NEXT: sub r7, r7, #150 +; CHECK-NEXT: add r4, sp, #64 +; CHECK-NEXT: str r6, [sp, #80] +; CHECK-NEXT: mov r6, #12 +; CHECK-NEXT: mov r11, #0 +; CHECK-NEXT: and r6, r6, r7, lsr #3 +; CHECK-NEXT: add r4, r4, #16 +; CHECK-NEXT: str r11, [sp, #92] +; CHECK-NEXT: str r11, [sp, #88] +; CHECK-NEXT: and r7, r7, #31 +; CHECK-NEXT: str r11, [sp, #84] +; CHECK-NEXT: str r11, [sp, #76] +; CHECK-NEXT: str r11, [sp, #72] +; CHECK-NEXT: str r11, [sp, #68] +; CHECK-NEXT: str r11, [sp, #64] +; CHECK-NEXT: ldr r6, [r4, -r6]! +; CHECK-NEXT: ldr r0, [r4, #4] +; CHECK-NEXT: str r0, [sp, #24] @ 4-byte Spill +; CHECK-NEXT: lsr r8, r6, #1 +; CHECK-NEXT: ldr r3, [r4, #8] +; CHECK-NEXT: lsl r6, r6, r7 +; CHECK-NEXT: ldr r5, [r4, #12] +; CHECK-NEXT: eor r4, r7, #31 +; CHECK-NEXT: lsl r12, r0, r7 +; CHECK-NEXT: str r5, [sp, #20] @ 4-byte Spill +; CHECK-NEXT: orr r0, r12, r8, lsr r4 +; CHECK-NEXT: umull r8, r5, r6, lr +; CHECK-NEXT: mov r12, #0 +; CHECK-NEXT: str lr, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: umlal r5, r12, r0, lr +; CHECK-NEXT: mov lr, r0 +; CHECK-NEXT: umull r0, r10, r6, r2 +; CHECK-NEXT: str r0, [sp, #16] @ 4-byte Spill +; CHECK-NEXT: adds r0, r0, r5 +; CHECK-NEXT: str r0, [sp, #28] @ 4-byte Spill +; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: adcs r5, r12, r10 +; CHECK-NEXT: adc r11, r11, #0 +; CHECK-NEXT: umlal r5, r11, lr, r2 +; CHECK-NEXT: lsl r12, r0, r7 +; CHECK-NEXT: lsr r0, r3, #1 +; CHECK-NEXT: orr r12, r12, r0, lsr r4 +; CHECK-NEXT: ldr r0, [sp, #24] @ 4-byte Reload +; CHECK-NEXT: lsl r7, r3, r7 +; CHECK-NEXT: ldr r3, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: lsrs r0, r0, #1 +; CHECK-NEXT: orr r0, r7, r0, lsr r4 +; CHECK-NEXT: umull r4, r7, r3, r0 +; CHECK-NEXT: mla r7, r3, r12, r7 +; CHECK-NEXT: ldr r3, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: ldr r12, [sp, #28] @ 4-byte Reload +; CHECK-NEXT: mla r0, r2, r0, r7 +; CHECK-NEXT: mla r7, r2, lr, r10 +; CHECK-NEXT: mla r2, r2, r6, r7 +; CHECK-NEXT: adds r7, r3, r4 +; CHECK-NEXT: adc r0, r2, r0 +; CHECK-NEXT: adds r10, r5, r7 +; CHECK-NEXT: adc lr, r11, r0 +; CHECK-NEXT: .LBB21_5: @ %fp-to-i-cleanup1 +; CHECK-NEXT: vmov r0, s1 +; CHECK-NEXT: mvn r5, #0 +; CHECK-NEXT: cmn r0, #1 +; CHECK-NEXT: ubfx r4, r0, #23, #8 +; CHECK-NEXT: movwgt r5, #0 +; CHECK-NEXT: movwgt r9, #1 +; CHECK-NEXT: cmp r4, #127 +; CHECK-NEXT: bhs .LBB21_7 +; CHECK-NEXT: @ %bb.6: +; CHECK-NEXT: mov r4, #0 +; CHECK-NEXT: mov r0, #0 ; CHECK-NEXT: mov r7, #0 -; CHECK-NEXT: sbcs r1, r3, #0 -; CHECK-NEXT: mov r8, #-2147483648 -; CHECK-NEXT: mov r1, #0 +; CHECK-NEXT: b .LBB21_10 +; CHECK-NEXT: .LBB21_7: @ %fp-to-i-if-check.exp.size +; CHECK-NEXT: movw r1, #65535 +; CHECK-NEXT: cmp r4, #149 +; CHECK-NEXT: movt r1, #127 +; CHECK-NEXT: and r0, r0, r1 +; CHECK-NEXT: orr r1, r0, #8388608 +; CHECK-NEXT: bhi .LBB21_9 +; CHECK-NEXT: @ %bb.8: @ %fp-to-i-if-exp.small +; CHECK-NEXT: rsb r0, r4, #150 +; CHECK-NEXT: lsr r0, r1, r0 +; CHECK-NEXT: umull r7, r6, r0, r5 +; CHECK-NEXT: umull r1, r3, r0, r9 +; CHECK-NEXT: mul r5, r5, r0 +; CHECK-NEXT: adds r4, r7, r3 +; CHECK-NEXT: adcs r0, r7, r6 +; CHECK-NEXT: adc r7, r6, r5 +; CHECK-NEXT: b .LBB21_10 +; CHECK-NEXT: .LBB21_9: @ %fp-to-i-if-exp.large +; CHECK-NEXT: str r1, [sp, #48] +; CHECK-NEXT: sub r1, r4, #150 +; CHECK-NEXT: mov r3, #12 +; CHECK-NEXT: add r7, sp, #32 +; CHECK-NEXT: mov r11, #0 +; CHECK-NEXT: and r3, r3, r1, lsr #3 +; CHECK-NEXT: add r7, r7, #16 +; CHECK-NEXT: str r11, [sp, #60] +; CHECK-NEXT: str r11, [sp, #56] +; CHECK-NEXT: str r11, [sp, #52] +; CHECK-NEXT: str r11, [sp, #44] +; CHECK-NEXT: str r11, [sp, #40] +; CHECK-NEXT: str r11, [sp, #36] +; CHECK-NEXT: str r11, [sp, #32] +; CHECK-NEXT: ldr r3, [r7, -r3]! +; CHECK-NEXT: ldr r0, [r7, #4] +; CHECK-NEXT: str r0, [sp, #20] @ 4-byte Spill +; CHECK-NEXT: lsr r4, r3, #1 +; CHECK-NEXT: ldr r2, [r7, #8] +; CHECK-NEXT: str r2, [sp, #16] @ 4-byte Spill +; CHECK-NEXT: ldr r2, [r7, #12] +; CHECK-NEXT: and r7, r1, #31 +; CHECK-NEXT: eor r6, r7, #31 +; CHECK-NEXT: str r12, [sp, #28] @ 4-byte Spill +; CHECK-NEXT: lsl r1, r0, r7 +; CHECK-NEXT: lsl r0, r3, r7 +; CHECK-NEXT: orr r12, r1, r4, lsr r6 +; CHECK-NEXT: umull r1, r4, r0, r9 +; CHECK-NEXT: str r10, [sp, #24] @ 4-byte Spill ; CHECK-NEXT: mov r10, #0 -; CHECK-NEXT: movwlt r1, #1 -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: moveq r3, r1 -; CHECK-NEXT: movne r1, r2 -; CHECK-NEXT: moveq r4, r5 -; CHECK-NEXT: moveq r0, r9 -; CHECK-NEXT: rsbs r2, r0, #0 -; CHECK-NEXT: rscs r2, r4, #-2147483648 -; CHECK-NEXT: sbcs r1, r9, r1 -; CHECK-NEXT: sbcs r1, r9, r3 +; CHECK-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-NEXT: umull r0, r3, r0, r5 +; CHECK-NEXT: str r2, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: umlal r4, r10, r12, r9 +; CHECK-NEXT: str r0, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: str r3, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: adds r4, r0, r4 +; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: adcs r10, r10, r3 +; CHECK-NEXT: adc r3, r11, #0 +; CHECK-NEXT: lsl r2, r0, r7 +; CHECK-NEXT: ldr r0, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: umlal r10, r3, r12, r5 +; CHECK-NEXT: lsr r11, r0, #1 +; CHECK-NEXT: orr r2, r2, r11, lsr r6 +; CHECK-NEXT: lsl r11, r0, r7 +; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: str r2, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: ldr r2, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: lsrs r7, r0, #1 +; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: orr r6, r11, r7, lsr r6 +; CHECK-NEXT: umull r11, r7, r9, r6 +; CHECK-NEXT: mla r7, r9, r0, r7 +; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: mla r7, r5, r6, r7 +; CHECK-NEXT: mla r6, r5, r12, r0 +; CHECK-NEXT: ldr r0, [sp] @ 4-byte Reload +; CHECK-NEXT: ldr r12, [sp, #28] @ 4-byte Reload +; CHECK-NEXT: mla r0, r5, r0, r6 +; CHECK-NEXT: adds r6, r2, r11 +; CHECK-NEXT: adc r7, r0, r7 +; CHECK-NEXT: adds r0, r10, r6 +; CHECK-NEXT: ldr r10, [sp, #24] @ 4-byte Reload +; CHECK-NEXT: adc r7, r3, r7 +; CHECK-NEXT: .LBB21_10: @ %fp-to-i-cleanup +; CHECK-NEXT: mvn r5, #0 +; CHECK-NEXT: subs r6, r1, r5 +; CHECK-NEXT: mvn r3, #-2147483648 +; CHECK-NEXT: sbcs r6, r4, r3 +; CHECK-NEXT: sbcs r6, r0, #0 +; CHECK-NEXT: mov r2, #0 +; CHECK-NEXT: sbcs r6, r7, #0 +; CHECK-NEXT: movge r4, r3 +; CHECK-NEXT: movge r1, r5 +; CHECK-NEXT: movwlt r2, #1 +; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: moveq r7, r2 +; CHECK-NEXT: movne r2, r0 +; CHECK-NEXT: rsbs r0, r1, #0 +; CHECK-NEXT: mov r6, #0 +; CHECK-NEXT: rscs r0, r4, #-2147483648 +; CHECK-NEXT: sbcs r0, r5, r2 +; CHECK-NEXT: sbcs r0, r5, r7 +; CHECK-NEXT: mov r7, #0 ; CHECK-NEXT: movwlt r7, #1 ; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: movne r7, r0 -; CHECK-NEXT: moveq r4, r8 -; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: subs r6, r0, r9 +; CHECK-NEXT: mov r0, #-2147483648 +; CHECK-NEXT: movne r7, r1 +; CHECK-NEXT: moveq r4, r0 +; CHECK-NEXT: subs r1, r8, r5 +; CHECK-NEXT: sbcs r1, r12, r3 ; CHECK-NEXT: vmov.32 d1[0], r7 -; CHECK-NEXT: sbcs r6, r1, r5 -; CHECK-NEXT: sbcs r6, r2, #0 -; CHECK-NEXT: sbcs r6, r3, #0 -; CHECK-NEXT: mov r6, #0 +; CHECK-NEXT: sbcs r1, r10, #0 +; CHECK-NEXT: sbcs r1, lr, #0 +; CHECK-NEXT: mov r1, #0 +; CHECK-NEXT: movlt r3, r12 +; CHECK-NEXT: movge r8, r5 +; CHECK-NEXT: movwlt r1, #1 +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: moveq lr, r1 +; CHECK-NEXT: movne r1, r10 +; CHECK-NEXT: rsbs r2, r8, #0 +; CHECK-NEXT: rscs r2, r3, #-2147483648 +; CHECK-NEXT: sbcs r1, r5, r1 +; CHECK-NEXT: sbcs r1, r5, lr ; CHECK-NEXT: movwlt r6, #1 ; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: moveq r3, r6 -; CHECK-NEXT: movne r6, r2 -; CHECK-NEXT: movne r5, r1 -; CHECK-NEXT: moveq r0, r9 -; CHECK-NEXT: rsbs r1, r0, #0 -; CHECK-NEXT: rscs r1, r5, #-2147483648 -; CHECK-NEXT: sbcs r1, r9, r6 -; CHECK-NEXT: sbcs r1, r9, r3 -; CHECK-NEXT: movwlt r10, #1 -; CHECK-NEXT: cmp r10, #0 -; CHECK-NEXT: movne r10, r0 -; CHECK-NEXT: moveq r5, r8 -; CHECK-NEXT: vmov.32 d0[0], r10 +; CHECK-NEXT: movne r6, r8 +; CHECK-NEXT: moveq r3, r0 +; CHECK-NEXT: vmov.32 d0[0], r6 ; CHECK-NEXT: vmov.32 d1[1], r4 -; CHECK-NEXT: vmov.32 d0[1], r5 -; CHECK-NEXT: vpop {d8} -; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} +; CHECK-NEXT: vmov.32 d0[1], r3 +; CHECK-NEXT: add sp, sp, #100 +; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} entry: %conv = fptosi <2 x float> %x to <2 x i128> %0 = icmp slt <2 x i128> %conv, @@ -1891,37 +2602,132 @@ entry: define <2 x i64> @utest_f32i64(<2 x float> %x) { ; CHECK-LABEL: utest_f32i64: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, lr} -; CHECK-NEXT: push {r4, r5, r6, lr} -; CHECK-NEXT: .vsave {d8} -; CHECK-NEXT: vpush {d8} -; CHECK-NEXT: vmov.f64 d8, d0 -; CHECK-NEXT: vmov.f32 s0, s17 -; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: vmov.f32 s0, s16 -; CHECK-NEXT: mov r4, r1 -; CHECK-NEXT: subs r1, r2, #1 -; CHECK-NEXT: mov r6, #0 -; CHECK-NEXT: sbcs r1, r3, #0 +; CHECK: @ %bb.0: @ %entryfp-to-i-entryfp-to-i-entry +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-NEXT: .pad #64 +; CHECK-NEXT: sub sp, sp, #64 +; CHECK-NEXT: vmov r1, s0 +; CHECK-NEXT: mov r2, #0 +; CHECK-NEXT: ubfx r0, r1, #23, #8 +; CHECK-NEXT: cmp r0, #127 +; CHECK-NEXT: bhs .LBB22_2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: mov r1, #0 +; CHECK-NEXT: b .LBB22_4 +; CHECK-NEXT: .LBB22_2: @ %fp-to-i-if-check.exp.size2 +; CHECK-NEXT: mov r3, #1 +; CHECK-NEXT: cmp r0, #149 +; CHECK-NEXT: bfi r1, r3, #23, #9 +; CHECK-NEXT: bhi .LBB22_5 +; CHECK-NEXT: @ %bb.3: @ %fp-to-i-if-exp.small3 +; CHECK-NEXT: rsb r0, r0, #150 +; CHECK-NEXT: lsr r1, r1, r0 +; CHECK-NEXT: .LBB22_4: @ %fp-to-i-cleanup1 +; CHECK-NEXT: mov r12, #0 +; CHECK-NEXT: mov r8, #0 +; CHECK-NEXT: mov lr, #0 +; CHECK-NEXT: b .LBB22_6 +; CHECK-NEXT: .LBB22_5: @ %fp-to-i-if-exp.large4 +; CHECK-NEXT: mov r3, #0 +; CHECK-NEXT: sub r0, r0, #150 +; CHECK-NEXT: str r1, [sp, #48] +; CHECK-NEXT: mov r1, #12 +; CHECK-NEXT: str r3, [sp, #60] +; CHECK-NEXT: and r1, r1, r0, lsr #3 +; CHECK-NEXT: str r3, [sp, #56] +; CHECK-NEXT: and r6, r0, #31 +; CHECK-NEXT: str r3, [sp, #52] +; CHECK-NEXT: eor r7, r6, #31 +; CHECK-NEXT: str r3, [sp, #44] +; CHECK-NEXT: str r3, [sp, #40] +; CHECK-NEXT: str r3, [sp, #36] +; CHECK-NEXT: str r3, [sp, #32] +; CHECK-NEXT: add r3, sp, #32 +; CHECK-NEXT: add r3, r3, #16 +; CHECK-NEXT: ldr r1, [r3, -r1]! +; CHECK-NEXT: ldmib r3, {r4, r5, lr} +; CHECK-NEXT: lsr r3, r1, #1 +; CHECK-NEXT: lsl r1, r1, r6 +; CHECK-NEXT: lsl r0, r4, r6 +; CHECK-NEXT: orr r12, r0, r3, lsr r7 +; CHECK-NEXT: lsr r3, r5, #1 +; CHECK-NEXT: lsl r0, lr, r6 +; CHECK-NEXT: orr lr, r0, r3, lsr r7 +; CHECK-NEXT: lsrs r3, r4, #1 +; CHECK-NEXT: lsl r0, r5, r6 +; CHECK-NEXT: orr r8, r0, r3, lsr r7 +; CHECK-NEXT: .LBB22_6: @ %fp-to-i-cleanup1 +; CHECK-NEXT: vmov r4, s1 +; CHECK-NEXT: ubfx r3, r4, #23, #8 +; CHECK-NEXT: cmp r3, #127 +; CHECK-NEXT: blo .LBB22_9 +; CHECK-NEXT: @ %bb.7: @ %fp-to-i-if-check.exp.size +; CHECK-NEXT: movw r2, #65535 +; CHECK-NEXT: cmp r3, #149 +; CHECK-NEXT: movt r2, #127 +; CHECK-NEXT: and r2, r4, r2 +; CHECK-NEXT: orr r2, r2, #8388608 +; CHECK-NEXT: bhi .LBB22_10 +; CHECK-NEXT: @ %bb.8: @ %fp-to-i-if-exp.small +; CHECK-NEXT: rsb r3, r3, #150 +; CHECK-NEXT: lsr r2, r2, r3 +; CHECK-NEXT: .LBB22_9: +; CHECK-NEXT: mov r3, #0 ; CHECK-NEXT: mov r5, #0 -; CHECK-NEXT: movwlo r6, #1 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: moveq r4, r6 -; CHECK-NEXT: movne r6, r0 -; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: subs r2, r2, #1 -; CHECK-NEXT: vmov.32 d1[0], r6 -; CHECK-NEXT: sbcs r2, r3, #0 -; CHECK-NEXT: movwlo r5, #1 -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: moveq r0, r5 -; CHECK-NEXT: movne r5, r1 -; CHECK-NEXT: vmov.32 d0[0], r0 -; CHECK-NEXT: vmov.32 d1[1], r4 -; CHECK-NEXT: vmov.32 d0[1], r5 -; CHECK-NEXT: vpop {d8} -; CHECK-NEXT: pop {r4, r5, r6, pc} +; CHECK-NEXT: mov r4, #0 +; CHECK-NEXT: b .LBB22_11 +; CHECK-NEXT: .LBB22_10: @ %fp-to-i-if-exp.large +; CHECK-NEXT: mov r4, #0 +; CHECK-NEXT: str r2, [sp, #16] +; CHECK-NEXT: sub r2, r3, #150 +; CHECK-NEXT: mov r3, #12 +; CHECK-NEXT: str r4, [sp, #28] +; CHECK-NEXT: str r4, [sp, #24] +; CHECK-NEXT: and r3, r3, r2, lsr #3 +; CHECK-NEXT: str r4, [sp, #20] +; CHECK-NEXT: and r2, r2, #31 +; CHECK-NEXT: str r4, [sp, #12] +; CHECK-NEXT: eor r9, r2, #31 +; CHECK-NEXT: str r4, [sp, #8] +; CHECK-NEXT: str r4, [sp, #4] +; CHECK-NEXT: str r4, [sp] +; CHECK-NEXT: mov r4, sp +; CHECK-NEXT: add r4, r4, #16 +; CHECK-NEXT: ldr r6, [r4, -r3]! +; CHECK-NEXT: ldmib r4, {r5, r7} +; CHECK-NEXT: lsr r0, r6, #1 +; CHECK-NEXT: ldr r4, [r4, #12] +; CHECK-NEXT: lsl r3, r5, r2 +; CHECK-NEXT: lsrs r5, r5, #1 +; CHECK-NEXT: orr r3, r3, r0, lsr r9 +; CHECK-NEXT: lsl r0, r4, r2 +; CHECK-NEXT: lsr r4, r7, #1 +; CHECK-NEXT: orr r4, r0, r4, lsr r9 +; CHECK-NEXT: lsl r0, r7, r2 +; CHECK-NEXT: orr r5, r0, r5, lsr r9 +; CHECK-NEXT: lsl r2, r6, r2 +; CHECK-NEXT: .LBB22_11: @ %fp-to-i-cleanup +; CHECK-NEXT: subs r5, r5, #1 +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: sbcs r4, r4, #0 +; CHECK-NEXT: mov r4, #0 +; CHECK-NEXT: movwlo r4, #1 +; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: moveq r3, r4 +; CHECK-NEXT: movne r4, r2 +; CHECK-NEXT: subs r2, r8, #1 +; CHECK-NEXT: vmov.32 d1[0], r4 +; CHECK-NEXT: sbcs r2, lr, #0 +; CHECK-NEXT: movwlo r0, #1 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: moveq r1, r0 +; CHECK-NEXT: movne r0, r12 +; CHECK-NEXT: vmov.32 d0[0], r1 +; CHECK-NEXT: vmov.32 d1[1], r3 +; CHECK-NEXT: vmov.32 d0[1], r0 +; CHECK-NEXT: add sp, sp, #64 +; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc} entry: %conv = fptoui <2 x float> %x to <2 x i128> %0 = icmp ult <2 x i128> %conv, @@ -1932,60 +2738,242 @@ entry: define <2 x i64> @ustest_f32i64(<2 x float> %x) { ; CHECK-LABEL: ustest_f32i64: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr} -; CHECK-NEXT: push {r4, r5, r6, r7, r8, lr} -; CHECK-NEXT: .vsave {d8} -; CHECK-NEXT: vpush {d8} -; CHECK-NEXT: vmov.f64 d8, d0 -; CHECK-NEXT: vmov.f32 s0, s17 -; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: mov r4, r1 -; CHECK-NEXT: subs r1, r2, #1 -; CHECK-NEXT: sbcs r1, r3, #0 -; CHECK-NEXT: mov r8, #1 +; CHECK: @ %bb.0: @ %entryfp-to-i-entryfp-to-i-entry +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-NEXT: .pad #100 +; CHECK-NEXT: sub sp, sp, #100 +; CHECK-NEXT: vmov r7, s0 +; CHECK-NEXT: mov lr, #0 +; CHECK-NEXT: mvn r10, #0 +; CHECK-NEXT: ubfx r0, r7, #23, #8 +; CHECK-NEXT: cmp r0, #127 +; CHECK-NEXT: bhs .LBB23_2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: mov r8, #0 +; CHECK-NEXT: str r0, [sp, #28] @ 4-byte Spill +; CHECK-NEXT: mov r9, #0 ; CHECK-NEXT: mov r1, #0 -; CHECK-NEXT: vmov.f32 s0, s16 -; CHECK-NEXT: movge r2, r8 -; CHECK-NEXT: movwlt r1, #1 -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: mov r7, #0 -; CHECK-NEXT: moveq r3, r1 -; CHECK-NEXT: moveq r4, r1 -; CHECK-NEXT: movne r1, r0 -; CHECK-NEXT: rsbs r0, r1, #0 -; CHECK-NEXT: rscs r0, r4, #0 +; CHECK-NEXT: b .LBB23_5 +; CHECK-NEXT: .LBB23_2: @ %fp-to-i-if-check.exp.size2 +; CHECK-NEXT: mov r6, #1 +; CHECK-NEXT: orr r9, r6, r7, asr #31 +; CHECK-NEXT: asr r1, r7, #31 +; CHECK-NEXT: bfi r7, r6, #23, #9 +; CHECK-NEXT: cmp r0, #149 +; CHECK-NEXT: bhi .LBB23_4 +; CHECK-NEXT: @ %bb.3: @ %fp-to-i-if-exp.small3 +; CHECK-NEXT: rsb r0, r0, #150 +; CHECK-NEXT: mov r6, #0 +; CHECK-NEXT: lsr r4, r7, r0 +; CHECK-NEXT: umull r0, r8, r4, r9 +; CHECK-NEXT: umull r2, r5, r4, r1 +; CHECK-NEXT: str r0, [sp, #28] @ 4-byte Spill +; CHECK-NEXT: adds r7, r2, r8 +; CHECK-NEXT: adcs r7, r5, #0 +; CHECK-NEXT: adc r3, r6, #0 +; CHECK-NEXT: adds r7, r2, r8 +; CHECK-NEXT: mla r7, r1, r4, r5 +; CHECK-NEXT: adcs r9, r2, r5 +; CHECK-NEXT: umlal r8, r6, r4, r1 +; CHECK-NEXT: adc r1, r3, r7 +; CHECK-NEXT: b .LBB23_5 +; CHECK-NEXT: .LBB23_4: @ %fp-to-i-if-exp.large4 +; CHECK-NEXT: sub r0, r0, #150 +; CHECK-NEXT: add r6, sp, #64 +; CHECK-NEXT: str r7, [sp, #80] +; CHECK-NEXT: mov r7, #12 +; CHECK-NEXT: mov r11, #0 +; CHECK-NEXT: and r7, r7, r0, lsr #3 +; CHECK-NEXT: add r6, r6, #16 +; CHECK-NEXT: str r11, [sp, #92] +; CHECK-NEXT: str r11, [sp, #88] +; CHECK-NEXT: str r11, [sp, #84] +; CHECK-NEXT: str r11, [sp, #76] +; CHECK-NEXT: str r11, [sp, #72] +; CHECK-NEXT: str r11, [sp, #68] +; CHECK-NEXT: str r11, [sp, #64] +; CHECK-NEXT: ldr r7, [r6, -r7]! +; CHECK-NEXT: ldr r2, [r6, #4] +; CHECK-NEXT: str r2, [sp, #24] @ 4-byte Spill +; CHECK-NEXT: ldr r4, [r6, #8] +; CHECK-NEXT: ldr r3, [r6, #12] +; CHECK-NEXT: and r6, r0, #31 +; CHECK-NEXT: eor r5, r6, #31 +; CHECK-NEXT: lsr r0, r7, #1 +; CHECK-NEXT: lsl r7, r7, r6 +; CHECK-NEXT: lsl r12, r2, r6 +; CHECK-NEXT: orr lr, r12, r0, lsr r5 +; CHECK-NEXT: umull r2, r0, r7, r9 +; CHECK-NEXT: mov r12, #0 +; CHECK-NEXT: str r3, [sp, #20] @ 4-byte Spill +; CHECK-NEXT: umlal r0, r12, lr, r9 +; CHECK-NEXT: str r2, [sp, #28] @ 4-byte Spill +; CHECK-NEXT: umull r2, r3, r7, r1 +; CHECK-NEXT: str r2, [sp, #16] @ 4-byte Spill +; CHECK-NEXT: adds r8, r2, r0 +; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: adcs r2, r12, r3 +; CHECK-NEXT: str r3, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: adc r3, r11, #0 +; CHECK-NEXT: lsl r12, r0, r6 +; CHECK-NEXT: mov r0, r4 +; CHECK-NEXT: lsl r6, r0, r6 +; CHECK-NEXT: ldr r0, [sp, #24] @ 4-byte Reload +; CHECK-NEXT: lsr r4, r4, #1 +; CHECK-NEXT: umlal r2, r3, lr, r1 +; CHECK-NEXT: orr r12, r12, r4, lsr r5 +; CHECK-NEXT: lsrs r4, r0, #1 +; CHECK-NEXT: orr r4, r6, r4, lsr r5 +; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: umull r5, r6, r9, r4 +; CHECK-NEXT: mla r6, r9, r12, r6 +; CHECK-NEXT: mla r6, r1, r4, r6 +; CHECK-NEXT: mla r4, r1, lr, r0 +; CHECK-NEXT: ldr r0, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: mov lr, #0 +; CHECK-NEXT: mla r1, r1, r7, r4 +; CHECK-NEXT: adds r7, r0, r5 +; CHECK-NEXT: adc r1, r1, r6 +; CHECK-NEXT: adds r9, r2, r7 +; CHECK-NEXT: adc r1, r3, r1 +; CHECK-NEXT: .LBB23_5: @ %fp-to-i-cleanup1 +; CHECK-NEXT: vmov r4, s1 +; CHECK-NEXT: mvn r5, #0 +; CHECK-NEXT: cmn r4, #1 +; CHECK-NEXT: ubfx r2, r4, #23, #8 +; CHECK-NEXT: movwgt r5, #0 +; CHECK-NEXT: movwgt r10, #1 +; CHECK-NEXT: cmp r2, #127 +; CHECK-NEXT: bhs .LBB23_7 +; CHECK-NEXT: @ %bb.6: +; CHECK-NEXT: mov r4, #0 ; CHECK-NEXT: mov r5, #0 -; CHECK-NEXT: rscs r0, r2, #0 -; CHECK-NEXT: rscs r0, r3, #0 +; CHECK-NEXT: mov r6, #0 +; CHECK-NEXT: b .LBB23_11 +; CHECK-NEXT: .LBB23_7: @ %fp-to-i-if-check.exp.size +; CHECK-NEXT: movw r3, #65535 +; CHECK-NEXT: cmp r2, #149 +; CHECK-NEXT: movt r3, #127 +; CHECK-NEXT: and r3, r4, r3 +; CHECK-NEXT: orr r4, r3, #8388608 +; CHECK-NEXT: bhi .LBB23_9 +; CHECK-NEXT: @ %bb.8: @ %fp-to-i-if-exp.small +; CHECK-NEXT: rsb r2, r2, #150 +; CHECK-NEXT: lsr r2, r4, r2 +; CHECK-NEXT: umull lr, r3, r2, r10 +; CHECK-NEXT: umull r7, r6, r2, r5 +; CHECK-NEXT: mul r2, r5, r2 +; CHECK-NEXT: adds r4, r7, r3 +; CHECK-NEXT: adcs r5, r7, r6 +; CHECK-NEXT: b .LBB23_10 +; CHECK-NEXT: .LBB23_9: @ %fp-to-i-if-exp.large +; CHECK-NEXT: sub r2, r2, #150 +; CHECK-NEXT: mov r3, #12 +; CHECK-NEXT: add r7, sp, #32 +; CHECK-NEXT: mov r12, #0 +; CHECK-NEXT: and r3, r3, r2, lsr #3 +; CHECK-NEXT: add r7, r7, #16 +; CHECK-NEXT: str r12, [sp, #60] +; CHECK-NEXT: and r2, r2, #31 +; CHECK-NEXT: str r12, [sp, #56] +; CHECK-NEXT: str r12, [sp, #52] +; CHECK-NEXT: str r4, [sp, #48] +; CHECK-NEXT: str r12, [sp, #44] +; CHECK-NEXT: str r12, [sp, #40] +; CHECK-NEXT: str r12, [sp, #36] +; CHECK-NEXT: str r12, [sp, #32] +; CHECK-NEXT: ldr r3, [r7, -r3]! +; CHECK-NEXT: ldr r0, [r7, #4] +; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: ldr r4, [r7, #8] +; CHECK-NEXT: str r4, [sp, #16] @ 4-byte Spill +; CHECK-NEXT: lsr r4, r3, #1 +; CHECK-NEXT: ldr lr, [r7, #12] +; CHECK-NEXT: eor r7, r2, #31 +; CHECK-NEXT: lsl r11, r0, r2 +; CHECK-NEXT: lsl r3, r3, r2 +; CHECK-NEXT: orr r11, r11, r4, lsr r7 +; CHECK-NEXT: umull r0, r4, r3, r10 +; CHECK-NEXT: str r9, [sp, #24] @ 4-byte Spill +; CHECK-NEXT: mov r9, #0 +; CHECK-NEXT: umlal r4, r9, r11, r10 +; CHECK-NEXT: lsl lr, lr, r2 +; CHECK-NEXT: str r0, [sp, #20] @ 4-byte Spill +; CHECK-NEXT: umull r0, r6, r3, r5 +; CHECK-NEXT: str r0, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: adds r4, r0, r4 +; CHECK-NEXT: ldr r0, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: adcs r9, r9, r6 +; CHECK-NEXT: str r6, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: adc r6, r12, #0 +; CHECK-NEXT: lsr r12, r0, #1 +; CHECK-NEXT: umlal r9, r6, r11, r5 +; CHECK-NEXT: orr r12, lr, r12, lsr r7 +; CHECK-NEXT: lsl lr, r0, r2 +; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: lsrs r2, r0, #1 +; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: orr r2, lr, r2, lsr r7 +; CHECK-NEXT: umull lr, r7, r10, r2 +; CHECK-NEXT: mla r7, r10, r12, r7 +; CHECK-NEXT: mla r2, r5, r2, r7 +; CHECK-NEXT: mla r7, r5, r11, r0 +; CHECK-NEXT: ldr r0, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: mla r3, r5, r3, r7 +; CHECK-NEXT: adds r7, r0, lr +; CHECK-NEXT: ldr lr, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: adc r2, r3, r2 +; CHECK-NEXT: adds r5, r9, r7 +; CHECK-NEXT: ldr r9, [sp, #24] @ 4-byte Reload +; CHECK-NEXT: .LBB23_10: @ %fp-to-i-cleanup +; CHECK-NEXT: adc r6, r6, r2 +; CHECK-NEXT: .LBB23_11: @ %fp-to-i-cleanup +; CHECK-NEXT: subs r2, r5, #1 +; CHECK-NEXT: mov r7, #0 +; CHECK-NEXT: sbcs r2, r6, #0 +; CHECK-NEXT: mov r3, #0 ; CHECK-NEXT: movwlt r7, #1 ; CHECK-NEXT: cmp r7, #0 +; CHECK-NEXT: mov r2, #1 +; CHECK-NEXT: moveq r6, r7 ; CHECK-NEXT: moveq r4, r7 -; CHECK-NEXT: movne r7, r1 -; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: subs r6, r2, #1 -; CHECK-NEXT: vmov.32 d1[0], r7 -; CHECK-NEXT: sbcs r6, r3, #0 -; CHECK-NEXT: movlt r8, r2 -; CHECK-NEXT: mov r2, #0 -; CHECK-NEXT: movwlt r2, #1 -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: moveq r3, r2 -; CHECK-NEXT: moveq r1, r2 -; CHECK-NEXT: movne r2, r0 -; CHECK-NEXT: rsbs r0, r2, #0 -; CHECK-NEXT: rscs r0, r1, #0 -; CHECK-NEXT: rscs r0, r8, #0 -; CHECK-NEXT: rscs r0, r3, #0 -; CHECK-NEXT: movwlt r5, #1 -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: moveq r2, r5 -; CHECK-NEXT: movne r5, r1 -; CHECK-NEXT: vmov.32 d0[0], r2 +; CHECK-NEXT: movne r7, lr +; CHECK-NEXT: moveq r5, r2 +; CHECK-NEXT: rsbs r0, r7, #0 +; CHECK-NEXT: rscs r0, r4, #0 +; CHECK-NEXT: rscs r0, r5, #0 +; CHECK-NEXT: rscs r0, r6, #0 +; CHECK-NEXT: ldr r6, [sp, #28] @ 4-byte Reload +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: movwlt r0, #1 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: moveq r4, r0 +; CHECK-NEXT: movne r0, r7 +; CHECK-NEXT: subs r7, r9, #1 +; CHECK-NEXT: vmov.32 d1[0], r0 +; CHECK-NEXT: sbcs r7, r1, #0 +; CHECK-NEXT: mov r7, #0 +; CHECK-NEXT: movwlt r7, #1 +; CHECK-NEXT: cmp r7, #0 +; CHECK-NEXT: moveq r1, r7 +; CHECK-NEXT: moveq r8, r7 +; CHECK-NEXT: movne r7, r6 +; CHECK-NEXT: movne r2, r9 +; CHECK-NEXT: rsbs r6, r7, #0 +; CHECK-NEXT: rscs r6, r8, #0 +; CHECK-NEXT: rscs r2, r2, #0 +; CHECK-NEXT: rscs r1, r1, #0 +; CHECK-NEXT: movwlt r3, #1 +; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: moveq r7, r3 +; CHECK-NEXT: movne r3, r8 +; CHECK-NEXT: vmov.32 d0[0], r7 ; CHECK-NEXT: vmov.32 d1[1], r4 -; CHECK-NEXT: vmov.32 d0[1], r5 -; CHECK-NEXT: vpop {d8} -; CHECK-NEXT: pop {r4, r5, r6, r7, r8, pc} +; CHECK-NEXT: vmov.32 d0[1], r3 +; CHECK-NEXT: add sp, sp, #100 +; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} entry: %conv = fptosi <2 x float> %x to <2 x i128> %0 = icmp slt <2 x i128> %conv, @@ -1999,130 +2987,129 @@ entry: define <2 x i64> @stest_f16i64(<2 x half> %x) { ; CHECK-NEON-LABEL: stest_f16i64: ; CHECK-NEON: @ %bb.0: @ %entry -; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} -; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} +; CHECK-NEON-NEXT: .save {r4, r5, r6, lr} +; CHECK-NEON-NEXT: push {r4, r5, r6, lr} ; CHECK-NEON-NEXT: .vsave {d8} ; CHECK-NEON-NEXT: vpush {d8} ; CHECK-NEON-NEXT: vmov r0, s0 ; CHECK-NEON-NEXT: vmov.f32 s16, s1 ; CHECK-NEON-NEXT: bl __aeabi_h2f -; CHECK-NEON-NEXT: mov r8, r0 +; CHECK-NEON-NEXT: mov r4, r0 ; CHECK-NEON-NEXT: vmov r0, s16 ; CHECK-NEON-NEXT: bl __aeabi_h2f ; CHECK-NEON-NEXT: vmov s0, r0 -; CHECK-NEON-NEXT: bl __fixsfti -; CHECK-NEON-NEXT: mov r4, r1 -; CHECK-NEON-NEXT: mvn r9, #0 -; CHECK-NEON-NEXT: subs r1, r0, r9 -; CHECK-NEON-NEXT: mvn r6, #-2147483648 -; CHECK-NEON-NEXT: sbcs r1, r4, r6 -; CHECK-NEON-NEXT: vmov s0, r8 -; CHECK-NEON-NEXT: sbcs r1, r2, #0 -; CHECK-NEON-NEXT: mov r5, #0 -; CHECK-NEON-NEXT: sbcs r1, r3, #0 -; CHECK-NEON-NEXT: mov r8, #-2147483648 -; CHECK-NEON-NEXT: mov r1, #0 -; CHECK-NEON-NEXT: mov r10, #0 -; CHECK-NEON-NEXT: movwlt r1, #1 -; CHECK-NEON-NEXT: cmp r1, #0 -; CHECK-NEON-NEXT: moveq r3, r1 -; CHECK-NEON-NEXT: movne r1, r2 -; CHECK-NEON-NEXT: moveq r4, r6 -; CHECK-NEON-NEXT: moveq r0, r9 -; CHECK-NEON-NEXT: rsbs r2, r0, #0 -; CHECK-NEON-NEXT: rscs r2, r4, #-2147483648 -; CHECK-NEON-NEXT: sbcs r1, r9, r1 -; CHECK-NEON-NEXT: sbcs r1, r9, r3 -; CHECK-NEON-NEXT: movwlt r5, #1 -; CHECK-NEON-NEXT: cmp r5, #0 -; CHECK-NEON-NEXT: movne r5, r0 -; CHECK-NEON-NEXT: moveq r4, r8 -; CHECK-NEON-NEXT: bl __fixsfti -; CHECK-NEON-NEXT: subs r7, r0, r9 -; CHECK-NEON-NEXT: vmov.32 d1[0], r5 -; CHECK-NEON-NEXT: sbcs r7, r1, r6 -; CHECK-NEON-NEXT: sbcs r7, r2, #0 -; CHECK-NEON-NEXT: sbcs r7, r3, #0 -; CHECK-NEON-NEXT: mov r7, #0 -; CHECK-NEON-NEXT: movwlt r7, #1 -; CHECK-NEON-NEXT: cmp r7, #0 -; CHECK-NEON-NEXT: moveq r3, r7 -; CHECK-NEON-NEXT: movne r7, r2 -; CHECK-NEON-NEXT: movne r6, r1 -; CHECK-NEON-NEXT: moveq r0, r9 -; CHECK-NEON-NEXT: rsbs r1, r0, #0 -; CHECK-NEON-NEXT: rscs r1, r6, #-2147483648 -; CHECK-NEON-NEXT: sbcs r1, r9, r7 -; CHECK-NEON-NEXT: sbcs r1, r9, r3 -; CHECK-NEON-NEXT: movwlt r10, #1 -; CHECK-NEON-NEXT: cmp r10, #0 -; CHECK-NEON-NEXT: movne r10, r0 -; CHECK-NEON-NEXT: moveq r6, r8 -; CHECK-NEON-NEXT: vmov.32 d0[0], r10 -; CHECK-NEON-NEXT: vmov.32 d1[1], r4 -; CHECK-NEON-NEXT: vmov.32 d0[1], r6 +; CHECK-NEON-NEXT: mvn lr, #0 +; CHECK-NEON-NEXT: vmov s2, r4 +; CHECK-NEON-NEXT: mvn r1, #-2147483648 +; CHECK-NEON-NEXT: vcvt.s32.f32 s0, s0 +; CHECK-NEON-NEXT: mov r2, #0 +; CHECK-NEON-NEXT: mvn r3, #-2147483648 +; CHECK-NEON-NEXT: vcvt.s32.f32 s2, s2 +; CHECK-NEON-NEXT: mov r12, #-2147483648 +; CHECK-NEON-NEXT: mov r6, #0 +; CHECK-NEON-NEXT: vmov r4, s0 +; CHECK-NEON-NEXT: subs r0, r4, lr +; CHECK-NEON-NEXT: rscs r0, r1, r4, asr #31 +; CHECK-NEON-NEXT: rscs r0, r2, r4, asr #31 +; CHECK-NEON-NEXT: rscs r0, r2, r4, asr #31 +; CHECK-NEON-NEXT: mov r0, #0 +; CHECK-NEON-NEXT: asrlt r3, r4, #31 +; CHECK-NEON-NEXT: movwlt r0, #1 +; CHECK-NEON-NEXT: cmp r0, #0 +; CHECK-NEON-NEXT: asrne r0, r4, #31 +; CHECK-NEON-NEXT: moveq r4, lr +; CHECK-NEON-NEXT: rsbs r5, r4, #0 +; CHECK-NEON-NEXT: rscs r5, r3, #-2147483648 +; CHECK-NEON-NEXT: sbcs r5, lr, r0 +; CHECK-NEON-NEXT: sbcs r0, lr, r0 +; CHECK-NEON-NEXT: mov r0, #0 +; CHECK-NEON-NEXT: movge r3, r12 +; CHECK-NEON-NEXT: movwlt r0, #1 +; CHECK-NEON-NEXT: cmp r0, #0 +; CHECK-NEON-NEXT: movne r0, r4 +; CHECK-NEON-NEXT: vmov r4, s2 +; CHECK-NEON-NEXT: subs r5, r4, lr +; CHECK-NEON-NEXT: rscs r5, r1, r4, asr #31 +; CHECK-NEON-NEXT: rscs r5, r2, r4, asr #31 +; CHECK-NEON-NEXT: rscs r5, r2, r4, asr #31 +; CHECK-NEON-NEXT: mvn r5, #0 +; CHECK-NEON-NEXT: asrlt r1, r4, #31 +; CHECK-NEON-NEXT: movlt r5, r4 +; CHECK-NEON-NEXT: movwlt r6, #1 +; CHECK-NEON-NEXT: cmp r6, #0 +; CHECK-NEON-NEXT: asrne r6, r4, #31 +; CHECK-NEON-NEXT: rsbs r4, r5, #0 +; CHECK-NEON-NEXT: rscs r4, r1, #-2147483648 +; CHECK-NEON-NEXT: sbcs r4, lr, r6 +; CHECK-NEON-NEXT: sbcs r6, lr, r6 +; CHECK-NEON-NEXT: movwlt r2, #1 +; CHECK-NEON-NEXT: cmp r2, #0 +; CHECK-NEON-NEXT: movne r2, r5 +; CHECK-NEON-NEXT: moveq r1, r12 +; CHECK-NEON-NEXT: vmov.32 d0[0], r2 +; CHECK-NEON-NEXT: vmov.32 d1[0], r0 +; CHECK-NEON-NEXT: vmov.32 d0[1], r1 +; CHECK-NEON-NEXT: vmov.32 d1[1], r3 ; CHECK-NEON-NEXT: vpop {d8} -; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} +; CHECK-NEON-NEXT: pop {r4, r5, r6, pc} ; ; CHECK-FP16-LABEL: stest_f16i64: ; CHECK-FP16: @ %bb.0: @ %entry -; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} -; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} -; CHECK-FP16-NEXT: vmov.u16 r0, d0[1] -; CHECK-FP16-NEXT: vmov.u16 r7, d0[0] -; CHECK-FP16-NEXT: vmov s0, r0 -; CHECK-FP16-NEXT: bl __fixhfti -; CHECK-FP16-NEXT: mov r4, r1 -; CHECK-FP16-NEXT: mvn r9, #0 -; CHECK-FP16-NEXT: subs r1, r0, r9 -; CHECK-FP16-NEXT: mvn r5, #-2147483648 -; CHECK-FP16-NEXT: sbcs r1, r4, r5 -; CHECK-FP16-NEXT: vmov s0, r7 -; CHECK-FP16-NEXT: sbcs r1, r2, #0 -; CHECK-FP16-NEXT: mov r7, #0 -; CHECK-FP16-NEXT: sbcs r1, r3, #0 -; CHECK-FP16-NEXT: mov r8, #-2147483648 -; CHECK-FP16-NEXT: mov r1, #0 -; CHECK-FP16-NEXT: mov r10, #0 -; CHECK-FP16-NEXT: movwlt r1, #1 -; CHECK-FP16-NEXT: cmp r1, #0 -; CHECK-FP16-NEXT: moveq r3, r1 -; CHECK-FP16-NEXT: movne r1, r2 -; CHECK-FP16-NEXT: moveq r4, r5 -; CHECK-FP16-NEXT: moveq r0, r9 -; CHECK-FP16-NEXT: rsbs r2, r0, #0 -; CHECK-FP16-NEXT: rscs r2, r4, #-2147483648 -; CHECK-FP16-NEXT: sbcs r1, r9, r1 -; CHECK-FP16-NEXT: sbcs r1, r9, r3 -; CHECK-FP16-NEXT: movwlt r7, #1 -; CHECK-FP16-NEXT: cmp r7, #0 -; CHECK-FP16-NEXT: movne r7, r0 -; CHECK-FP16-NEXT: moveq r4, r8 -; CHECK-FP16-NEXT: bl __fixhfti -; CHECK-FP16-NEXT: subs r6, r0, r9 -; CHECK-FP16-NEXT: vmov.32 d1[0], r7 -; CHECK-FP16-NEXT: sbcs r6, r1, r5 -; CHECK-FP16-NEXT: sbcs r6, r2, #0 -; CHECK-FP16-NEXT: sbcs r6, r3, #0 +; CHECK-FP16-NEXT: .save {r4, r5, r6, lr} +; CHECK-FP16-NEXT: push {r4, r5, r6, lr} +; CHECK-FP16-NEXT: vmovx.f16 s2, s0 +; CHECK-FP16-NEXT: mvn lr, #0 +; CHECK-FP16-NEXT: vcvt.s32.f16 s2, s2 +; CHECK-FP16-NEXT: mvn r1, #-2147483648 +; CHECK-FP16-NEXT: vmov r0, s2 +; CHECK-FP16-NEXT: mov r2, #0 +; CHECK-FP16-NEXT: mov r4, #0 +; CHECK-FP16-NEXT: mov r12, #-2147483648 +; CHECK-FP16-NEXT: vcvt.s32.f16 s0, s0 ; CHECK-FP16-NEXT: mov r6, #0 +; CHECK-FP16-NEXT: subs r3, r0, lr +; CHECK-FP16-NEXT: rscs r3, r1, r0, asr #31 +; CHECK-FP16-NEXT: rscs r3, r2, r0, asr #31 +; CHECK-FP16-NEXT: rscs r3, r2, r0, asr #31 +; CHECK-FP16-NEXT: mvn r3, #-2147483648 +; CHECK-FP16-NEXT: movwlt r4, #1 +; CHECK-FP16-NEXT: asrlt r3, r0, #31 +; CHECK-FP16-NEXT: cmp r4, #0 +; CHECK-FP16-NEXT: asrne r4, r0, #31 +; CHECK-FP16-NEXT: moveq r0, lr +; CHECK-FP16-NEXT: rsbs r5, r0, #0 +; CHECK-FP16-NEXT: rscs r5, r3, #-2147483648 +; CHECK-FP16-NEXT: sbcs r5, lr, r4 +; CHECK-FP16-NEXT: sbcs r4, lr, r4 +; CHECK-FP16-NEXT: mov r4, #0 +; CHECK-FP16-NEXT: movge r3, r12 +; CHECK-FP16-NEXT: movwlt r4, #1 +; CHECK-FP16-NEXT: cmp r4, #0 +; CHECK-FP16-NEXT: movne r4, r0 +; CHECK-FP16-NEXT: vmov r0, s0 +; CHECK-FP16-NEXT: subs r5, r0, lr +; CHECK-FP16-NEXT: rscs r5, r1, r0, asr #31 +; CHECK-FP16-NEXT: rscs r5, r2, r0, asr #31 +; CHECK-FP16-NEXT: rscs r5, r2, r0, asr #31 +; CHECK-FP16-NEXT: mvn r5, #0 +; CHECK-FP16-NEXT: asrlt r1, r0, #31 +; CHECK-FP16-NEXT: movlt r5, r0 ; CHECK-FP16-NEXT: movwlt r6, #1 ; CHECK-FP16-NEXT: cmp r6, #0 -; CHECK-FP16-NEXT: moveq r3, r6 -; CHECK-FP16-NEXT: movne r6, r2 -; CHECK-FP16-NEXT: movne r5, r1 -; CHECK-FP16-NEXT: moveq r0, r9 -; CHECK-FP16-NEXT: rsbs r1, r0, #0 -; CHECK-FP16-NEXT: rscs r1, r5, #-2147483648 -; CHECK-FP16-NEXT: sbcs r1, r9, r6 -; CHECK-FP16-NEXT: sbcs r1, r9, r3 -; CHECK-FP16-NEXT: movwlt r10, #1 -; CHECK-FP16-NEXT: cmp r10, #0 -; CHECK-FP16-NEXT: movne r10, r0 -; CHECK-FP16-NEXT: moveq r5, r8 -; CHECK-FP16-NEXT: vmov.32 d0[0], r10 -; CHECK-FP16-NEXT: vmov.32 d1[1], r4 -; CHECK-FP16-NEXT: vmov.32 d0[1], r5 -; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} +; CHECK-FP16-NEXT: asrne r6, r0, #31 +; CHECK-FP16-NEXT: rsbs r0, r5, #0 +; CHECK-FP16-NEXT: rscs r0, r1, #-2147483648 +; CHECK-FP16-NEXT: sbcs r0, lr, r6 +; CHECK-FP16-NEXT: sbcs r0, lr, r6 +; CHECK-FP16-NEXT: movwlt r2, #1 +; CHECK-FP16-NEXT: cmp r2, #0 +; CHECK-FP16-NEXT: movne r2, r5 +; CHECK-FP16-NEXT: moveq r1, r12 +; CHECK-FP16-NEXT: vmov.32 d0[0], r2 +; CHECK-FP16-NEXT: vmov.32 d1[0], r4 +; CHECK-FP16-NEXT: vmov.32 d0[1], r1 +; CHECK-FP16-NEXT: vmov.32 d1[1], r3 +; CHECK-FP16-NEXT: pop {r4, r5, r6, pc} entry: %conv = fptosi <2 x half> %x to <2 x i128> %0 = icmp slt <2 x i128> %conv, @@ -2136,72 +3123,42 @@ entry: define <2 x i64> @utest_f16i64(<2 x half> %x) { ; CHECK-NEON-LABEL: utest_f16i64: ; CHECK-NEON: @ %bb.0: @ %entry -; CHECK-NEON-NEXT: .save {r4, r5, r6, lr} -; CHECK-NEON-NEXT: push {r4, r5, r6, lr} +; CHECK-NEON-NEXT: .save {r4, lr} +; CHECK-NEON-NEXT: push {r4, lr} ; CHECK-NEON-NEXT: .vsave {d8} ; CHECK-NEON-NEXT: vpush {d8} ; CHECK-NEON-NEXT: vmov r0, s0 ; CHECK-NEON-NEXT: vmov.f32 s16, s1 ; CHECK-NEON-NEXT: bl __aeabi_h2f -; CHECK-NEON-NEXT: mov r5, r0 +; CHECK-NEON-NEXT: mov r4, r0 ; CHECK-NEON-NEXT: vmov r0, s16 ; CHECK-NEON-NEXT: bl __aeabi_h2f ; CHECK-NEON-NEXT: vmov s0, r0 -; CHECK-NEON-NEXT: bl __fixunssfti -; CHECK-NEON-NEXT: mov r4, r1 -; CHECK-NEON-NEXT: subs r1, r2, #1 -; CHECK-NEON-NEXT: vmov s0, r5 -; CHECK-NEON-NEXT: sbcs r1, r3, #0 -; CHECK-NEON-NEXT: mov r5, #0 -; CHECK-NEON-NEXT: mov r6, #0 -; CHECK-NEON-NEXT: movwlo r5, #1 -; CHECK-NEON-NEXT: cmp r5, #0 -; CHECK-NEON-NEXT: moveq r4, r5 -; CHECK-NEON-NEXT: movne r5, r0 -; CHECK-NEON-NEXT: bl __fixunssfti -; CHECK-NEON-NEXT: subs r2, r2, #1 -; CHECK-NEON-NEXT: vmov.32 d1[0], r5 -; CHECK-NEON-NEXT: sbcs r2, r3, #0 -; CHECK-NEON-NEXT: movwlo r6, #1 -; CHECK-NEON-NEXT: cmp r6, #0 -; CHECK-NEON-NEXT: moveq r0, r6 -; CHECK-NEON-NEXT: movne r6, r1 -; CHECK-NEON-NEXT: vmov.32 d0[0], r0 -; CHECK-NEON-NEXT: vmov.32 d1[1], r4 -; CHECK-NEON-NEXT: vmov.32 d0[1], r6 +; CHECK-NEON-NEXT: vmov s4, r4 +; CHECK-NEON-NEXT: vcvt.u32.f32 s2, s0 +; CHECK-NEON-NEXT: vcvt.u32.f32 s0, s4 +; CHECK-NEON-NEXT: vldr s3, .LCPI25_0 +; CHECK-NEON-NEXT: vmov.f32 s1, s3 ; CHECK-NEON-NEXT: vpop {d8} -; CHECK-NEON-NEXT: pop {r4, r5, r6, pc} +; CHECK-NEON-NEXT: pop {r4, pc} +; CHECK-NEON-NEXT: .p2align 2 +; CHECK-NEON-NEXT: @ %bb.1: +; CHECK-NEON-NEXT: .LCPI25_0: +; CHECK-NEON-NEXT: .long 0x00000000 @ float 0 ; ; CHECK-FP16-LABEL: utest_f16i64: ; CHECK-FP16: @ %bb.0: @ %entry -; CHECK-FP16-NEXT: .save {r4, r5, r6, lr} -; CHECK-FP16-NEXT: push {r4, r5, r6, lr} -; CHECK-FP16-NEXT: vmov.u16 r0, d0[1] -; CHECK-FP16-NEXT: vmov.u16 r6, d0[0] -; CHECK-FP16-NEXT: vmov s0, r0 -; CHECK-FP16-NEXT: bl __fixunshfti -; CHECK-FP16-NEXT: mov r4, r1 -; CHECK-FP16-NEXT: subs r1, r2, #1 -; CHECK-FP16-NEXT: vmov s0, r6 -; CHECK-FP16-NEXT: sbcs r1, r3, #0 -; CHECK-FP16-NEXT: mov r6, #0 -; CHECK-FP16-NEXT: mov r5, #0 -; CHECK-FP16-NEXT: movwlo r6, #1 -; CHECK-FP16-NEXT: cmp r6, #0 -; CHECK-FP16-NEXT: moveq r4, r6 -; CHECK-FP16-NEXT: movne r6, r0 -; CHECK-FP16-NEXT: bl __fixunshfti -; CHECK-FP16-NEXT: subs r2, r2, #1 -; CHECK-FP16-NEXT: vmov.32 d1[0], r6 -; CHECK-FP16-NEXT: sbcs r2, r3, #0 -; CHECK-FP16-NEXT: movwlo r5, #1 -; CHECK-FP16-NEXT: cmp r5, #0 -; CHECK-FP16-NEXT: moveq r0, r5 -; CHECK-FP16-NEXT: movne r5, r1 -; CHECK-FP16-NEXT: vmov.32 d0[0], r0 -; CHECK-FP16-NEXT: vmov.32 d1[1], r4 -; CHECK-FP16-NEXT: vmov.32 d0[1], r5 -; CHECK-FP16-NEXT: pop {r4, r5, r6, pc} +; CHECK-FP16-NEXT: vcvt.u32.f16 s4, s0 +; CHECK-FP16-NEXT: vmovx.f16 s0, s0 +; CHECK-FP16-NEXT: vcvt.u32.f16 s6, s0 +; CHECK-FP16-NEXT: vldr s7, .LCPI25_0 +; CHECK-FP16-NEXT: vmov.f32 s5, s7 +; CHECK-FP16-NEXT: vorr q0, q1, q1 +; CHECK-FP16-NEXT: bx lr +; CHECK-FP16-NEXT: .p2align 2 +; CHECK-FP16-NEXT: @ %bb.1: +; CHECK-FP16-NEXT: .LCPI25_0: +; CHECK-FP16-NEXT: .long 0x00000000 @ float 0 entry: %conv = fptoui <2 x half> %x to <2 x i128> %0 = icmp ult <2 x i128> %conv, @@ -2213,118 +3170,119 @@ entry: define <2 x i64> @ustest_f16i64(<2 x half> %x) { ; CHECK-NEON-LABEL: ustest_f16i64: ; CHECK-NEON: @ %bb.0: @ %entry -; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r8, lr} -; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r8, lr} +; CHECK-NEON-NEXT: .save {r4, lr} +; CHECK-NEON-NEXT: push {r4, lr} ; CHECK-NEON-NEXT: .vsave {d8} ; CHECK-NEON-NEXT: vpush {d8} ; CHECK-NEON-NEXT: vmov r0, s0 ; CHECK-NEON-NEXT: vmov.f32 s16, s1 ; CHECK-NEON-NEXT: bl __aeabi_h2f -; CHECK-NEON-NEXT: mov r5, r0 +; CHECK-NEON-NEXT: mov r4, r0 ; CHECK-NEON-NEXT: vmov r0, s16 ; CHECK-NEON-NEXT: bl __aeabi_h2f ; CHECK-NEON-NEXT: vmov s0, r0 -; CHECK-NEON-NEXT: bl __fixsfti -; CHECK-NEON-NEXT: mov r4, r1 -; CHECK-NEON-NEXT: subs r1, r2, #1 -; CHECK-NEON-NEXT: sbcs r1, r3, #0 -; CHECK-NEON-NEXT: mov r8, #1 +; CHECK-NEON-NEXT: mov r12, #1 +; CHECK-NEON-NEXT: mov r0, #0 +; CHECK-NEON-NEXT: vmov s2, r4 +; CHECK-NEON-NEXT: vcvt.s32.f32 s0, s0 +; CHECK-NEON-NEXT: mov r2, #1 +; CHECK-NEON-NEXT: vcvt.s32.f32 s2, s2 +; CHECK-NEON-NEXT: vmov r3, s0 +; CHECK-NEON-NEXT: rsbs r1, r12, r3, asr #31 +; CHECK-NEON-NEXT: rscs r1, r0, r3, asr #31 ; CHECK-NEON-NEXT: mov r1, #0 -; CHECK-NEON-NEXT: movge r2, r8 ; CHECK-NEON-NEXT: movwlt r1, #1 ; CHECK-NEON-NEXT: cmp r1, #0 -; CHECK-NEON-NEXT: moveq r3, r1 -; CHECK-NEON-NEXT: moveq r4, r1 -; CHECK-NEON-NEXT: movne r1, r0 -; CHECK-NEON-NEXT: rsbs r0, r1, #0 -; CHECK-NEON-NEXT: rscs r0, r4, #0 -; CHECK-NEON-NEXT: vmov s0, r5 -; CHECK-NEON-NEXT: rscs r0, r2, #0 -; CHECK-NEON-NEXT: mov r7, #0 -; CHECK-NEON-NEXT: rscs r0, r3, #0 -; CHECK-NEON-NEXT: mov r5, #0 -; CHECK-NEON-NEXT: movwlt r7, #1 -; CHECK-NEON-NEXT: cmp r7, #0 -; CHECK-NEON-NEXT: moveq r4, r7 -; CHECK-NEON-NEXT: movne r7, r1 -; CHECK-NEON-NEXT: bl __fixsfti -; CHECK-NEON-NEXT: subs r6, r2, #1 -; CHECK-NEON-NEXT: vmov.32 d1[0], r7 -; CHECK-NEON-NEXT: sbcs r6, r3, #0 -; CHECK-NEON-NEXT: movlt r8, r2 +; CHECK-NEON-NEXT: mov lr, r1 +; CHECK-NEON-NEXT: movne r1, r3 +; CHECK-NEON-NEXT: asrne lr, r3, #31 +; CHECK-NEON-NEXT: asrne r2, r3, #31 +; CHECK-NEON-NEXT: rsbs r3, r1, #0 +; CHECK-NEON-NEXT: rscs r3, lr, #0 +; CHECK-NEON-NEXT: rscs r2, r2, #0 +; CHECK-NEON-NEXT: rscs r2, lr, #0 ; CHECK-NEON-NEXT: mov r2, #0 ; CHECK-NEON-NEXT: movwlt r2, #1 ; CHECK-NEON-NEXT: cmp r2, #0 -; CHECK-NEON-NEXT: moveq r3, r2 -; CHECK-NEON-NEXT: moveq r1, r2 -; CHECK-NEON-NEXT: movne r2, r0 -; CHECK-NEON-NEXT: rsbs r0, r2, #0 -; CHECK-NEON-NEXT: rscs r0, r1, #0 -; CHECK-NEON-NEXT: rscs r0, r8, #0 -; CHECK-NEON-NEXT: rscs r0, r3, #0 -; CHECK-NEON-NEXT: movwlt r5, #1 -; CHECK-NEON-NEXT: cmp r5, #0 -; CHECK-NEON-NEXT: moveq r2, r5 -; CHECK-NEON-NEXT: movne r5, r1 -; CHECK-NEON-NEXT: vmov.32 d0[0], r2 -; CHECK-NEON-NEXT: vmov.32 d1[1], r4 -; CHECK-NEON-NEXT: vmov.32 d0[1], r5 +; CHECK-NEON-NEXT: moveq lr, r2 +; CHECK-NEON-NEXT: movne r2, r1 +; CHECK-NEON-NEXT: vmov r1, s2 +; CHECK-NEON-NEXT: vmov.32 d1[0], r2 +; CHECK-NEON-NEXT: rsbs r3, r12, r1, asr #31 +; CHECK-NEON-NEXT: rscs r3, r0, r1, asr #31 +; CHECK-NEON-NEXT: mov r3, #0 +; CHECK-NEON-NEXT: movwlt r3, #1 +; CHECK-NEON-NEXT: cmp r3, #0 +; CHECK-NEON-NEXT: mov r4, r3 +; CHECK-NEON-NEXT: movne r3, r1 +; CHECK-NEON-NEXT: asrne r4, r1, #31 +; CHECK-NEON-NEXT: asrne r12, r1, #31 +; CHECK-NEON-NEXT: rsbs r1, r3, #0 +; CHECK-NEON-NEXT: rscs r1, r4, #0 +; CHECK-NEON-NEXT: rscs r1, r12, #0 +; CHECK-NEON-NEXT: rscs r1, r4, #0 +; CHECK-NEON-NEXT: movwlt r0, #1 +; CHECK-NEON-NEXT: cmp r0, #0 +; CHECK-NEON-NEXT: moveq r3, r0 +; CHECK-NEON-NEXT: movne r0, r4 +; CHECK-NEON-NEXT: vmov.32 d0[0], r3 +; CHECK-NEON-NEXT: vmov.32 d1[1], lr +; CHECK-NEON-NEXT: vmov.32 d0[1], r0 ; CHECK-NEON-NEXT: vpop {d8} -; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, pc} +; CHECK-NEON-NEXT: pop {r4, pc} ; ; CHECK-FP16-LABEL: ustest_f16i64: ; CHECK-FP16: @ %bb.0: @ %entry -; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r8, lr} -; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r8, lr} -; CHECK-FP16-NEXT: vmov.u16 r0, d0[1] -; CHECK-FP16-NEXT: vmov.u16 r5, d0[0] -; CHECK-FP16-NEXT: vmov s0, r0 -; CHECK-FP16-NEXT: bl __fixhfti -; CHECK-FP16-NEXT: mov r4, r1 -; CHECK-FP16-NEXT: subs r1, r2, #1 -; CHECK-FP16-NEXT: sbcs r1, r3, #0 -; CHECK-FP16-NEXT: mov r8, #1 +; CHECK-FP16-NEXT: .save {r4, lr} +; CHECK-FP16-NEXT: push {r4, lr} +; CHECK-FP16-NEXT: vcvt.s32.f16 s2, s0 +; CHECK-FP16-NEXT: vmovx.f16 s0, s0 +; CHECK-FP16-NEXT: vcvt.s32.f16 s0, s0 +; CHECK-FP16-NEXT: mov r12, #1 +; CHECK-FP16-NEXT: vmov r3, s0 +; CHECK-FP16-NEXT: mov r0, #0 +; CHECK-FP16-NEXT: mov r2, #1 +; CHECK-FP16-NEXT: rsbs r1, r12, r3, asr #31 +; CHECK-FP16-NEXT: rscs r1, r0, r3, asr #31 ; CHECK-FP16-NEXT: mov r1, #0 -; CHECK-FP16-NEXT: movge r2, r8 ; CHECK-FP16-NEXT: movwlt r1, #1 ; CHECK-FP16-NEXT: cmp r1, #0 -; CHECK-FP16-NEXT: moveq r3, r1 -; CHECK-FP16-NEXT: moveq r4, r1 -; CHECK-FP16-NEXT: movne r1, r0 -; CHECK-FP16-NEXT: rsbs r0, r1, #0 -; CHECK-FP16-NEXT: rscs r0, r4, #0 -; CHECK-FP16-NEXT: vmov s0, r5 -; CHECK-FP16-NEXT: rscs r0, r2, #0 -; CHECK-FP16-NEXT: mov r7, #0 -; CHECK-FP16-NEXT: rscs r0, r3, #0 -; CHECK-FP16-NEXT: mov r5, #0 -; CHECK-FP16-NEXT: movwlt r7, #1 -; CHECK-FP16-NEXT: cmp r7, #0 -; CHECK-FP16-NEXT: moveq r4, r7 -; CHECK-FP16-NEXT: movne r7, r1 -; CHECK-FP16-NEXT: bl __fixhfti -; CHECK-FP16-NEXT: subs r6, r2, #1 -; CHECK-FP16-NEXT: vmov.32 d1[0], r7 -; CHECK-FP16-NEXT: sbcs r6, r3, #0 -; CHECK-FP16-NEXT: movlt r8, r2 +; CHECK-FP16-NEXT: mov lr, r1 +; CHECK-FP16-NEXT: movne r1, r3 +; CHECK-FP16-NEXT: asrne lr, r3, #31 +; CHECK-FP16-NEXT: asrne r2, r3, #31 +; CHECK-FP16-NEXT: rsbs r3, r1, #0 +; CHECK-FP16-NEXT: rscs r3, lr, #0 +; CHECK-FP16-NEXT: rscs r2, r2, #0 +; CHECK-FP16-NEXT: rscs r2, lr, #0 ; CHECK-FP16-NEXT: mov r2, #0 ; CHECK-FP16-NEXT: movwlt r2, #1 ; CHECK-FP16-NEXT: cmp r2, #0 -; CHECK-FP16-NEXT: moveq r3, r2 -; CHECK-FP16-NEXT: moveq r1, r2 -; CHECK-FP16-NEXT: movne r2, r0 -; CHECK-FP16-NEXT: rsbs r0, r2, #0 -; CHECK-FP16-NEXT: rscs r0, r1, #0 -; CHECK-FP16-NEXT: rscs r0, r8, #0 -; CHECK-FP16-NEXT: rscs r0, r3, #0 -; CHECK-FP16-NEXT: movwlt r5, #1 -; CHECK-FP16-NEXT: cmp r5, #0 -; CHECK-FP16-NEXT: moveq r2, r5 -; CHECK-FP16-NEXT: movne r5, r1 -; CHECK-FP16-NEXT: vmov.32 d0[0], r2 -; CHECK-FP16-NEXT: vmov.32 d1[1], r4 -; CHECK-FP16-NEXT: vmov.32 d0[1], r5 -; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r8, pc} +; CHECK-FP16-NEXT: moveq lr, r2 +; CHECK-FP16-NEXT: movne r2, r1 +; CHECK-FP16-NEXT: vmov r1, s2 +; CHECK-FP16-NEXT: vmov.32 d1[0], r2 +; CHECK-FP16-NEXT: rsbs r3, r12, r1, asr #31 +; CHECK-FP16-NEXT: rscs r3, r0, r1, asr #31 +; CHECK-FP16-NEXT: mov r3, #0 +; CHECK-FP16-NEXT: movwlt r3, #1 +; CHECK-FP16-NEXT: cmp r3, #0 +; CHECK-FP16-NEXT: mov r4, r3 +; CHECK-FP16-NEXT: movne r3, r1 +; CHECK-FP16-NEXT: asrne r4, r1, #31 +; CHECK-FP16-NEXT: asrne r12, r1, #31 +; CHECK-FP16-NEXT: rsbs r1, r3, #0 +; CHECK-FP16-NEXT: rscs r1, r4, #0 +; CHECK-FP16-NEXT: rscs r1, r12, #0 +; CHECK-FP16-NEXT: rscs r1, r4, #0 +; CHECK-FP16-NEXT: movwlt r0, #1 +; CHECK-FP16-NEXT: cmp r0, #0 +; CHECK-FP16-NEXT: moveq r3, r0 +; CHECK-FP16-NEXT: movne r0, r4 +; CHECK-FP16-NEXT: vmov.32 d0[0], r3 +; CHECK-FP16-NEXT: vmov.32 d1[1], lr +; CHECK-FP16-NEXT: vmov.32 d0[1], r0 +; CHECK-FP16-NEXT: pop {r4, pc} entry: %conv = fptosi <2 x half> %x to <2 x i128> %0 = icmp slt <2 x i128> %conv, @@ -3615,66 +4573,272 @@ entry: define <2 x i64> @stest_f64i64_mm(<2 x double> %x) { ; CHECK-LABEL: stest_f64i64_mm: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} -; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} -; CHECK-NEXT: .vsave {d8, d9} -; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: vorr q4, q0, q0 -; CHECK-NEXT: vorr d0, d9, d9 -; CHECK-NEXT: bl __fixdfti -; CHECK-NEXT: mov r4, r1 -; CHECK-NEXT: mvn r9, #0 -; CHECK-NEXT: subs r1, r0, r9 -; CHECK-NEXT: mvn r5, #-2147483648 -; CHECK-NEXT: sbcs r1, r4, r5 -; CHECK-NEXT: vorr d0, d8, d8 -; CHECK-NEXT: sbcs r1, r2, #0 -; CHECK-NEXT: mov r7, #0 -; CHECK-NEXT: sbcs r1, r3, #0 -; CHECK-NEXT: mov r8, #-2147483648 +; CHECK: @ %bb.0: @ %entryfp-to-i-entryfp-to-i-entry +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-NEXT: .pad #100 +; CHECK-NEXT: sub sp, sp, #100 +; CHECK-NEXT: vmov r4, r1, d0 +; CHECK-NEXT: mov r11, #1 +; CHECK-NEXT: mvn lr, #0 +; CHECK-NEXT: movw r2, #1023 +; CHECK-NEXT: ubfx r0, r1, #20, #11 +; CHECK-NEXT: cmp r0, r2 +; CHECK-NEXT: bhs .LBB45_2 +; CHECK-NEXT: @ %bb.1: ; CHECK-NEXT: mov r1, #0 +; CHECK-NEXT: mov r12, #0 +; CHECK-NEXT: mov r8, #0 ; CHECK-NEXT: mov r10, #0 -; CHECK-NEXT: movwlt r1, #1 -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: moveq r3, r1 -; CHECK-NEXT: movne r1, r2 -; CHECK-NEXT: moveq r4, r5 -; CHECK-NEXT: moveq r0, r9 -; CHECK-NEXT: rsbs r2, r0, #0 -; CHECK-NEXT: rscs r2, r4, #-2147483648 -; CHECK-NEXT: sbcs r1, r9, r1 -; CHECK-NEXT: sbcs r1, r9, r3 +; CHECK-NEXT: b .LBB45_5 +; CHECK-NEXT: .LBB45_2: @ %fp-to-i-if-check.exp.size2 +; CHECK-NEXT: mov r6, r1 +; CHECK-NEXT: orr r9, r11, r1, asr #31 +; CHECK-NEXT: bfi r6, r11, #20, #12 +; CHECK-NEXT: asr r3, r1, #31 +; CHECK-NEXT: movw r7, #1074 +; CHECK-NEXT: cmp r0, r7 +; CHECK-NEXT: bhi .LBB45_4 +; CHECK-NEXT: @ %bb.3: @ %fp-to-i-if-exp.small3 +; CHECK-NEXT: movw r7, #1075 +; CHECK-NEXT: mov r8, #0 +; CHECK-NEXT: sub r7, r7, r0 +; CHECK-NEXT: rsb r5, r7, #32 +; CHECK-NEXT: lsr r4, r4, r7 +; CHECK-NEXT: lsr r7, r6, r7 +; CHECK-NEXT: orr r5, r4, r6, lsl r5 +; CHECK-NEXT: movw r4, #1043 +; CHECK-NEXT: subs r0, r4, r0 +; CHECK-NEXT: mov r4, #0 +; CHECK-NEXT: movwpl r7, #0 +; CHECK-NEXT: lsrpl r5, r6, r0 +; CHECK-NEXT: umull r1, r0, r5, r9 +; CHECK-NEXT: umlal r0, r4, r7, r9 +; CHECK-NEXT: umull r9, r6, r5, r3 +; CHECK-NEXT: adds r12, r9, r0 +; CHECK-NEXT: adcs r0, r4, r6 +; CHECK-NEXT: mla r6, r3, r7, r6 +; CHECK-NEXT: adc r4, r8, #0 +; CHECK-NEXT: umlal r0, r4, r7, r3 +; CHECK-NEXT: mla r3, r3, r5, r6 +; CHECK-NEXT: adds r8, r0, r9 +; CHECK-NEXT: adc r10, r4, r3 +; CHECK-NEXT: b .LBB45_5 +; CHECK-NEXT: .LBB45_4: @ %fp-to-i-if-exp.large4 +; CHECK-NEXT: add r1, sp, #80 +; CHECK-NEXT: mov r7, #0 +; CHECK-NEXT: stm r1, {r4, r6, r7} +; CHECK-NEXT: movw r6, #1075 +; CHECK-NEXT: sub r0, r0, r6 +; CHECK-NEXT: mov r6, #12 +; CHECK-NEXT: add r5, sp, #64 +; CHECK-NEXT: and r6, r6, r0, lsr #3 +; CHECK-NEXT: add r5, r5, #16 +; CHECK-NEXT: str r7, [sp, #92] +; CHECK-NEXT: and r0, r0, #31 +; CHECK-NEXT: str r7, [sp, #76] +; CHECK-NEXT: eor r8, r0, #31 +; CHECK-NEXT: str r7, [sp, #72] +; CHECK-NEXT: mov r12, #0 +; CHECK-NEXT: str r7, [sp, #68] +; CHECK-NEXT: str r7, [sp, #64] +; CHECK-NEXT: ldr r6, [r5, -r6]! +; CHECK-NEXT: ldr r1, [r5, #4] +; CHECK-NEXT: str r1, [sp, #28] @ 4-byte Spill +; CHECK-NEXT: lsr r4, r6, #1 +; CHECK-NEXT: ldr r10, [r5, #8] +; CHECK-NEXT: ldr r2, [r5, #12] +; CHECK-NEXT: lsl r5, r1, r0 +; CHECK-NEXT: orr r4, r5, r4, lsr r8 +; CHECK-NEXT: lsl r5, r6, r0 +; CHECK-NEXT: umull r1, r6, r5, r9 +; CHECK-NEXT: str r2, [sp, #24] @ 4-byte Spill +; CHECK-NEXT: umlal r6, r12, r4, r9 +; CHECK-NEXT: umull lr, r2, r5, r3 +; CHECK-NEXT: adds r11, lr, r6 +; CHECK-NEXT: adcs r6, r12, r2 +; CHECK-NEXT: mla r2, r3, r4, r2 +; CHECK-NEXT: adc r7, r7, #0 +; CHECK-NEXT: umlal r6, r7, r4, r3 +; CHECK-NEXT: lsr r4, r10, #1 +; CHECK-NEXT: mla r12, r3, r5, r2 +; CHECK-NEXT: ldr r2, [sp, #24] @ 4-byte Reload +; CHECK-NEXT: lsl r5, r2, r0 +; CHECK-NEXT: ldr r2, [sp, #28] @ 4-byte Reload +; CHECK-NEXT: orr r5, r5, r4, lsr r8 +; CHECK-NEXT: lsl r0, r10, r0 +; CHECK-NEXT: lsrs r4, r2, #1 +; CHECK-NEXT: orr r0, r0, r4, lsr r8 +; CHECK-NEXT: umull r4, r2, r9, r0 +; CHECK-NEXT: mla r2, r9, r5, r2 +; CHECK-NEXT: mla r0, r3, r0, r2 +; CHECK-NEXT: adds r3, lr, r4 +; CHECK-NEXT: mvn lr, #0 +; CHECK-NEXT: movw r2, #1023 +; CHECK-NEXT: adc r0, r12, r0 +; CHECK-NEXT: adds r8, r6, r3 +; CHECK-NEXT: adc r10, r7, r0 +; CHECK-NEXT: mov r12, r11 +; CHECK-NEXT: mov r11, #1 +; CHECK-NEXT: .LBB45_5: @ %fp-to-i-cleanup1 +; CHECK-NEXT: vmov r4, r7, d1 +; CHECK-NEXT: mvn r6, #0 +; CHECK-NEXT: mov r9, #0 +; CHECK-NEXT: cmn r7, #1 +; CHECK-NEXT: ubfx r0, r7, #20, #11 +; CHECK-NEXT: movwgt r6, #0 +; CHECK-NEXT: movwgt lr, #1 +; CHECK-NEXT: cmp r0, r2 +; CHECK-NEXT: bhs .LBB45_7 +; CHECK-NEXT: @ %bb.6: +; CHECK-NEXT: mov r5, #0 +; CHECK-NEXT: mov r4, #0 +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: mov r7, #0 +; CHECK-NEXT: b .LBB45_11 +; CHECK-NEXT: .LBB45_7: @ %fp-to-i-if-check.exp.size +; CHECK-NEXT: bfi r7, r11, #20, #12 +; CHECK-NEXT: movw r2, #1074 +; CHECK-NEXT: cmp r0, r2 +; CHECK-NEXT: str r1, [sp, #24] @ 4-byte Spill +; CHECK-NEXT: str r8, [sp, #28] @ 4-byte Spill +; CHECK-NEXT: bhi .LBB45_9 +; CHECK-NEXT: @ %bb.8: @ %fp-to-i-if-exp.small +; CHECK-NEXT: movw r2, #1075 +; CHECK-NEXT: mov r8, #0 +; CHECK-NEXT: sub r2, r2, r0 +; CHECK-NEXT: rsb r5, r2, #32 +; CHECK-NEXT: lsr r4, r4, r2 +; CHECK-NEXT: lsr r2, r7, r2 +; CHECK-NEXT: orr r3, r4, r7, lsl r5 +; CHECK-NEXT: movw r4, #1043 +; CHECK-NEXT: subs r0, r4, r0 +; CHECK-NEXT: movwpl r2, #0 +; CHECK-NEXT: lsrpl r3, r7, r0 +; CHECK-NEXT: mov r7, #0 +; CHECK-NEXT: umull r5, r0, r3, lr +; CHECK-NEXT: umlal r0, r7, r2, lr +; CHECK-NEXT: umull lr, r11, r3, r6 +; CHECK-NEXT: adds r4, lr, r0 +; CHECK-NEXT: adcs r0, r7, r11 +; CHECK-NEXT: adc r7, r8, #0 +; CHECK-NEXT: mla r8, r6, r2, r11 +; CHECK-NEXT: umlal r0, r7, r2, r6 +; CHECK-NEXT: mla r2, r6, r3, r8 +; CHECK-NEXT: adds r0, r0, lr +; CHECK-NEXT: adc r7, r7, r2 +; CHECK-NEXT: b .LBB45_10 +; CHECK-NEXT: .LBB45_9: @ %fp-to-i-if-exp.large +; CHECK-NEXT: movw r2, #1075 +; CHECK-NEXT: sub r0, r0, r2 +; CHECK-NEXT: mov r2, #12 +; CHECK-NEXT: add r3, sp, #32 +; CHECK-NEXT: mov r8, #0 +; CHECK-NEXT: add r1, sp, #48 +; CHECK-NEXT: and r2, r2, r0, lsr #3 +; CHECK-NEXT: add r3, r3, #16 +; CHECK-NEXT: str r8, [sp, #60] +; CHECK-NEXT: and r0, r0, #31 +; CHECK-NEXT: str r8, [sp, #44] +; CHECK-NEXT: str r8, [sp, #40] +; CHECK-NEXT: str r8, [sp, #36] +; CHECK-NEXT: str r8, [sp, #32] +; CHECK-NEXT: stm r1, {r4, r7, r8} +; CHECK-NEXT: ldr r2, [r3, -r2]! +; CHECK-NEXT: ldr r1, [r3, #4] +; CHECK-NEXT: str r1, [sp, #16] @ 4-byte Spill +; CHECK-NEXT: lsr r5, r2, #1 +; CHECK-NEXT: ldr r4, [r3, #8] +; CHECK-NEXT: lsl r2, r2, r0 +; CHECK-NEXT: str r4, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: lsl r4, r1, r0 +; CHECK-NEXT: eor r1, r0, #31 +; CHECK-NEXT: ldr r3, [r3, #12] +; CHECK-NEXT: str r12, [sp, #20] @ 4-byte Spill +; CHECK-NEXT: umull r7, r11, r2, r6 +; CHECK-NEXT: orr r12, r4, r5, lsr r1 +; CHECK-NEXT: str r3, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: umull r5, r4, r2, lr +; CHECK-NEXT: mov r3, lr +; CHECK-NEXT: mov lr, #0 +; CHECK-NEXT: str r7, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: umlal r4, lr, r12, r3 +; CHECK-NEXT: adds r4, r7, r4 +; CHECK-NEXT: mla r7, r6, r12, r11 +; CHECK-NEXT: adcs lr, lr, r11 +; CHECK-NEXT: adc r8, r8, #0 +; CHECK-NEXT: umlal lr, r8, r12, r6 +; CHECK-NEXT: mla r12, r6, r2, r7 +; CHECK-NEXT: ldr r2, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: ldr r7, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: lsl r11, r2, r0 +; CHECK-NEXT: lsr r2, r7, #1 +; CHECK-NEXT: lsl r0, r7, r0 +; CHECK-NEXT: ldr r7, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: orr r2, r11, r2, lsr r1 +; CHECK-NEXT: lsrs r7, r7, #1 +; CHECK-NEXT: orr r0, r0, r7, lsr r1 +; CHECK-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: umull r11, r7, r3, r0 +; CHECK-NEXT: mla r2, r3, r2, r7 +; CHECK-NEXT: mla r0, r6, r0, r2 +; CHECK-NEXT: adds r2, r1, r11 +; CHECK-NEXT: adc r3, r12, r0 +; CHECK-NEXT: adds r0, lr, r2 +; CHECK-NEXT: ldr r12, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: adc r7, r8, r3 +; CHECK-NEXT: .LBB45_10: @ %fp-to-i-cleanup +; CHECK-NEXT: ldr r8, [sp, #28] @ 4-byte Reload +; CHECK-NEXT: ldr r1, [sp, #24] @ 4-byte Reload +; CHECK-NEXT: .LBB45_11: @ %fp-to-i-cleanup +; CHECK-NEXT: mvn r6, #0 +; CHECK-NEXT: subs r3, r5, r6 +; CHECK-NEXT: mvn r2, #-2147483648 +; CHECK-NEXT: sbcs r3, r4, r2 +; CHECK-NEXT: sbcs r3, r0, #0 +; CHECK-NEXT: sbcs r3, r7, #0 +; CHECK-NEXT: mov r3, #0 +; CHECK-NEXT: movwlt r3, #1 +; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: moveq r7, r3 +; CHECK-NEXT: movne r3, r0 +; CHECK-NEXT: moveq r4, r2 +; CHECK-NEXT: moveq r5, r6 +; CHECK-NEXT: rsbs r0, r5, #0 +; CHECK-NEXT: rscs r0, r4, #-2147483648 +; CHECK-NEXT: sbcs r0, r6, r3 +; CHECK-NEXT: sbcs r0, r6, r7 +; CHECK-NEXT: mov r7, #0 ; CHECK-NEXT: movwlt r7, #1 ; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: movne r7, r0 -; CHECK-NEXT: moveq r4, r8 -; CHECK-NEXT: bl __fixdfti -; CHECK-NEXT: subs r6, r0, r9 +; CHECK-NEXT: mov r0, #-2147483648 +; CHECK-NEXT: movne r7, r5 +; CHECK-NEXT: moveq r4, r0 +; CHECK-NEXT: subs r3, r1, r6 +; CHECK-NEXT: sbcs r3, r12, r2 ; CHECK-NEXT: vmov.32 d1[0], r7 -; CHECK-NEXT: sbcs r6, r1, r5 -; CHECK-NEXT: sbcs r6, r2, #0 -; CHECK-NEXT: sbcs r6, r3, #0 -; CHECK-NEXT: mov r6, #0 -; CHECK-NEXT: movwlt r6, #1 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: moveq r3, r6 -; CHECK-NEXT: movne r6, r2 -; CHECK-NEXT: movne r5, r1 -; CHECK-NEXT: moveq r0, r9 -; CHECK-NEXT: rsbs r1, r0, #0 -; CHECK-NEXT: rscs r1, r5, #-2147483648 -; CHECK-NEXT: sbcs r1, r9, r6 -; CHECK-NEXT: sbcs r1, r9, r3 -; CHECK-NEXT: movwlt r10, #1 -; CHECK-NEXT: cmp r10, #0 -; CHECK-NEXT: movne r10, r0 -; CHECK-NEXT: moveq r5, r8 -; CHECK-NEXT: vmov.32 d0[0], r10 +; CHECK-NEXT: sbcs r3, r8, #0 +; CHECK-NEXT: sbcs r3, r10, #0 +; CHECK-NEXT: mov r3, #0 +; CHECK-NEXT: movwlt r3, #1 +; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: moveq r10, r3 +; CHECK-NEXT: movne r3, r8 +; CHECK-NEXT: movne r2, r12 +; CHECK-NEXT: moveq r1, r6 +; CHECK-NEXT: rsbs r5, r1, #0 +; CHECK-NEXT: rscs r5, r2, #-2147483648 +; CHECK-NEXT: sbcs r3, r6, r3 +; CHECK-NEXT: sbcs r3, r6, r10 +; CHECK-NEXT: movwlt r9, #1 +; CHECK-NEXT: cmp r9, #0 +; CHECK-NEXT: movne r9, r1 +; CHECK-NEXT: moveq r2, r0 +; CHECK-NEXT: vmov.32 d0[0], r9 ; CHECK-NEXT: vmov.32 d1[1], r4 -; CHECK-NEXT: vmov.32 d0[1], r5 -; CHECK-NEXT: vpop {d8, d9} -; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} +; CHECK-NEXT: vmov.32 d0[1], r2 +; CHECK-NEXT: add sp, sp, #100 +; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} entry: %conv = fptosi <2 x double> %x to <2 x i128> %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> ) @@ -3685,37 +4849,154 @@ entry: define <2 x i64> @utest_f64i64_mm(<2 x double> %x) { ; CHECK-LABEL: utest_f64i64_mm: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, lr} -; CHECK-NEXT: push {r4, r5, r6, lr} -; CHECK-NEXT: .vsave {d8, d9} -; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: vorr q4, q0, q0 -; CHECK-NEXT: vorr d0, d9, d9 -; CHECK-NEXT: bl __fixunsdfti -; CHECK-NEXT: mov r4, r1 -; CHECK-NEXT: subs r1, r2, #1 -; CHECK-NEXT: vorr d0, d8, d8 -; CHECK-NEXT: sbcs r1, r3, #0 +; CHECK: @ %bb.0: @ %entryfp-to-i-entryfp-to-i-entry +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} +; CHECK-NEXT: .pad #64 +; CHECK-NEXT: sub sp, sp, #64 +; CHECK-NEXT: vmov r1, r0, d0 +; CHECK-NEXT: mov lr, #1 +; CHECK-NEXT: movw r8, #1023 +; CHECK-NEXT: ubfx r2, r0, #20, #11 +; CHECK-NEXT: cmp r2, r8 +; CHECK-NEXT: bhs .LBB46_2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: mov r1, #0 +; CHECK-NEXT: mov r10, #0 +; CHECK-NEXT: b .LBB46_4 +; CHECK-NEXT: .LBB46_2: @ %fp-to-i-if-check.exp.size2 +; CHECK-NEXT: bfi r0, lr, #20, #12 +; CHECK-NEXT: movw r3, #1074 +; CHECK-NEXT: cmp r2, r3 +; CHECK-NEXT: bhi .LBB46_5 +; CHECK-NEXT: @ %bb.3: @ %fp-to-i-if-exp.small3 +; CHECK-NEXT: movw r3, #1075 +; CHECK-NEXT: sub r3, r3, r2 +; CHECK-NEXT: rsb r7, r3, #32 +; CHECK-NEXT: lsr r1, r1, r3 +; CHECK-NEXT: lsr r10, r0, r3 +; CHECK-NEXT: orr r1, r1, r0, lsl r7 +; CHECK-NEXT: movw r7, #1043 +; CHECK-NEXT: subs r2, r7, r2 +; CHECK-NEXT: movwpl r10, #0 +; CHECK-NEXT: lsrpl r1, r0, r2 +; CHECK-NEXT: .LBB46_4: @ %fp-to-i-cleanup1 +; CHECK-NEXT: mov r12, #0 +; CHECK-NEXT: mov r9, #0 +; CHECK-NEXT: b .LBB46_6 +; CHECK-NEXT: .LBB46_5: @ %fp-to-i-if-exp.large4 +; CHECK-NEXT: str r0, [sp, #52] +; CHECK-NEXT: movw r0, #1075 +; CHECK-NEXT: sub r0, r2, r0 +; CHECK-NEXT: add r2, sp, #32 +; CHECK-NEXT: str r1, [sp, #48] +; CHECK-NEXT: mov r1, #12 +; CHECK-NEXT: mov r3, #0 +; CHECK-NEXT: and r1, r1, r0, lsr #3 +; CHECK-NEXT: add r2, r2, #16 +; CHECK-NEXT: str r3, [sp, #60] +; CHECK-NEXT: str r3, [sp, #56] +; CHECK-NEXT: and r6, r0, #31 +; CHECK-NEXT: str r3, [sp, #44] +; CHECK-NEXT: eor r4, r6, #31 +; CHECK-NEXT: str r3, [sp, #40] +; CHECK-NEXT: str r3, [sp, #36] +; CHECK-NEXT: str r3, [sp, #32] +; CHECK-NEXT: ldr r1, [r2, -r1]! +; CHECK-NEXT: ldmib r2, {r3, r7} +; CHECK-NEXT: lsr r5, r1, #1 +; CHECK-NEXT: ldr r2, [r2, #12] +; CHECK-NEXT: lsl r1, r1, r6 +; CHECK-NEXT: lsl r0, r3, r6 +; CHECK-NEXT: orr r10, r0, r5, lsr r4 +; CHECK-NEXT: lsr r5, r7, #1 +; CHECK-NEXT: lsrs r3, r3, #1 +; CHECK-NEXT: lsl r2, r2, r6 +; CHECK-NEXT: orr r9, r2, r5, lsr r4 +; CHECK-NEXT: lsl r2, r7, r6 +; CHECK-NEXT: orr r12, r2, r3, lsr r4 +; CHECK-NEXT: .LBB46_6: @ %fp-to-i-cleanup1 +; CHECK-NEXT: vmov r3, r6, d1 +; CHECK-NEXT: mov r2, #0 +; CHECK-NEXT: ubfx r7, r6, #20, #11 +; CHECK-NEXT: cmp r7, r8 +; CHECK-NEXT: bhs .LBB46_8 +; CHECK-NEXT: @ %bb.7: +; CHECK-NEXT: mov r3, #0 +; CHECK-NEXT: mov r4, #0 +; CHECK-NEXT: b .LBB46_10 +; CHECK-NEXT: .LBB46_8: @ %fp-to-i-if-check.exp.size +; CHECK-NEXT: bfi r6, lr, #20, #12 +; CHECK-NEXT: movw r4, #1074 +; CHECK-NEXT: cmp r7, r4 +; CHECK-NEXT: bhi .LBB46_11 +; CHECK-NEXT: @ %bb.9: @ %fp-to-i-if-exp.small +; CHECK-NEXT: movw r4, #1075 +; CHECK-NEXT: sub r4, r4, r7 +; CHECK-NEXT: rsb r5, r4, #32 +; CHECK-NEXT: lsr r3, r3, r4 +; CHECK-NEXT: lsr r4, r6, r4 +; CHECK-NEXT: orr r3, r3, r6, lsl r5 +; CHECK-NEXT: movw r5, #1043 +; CHECK-NEXT: subs r7, r5, r7 +; CHECK-NEXT: movwpl r4, #0 +; CHECK-NEXT: lsrpl r3, r6, r7 +; CHECK-NEXT: .LBB46_10: @ %fp-to-i-cleanup +; CHECK-NEXT: mov r7, #0 ; CHECK-NEXT: mov r6, #0 +; CHECK-NEXT: b .LBB46_12 +; CHECK-NEXT: .LBB46_11: @ %fp-to-i-if-exp.large +; CHECK-NEXT: str r3, [sp, #16] +; CHECK-NEXT: movw r3, #1075 +; CHECK-NEXT: sub r3, r7, r3 +; CHECK-NEXT: mov r7, #12 +; CHECK-NEXT: str r6, [sp, #20] +; CHECK-NEXT: mov r6, sp ; CHECK-NEXT: mov r5, #0 -; CHECK-NEXT: movwlo r6, #1 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: moveq r4, r6 -; CHECK-NEXT: movne r6, r0 -; CHECK-NEXT: bl __fixunsdfti -; CHECK-NEXT: subs r2, r2, #1 -; CHECK-NEXT: vmov.32 d1[0], r6 -; CHECK-NEXT: sbcs r2, r3, #0 -; CHECK-NEXT: movwlo r5, #1 -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: moveq r0, r5 -; CHECK-NEXT: movne r5, r1 -; CHECK-NEXT: vmov.32 d0[0], r0 +; CHECK-NEXT: and r7, r7, r3, lsr #3 +; CHECK-NEXT: add r6, r6, #16 +; CHECK-NEXT: str r5, [sp, #28] +; CHECK-NEXT: str r5, [sp, #24] +; CHECK-NEXT: and r3, r3, #31 +; CHECK-NEXT: str r5, [sp, #12] +; CHECK-NEXT: eor lr, r3, #31 +; CHECK-NEXT: str r5, [sp, #8] +; CHECK-NEXT: str r5, [sp, #4] +; CHECK-NEXT: str r5, [sp] +; CHECK-NEXT: ldr r5, [r6, -r7]! +; CHECK-NEXT: ldr r7, [r6, #4] +; CHECK-NEXT: ldr r0, [r6, #8] +; CHECK-NEXT: ldr r8, [r6, #12] +; CHECK-NEXT: lsr r6, r5, #1 +; CHECK-NEXT: lsl r4, r7, r3 +; CHECK-NEXT: lsrs r7, r7, #1 +; CHECK-NEXT: orr r4, r4, r6, lsr lr +; CHECK-NEXT: lsr r6, r0, #1 +; CHECK-NEXT: lsl r8, r8, r3 +; CHECK-NEXT: lsl r0, r0, r3 +; CHECK-NEXT: orr r6, r8, r6, lsr lr +; CHECK-NEXT: orr r7, r0, r7, lsr lr +; CHECK-NEXT: lsl r3, r5, r3 +; CHECK-NEXT: .LBB46_12: @ %fp-to-i-cleanup +; CHECK-NEXT: subs r0, r7, #1 +; CHECK-NEXT: sbcs r0, r6, #0 +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: movwlo r0, #1 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: moveq r4, r0 +; CHECK-NEXT: movne r0, r3 +; CHECK-NEXT: subs r3, r12, #1 +; CHECK-NEXT: vmov.32 d1[0], r0 +; CHECK-NEXT: sbcs r3, r9, #0 +; CHECK-NEXT: movwlo r2, #1 +; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: moveq r1, r2 +; CHECK-NEXT: movne r2, r10 +; CHECK-NEXT: vmov.32 d0[0], r1 ; CHECK-NEXT: vmov.32 d1[1], r4 -; CHECK-NEXT: vmov.32 d0[1], r5 -; CHECK-NEXT: vpop {d8, d9} -; CHECK-NEXT: pop {r4, r5, r6, pc} +; CHECK-NEXT: vmov.32 d0[1], r2 +; CHECK-NEXT: add sp, sp, #64 +; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} entry: %conv = fptoui <2 x double> %x to <2 x i128> %spec.store.select = call <2 x i128> @llvm.umin.v2i128(<2 x i128> %conv, <2 x i128> ) @@ -3725,46 +5006,252 @@ entry: define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) { ; CHECK-LABEL: ustest_f64i64_mm: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, lr} -; CHECK-NEXT: push {r4, r5, r6, lr} -; CHECK-NEXT: .vsave {d8, d9} -; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: vorr q4, q0, q0 -; CHECK-NEXT: vorr d0, d9, d9 -; CHECK-NEXT: bl __fixdfti -; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: subs r0, r2, #1 -; CHECK-NEXT: sbcs r0, r3, #0 -; CHECK-NEXT: vorr d0, d8, d8 +; CHECK: @ %bb.0: @ %entryfp-to-i-entryfp-to-i-entry +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-NEXT: .pad #100 +; CHECK-NEXT: sub sp, sp, #100 +; CHECK-NEXT: vmov r5, r3, d0 +; CHECK-NEXT: mov r8, #1 +; CHECK-NEXT: mvn r10, #0 +; CHECK-NEXT: movw r9, #1023 +; CHECK-NEXT: ubfx r0, r3, #20, #11 +; CHECK-NEXT: cmp r0, r9 +; CHECK-NEXT: bhs .LBB47_2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: mov r12, #0 +; CHECK-NEXT: mov r11, #0 +; CHECK-NEXT: mov r1, #0 +; CHECK-NEXT: mov lr, #0 +; CHECK-NEXT: b .LBB47_5 +; CHECK-NEXT: .LBB47_2: @ %fp-to-i-if-check.exp.size2 +; CHECK-NEXT: mov r1, r3 +; CHECK-NEXT: orr lr, r8, r3, asr #31 +; CHECK-NEXT: bfi r1, r8, #20, #12 +; CHECK-NEXT: asr r3, r3, #31 +; CHECK-NEXT: movw r7, #1074 +; CHECK-NEXT: cmp r0, r7 +; CHECK-NEXT: bhi .LBB47_4 +; CHECK-NEXT: @ %bb.3: @ %fp-to-i-if-exp.small3 +; CHECK-NEXT: movw r7, #1075 +; CHECK-NEXT: mov r2, #0 +; CHECK-NEXT: sub r7, r7, r0 +; CHECK-NEXT: rsb r4, r7, #32 +; CHECK-NEXT: lsr r5, r5, r7 +; CHECK-NEXT: lsr r7, r1, r7 +; CHECK-NEXT: orr r5, r5, r1, lsl r4 +; CHECK-NEXT: movw r4, #1043 +; CHECK-NEXT: subs r0, r4, r0 +; CHECK-NEXT: movwpl r7, #0 +; CHECK-NEXT: lsrpl r5, r1, r0 +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: umull r12, r4, r5, lr +; CHECK-NEXT: umlal r4, r2, r7, lr +; CHECK-NEXT: umull lr, r6, r5, r3 +; CHECK-NEXT: adds r11, lr, r4 +; CHECK-NEXT: adcs r2, r2, r6 +; CHECK-NEXT: mla r6, r3, r7, r6 +; CHECK-NEXT: adc r4, r0, #0 +; CHECK-NEXT: umlal r2, r4, r7, r3 +; CHECK-NEXT: mla r3, r3, r5, r6 +; CHECK-NEXT: adds r1, r2, lr +; CHECK-NEXT: adc lr, r4, r3 +; CHECK-NEXT: b .LBB47_5 +; CHECK-NEXT: .LBB47_4: @ %fp-to-i-if-exp.large4 +; CHECK-NEXT: str r1, [sp, #84] +; CHECK-NEXT: movw r1, #1075 +; CHECK-NEXT: sub r0, r0, r1 +; CHECK-NEXT: mov r1, #12 +; CHECK-NEXT: add r2, sp, #64 +; CHECK-NEXT: mov r7, #0 +; CHECK-NEXT: and r1, r1, r0, lsr #3 +; CHECK-NEXT: add r2, r2, #16 +; CHECK-NEXT: str r7, [sp, #92] +; CHECK-NEXT: mov r9, #0 +; CHECK-NEXT: str r7, [sp, #88] +; CHECK-NEXT: str r5, [sp, #80] +; CHECK-NEXT: str r7, [sp, #76] +; CHECK-NEXT: str r7, [sp, #72] +; CHECK-NEXT: str r7, [sp, #68] +; CHECK-NEXT: str r7, [sp, #64] +; CHECK-NEXT: ldr r1, [r2, -r1]! +; CHECK-NEXT: ldr r6, [r2, #4] +; CHECK-NEXT: str r6, [sp, #28] @ 4-byte Spill +; CHECK-NEXT: lsr r4, r1, #1 +; CHECK-NEXT: ldr r5, [r2, #8] +; CHECK-NEXT: str r5, [sp, #24] @ 4-byte Spill +; CHECK-NEXT: ldr r2, [r2, #12] +; CHECK-NEXT: str r2, [sp, #20] @ 4-byte Spill +; CHECK-NEXT: and r2, r0, #31 +; CHECK-NEXT: eor r5, r2, #31 +; CHECK-NEXT: lsl r8, r1, r2 +; CHECK-NEXT: lsl r0, r6, r2 +; CHECK-NEXT: umull r12, r1, r8, lr +; CHECK-NEXT: orr r4, r0, r4, lsr r5 +; CHECK-NEXT: umlal r1, r9, r4, lr +; CHECK-NEXT: umull r10, r0, r8, r3 +; CHECK-NEXT: adds r11, r10, r1 +; CHECK-NEXT: ldr r1, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: adcs r6, r9, r0 +; CHECK-NEXT: movw r9, #1023 +; CHECK-NEXT: adc r7, r7, #0 +; CHECK-NEXT: umlal r6, r7, r4, r3 +; CHECK-NEXT: mla r4, r3, r4, r0 +; CHECK-NEXT: mla r0, r3, r8, r4 +; CHECK-NEXT: lsl r8, r1, r2 +; CHECK-NEXT: ldr r1, [sp, #24] @ 4-byte Reload +; CHECK-NEXT: lsr r4, r1, #1 +; CHECK-NEXT: lsl r2, r1, r2 +; CHECK-NEXT: ldr r1, [sp, #28] @ 4-byte Reload +; CHECK-NEXT: orr r8, r8, r4, lsr r5 +; CHECK-NEXT: lsrs r4, r1, #1 +; CHECK-NEXT: orr r2, r2, r4, lsr r5 +; CHECK-NEXT: umull r4, r5, lr, r2 +; CHECK-NEXT: mla r5, lr, r8, r5 +; CHECK-NEXT: mov r8, #1 +; CHECK-NEXT: mla r2, r3, r2, r5 +; CHECK-NEXT: adds r3, r10, r4 +; CHECK-NEXT: mvn r10, #0 +; CHECK-NEXT: adc r2, r0, r2 +; CHECK-NEXT: adds r1, r6, r3 +; CHECK-NEXT: adc lr, r7, r2 +; CHECK-NEXT: .LBB47_5: @ %fp-to-i-cleanup1 +; CHECK-NEXT: vmov r5, r7, d1 +; CHECK-NEXT: mvn r6, #0 +; CHECK-NEXT: mov r3, #0 +; CHECK-NEXT: cmn r7, #1 +; CHECK-NEXT: ubfx r4, r7, #20, #11 +; CHECK-NEXT: movwgt r6, #0 +; CHECK-NEXT: movwgt r10, #1 +; CHECK-NEXT: cmp r4, r9 +; CHECK-NEXT: bhs .LBB47_7 +; CHECK-NEXT: @ %bb.6: +; CHECK-NEXT: mov r4, #0 +; CHECK-NEXT: mov r5, #0 +; CHECK-NEXT: mov r6, #0 +; CHECK-NEXT: mov r2, #0 +; CHECK-NEXT: b .LBB47_11 +; CHECK-NEXT: .LBB47_7: @ %fp-to-i-if-check.exp.size +; CHECK-NEXT: bfi r7, r8, #20, #12 +; CHECK-NEXT: movw r2, #1074 +; CHECK-NEXT: cmp r4, r2 +; CHECK-NEXT: str r1, [sp, #28] @ 4-byte Spill +; CHECK-NEXT: bhi .LBB47_9 +; CHECK-NEXT: @ %bb.8: @ %fp-to-i-if-exp.small +; CHECK-NEXT: movw r2, #1075 +; CHECK-NEXT: sub r2, r2, r4 +; CHECK-NEXT: lsr r8, r5, r2 +; CHECK-NEXT: rsb r5, r2, #32 +; CHECK-NEXT: lsr r2, r7, r2 +; CHECK-NEXT: orr r0, r8, r7, lsl r5 +; CHECK-NEXT: movw r5, #1043 +; CHECK-NEXT: subs r4, r5, r4 +; CHECK-NEXT: mov r8, #0 +; CHECK-NEXT: movwpl r2, #0 +; CHECK-NEXT: lsrpl r0, r7, r4 +; CHECK-NEXT: mov r7, #0 +; CHECK-NEXT: umull r4, r5, r0, r10 +; CHECK-NEXT: umlal r5, r7, r2, r10 +; CHECK-NEXT: umull r9, r1, r0, r6 +; CHECK-NEXT: adds r5, r9, r5 +; CHECK-NEXT: adcs r7, r7, r1 +; CHECK-NEXT: mla r1, r6, r2, r1 +; CHECK-NEXT: adc r8, r8, #0 +; CHECK-NEXT: umlal r7, r8, r2, r6 +; CHECK-NEXT: mla r0, r6, r0, r1 +; CHECK-NEXT: adds r6, r7, r9 +; CHECK-NEXT: adc r2, r8, r0 +; CHECK-NEXT: b .LBB47_10 +; CHECK-NEXT: .LBB47_9: @ %fp-to-i-if-exp.large +; CHECK-NEXT: add r0, sp, #48 +; CHECK-NEXT: mov r9, #0 +; CHECK-NEXT: stm r0, {r5, r7, r9} +; CHECK-NEXT: movw r0, #1075 +; CHECK-NEXT: sub r0, r4, r0 +; CHECK-NEXT: mov r1, #12 +; CHECK-NEXT: add r2, sp, #32 +; CHECK-NEXT: and r1, r1, r0, lsr #3 +; CHECK-NEXT: add r2, r2, #16 +; CHECK-NEXT: str r9, [sp, #60] +; CHECK-NEXT: and r0, r0, #31 +; CHECK-NEXT: str r9, [sp, #44] +; CHECK-NEXT: eor r7, r0, #31 +; CHECK-NEXT: str r9, [sp, #40] +; CHECK-NEXT: mov r8, r10 +; CHECK-NEXT: str r9, [sp, #36] +; CHECK-NEXT: str r9, [sp, #32] +; CHECK-NEXT: ldr r1, [r2, -r1]! +; CHECK-NEXT: ldr r4, [r2, #4] +; CHECK-NEXT: str r4, [sp, #20] @ 4-byte Spill +; CHECK-NEXT: ldr r5, [r2, #8] +; CHECK-NEXT: str r5, [sp, #16] @ 4-byte Spill +; CHECK-NEXT: lsr r5, r1, #1 +; CHECK-NEXT: lsl r4, r4, r0 +; CHECK-NEXT: lsl r1, r1, r0 +; CHECK-NEXT: ldr r2, [r2, #12] +; CHECK-NEXT: str lr, [sp, #24] @ 4-byte Spill +; CHECK-NEXT: orr lr, r4, r5, lsr r7 +; CHECK-NEXT: umull r4, r5, r1, r10 +; CHECK-NEXT: mov r10, #0 +; CHECK-NEXT: str r2, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: str r12, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: umlal r5, r10, lr, r8 +; CHECK-NEXT: umull r12, r2, r1, r6 +; CHECK-NEXT: adds r5, r12, r5 +; CHECK-NEXT: adcs r10, r10, r2 +; CHECK-NEXT: mla r2, r6, lr, r2 +; CHECK-NEXT: adc r9, r9, #0 +; CHECK-NEXT: umlal r10, r9, lr, r6 +; CHECK-NEXT: mla r1, r6, r1, r2 +; CHECK-NEXT: ldr r2, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: str r1, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: lsl lr, r1, r0 +; CHECK-NEXT: lsr r1, r2, #1 +; CHECK-NEXT: lsl r0, r2, r0 +; CHECK-NEXT: ldr r2, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: orr r1, lr, r1, lsr r7 +; CHECK-NEXT: ldr lr, [sp, #24] @ 4-byte Reload +; CHECK-NEXT: lsrs r2, r2, #1 +; CHECK-NEXT: orr r0, r0, r2, lsr r7 +; CHECK-NEXT: umull r2, r7, r8, r0 +; CHECK-NEXT: mla r1, r8, r1, r7 +; CHECK-NEXT: mla r0, r6, r0, r1 +; CHECK-NEXT: adds r1, r12, r2 +; CHECK-NEXT: ldmib sp, {r2, r12} @ 8-byte Folded Reload +; CHECK-NEXT: adc r0, r2, r0 +; CHECK-NEXT: adds r6, r10, r1 +; CHECK-NEXT: adc r2, r9, r0 +; CHECK-NEXT: .LBB47_10: @ %fp-to-i-cleanup +; CHECK-NEXT: ldr r1, [sp, #28] @ 4-byte Reload +; CHECK-NEXT: .LBB47_11: @ %fp-to-i-cleanup +; CHECK-NEXT: subs r0, r6, #1 +; CHECK-NEXT: sbcs r0, r2, #0 ; CHECK-NEXT: mov r0, #0 -; CHECK-NEXT: mov r4, r1 ; CHECK-NEXT: movwlt r0, #1 ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: moveq r5, r0 ; CHECK-NEXT: moveq r4, r0 -; CHECK-NEXT: movne r0, r3 +; CHECK-NEXT: moveq r5, r0 +; CHECK-NEXT: movne r0, r2 ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r6, #0 ; CHECK-NEXT: movwmi r4, #0 ; CHECK-NEXT: movwmi r5, #0 -; CHECK-NEXT: bl __fixdfti -; CHECK-NEXT: subs r2, r2, #1 -; CHECK-NEXT: vmov.32 d1[0], r5 -; CHECK-NEXT: sbcs r2, r3, #0 -; CHECK-NEXT: movwlt r6, #1 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: moveq r1, r6 -; CHECK-NEXT: moveq r0, r6 -; CHECK-NEXT: movne r6, r3 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: movwmi r0, #0 -; CHECK-NEXT: movwmi r1, #0 -; CHECK-NEXT: vmov.32 d0[0], r0 -; CHECK-NEXT: vmov.32 d1[1], r4 -; CHECK-NEXT: vmov.32 d0[1], r1 -; CHECK-NEXT: vpop {d8, d9} -; CHECK-NEXT: pop {r4, r5, r6, pc} +; CHECK-NEXT: subs r0, r1, #1 +; CHECK-NEXT: vmov.32 d1[0], r4 +; CHECK-NEXT: sbcs r0, lr, #0 +; CHECK-NEXT: movwlt r3, #1 +; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: moveq r11, r3 +; CHECK-NEXT: moveq r12, r3 +; CHECK-NEXT: movne r3, lr +; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: movwmi r12, #0 +; CHECK-NEXT: movwmi r11, #0 +; CHECK-NEXT: vmov.32 d0[0], r12 +; CHECK-NEXT: vmov.32 d1[1], r5 +; CHECK-NEXT: vmov.32 d0[1], r11 +; CHECK-NEXT: add sp, sp, #100 +; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} entry: %conv = fptosi <2 x double> %x to <2 x i128> %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> ) @@ -3775,66 +5262,251 @@ entry: define <2 x i64> @stest_f32i64_mm(<2 x float> %x) { ; CHECK-LABEL: stest_f32i64_mm: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} -; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} -; CHECK-NEXT: .vsave {d8} -; CHECK-NEXT: vpush {d8} -; CHECK-NEXT: vmov.f64 d8, d0 -; CHECK-NEXT: vmov.f32 s0, s17 -; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: mov r4, r1 +; CHECK: @ %bb.0: @ %entryfp-to-i-entryfp-to-i-entry +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-NEXT: .pad #100 +; CHECK-NEXT: sub sp, sp, #100 +; CHECK-NEXT: vmov r6, s0 +; CHECK-NEXT: mov r2, #0 ; CHECK-NEXT: mvn r9, #0 -; CHECK-NEXT: subs r1, r0, r9 -; CHECK-NEXT: mvn r5, #-2147483648 -; CHECK-NEXT: sbcs r1, r4, r5 -; CHECK-NEXT: vmov.f32 s0, s16 -; CHECK-NEXT: sbcs r1, r2, #0 +; CHECK-NEXT: ubfx r7, r6, #23, #8 +; CHECK-NEXT: cmp r7, #127 +; CHECK-NEXT: bhs .LBB48_2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: mov r8, #0 +; CHECK-NEXT: mov lr, #0 +; CHECK-NEXT: mov r10, #0 +; CHECK-NEXT: mov r12, #0 +; CHECK-NEXT: b .LBB48_5 +; CHECK-NEXT: .LBB48_2: @ %fp-to-i-if-check.exp.size2 +; CHECK-NEXT: mov r0, #1 +; CHECK-NEXT: orr lr, r0, r6, asr #31 +; CHECK-NEXT: asr r1, r6, #31 +; CHECK-NEXT: bfi r6, r0, #23, #9 +; CHECK-NEXT: cmp r7, #149 +; CHECK-NEXT: bhi .LBB48_4 +; CHECK-NEXT: @ %bb.3: @ %fp-to-i-if-exp.small3 +; CHECK-NEXT: rsb r0, r7, #150 +; CHECK-NEXT: mov r5, #0 +; CHECK-NEXT: lsr r0, r6, r0 +; CHECK-NEXT: umull r8, lr, r0, lr +; CHECK-NEXT: umull r7, r6, r0, r1 +; CHECK-NEXT: adds r4, r7, lr +; CHECK-NEXT: adcs r4, r6, #0 +; CHECK-NEXT: adc r3, r5, #0 +; CHECK-NEXT: adds r4, r7, lr +; CHECK-NEXT: mla r4, r1, r0, r6 +; CHECK-NEXT: adcs r10, r7, r6 +; CHECK-NEXT: umlal lr, r5, r0, r1 +; CHECK-NEXT: adc r12, r3, r4 +; CHECK-NEXT: b .LBB48_5 +; CHECK-NEXT: .LBB48_4: @ %fp-to-i-if-exp.large4 +; CHECK-NEXT: sub r7, r7, #150 +; CHECK-NEXT: add r4, sp, #64 +; CHECK-NEXT: str r6, [sp, #80] +; CHECK-NEXT: mov r6, #12 +; CHECK-NEXT: mov r11, #0 +; CHECK-NEXT: and r6, r6, r7, lsr #3 +; CHECK-NEXT: add r4, r4, #16 +; CHECK-NEXT: str r11, [sp, #92] +; CHECK-NEXT: str r11, [sp, #88] +; CHECK-NEXT: and r7, r7, #31 +; CHECK-NEXT: str r11, [sp, #84] +; CHECK-NEXT: str r11, [sp, #76] +; CHECK-NEXT: str r11, [sp, #72] +; CHECK-NEXT: str r11, [sp, #68] +; CHECK-NEXT: str r11, [sp, #64] +; CHECK-NEXT: ldr r6, [r4, -r6]! +; CHECK-NEXT: ldr r0, [r4, #4] +; CHECK-NEXT: str r0, [sp, #24] @ 4-byte Spill +; CHECK-NEXT: lsr r8, r6, #1 +; CHECK-NEXT: ldr r3, [r4, #8] +; CHECK-NEXT: lsl r6, r6, r7 +; CHECK-NEXT: ldr r5, [r4, #12] +; CHECK-NEXT: eor r4, r7, #31 +; CHECK-NEXT: lsl r12, r0, r7 +; CHECK-NEXT: str r5, [sp, #20] @ 4-byte Spill +; CHECK-NEXT: orr r0, r12, r8, lsr r4 +; CHECK-NEXT: umull r8, r5, r6, lr +; CHECK-NEXT: mov r12, #0 +; CHECK-NEXT: str lr, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: umlal r5, r12, r0, lr +; CHECK-NEXT: mov lr, r0 +; CHECK-NEXT: umull r0, r10, r6, r1 +; CHECK-NEXT: str r0, [sp, #16] @ 4-byte Spill +; CHECK-NEXT: adds r0, r0, r5 +; CHECK-NEXT: str r0, [sp, #28] @ 4-byte Spill +; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: adcs r5, r12, r10 +; CHECK-NEXT: adc r11, r11, #0 +; CHECK-NEXT: umlal r5, r11, lr, r1 +; CHECK-NEXT: lsl r12, r0, r7 +; CHECK-NEXT: lsr r0, r3, #1 +; CHECK-NEXT: orr r12, r12, r0, lsr r4 +; CHECK-NEXT: ldr r0, [sp, #24] @ 4-byte Reload +; CHECK-NEXT: lsl r7, r3, r7 +; CHECK-NEXT: ldr r3, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: lsrs r0, r0, #1 +; CHECK-NEXT: orr r0, r7, r0, lsr r4 +; CHECK-NEXT: umull r4, r7, r3, r0 +; CHECK-NEXT: mla r7, r3, r12, r7 +; CHECK-NEXT: ldr r3, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: mla r0, r1, r0, r7 +; CHECK-NEXT: mla r7, r1, lr, r10 +; CHECK-NEXT: ldr lr, [sp, #28] @ 4-byte Reload +; CHECK-NEXT: mla r1, r1, r6, r7 +; CHECK-NEXT: adds r7, r3, r4 +; CHECK-NEXT: adc r0, r1, r0 +; CHECK-NEXT: adds r10, r5, r7 +; CHECK-NEXT: adc r12, r11, r0 +; CHECK-NEXT: .LBB48_5: @ %fp-to-i-cleanup1 +; CHECK-NEXT: vmov r0, s1 +; CHECK-NEXT: mvn r5, #0 +; CHECK-NEXT: cmn r0, #1 +; CHECK-NEXT: ubfx r4, r0, #23, #8 +; CHECK-NEXT: movwgt r5, #0 +; CHECK-NEXT: movwgt r9, #1 +; CHECK-NEXT: cmp r4, #127 +; CHECK-NEXT: bhs .LBB48_7 +; CHECK-NEXT: @ %bb.6: +; CHECK-NEXT: mov r4, #0 +; CHECK-NEXT: mov r0, #0 ; CHECK-NEXT: mov r7, #0 -; CHECK-NEXT: sbcs r1, r3, #0 -; CHECK-NEXT: mov r8, #-2147483648 -; CHECK-NEXT: mov r1, #0 +; CHECK-NEXT: b .LBB48_10 +; CHECK-NEXT: .LBB48_7: @ %fp-to-i-if-check.exp.size +; CHECK-NEXT: movw r2, #65535 +; CHECK-NEXT: cmp r4, #149 +; CHECK-NEXT: movt r2, #127 +; CHECK-NEXT: and r0, r0, r2 +; CHECK-NEXT: orr r2, r0, #8388608 +; CHECK-NEXT: bhi .LBB48_9 +; CHECK-NEXT: @ %bb.8: @ %fp-to-i-if-exp.small +; CHECK-NEXT: rsb r0, r4, #150 +; CHECK-NEXT: lsr r0, r2, r0 +; CHECK-NEXT: umull r7, r6, r0, r5 +; CHECK-NEXT: umull r2, r3, r0, r9 +; CHECK-NEXT: mul r5, r5, r0 +; CHECK-NEXT: adds r4, r7, r3 +; CHECK-NEXT: adcs r0, r7, r6 +; CHECK-NEXT: adc r7, r6, r5 +; CHECK-NEXT: b .LBB48_10 +; CHECK-NEXT: .LBB48_9: @ %fp-to-i-if-exp.large +; CHECK-NEXT: str r2, [sp, #48] +; CHECK-NEXT: sub r2, r4, #150 +; CHECK-NEXT: mov r3, #12 +; CHECK-NEXT: add r7, sp, #32 +; CHECK-NEXT: mov r11, #0 +; CHECK-NEXT: and r3, r3, r2, lsr #3 +; CHECK-NEXT: add r7, r7, #16 +; CHECK-NEXT: str r11, [sp, #60] +; CHECK-NEXT: str r11, [sp, #56] +; CHECK-NEXT: str r11, [sp, #52] +; CHECK-NEXT: str r11, [sp, #44] +; CHECK-NEXT: str r11, [sp, #40] +; CHECK-NEXT: str r11, [sp, #36] +; CHECK-NEXT: str r11, [sp, #32] +; CHECK-NEXT: ldr r3, [r7, -r3]! +; CHECK-NEXT: ldr r0, [r7, #4] +; CHECK-NEXT: str r0, [sp, #20] @ 4-byte Spill +; CHECK-NEXT: lsr r4, r3, #1 +; CHECK-NEXT: ldr r1, [r7, #8] +; CHECK-NEXT: str r1, [sp, #16] @ 4-byte Spill +; CHECK-NEXT: ldr r1, [r7, #12] +; CHECK-NEXT: and r7, r2, #31 +; CHECK-NEXT: eor r6, r7, #31 +; CHECK-NEXT: str lr, [sp, #28] @ 4-byte Spill +; CHECK-NEXT: lsl r2, r0, r7 +; CHECK-NEXT: lsl r0, r3, r7 +; CHECK-NEXT: orr lr, r2, r4, lsr r6 +; CHECK-NEXT: umull r2, r4, r0, r9 +; CHECK-NEXT: str r10, [sp, #24] @ 4-byte Spill ; CHECK-NEXT: mov r10, #0 +; CHECK-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-NEXT: umull r0, r3, r0, r5 +; CHECK-NEXT: str r1, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: umlal r4, r10, lr, r9 +; CHECK-NEXT: str r0, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: str r3, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: adds r4, r0, r4 +; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: adcs r10, r10, r3 +; CHECK-NEXT: adc r3, r11, #0 +; CHECK-NEXT: lsl r1, r0, r7 +; CHECK-NEXT: ldr r0, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: umlal r10, r3, lr, r5 +; CHECK-NEXT: lsr r11, r0, #1 +; CHECK-NEXT: orr r1, r1, r11, lsr r6 +; CHECK-NEXT: lsl r11, r0, r7 +; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: str r1, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: ldr r1, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: lsrs r7, r0, #1 +; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: orr r6, r11, r7, lsr r6 +; CHECK-NEXT: umull r11, r7, r9, r6 +; CHECK-NEXT: mla r7, r9, r0, r7 +; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: mla r7, r5, r6, r7 +; CHECK-NEXT: mla r6, r5, lr, r0 +; CHECK-NEXT: ldr r0, [sp] @ 4-byte Reload +; CHECK-NEXT: ldr lr, [sp, #28] @ 4-byte Reload +; CHECK-NEXT: mla r0, r5, r0, r6 +; CHECK-NEXT: adds r6, r1, r11 +; CHECK-NEXT: adc r7, r0, r7 +; CHECK-NEXT: adds r0, r10, r6 +; CHECK-NEXT: ldr r10, [sp, #24] @ 4-byte Reload +; CHECK-NEXT: adc r7, r3, r7 +; CHECK-NEXT: .LBB48_10: @ %fp-to-i-cleanup +; CHECK-NEXT: mvn r6, #0 +; CHECK-NEXT: subs r1, r2, r6 +; CHECK-NEXT: mvn r3, #-2147483648 +; CHECK-NEXT: sbcs r1, r4, r3 +; CHECK-NEXT: sbcs r1, r0, #0 +; CHECK-NEXT: mov r5, #0 +; CHECK-NEXT: sbcs r1, r7, #0 +; CHECK-NEXT: mov r1, #0 ; CHECK-NEXT: movwlt r1, #1 ; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: moveq r3, r1 -; CHECK-NEXT: movne r1, r2 -; CHECK-NEXT: moveq r4, r5 -; CHECK-NEXT: moveq r0, r9 -; CHECK-NEXT: rsbs r2, r0, #0 -; CHECK-NEXT: rscs r2, r4, #-2147483648 -; CHECK-NEXT: sbcs r1, r9, r1 -; CHECK-NEXT: sbcs r1, r9, r3 +; CHECK-NEXT: moveq r7, r1 +; CHECK-NEXT: movne r1, r0 +; CHECK-NEXT: moveq r4, r3 +; CHECK-NEXT: moveq r2, r6 +; CHECK-NEXT: rsbs r0, r2, #0 +; CHECK-NEXT: rscs r0, r4, #-2147483648 +; CHECK-NEXT: sbcs r0, r6, r1 +; CHECK-NEXT: sbcs r0, r6, r7 +; CHECK-NEXT: mov r7, #0 ; CHECK-NEXT: movwlt r7, #1 ; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: movne r7, r0 -; CHECK-NEXT: moveq r4, r8 -; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: subs r6, r0, r9 +; CHECK-NEXT: mov r0, #-2147483648 +; CHECK-NEXT: movne r7, r2 +; CHECK-NEXT: moveq r4, r0 +; CHECK-NEXT: subs r1, r8, r6 +; CHECK-NEXT: sbcs r1, lr, r3 ; CHECK-NEXT: vmov.32 d1[0], r7 -; CHECK-NEXT: sbcs r6, r1, r5 -; CHECK-NEXT: sbcs r6, r2, #0 -; CHECK-NEXT: sbcs r6, r3, #0 -; CHECK-NEXT: mov r6, #0 -; CHECK-NEXT: movwlt r6, #1 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: moveq r3, r6 -; CHECK-NEXT: movne r6, r2 -; CHECK-NEXT: movne r5, r1 -; CHECK-NEXT: moveq r0, r9 -; CHECK-NEXT: rsbs r1, r0, #0 -; CHECK-NEXT: rscs r1, r5, #-2147483648 -; CHECK-NEXT: sbcs r1, r9, r6 -; CHECK-NEXT: sbcs r1, r9, r3 -; CHECK-NEXT: movwlt r10, #1 -; CHECK-NEXT: cmp r10, #0 -; CHECK-NEXT: movne r10, r0 -; CHECK-NEXT: moveq r5, r8 -; CHECK-NEXT: vmov.32 d0[0], r10 +; CHECK-NEXT: sbcs r1, r10, #0 +; CHECK-NEXT: sbcs r1, r12, #0 +; CHECK-NEXT: mov r1, #0 +; CHECK-NEXT: movwlt r1, #1 +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: moveq r12, r1 +; CHECK-NEXT: movne r1, r10 +; CHECK-NEXT: movne r3, lr +; CHECK-NEXT: moveq r8, r6 +; CHECK-NEXT: rsbs r2, r8, #0 +; CHECK-NEXT: rscs r2, r3, #-2147483648 +; CHECK-NEXT: sbcs r1, r6, r1 +; CHECK-NEXT: sbcs r1, r6, r12 +; CHECK-NEXT: movwlt r5, #1 +; CHECK-NEXT: cmp r5, #0 +; CHECK-NEXT: movne r5, r8 +; CHECK-NEXT: moveq r3, r0 +; CHECK-NEXT: vmov.32 d0[0], r5 ; CHECK-NEXT: vmov.32 d1[1], r4 -; CHECK-NEXT: vmov.32 d0[1], r5 -; CHECK-NEXT: vpop {d8} -; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} +; CHECK-NEXT: vmov.32 d0[1], r3 +; CHECK-NEXT: add sp, sp, #100 +; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} entry: %conv = fptosi <2 x float> %x to <2 x i128> %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> ) @@ -3845,37 +5517,132 @@ entry: define <2 x i64> @utest_f32i64_mm(<2 x float> %x) { ; CHECK-LABEL: utest_f32i64_mm: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, lr} -; CHECK-NEXT: push {r4, r5, r6, lr} -; CHECK-NEXT: .vsave {d8} -; CHECK-NEXT: vpush {d8} -; CHECK-NEXT: vmov.f64 d8, d0 -; CHECK-NEXT: vmov.f32 s0, s17 -; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: vmov.f32 s0, s16 -; CHECK-NEXT: mov r4, r1 -; CHECK-NEXT: subs r1, r2, #1 -; CHECK-NEXT: mov r6, #0 -; CHECK-NEXT: sbcs r1, r3, #0 +; CHECK: @ %bb.0: @ %entryfp-to-i-entryfp-to-i-entry +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-NEXT: .pad #64 +; CHECK-NEXT: sub sp, sp, #64 +; CHECK-NEXT: vmov r1, s0 +; CHECK-NEXT: mov r2, #0 +; CHECK-NEXT: ubfx r0, r1, #23, #8 +; CHECK-NEXT: cmp r0, #127 +; CHECK-NEXT: bhs .LBB49_2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: mov r1, #0 +; CHECK-NEXT: b .LBB49_4 +; CHECK-NEXT: .LBB49_2: @ %fp-to-i-if-check.exp.size2 +; CHECK-NEXT: mov r3, #1 +; CHECK-NEXT: cmp r0, #149 +; CHECK-NEXT: bfi r1, r3, #23, #9 +; CHECK-NEXT: bhi .LBB49_5 +; CHECK-NEXT: @ %bb.3: @ %fp-to-i-if-exp.small3 +; CHECK-NEXT: rsb r0, r0, #150 +; CHECK-NEXT: lsr r1, r1, r0 +; CHECK-NEXT: .LBB49_4: @ %fp-to-i-cleanup1 +; CHECK-NEXT: mov r12, #0 +; CHECK-NEXT: mov r8, #0 +; CHECK-NEXT: mov lr, #0 +; CHECK-NEXT: b .LBB49_6 +; CHECK-NEXT: .LBB49_5: @ %fp-to-i-if-exp.large4 +; CHECK-NEXT: mov r3, #0 +; CHECK-NEXT: sub r0, r0, #150 +; CHECK-NEXT: str r1, [sp, #48] +; CHECK-NEXT: mov r1, #12 +; CHECK-NEXT: str r3, [sp, #60] +; CHECK-NEXT: and r1, r1, r0, lsr #3 +; CHECK-NEXT: str r3, [sp, #56] +; CHECK-NEXT: and r6, r0, #31 +; CHECK-NEXT: str r3, [sp, #52] +; CHECK-NEXT: eor r7, r6, #31 +; CHECK-NEXT: str r3, [sp, #44] +; CHECK-NEXT: str r3, [sp, #40] +; CHECK-NEXT: str r3, [sp, #36] +; CHECK-NEXT: str r3, [sp, #32] +; CHECK-NEXT: add r3, sp, #32 +; CHECK-NEXT: add r3, r3, #16 +; CHECK-NEXT: ldr r1, [r3, -r1]! +; CHECK-NEXT: ldmib r3, {r4, r5, lr} +; CHECK-NEXT: lsr r3, r1, #1 +; CHECK-NEXT: lsl r1, r1, r6 +; CHECK-NEXT: lsl r0, r4, r6 +; CHECK-NEXT: orr r12, r0, r3, lsr r7 +; CHECK-NEXT: lsr r3, r5, #1 +; CHECK-NEXT: lsl r0, lr, r6 +; CHECK-NEXT: orr lr, r0, r3, lsr r7 +; CHECK-NEXT: lsrs r3, r4, #1 +; CHECK-NEXT: lsl r0, r5, r6 +; CHECK-NEXT: orr r8, r0, r3, lsr r7 +; CHECK-NEXT: .LBB49_6: @ %fp-to-i-cleanup1 +; CHECK-NEXT: vmov r4, s1 +; CHECK-NEXT: ubfx r3, r4, #23, #8 +; CHECK-NEXT: cmp r3, #127 +; CHECK-NEXT: blo .LBB49_9 +; CHECK-NEXT: @ %bb.7: @ %fp-to-i-if-check.exp.size +; CHECK-NEXT: movw r2, #65535 +; CHECK-NEXT: cmp r3, #149 +; CHECK-NEXT: movt r2, #127 +; CHECK-NEXT: and r2, r4, r2 +; CHECK-NEXT: orr r2, r2, #8388608 +; CHECK-NEXT: bhi .LBB49_10 +; CHECK-NEXT: @ %bb.8: @ %fp-to-i-if-exp.small +; CHECK-NEXT: rsb r3, r3, #150 +; CHECK-NEXT: lsr r2, r2, r3 +; CHECK-NEXT: .LBB49_9: +; CHECK-NEXT: mov r3, #0 ; CHECK-NEXT: mov r5, #0 -; CHECK-NEXT: movwlo r6, #1 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: moveq r4, r6 -; CHECK-NEXT: movne r6, r0 -; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: subs r2, r2, #1 -; CHECK-NEXT: vmov.32 d1[0], r6 -; CHECK-NEXT: sbcs r2, r3, #0 -; CHECK-NEXT: movwlo r5, #1 -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: moveq r0, r5 -; CHECK-NEXT: movne r5, r1 -; CHECK-NEXT: vmov.32 d0[0], r0 -; CHECK-NEXT: vmov.32 d1[1], r4 -; CHECK-NEXT: vmov.32 d0[1], r5 -; CHECK-NEXT: vpop {d8} -; CHECK-NEXT: pop {r4, r5, r6, pc} +; CHECK-NEXT: mov r4, #0 +; CHECK-NEXT: b .LBB49_11 +; CHECK-NEXT: .LBB49_10: @ %fp-to-i-if-exp.large +; CHECK-NEXT: mov r4, #0 +; CHECK-NEXT: str r2, [sp, #16] +; CHECK-NEXT: sub r2, r3, #150 +; CHECK-NEXT: mov r3, #12 +; CHECK-NEXT: str r4, [sp, #28] +; CHECK-NEXT: str r4, [sp, #24] +; CHECK-NEXT: and r3, r3, r2, lsr #3 +; CHECK-NEXT: str r4, [sp, #20] +; CHECK-NEXT: and r2, r2, #31 +; CHECK-NEXT: str r4, [sp, #12] +; CHECK-NEXT: eor r9, r2, #31 +; CHECK-NEXT: str r4, [sp, #8] +; CHECK-NEXT: str r4, [sp, #4] +; CHECK-NEXT: str r4, [sp] +; CHECK-NEXT: mov r4, sp +; CHECK-NEXT: add r4, r4, #16 +; CHECK-NEXT: ldr r6, [r4, -r3]! +; CHECK-NEXT: ldmib r4, {r5, r7} +; CHECK-NEXT: lsr r0, r6, #1 +; CHECK-NEXT: ldr r4, [r4, #12] +; CHECK-NEXT: lsl r3, r5, r2 +; CHECK-NEXT: lsrs r5, r5, #1 +; CHECK-NEXT: orr r3, r3, r0, lsr r9 +; CHECK-NEXT: lsl r0, r4, r2 +; CHECK-NEXT: lsr r4, r7, #1 +; CHECK-NEXT: orr r4, r0, r4, lsr r9 +; CHECK-NEXT: lsl r0, r7, r2 +; CHECK-NEXT: orr r5, r0, r5, lsr r9 +; CHECK-NEXT: lsl r2, r6, r2 +; CHECK-NEXT: .LBB49_11: @ %fp-to-i-cleanup +; CHECK-NEXT: subs r5, r5, #1 +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: sbcs r4, r4, #0 +; CHECK-NEXT: mov r4, #0 +; CHECK-NEXT: movwlo r4, #1 +; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: moveq r3, r4 +; CHECK-NEXT: movne r4, r2 +; CHECK-NEXT: subs r2, r8, #1 +; CHECK-NEXT: vmov.32 d1[0], r4 +; CHECK-NEXT: sbcs r2, lr, #0 +; CHECK-NEXT: movwlo r0, #1 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: moveq r1, r0 +; CHECK-NEXT: movne r0, r12 +; CHECK-NEXT: vmov.32 d0[0], r1 +; CHECK-NEXT: vmov.32 d1[1], r3 +; CHECK-NEXT: vmov.32 d0[1], r0 +; CHECK-NEXT: add sp, sp, #64 +; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc} entry: %conv = fptoui <2 x float> %x to <2 x i128> %spec.store.select = call <2 x i128> @llvm.umin.v2i128(<2 x i128> %conv, <2 x i128> ) @@ -3885,46 +5652,231 @@ entry: define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) { ; CHECK-LABEL: ustest_f32i64_mm: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, lr} -; CHECK-NEXT: push {r4, r5, r6, lr} -; CHECK-NEXT: .vsave {d8} -; CHECK-NEXT: vpush {d8} -; CHECK-NEXT: vmov.f64 d8, d0 -; CHECK-NEXT: vmov.f32 s0, s17 -; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: vmov.f32 s0, s16 -; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: subs r0, r2, #1 -; CHECK-NEXT: mov r4, r1 -; CHECK-NEXT: sbcs r0, r3, #0 -; CHECK-NEXT: mov r6, #0 +; CHECK: @ %bb.0: @ %entryfp-to-i-entryfp-to-i-entry +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-NEXT: .pad #100 +; CHECK-NEXT: sub sp, sp, #100 +; CHECK-NEXT: vmov r2, s0 ; CHECK-NEXT: mov r0, #0 -; CHECK-NEXT: movwlt r0, #1 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: moveq r5, r0 -; CHECK-NEXT: moveq r4, r0 -; CHECK-NEXT: movne r0, r3 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: movwmi r4, #0 -; CHECK-NEXT: movwmi r5, #0 -; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: subs r2, r2, #1 -; CHECK-NEXT: vmov.32 d1[0], r5 -; CHECK-NEXT: sbcs r2, r3, #0 -; CHECK-NEXT: movwlt r6, #1 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: moveq r1, r6 -; CHECK-NEXT: moveq r0, r6 -; CHECK-NEXT: movne r6, r3 -; CHECK-NEXT: cmp r6, #0 +; CHECK-NEXT: mvn r8, #0 +; CHECK-NEXT: ubfx r1, r2, #23, #8 +; CHECK-NEXT: cmp r1, #127 +; CHECK-NEXT: bhs .LBB50_2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: mov r1, #0 +; CHECK-NEXT: mov r2, #0 +; CHECK-NEXT: mov lr, #0 +; CHECK-NEXT: mov r12, #0 +; CHECK-NEXT: b .LBB50_5 +; CHECK-NEXT: .LBB50_2: @ %fp-to-i-if-check.exp.size2 +; CHECK-NEXT: mov r6, #1 +; CHECK-NEXT: orr r9, r6, r2, asr #31 +; CHECK-NEXT: asr lr, r2, #31 +; CHECK-NEXT: bfi r2, r6, #23, #9 +; CHECK-NEXT: cmp r1, #149 +; CHECK-NEXT: bhi .LBB50_4 +; CHECK-NEXT: @ %bb.3: @ %fp-to-i-if-exp.small3 +; CHECK-NEXT: rsb r1, r1, #150 +; CHECK-NEXT: mov r7, #0 +; CHECK-NEXT: lsr r6, r2, r1 +; CHECK-NEXT: umull r1, r2, r6, r9 +; CHECK-NEXT: umull r5, r3, r6, lr +; CHECK-NEXT: adds r4, r5, r2 +; CHECK-NEXT: adcs r4, r3, #0 +; CHECK-NEXT: adc r12, r7, #0 +; CHECK-NEXT: adds r4, r5, r2 +; CHECK-NEXT: mla r4, lr, r6, r3 +; CHECK-NEXT: umlal r2, r7, r6, lr +; CHECK-NEXT: adcs lr, r5, r3 +; CHECK-NEXT: adc r12, r12, r4 +; CHECK-NEXT: b .LBB50_5 +; CHECK-NEXT: .LBB50_4: @ %fp-to-i-if-exp.large4 +; CHECK-NEXT: sub r1, r1, #150 +; CHECK-NEXT: add r3, sp, #64 +; CHECK-NEXT: str r2, [sp, #80] +; CHECK-NEXT: mov r2, #12 +; CHECK-NEXT: mov r11, #0 +; CHECK-NEXT: and r2, r2, r1, lsr #3 +; CHECK-NEXT: add r3, r3, #16 +; CHECK-NEXT: str r11, [sp, #92] +; CHECK-NEXT: str r11, [sp, #88] +; CHECK-NEXT: mov r5, #0 +; CHECK-NEXT: str r11, [sp, #84] +; CHECK-NEXT: str r11, [sp, #76] +; CHECK-NEXT: str r11, [sp, #72] +; CHECK-NEXT: str r11, [sp, #68] +; CHECK-NEXT: str r11, [sp, #64] +; CHECK-NEXT: ldr r2, [r3, -r2]! +; CHECK-NEXT: ldr r4, [r3, #4] +; CHECK-NEXT: str r4, [sp, #28] @ 4-byte Spill +; CHECK-NEXT: ldr r7, [r3, #8] +; CHECK-NEXT: str r7, [sp, #16] @ 4-byte Spill +; CHECK-NEXT: and r7, r1, #31 +; CHECK-NEXT: ldr r3, [r3, #12] +; CHECK-NEXT: lsl r1, r4, r7 +; CHECK-NEXT: eor r4, r7, #31 +; CHECK-NEXT: str r3, [sp, #24] @ 4-byte Spill +; CHECK-NEXT: lsr r3, r2, #1 +; CHECK-NEXT: orr r10, r1, r3, lsr r4 +; CHECK-NEXT: lsl r3, r2, r7 +; CHECK-NEXT: umull r1, r2, r3, r9 +; CHECK-NEXT: str r3, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: umull r3, r12, r3, lr +; CHECK-NEXT: umlal r2, r5, r10, r9 +; CHECK-NEXT: str r3, [sp, #20] @ 4-byte Spill +; CHECK-NEXT: str r12, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: adds r2, r3, r2 +; CHECK-NEXT: ldr r3, [sp, #24] @ 4-byte Reload +; CHECK-NEXT: adcs r5, r5, r12 +; CHECK-NEXT: adc r12, r11, #0 +; CHECK-NEXT: lsl r6, r3, r7 +; CHECK-NEXT: ldr r3, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: umlal r5, r12, r10, lr +; CHECK-NEXT: lsr r11, r3, #1 +; CHECK-NEXT: orr r6, r6, r11, lsr r4 +; CHECK-NEXT: lsl r11, r3, r7 +; CHECK-NEXT: ldr r3, [sp, #28] @ 4-byte Reload +; CHECK-NEXT: str r6, [sp, #24] @ 4-byte Spill +; CHECK-NEXT: ldr r6, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: lsrs r7, r3, #1 +; CHECK-NEXT: ldr r3, [sp, #24] @ 4-byte Reload +; CHECK-NEXT: orr r4, r11, r7, lsr r4 +; CHECK-NEXT: umull r11, r7, r9, r4 +; CHECK-NEXT: mla r7, r9, r3, r7 +; CHECK-NEXT: ldr r3, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: mla r3, lr, r10, r3 +; CHECK-NEXT: mla r4, lr, r4, r7 +; CHECK-NEXT: ldr r7, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: mla r3, lr, r6, r3 +; CHECK-NEXT: adds r7, r7, r11 +; CHECK-NEXT: adc r3, r3, r4 +; CHECK-NEXT: adds lr, r5, r7 +; CHECK-NEXT: adc r12, r12, r3 +; CHECK-NEXT: .LBB50_5: @ %fp-to-i-cleanup1 +; CHECK-NEXT: vmov r6, s1 +; CHECK-NEXT: mvn r5, #0 +; CHECK-NEXT: cmn r6, #1 +; CHECK-NEXT: ubfx r4, r6, #23, #8 +; CHECK-NEXT: movwgt r5, #0 +; CHECK-NEXT: movwgt r8, #1 +; CHECK-NEXT: cmp r4, #127 +; CHECK-NEXT: bhs .LBB50_7 +; CHECK-NEXT: @ %bb.6: +; CHECK-NEXT: mov r4, #0 +; CHECK-NEXT: mov r6, #0 +; CHECK-NEXT: mov r5, #0 +; CHECK-NEXT: b .LBB50_10 +; CHECK-NEXT: .LBB50_7: @ %fp-to-i-if-check.exp.size +; CHECK-NEXT: movw r0, #65535 +; CHECK-NEXT: cmp r4, #149 +; CHECK-NEXT: movt r0, #127 +; CHECK-NEXT: and r0, r6, r0 +; CHECK-NEXT: orr r0, r0, #8388608 +; CHECK-NEXT: bhi .LBB50_9 +; CHECK-NEXT: @ %bb.8: @ %fp-to-i-if-exp.small +; CHECK-NEXT: rsb r3, r4, #150 +; CHECK-NEXT: lsr r3, r0, r3 +; CHECK-NEXT: umull r0, r4, r3, r8 +; CHECK-NEXT: umull r6, r7, r3, r5 +; CHECK-NEXT: mul r3, r5, r3 +; CHECK-NEXT: adds r4, r6, r4 +; CHECK-NEXT: adcs r6, r6, r7 +; CHECK-NEXT: adc r5, r7, r3 +; CHECK-NEXT: b .LBB50_10 +; CHECK-NEXT: .LBB50_9: @ %fp-to-i-if-exp.large +; CHECK-NEXT: str r0, [sp, #48] +; CHECK-NEXT: sub r0, r4, #150 +; CHECK-NEXT: mov r3, #12 +; CHECK-NEXT: add r4, sp, #32 +; CHECK-NEXT: mov r9, #0 +; CHECK-NEXT: and r3, r3, r0, lsr #3 +; CHECK-NEXT: add r4, r4, #16 +; CHECK-NEXT: str r9, [sp, #60] +; CHECK-NEXT: str r9, [sp, #56] +; CHECK-NEXT: str r9, [sp, #52] +; CHECK-NEXT: str r9, [sp, #44] +; CHECK-NEXT: str r9, [sp, #40] +; CHECK-NEXT: str r9, [sp, #36] +; CHECK-NEXT: str r9, [sp, #32] +; CHECK-NEXT: ldr r3, [r4, -r3]! +; CHECK-NEXT: ldr r6, [r4, #4] +; CHECK-NEXT: str r6, [sp, #20] @ 4-byte Spill +; CHECK-NEXT: ldr r7, [r4, #8] +; CHECK-NEXT: str r7, [sp, #16] @ 4-byte Spill +; CHECK-NEXT: and r7, r0, #31 +; CHECK-NEXT: ldr r4, [r4, #12] +; CHECK-NEXT: str lr, [sp, #24] @ 4-byte Spill +; CHECK-NEXT: eor lr, r7, #31 +; CHECK-NEXT: str r4, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: lsr r4, r3, #1 +; CHECK-NEXT: lsl r0, r6, r7 +; CHECK-NEXT: lsl r3, r3, r7 +; CHECK-NEXT: orr r10, r0, r4, lsr lr +; CHECK-NEXT: umull r0, r4, r3, r8 +; CHECK-NEXT: str r12, [sp, #28] @ 4-byte Spill +; CHECK-NEXT: mov r12, #0 +; CHECK-NEXT: str r3, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: umull r3, r6, r3, r5 +; CHECK-NEXT: umlal r4, r12, r10, r8 +; CHECK-NEXT: str r3, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: str r6, [sp] @ 4-byte Spill +; CHECK-NEXT: adds r4, r3, r4 +; CHECK-NEXT: ldr r3, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: adcs r12, r12, r6 +; CHECK-NEXT: adc r11, r9, #0 +; CHECK-NEXT: lsl r6, r3, r7 +; CHECK-NEXT: ldr r3, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: umlal r12, r11, r10, r5 +; CHECK-NEXT: lsr r9, r3, #1 +; CHECK-NEXT: lsl r7, r3, r7 +; CHECK-NEXT: ldr r3, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: orr r9, r6, r9, lsr lr +; CHECK-NEXT: lsrs r6, r3, #1 +; CHECK-NEXT: ldr r3, [sp] @ 4-byte Reload +; CHECK-NEXT: orr r7, r7, r6, lsr lr +; CHECK-NEXT: umull lr, r6, r8, r7 +; CHECK-NEXT: mla r6, r8, r9, r6 +; CHECK-NEXT: mla r6, r5, r7, r6 +; CHECK-NEXT: mla r7, r5, r10, r3 +; CHECK-NEXT: ldr r3, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: mla r3, r5, r3, r7 +; CHECK-NEXT: ldr r5, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: adds r7, r5, lr +; CHECK-NEXT: ldr lr, [sp, #24] @ 4-byte Reload +; CHECK-NEXT: adc r3, r3, r6 +; CHECK-NEXT: adds r6, r12, r7 +; CHECK-NEXT: ldr r12, [sp, #28] @ 4-byte Reload +; CHECK-NEXT: adc r5, r11, r3 +; CHECK-NEXT: .LBB50_10: @ %fp-to-i-cleanup +; CHECK-NEXT: subs r7, r6, #1 +; CHECK-NEXT: mov r3, #0 +; CHECK-NEXT: sbcs r7, r5, #0 +; CHECK-NEXT: mov r7, #0 +; CHECK-NEXT: movwlt r7, #1 +; CHECK-NEXT: cmp r7, #0 +; CHECK-NEXT: moveq r0, r7 +; CHECK-NEXT: moveq r4, r7 +; CHECK-NEXT: movne r7, r5 +; CHECK-NEXT: cmp r7, #0 ; CHECK-NEXT: movwmi r0, #0 +; CHECK-NEXT: movwmi r4, #0 +; CHECK-NEXT: subs r7, lr, #1 +; CHECK-NEXT: vmov.32 d1[0], r0 +; CHECK-NEXT: sbcs r7, r12, #0 +; CHECK-NEXT: movwlt r3, #1 +; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: moveq r2, r3 +; CHECK-NEXT: moveq r1, r3 +; CHECK-NEXT: movne r3, r12 +; CHECK-NEXT: cmp r3, #0 ; CHECK-NEXT: movwmi r1, #0 -; CHECK-NEXT: vmov.32 d0[0], r0 +; CHECK-NEXT: movwmi r2, #0 +; CHECK-NEXT: vmov.32 d0[0], r1 ; CHECK-NEXT: vmov.32 d1[1], r4 -; CHECK-NEXT: vmov.32 d0[1], r1 -; CHECK-NEXT: vpop {d8} -; CHECK-NEXT: pop {r4, r5, r6, pc} +; CHECK-NEXT: vmov.32 d0[1], r2 +; CHECK-NEXT: add sp, sp, #100 +; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} entry: %conv = fptosi <2 x float> %x to <2 x i128> %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> ) @@ -3936,130 +5888,45 @@ entry: define <2 x i64> @stest_f16i64_mm(<2 x half> %x) { ; CHECK-NEON-LABEL: stest_f16i64_mm: ; CHECK-NEON: @ %bb.0: @ %entry -; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} -; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} +; CHECK-NEON-NEXT: .save {r4, lr} +; CHECK-NEON-NEXT: push {r4, lr} ; CHECK-NEON-NEXT: .vsave {d8} ; CHECK-NEON-NEXT: vpush {d8} ; CHECK-NEON-NEXT: vmov r0, s0 ; CHECK-NEON-NEXT: vmov.f32 s16, s1 ; CHECK-NEON-NEXT: bl __aeabi_h2f -; CHECK-NEON-NEXT: mov r8, r0 +; CHECK-NEON-NEXT: mov r4, r0 ; CHECK-NEON-NEXT: vmov r0, s16 ; CHECK-NEON-NEXT: bl __aeabi_h2f ; CHECK-NEON-NEXT: vmov s0, r0 -; CHECK-NEON-NEXT: bl __fixsfti -; CHECK-NEON-NEXT: mov r4, r1 -; CHECK-NEON-NEXT: mvn r9, #0 -; CHECK-NEON-NEXT: subs r1, r0, r9 -; CHECK-NEON-NEXT: mvn r6, #-2147483648 -; CHECK-NEON-NEXT: sbcs r1, r4, r6 -; CHECK-NEON-NEXT: vmov s0, r8 -; CHECK-NEON-NEXT: sbcs r1, r2, #0 -; CHECK-NEON-NEXT: mov r5, #0 -; CHECK-NEON-NEXT: sbcs r1, r3, #0 -; CHECK-NEON-NEXT: mov r8, #-2147483648 -; CHECK-NEON-NEXT: mov r1, #0 -; CHECK-NEON-NEXT: mov r10, #0 -; CHECK-NEON-NEXT: movwlt r1, #1 -; CHECK-NEON-NEXT: cmp r1, #0 -; CHECK-NEON-NEXT: moveq r3, r1 -; CHECK-NEON-NEXT: movne r1, r2 -; CHECK-NEON-NEXT: moveq r4, r6 -; CHECK-NEON-NEXT: moveq r0, r9 -; CHECK-NEON-NEXT: rsbs r2, r0, #0 -; CHECK-NEON-NEXT: rscs r2, r4, #-2147483648 -; CHECK-NEON-NEXT: sbcs r1, r9, r1 -; CHECK-NEON-NEXT: sbcs r1, r9, r3 -; CHECK-NEON-NEXT: movwlt r5, #1 -; CHECK-NEON-NEXT: cmp r5, #0 -; CHECK-NEON-NEXT: movne r5, r0 -; CHECK-NEON-NEXT: moveq r4, r8 -; CHECK-NEON-NEXT: bl __fixsfti -; CHECK-NEON-NEXT: subs r7, r0, r9 -; CHECK-NEON-NEXT: vmov.32 d1[0], r5 -; CHECK-NEON-NEXT: sbcs r7, r1, r6 -; CHECK-NEON-NEXT: sbcs r7, r2, #0 -; CHECK-NEON-NEXT: sbcs r7, r3, #0 -; CHECK-NEON-NEXT: mov r7, #0 -; CHECK-NEON-NEXT: movwlt r7, #1 -; CHECK-NEON-NEXT: cmp r7, #0 -; CHECK-NEON-NEXT: moveq r3, r7 -; CHECK-NEON-NEXT: movne r7, r2 -; CHECK-NEON-NEXT: movne r6, r1 -; CHECK-NEON-NEXT: moveq r0, r9 -; CHECK-NEON-NEXT: rsbs r1, r0, #0 -; CHECK-NEON-NEXT: rscs r1, r6, #-2147483648 -; CHECK-NEON-NEXT: sbcs r1, r9, r7 -; CHECK-NEON-NEXT: sbcs r1, r9, r3 -; CHECK-NEON-NEXT: movwlt r10, #1 -; CHECK-NEON-NEXT: cmp r10, #0 -; CHECK-NEON-NEXT: movne r10, r0 -; CHECK-NEON-NEXT: moveq r6, r8 -; CHECK-NEON-NEXT: vmov.32 d0[0], r10 -; CHECK-NEON-NEXT: vmov.32 d1[1], r4 -; CHECK-NEON-NEXT: vmov.32 d0[1], r6 +; CHECK-NEON-NEXT: vmov s2, r4 +; CHECK-NEON-NEXT: vcvt.s32.f32 s0, s0 +; CHECK-NEON-NEXT: vcvt.s32.f32 s4, s2 +; CHECK-NEON-NEXT: vmov r0, s0 +; CHECK-NEON-NEXT: vmov r1, s4 +; CHECK-NEON-NEXT: vmov.32 d1[0], r0 +; CHECK-NEON-NEXT: asr r0, r0, #31 +; CHECK-NEON-NEXT: vmov.32 d0[0], r1 +; CHECK-NEON-NEXT: vmov.32 d1[1], r0 +; CHECK-NEON-NEXT: asr r0, r1, #31 +; CHECK-NEON-NEXT: vmov.32 d0[1], r0 ; CHECK-NEON-NEXT: vpop {d8} -; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} +; CHECK-NEON-NEXT: pop {r4, pc} ; ; CHECK-FP16-LABEL: stest_f16i64_mm: ; CHECK-FP16: @ %bb.0: @ %entry -; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} -; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} -; CHECK-FP16-NEXT: vmov.u16 r0, d0[1] -; CHECK-FP16-NEXT: vmov.u16 r7, d0[0] -; CHECK-FP16-NEXT: vmov s0, r0 -; CHECK-FP16-NEXT: bl __fixhfti -; CHECK-FP16-NEXT: mov r4, r1 -; CHECK-FP16-NEXT: mvn r9, #0 -; CHECK-FP16-NEXT: subs r1, r0, r9 -; CHECK-FP16-NEXT: mvn r5, #-2147483648 -; CHECK-FP16-NEXT: sbcs r1, r4, r5 -; CHECK-FP16-NEXT: vmov s0, r7 -; CHECK-FP16-NEXT: sbcs r1, r2, #0 -; CHECK-FP16-NEXT: mov r7, #0 -; CHECK-FP16-NEXT: sbcs r1, r3, #0 -; CHECK-FP16-NEXT: mov r8, #-2147483648 -; CHECK-FP16-NEXT: mov r1, #0 -; CHECK-FP16-NEXT: mov r10, #0 -; CHECK-FP16-NEXT: movwlt r1, #1 -; CHECK-FP16-NEXT: cmp r1, #0 -; CHECK-FP16-NEXT: moveq r3, r1 -; CHECK-FP16-NEXT: movne r1, r2 -; CHECK-FP16-NEXT: moveq r4, r5 -; CHECK-FP16-NEXT: moveq r0, r9 -; CHECK-FP16-NEXT: rsbs r2, r0, #0 -; CHECK-FP16-NEXT: rscs r2, r4, #-2147483648 -; CHECK-FP16-NEXT: sbcs r1, r9, r1 -; CHECK-FP16-NEXT: sbcs r1, r9, r3 -; CHECK-FP16-NEXT: movwlt r7, #1 -; CHECK-FP16-NEXT: cmp r7, #0 -; CHECK-FP16-NEXT: movne r7, r0 -; CHECK-FP16-NEXT: moveq r4, r8 -; CHECK-FP16-NEXT: bl __fixhfti -; CHECK-FP16-NEXT: subs r6, r0, r9 -; CHECK-FP16-NEXT: vmov.32 d1[0], r7 -; CHECK-FP16-NEXT: sbcs r6, r1, r5 -; CHECK-FP16-NEXT: sbcs r6, r2, #0 -; CHECK-FP16-NEXT: sbcs r6, r3, #0 -; CHECK-FP16-NEXT: mov r6, #0 -; CHECK-FP16-NEXT: movwlt r6, #1 -; CHECK-FP16-NEXT: cmp r6, #0 -; CHECK-FP16-NEXT: moveq r3, r6 -; CHECK-FP16-NEXT: movne r6, r2 -; CHECK-FP16-NEXT: movne r5, r1 -; CHECK-FP16-NEXT: moveq r0, r9 -; CHECK-FP16-NEXT: rsbs r1, r0, #0 -; CHECK-FP16-NEXT: rscs r1, r5, #-2147483648 -; CHECK-FP16-NEXT: sbcs r1, r9, r6 -; CHECK-FP16-NEXT: sbcs r1, r9, r3 -; CHECK-FP16-NEXT: movwlt r10, #1 -; CHECK-FP16-NEXT: cmp r10, #0 -; CHECK-FP16-NEXT: movne r10, r0 -; CHECK-FP16-NEXT: moveq r5, r8 -; CHECK-FP16-NEXT: vmov.32 d0[0], r10 -; CHECK-FP16-NEXT: vmov.32 d1[1], r4 -; CHECK-FP16-NEXT: vmov.32 d0[1], r5 -; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} +; CHECK-FP16-NEXT: vcvt.s32.f16 s4, s0 +; CHECK-FP16-NEXT: vmovx.f16 s0, s0 +; CHECK-FP16-NEXT: vcvt.s32.f16 s0, s0 +; CHECK-FP16-NEXT: vmov r1, s4 +; CHECK-FP16-NEXT: vmov r0, s0 +; CHECK-FP16-NEXT: vmov.32 d1[0], r0 +; CHECK-FP16-NEXT: asr r0, r0, #31 +; CHECK-FP16-NEXT: vmov.32 d0[0], r1 +; CHECK-FP16-NEXT: vmov.32 d1[1], r0 +; CHECK-FP16-NEXT: asr r0, r1, #31 +; CHECK-FP16-NEXT: vmov.32 d0[1], r0 +; CHECK-FP16-NEXT: bx lr entry: %conv = fptosi <2 x half> %x to <2 x i128> %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> ) @@ -4071,72 +5938,42 @@ entry: define <2 x i64> @utest_f16i64_mm(<2 x half> %x) { ; CHECK-NEON-LABEL: utest_f16i64_mm: ; CHECK-NEON: @ %bb.0: @ %entry -; CHECK-NEON-NEXT: .save {r4, r5, r6, lr} -; CHECK-NEON-NEXT: push {r4, r5, r6, lr} +; CHECK-NEON-NEXT: .save {r4, lr} +; CHECK-NEON-NEXT: push {r4, lr} ; CHECK-NEON-NEXT: .vsave {d8} ; CHECK-NEON-NEXT: vpush {d8} ; CHECK-NEON-NEXT: vmov r0, s0 ; CHECK-NEON-NEXT: vmov.f32 s16, s1 ; CHECK-NEON-NEXT: bl __aeabi_h2f -; CHECK-NEON-NEXT: mov r5, r0 +; CHECK-NEON-NEXT: mov r4, r0 ; CHECK-NEON-NEXT: vmov r0, s16 ; CHECK-NEON-NEXT: bl __aeabi_h2f ; CHECK-NEON-NEXT: vmov s0, r0 -; CHECK-NEON-NEXT: bl __fixunssfti -; CHECK-NEON-NEXT: mov r4, r1 -; CHECK-NEON-NEXT: subs r1, r2, #1 -; CHECK-NEON-NEXT: vmov s0, r5 -; CHECK-NEON-NEXT: sbcs r1, r3, #0 -; CHECK-NEON-NEXT: mov r5, #0 -; CHECK-NEON-NEXT: mov r6, #0 -; CHECK-NEON-NEXT: movwlo r5, #1 -; CHECK-NEON-NEXT: cmp r5, #0 -; CHECK-NEON-NEXT: moveq r4, r5 -; CHECK-NEON-NEXT: movne r5, r0 -; CHECK-NEON-NEXT: bl __fixunssfti -; CHECK-NEON-NEXT: subs r2, r2, #1 -; CHECK-NEON-NEXT: vmov.32 d1[0], r5 -; CHECK-NEON-NEXT: sbcs r2, r3, #0 -; CHECK-NEON-NEXT: movwlo r6, #1 -; CHECK-NEON-NEXT: cmp r6, #0 -; CHECK-NEON-NEXT: moveq r0, r6 -; CHECK-NEON-NEXT: movne r6, r1 -; CHECK-NEON-NEXT: vmov.32 d0[0], r0 -; CHECK-NEON-NEXT: vmov.32 d1[1], r4 -; CHECK-NEON-NEXT: vmov.32 d0[1], r6 +; CHECK-NEON-NEXT: vmov s4, r4 +; CHECK-NEON-NEXT: vcvt.u32.f32 s2, s0 +; CHECK-NEON-NEXT: vcvt.u32.f32 s0, s4 +; CHECK-NEON-NEXT: vldr s3, .LCPI52_0 +; CHECK-NEON-NEXT: vmov.f32 s1, s3 ; CHECK-NEON-NEXT: vpop {d8} -; CHECK-NEON-NEXT: pop {r4, r5, r6, pc} +; CHECK-NEON-NEXT: pop {r4, pc} +; CHECK-NEON-NEXT: .p2align 2 +; CHECK-NEON-NEXT: @ %bb.1: +; CHECK-NEON-NEXT: .LCPI52_0: +; CHECK-NEON-NEXT: .long 0x00000000 @ float 0 ; ; CHECK-FP16-LABEL: utest_f16i64_mm: ; CHECK-FP16: @ %bb.0: @ %entry -; CHECK-FP16-NEXT: .save {r4, r5, r6, lr} -; CHECK-FP16-NEXT: push {r4, r5, r6, lr} -; CHECK-FP16-NEXT: vmov.u16 r0, d0[1] -; CHECK-FP16-NEXT: vmov.u16 r6, d0[0] -; CHECK-FP16-NEXT: vmov s0, r0 -; CHECK-FP16-NEXT: bl __fixunshfti -; CHECK-FP16-NEXT: mov r4, r1 -; CHECK-FP16-NEXT: subs r1, r2, #1 -; CHECK-FP16-NEXT: vmov s0, r6 -; CHECK-FP16-NEXT: sbcs r1, r3, #0 -; CHECK-FP16-NEXT: mov r6, #0 -; CHECK-FP16-NEXT: mov r5, #0 -; CHECK-FP16-NEXT: movwlo r6, #1 -; CHECK-FP16-NEXT: cmp r6, #0 -; CHECK-FP16-NEXT: moveq r4, r6 -; CHECK-FP16-NEXT: movne r6, r0 -; CHECK-FP16-NEXT: bl __fixunshfti -; CHECK-FP16-NEXT: subs r2, r2, #1 -; CHECK-FP16-NEXT: vmov.32 d1[0], r6 -; CHECK-FP16-NEXT: sbcs r2, r3, #0 -; CHECK-FP16-NEXT: movwlo r5, #1 -; CHECK-FP16-NEXT: cmp r5, #0 -; CHECK-FP16-NEXT: moveq r0, r5 -; CHECK-FP16-NEXT: movne r5, r1 -; CHECK-FP16-NEXT: vmov.32 d0[0], r0 -; CHECK-FP16-NEXT: vmov.32 d1[1], r4 -; CHECK-FP16-NEXT: vmov.32 d0[1], r5 -; CHECK-FP16-NEXT: pop {r4, r5, r6, pc} +; CHECK-FP16-NEXT: vcvt.u32.f16 s4, s0 +; CHECK-FP16-NEXT: vmovx.f16 s0, s0 +; CHECK-FP16-NEXT: vcvt.u32.f16 s6, s0 +; CHECK-FP16-NEXT: vldr s7, .LCPI52_0 +; CHECK-FP16-NEXT: vmov.f32 s5, s7 +; CHECK-FP16-NEXT: vorr q0, q1, q1 +; CHECK-FP16-NEXT: bx lr +; CHECK-FP16-NEXT: .p2align 2 +; CHECK-FP16-NEXT: @ %bb.1: +; CHECK-FP16-NEXT: .LCPI52_0: +; CHECK-FP16-NEXT: .long 0x00000000 @ float 0 entry: %conv = fptoui <2 x half> %x to <2 x i128> %spec.store.select = call <2 x i128> @llvm.umin.v2i128(<2 x i128> %conv, <2 x i128> ) @@ -4147,90 +5984,89 @@ entry: define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) { ; CHECK-NEON-LABEL: ustest_f16i64_mm: ; CHECK-NEON: @ %bb.0: @ %entry -; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r11, lr} -; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r11, lr} +; CHECK-NEON-NEXT: .save {r4, lr} +; CHECK-NEON-NEXT: push {r4, lr} ; CHECK-NEON-NEXT: .vsave {d8} ; CHECK-NEON-NEXT: vpush {d8} ; CHECK-NEON-NEXT: vmov r0, s0 ; CHECK-NEON-NEXT: vmov.f32 s16, s1 ; CHECK-NEON-NEXT: bl __aeabi_h2f -; CHECK-NEON-NEXT: mov r6, r0 +; CHECK-NEON-NEXT: mov r4, r0 ; CHECK-NEON-NEXT: vmov r0, s16 ; CHECK-NEON-NEXT: bl __aeabi_h2f ; CHECK-NEON-NEXT: vmov s0, r0 -; CHECK-NEON-NEXT: bl __fixsfti -; CHECK-NEON-NEXT: mov r5, r0 -; CHECK-NEON-NEXT: subs r0, r2, #1 -; CHECK-NEON-NEXT: sbcs r0, r3, #0 -; CHECK-NEON-NEXT: vmov s0, r6 +; CHECK-NEON-NEXT: mov r1, #1 +; CHECK-NEON-NEXT: vmov s2, r4 ; CHECK-NEON-NEXT: mov r0, #0 -; CHECK-NEON-NEXT: mov r4, r1 +; CHECK-NEON-NEXT: vcvt.s32.f32 s0, s0 +; CHECK-NEON-NEXT: vcvt.s32.f32 s2, s2 +; CHECK-NEON-NEXT: vmov r2, s0 +; CHECK-NEON-NEXT: rsbs r3, r1, r2, asr #31 +; CHECK-NEON-NEXT: rscs r3, r0, r2, asr #31 +; CHECK-NEON-NEXT: mov r3, #0 +; CHECK-NEON-NEXT: movwlt r3, #1 +; CHECK-NEON-NEXT: cmp r3, #0 +; CHECK-NEON-NEXT: mov r4, r3 +; CHECK-NEON-NEXT: asrne r3, r2, #31 +; CHECK-NEON-NEXT: movne r4, r2 +; CHECK-NEON-NEXT: vmov r2, s2 +; CHECK-NEON-NEXT: cmp r3, #0 +; CHECK-NEON-NEXT: movwmi r4, #0 +; CHECK-NEON-NEXT: vmov.32 d1[0], r4 +; CHECK-NEON-NEXT: rsbs r1, r1, r2, asr #31 +; CHECK-NEON-NEXT: rscs r1, r0, r2, asr #31 ; CHECK-NEON-NEXT: movwlt r0, #1 ; CHECK-NEON-NEXT: cmp r0, #0 -; CHECK-NEON-NEXT: moveq r5, r0 -; CHECK-NEON-NEXT: moveq r4, r0 -; CHECK-NEON-NEXT: movne r0, r3 +; CHECK-NEON-NEXT: mov r1, r0 +; CHECK-NEON-NEXT: asrne r0, r2, #31 +; CHECK-NEON-NEXT: movne r1, r2 ; CHECK-NEON-NEXT: cmp r0, #0 -; CHECK-NEON-NEXT: mov r7, #0 -; CHECK-NEON-NEXT: movwmi r4, #0 -; CHECK-NEON-NEXT: movwmi r5, #0 -; CHECK-NEON-NEXT: bl __fixsfti -; CHECK-NEON-NEXT: subs r2, r2, #1 -; CHECK-NEON-NEXT: vmov.32 d1[0], r5 -; CHECK-NEON-NEXT: sbcs r2, r3, #0 -; CHECK-NEON-NEXT: movwlt r7, #1 -; CHECK-NEON-NEXT: cmp r7, #0 -; CHECK-NEON-NEXT: moveq r1, r7 -; CHECK-NEON-NEXT: moveq r0, r7 -; CHECK-NEON-NEXT: movne r7, r3 -; CHECK-NEON-NEXT: cmp r7, #0 -; CHECK-NEON-NEXT: movwmi r0, #0 ; CHECK-NEON-NEXT: movwmi r1, #0 -; CHECK-NEON-NEXT: vmov.32 d0[0], r0 -; CHECK-NEON-NEXT: vmov.32 d1[1], r4 -; CHECK-NEON-NEXT: vmov.32 d0[1], r1 +; CHECK-NEON-NEXT: bic r0, r0, r0, asr #31 +; CHECK-NEON-NEXT: vmov.32 d0[0], r1 +; CHECK-NEON-NEXT: bic r1, r3, r3, asr #31 +; CHECK-NEON-NEXT: vmov.32 d1[1], r1 +; CHECK-NEON-NEXT: vmov.32 d0[1], r0 ; CHECK-NEON-NEXT: vpop {d8} -; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r11, pc} +; CHECK-NEON-NEXT: pop {r4, pc} ; ; CHECK-FP16-LABEL: ustest_f16i64_mm: ; CHECK-FP16: @ %bb.0: @ %entry -; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r11, lr} -; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r11, lr} -; CHECK-FP16-NEXT: vmov.u16 r0, d0[1] -; CHECK-FP16-NEXT: vmov.u16 r7, d0[0] -; CHECK-FP16-NEXT: vmov s0, r0 -; CHECK-FP16-NEXT: bl __fixhfti -; CHECK-FP16-NEXT: mov r5, r0 -; CHECK-FP16-NEXT: subs r0, r2, #1 -; CHECK-FP16-NEXT: sbcs r0, r3, #0 -; CHECK-FP16-NEXT: vmov s0, r7 +; CHECK-FP16-NEXT: .save {r11, lr} +; CHECK-FP16-NEXT: push {r11, lr} +; CHECK-FP16-NEXT: vcvt.s32.f16 s2, s0 +; CHECK-FP16-NEXT: vmovx.f16 s0, s0 +; CHECK-FP16-NEXT: vcvt.s32.f16 s0, s0 +; CHECK-FP16-NEXT: mov r12, #1 +; CHECK-FP16-NEXT: vmov r1, s0 ; CHECK-FP16-NEXT: mov r0, #0 -; CHECK-FP16-NEXT: mov r4, r1 +; CHECK-FP16-NEXT: rsbs r3, r12, r1, asr #31 +; CHECK-FP16-NEXT: rscs r3, r0, r1, asr #31 +; CHECK-FP16-NEXT: mov r3, #0 +; CHECK-FP16-NEXT: movwlt r3, #1 +; CHECK-FP16-NEXT: cmp r3, #0 +; CHECK-FP16-NEXT: mov lr, r3 +; CHECK-FP16-NEXT: asrne r3, r1, #31 +; CHECK-FP16-NEXT: movne lr, r1 +; CHECK-FP16-NEXT: vmov r1, s2 +; CHECK-FP16-NEXT: cmp r3, #0 +; CHECK-FP16-NEXT: movwmi lr, #0 +; CHECK-FP16-NEXT: vmov.32 d1[0], lr +; CHECK-FP16-NEXT: rsbs r2, r12, r1, asr #31 +; CHECK-FP16-NEXT: rscs r2, r0, r1, asr #31 ; CHECK-FP16-NEXT: movwlt r0, #1 ; CHECK-FP16-NEXT: cmp r0, #0 -; CHECK-FP16-NEXT: moveq r5, r0 -; CHECK-FP16-NEXT: moveq r4, r0 -; CHECK-FP16-NEXT: movne r0, r3 +; CHECK-FP16-NEXT: mov r2, r0 +; CHECK-FP16-NEXT: asrne r0, r1, #31 +; CHECK-FP16-NEXT: movne r2, r1 ; CHECK-FP16-NEXT: cmp r0, #0 -; CHECK-FP16-NEXT: mov r6, #0 -; CHECK-FP16-NEXT: movwmi r4, #0 -; CHECK-FP16-NEXT: movwmi r5, #0 -; CHECK-FP16-NEXT: bl __fixhfti -; CHECK-FP16-NEXT: subs r2, r2, #1 -; CHECK-FP16-NEXT: vmov.32 d1[0], r5 -; CHECK-FP16-NEXT: sbcs r2, r3, #0 -; CHECK-FP16-NEXT: movwlt r6, #1 -; CHECK-FP16-NEXT: cmp r6, #0 -; CHECK-FP16-NEXT: moveq r1, r6 -; CHECK-FP16-NEXT: moveq r0, r6 -; CHECK-FP16-NEXT: movne r6, r3 -; CHECK-FP16-NEXT: cmp r6, #0 -; CHECK-FP16-NEXT: movwmi r0, #0 -; CHECK-FP16-NEXT: movwmi r1, #0 -; CHECK-FP16-NEXT: vmov.32 d0[0], r0 -; CHECK-FP16-NEXT: vmov.32 d1[1], r4 -; CHECK-FP16-NEXT: vmov.32 d0[1], r1 -; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r11, pc} +; CHECK-FP16-NEXT: movwmi r2, #0 +; CHECK-FP16-NEXT: bic r1, r3, r3, asr #31 +; CHECK-FP16-NEXT: vmov.32 d0[0], r2 +; CHECK-FP16-NEXT: bic r0, r0, r0, asr #31 +; CHECK-FP16-NEXT: vmov.32 d1[1], r1 +; CHECK-FP16-NEXT: vmov.32 d0[1], r0 +; CHECK-FP16-NEXT: pop {r11, pc} entry: %conv = fptosi <2 x half> %x to <2 x i128> %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> ) diff --git a/llvm/test/CodeGen/ARM/fptosi-sat-scalar.ll b/llvm/test/CodeGen/ARM/fptosi-sat-scalar.ll index bccb0ad150509..7e8656acfb8e9 100644 --- a/llvm/test/CodeGen/ARM/fptosi-sat-scalar.ll +++ b/llvm/test/CodeGen/ARM/fptosi-sat-scalar.ll @@ -743,304 +743,906 @@ define i64 @test_signed_i64_f32(float %f) nounwind { define i100 @test_signed_i100_f32(float %f) nounwind { ; SOFT-LABEL: test_signed_i100_f32: -; SOFT: @ %bb.0: +; SOFT: @ %bb.0: @ %fp-to-i-entry ; SOFT-NEXT: .save {r4, r5, r6, r7, lr} ; SOFT-NEXT: push {r4, r5, r6, r7, lr} -; SOFT-NEXT: .pad #20 -; SOFT-NEXT: sub sp, #20 +; SOFT-NEXT: .pad #60 +; SOFT-NEXT: sub sp, #60 ; SOFT-NEXT: mov r4, r0 -; SOFT-NEXT: movs r0, #241 -; SOFT-NEXT: lsls r1, r0, #24 -; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: bl __aeabi_fcmpge -; SOFT-NEXT: mov r6, r0 +; SOFT-NEXT: lsrs r0, r0, #23 +; SOFT-NEXT: uxtb r7, r0 +; SOFT-NEXT: movs r6, #0 +; SOFT-NEXT: mov r5, r6 +; SOFT-NEXT: cmp r7, #127 +; SOFT-NEXT: blo .LBB8_4 +; SOFT-NEXT: @ %bb.1: @ %fp-to-i-entry ; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: bl __fixsfti -; SOFT-NEXT: movs r5, #7 -; SOFT-NEXT: str r5, [sp, #16] @ 4-byte Spill -; SOFT-NEXT: mvns r7, r5 -; SOFT-NEXT: cmp r6, #0 -; SOFT-NEXT: beq .LBB8_17 -; SOFT-NEXT: @ %bb.1: -; SOFT-NEXT: beq .LBB8_18 -; SOFT-NEXT: .LBB8_2: -; SOFT-NEXT: bne .LBB8_4 -; SOFT-NEXT: .LBB8_3: -; SOFT-NEXT: mov r2, r6 +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: bl __aeabi_fcmpun +; SOFT-NEXT: cmp r0, #0 +; SOFT-NEXT: mov r1, r5 +; SOFT-NEXT: mov r2, r5 +; SOFT-NEXT: bne .LBB8_5 +; SOFT-NEXT: @ %bb.2: @ %fp-to-i-if-check.saturate +; SOFT-NEXT: asrs r0, r4, #31 +; SOFT-NEXT: cmp r7, #226 +; SOFT-NEXT: blo .LBB8_6 +; SOFT-NEXT: @ %bb.3: @ %fp-to-i-if-saturate +; SOFT-NEXT: movs r6, #7 +; SOFT-NEXT: eors r6, r0 +; SOFT-NEXT: mvns r5, r0 ; SOFT-NEXT: .LBB8_4: -; SOFT-NEXT: str r2, [sp, #4] @ 4-byte Spill -; SOFT-NEXT: str r1, [sp, #8] @ 4-byte Spill +; SOFT-NEXT: mov r1, r5 +; SOFT-NEXT: mov r2, r5 +; SOFT-NEXT: .LBB8_5: @ %fp-to-i-cleanup +; SOFT-NEXT: mov r0, r5 +; SOFT-NEXT: mov r3, r6 +; SOFT-NEXT: add sp, #60 +; SOFT-NEXT: pop {r4, r5, r6, r7, pc} +; SOFT-NEXT: .LBB8_6: @ %fp-to-i-if-check.exp.size +; SOFT-NEXT: movs r6, #1 +; SOFT-NEXT: str r0, [sp, #20] @ 4-byte Spill +; SOFT-NEXT: orrs r6, r0 +; SOFT-NEXT: ldr r0, .LCPI8_0 +; SOFT-NEXT: ands r4, r0 +; SOFT-NEXT: adds r0, r4, r0 +; SOFT-NEXT: adds r0, r0, #1 +; SOFT-NEXT: cmp r7, #149 +; SOFT-NEXT: bhi .LBB8_8 +; SOFT-NEXT: @ %bb.7: @ %fp-to-i-if-exp.small +; SOFT-NEXT: movs r1, #150 +; SOFT-NEXT: subs r1, r1, r7 +; SOFT-NEXT: lsrs r0, r1 ; SOFT-NEXT: str r0, [sp, #12] @ 4-byte Spill -; SOFT-NEXT: beq .LBB8_6 -; SOFT-NEXT: @ %bb.5: -; SOFT-NEXT: mov r7, r3 -; SOFT-NEXT: .LBB8_6: -; SOFT-NEXT: ldr r1, .LCPI8_0 -; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: bl __aeabi_fcmpgt +; SOFT-NEXT: movs r7, #0 +; SOFT-NEXT: mov r1, r7 +; SOFT-NEXT: mov r2, r6 +; SOFT-NEXT: mov r3, r7 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: mov r5, r0 +; SOFT-NEXT: str r1, [sp, #16] @ 4-byte Spill +; SOFT-NEXT: mov r0, r7 +; SOFT-NEXT: mov r1, r7 +; SOFT-NEXT: mov r2, r6 +; SOFT-NEXT: mov r3, r7 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: mov r4, r1 +; SOFT-NEXT: ldr r1, [sp, #16] @ 4-byte Reload +; SOFT-NEXT: adds r6, r0, r1 +; SOFT-NEXT: adcs r4, r7 +; SOFT-NEXT: ldr r0, [sp, #12] @ 4-byte Reload +; SOFT-NEXT: mov r1, r7 +; SOFT-NEXT: ldr r2, [sp, #20] @ 4-byte Reload +; SOFT-NEXT: mov r3, r7 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: adds r0, r0, r6 +; SOFT-NEXT: str r0, [sp, #16] @ 4-byte Spill +; SOFT-NEXT: adcs r1, r7 +; SOFT-NEXT: adds r0, r4, r1 +; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill +; SOFT-NEXT: mov r6, r7 +; SOFT-NEXT: adcs r6, r7 +; SOFT-NEXT: mov r0, r7 +; SOFT-NEXT: mov r1, r7 +; SOFT-NEXT: ldr r2, [sp, #20] @ 4-byte Reload +; SOFT-NEXT: mov r3, r7 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: mov r4, r1 +; SOFT-NEXT: ldr r1, [sp, #8] @ 4-byte Reload +; SOFT-NEXT: adds r0, r0, r1 +; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill +; SOFT-NEXT: adcs r4, r6 +; SOFT-NEXT: ldr r0, [sp, #20] @ 4-byte Reload +; SOFT-NEXT: mov r1, r0 +; SOFT-NEXT: ldr r2, [sp, #12] @ 4-byte Reload +; SOFT-NEXT: mov r3, r7 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: mov r6, r1 +; SOFT-NEXT: ldr r1, [sp, #16] @ 4-byte Reload +; SOFT-NEXT: ldr r2, [sp, #8] @ 4-byte Reload +; SOFT-NEXT: adds r2, r2, r0 +; SOFT-NEXT: adcs r6, r4 +; SOFT-NEXT: b .LBB8_5 +; SOFT-NEXT: .LBB8_8: @ %fp-to-i-if-exp.large +; SOFT-NEXT: str r6, [sp, #16] @ 4-byte Spill ; SOFT-NEXT: movs r6, #0 -; SOFT-NEXT: mvns r5, r6 -; SOFT-NEXT: cmp r0, #0 -; SOFT-NEXT: beq .LBB8_19 -; SOFT-NEXT: @ %bb.7: +; SOFT-NEXT: str r6, [sp, #52] +; SOFT-NEXT: str r6, [sp, #48] +; SOFT-NEXT: str r6, [sp, #44] +; SOFT-NEXT: str r0, [sp, #40] +; SOFT-NEXT: str r6, [sp, #36] +; SOFT-NEXT: str r6, [sp, #32] +; SOFT-NEXT: str r6, [sp, #28] +; SOFT-NEXT: str r6, [sp, #24] +; SOFT-NEXT: subs r7, #150 +; SOFT-NEXT: movs r5, #31 +; SOFT-NEXT: lsrs r0, r7, #3 +; SOFT-NEXT: ands r7, r5 +; SOFT-NEXT: movs r1, #12 +; SOFT-NEXT: ands r1, r0 +; SOFT-NEXT: add r0, sp, #24 +; SOFT-NEXT: adds r0, #16 +; SOFT-NEXT: subs r4, r0, r1 +; SOFT-NEXT: ldr r0, [r4, #4] +; SOFT-NEXT: str r0, [sp, #12] @ 4-byte Spill +; SOFT-NEXT: ldr r0, [r4, #8] +; SOFT-NEXT: ldr r1, [r4, #12] +; SOFT-NEXT: lsrs r3, r0, #1 +; SOFT-NEXT: lsls r0, r7 +; SOFT-NEXT: eors r5, r7 +; SOFT-NEXT: ldr r2, [sp, #12] @ 4-byte Reload +; SOFT-NEXT: lsrs r2, r2, #1 +; SOFT-NEXT: lsrs r2, r5 +; SOFT-NEXT: orrs r2, r0 +; SOFT-NEXT: lsls r1, r7 +; SOFT-NEXT: lsrs r3, r5 +; SOFT-NEXT: orrs r3, r1 +; SOFT-NEXT: ldr r0, [sp, #16] @ 4-byte Reload +; SOFT-NEXT: ldr r1, [sp, #20] @ 4-byte Reload +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: str r0, [sp] @ 4-byte Spill +; SOFT-NEXT: str r1, [sp, #8] @ 4-byte Spill +; SOFT-NEXT: ldr r1, [sp, #12] @ 4-byte Reload +; SOFT-NEXT: lsls r1, r7 +; SOFT-NEXT: ldr r4, [r4] +; SOFT-NEXT: lsrs r0, r4, #1 +; SOFT-NEXT: lsrs r0, r5 +; SOFT-NEXT: orrs r0, r1 +; SOFT-NEXT: mov r5, r0 +; SOFT-NEXT: lsls r4, r7 +; SOFT-NEXT: ldr r0, [sp, #20] @ 4-byte Reload +; SOFT-NEXT: mov r1, r0 +; SOFT-NEXT: mov r2, r4 +; SOFT-NEXT: str r5, [sp, #4] @ 4-byte Spill +; SOFT-NEXT: mov r3, r5 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: ldr r2, [sp] @ 4-byte Reload +; SOFT-NEXT: adds r0, r0, r2 +; SOFT-NEXT: str r0, [sp, #12] @ 4-byte Spill +; SOFT-NEXT: ldr r0, [sp, #8] @ 4-byte Reload +; SOFT-NEXT: adcs r1, r0 +; SOFT-NEXT: str r1, [sp, #8] @ 4-byte Spill ; SOFT-NEXT: mov r0, r5 -; SOFT-NEXT: beq .LBB8_20 -; SOFT-NEXT: .LBB8_8: -; SOFT-NEXT: mov r7, r5 -; SOFT-NEXT: beq .LBB8_21 -; SOFT-NEXT: .LBB8_9: -; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill -; SOFT-NEXT: bne .LBB8_11 -; SOFT-NEXT: .LBB8_10: -; SOFT-NEXT: ldr r5, [sp, #12] @ 4-byte Reload -; SOFT-NEXT: .LBB8_11: +; SOFT-NEXT: mov r1, r6 +; SOFT-NEXT: ldr r5, [sp, #16] @ 4-byte Reload +; SOFT-NEXT: mov r2, r5 +; SOFT-NEXT: mov r3, r6 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: str r0, [sp] @ 4-byte Spill +; SOFT-NEXT: mov r7, r1 ; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: mov r1, r4 -; SOFT-NEXT: bl __aeabi_fcmpun -; SOFT-NEXT: cmp r0, #0 -; SOFT-NEXT: mov r0, r6 -; SOFT-NEXT: beq .LBB8_22 -; SOFT-NEXT: @ %bb.12: ; SOFT-NEXT: mov r1, r6 -; SOFT-NEXT: beq .LBB8_23 -; SOFT-NEXT: .LBB8_13: -; SOFT-NEXT: mov r2, r6 -; SOFT-NEXT: beq .LBB8_24 -; SOFT-NEXT: .LBB8_14: -; SOFT-NEXT: bne .LBB8_16 -; SOFT-NEXT: .LBB8_15: -; SOFT-NEXT: ldr r6, [sp, #16] @ 4-byte Reload -; SOFT-NEXT: .LBB8_16: +; SOFT-NEXT: mov r2, r5 ; SOFT-NEXT: mov r3, r6 -; SOFT-NEXT: add sp, #20 -; SOFT-NEXT: pop {r4, r5, r6, r7, pc} -; SOFT-NEXT: .LBB8_17: -; SOFT-NEXT: mov r0, r6 -; SOFT-NEXT: bne .LBB8_2 -; SOFT-NEXT: .LBB8_18: +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: mov r5, r0 +; SOFT-NEXT: ldr r0, [sp] @ 4-byte Reload +; SOFT-NEXT: adds r0, r0, r1 +; SOFT-NEXT: str r0, [sp, #16] @ 4-byte Spill +; SOFT-NEXT: adcs r7, r6 +; SOFT-NEXT: str r7, [sp] @ 4-byte Spill +; SOFT-NEXT: mov r0, r4 ; SOFT-NEXT: mov r1, r6 -; SOFT-NEXT: beq .LBB8_3 -; SOFT-NEXT: b .LBB8_4 -; SOFT-NEXT: .LBB8_19: -; SOFT-NEXT: str r7, [sp, #16] @ 4-byte Spill -; SOFT-NEXT: mov r0, r5 -; SOFT-NEXT: bne .LBB8_8 -; SOFT-NEXT: .LBB8_20: +; SOFT-NEXT: ldr r7, [sp, #20] @ 4-byte Reload +; SOFT-NEXT: mov r2, r7 +; SOFT-NEXT: mov r3, r6 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: ldr r2, [sp, #16] @ 4-byte Reload +; SOFT-NEXT: adds r0, r0, r2 +; SOFT-NEXT: str r0, [sp, #16] @ 4-byte Spill +; SOFT-NEXT: adcs r1, r6 +; SOFT-NEXT: ldr r0, [sp] @ 4-byte Reload +; SOFT-NEXT: adds r0, r0, r1 +; SOFT-NEXT: str r0, [sp] @ 4-byte Spill +; SOFT-NEXT: mov r4, r6 +; SOFT-NEXT: adcs r4, r6 ; SOFT-NEXT: ldr r0, [sp, #4] @ 4-byte Reload -; SOFT-NEXT: mov r7, r5 -; SOFT-NEXT: bne .LBB8_9 -; SOFT-NEXT: .LBB8_21: -; SOFT-NEXT: ldr r7, [sp, #8] @ 4-byte Reload -; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill -; SOFT-NEXT: beq .LBB8_10 -; SOFT-NEXT: b .LBB8_11 -; SOFT-NEXT: .LBB8_22: -; SOFT-NEXT: mov r0, r5 ; SOFT-NEXT: mov r1, r6 -; SOFT-NEXT: bne .LBB8_13 -; SOFT-NEXT: .LBB8_23: -; SOFT-NEXT: mov r1, r7 -; SOFT-NEXT: mov r2, r6 -; SOFT-NEXT: bne .LBB8_14 -; SOFT-NEXT: .LBB8_24: -; SOFT-NEXT: ldr r2, [sp, #8] @ 4-byte Reload -; SOFT-NEXT: beq .LBB8_15 -; SOFT-NEXT: b .LBB8_16 +; SOFT-NEXT: mov r2, r7 +; SOFT-NEXT: mov r3, r6 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: mov r6, r1 +; SOFT-NEXT: ldr r1, [sp, #16] @ 4-byte Reload +; SOFT-NEXT: ldr r2, [sp] @ 4-byte Reload +; SOFT-NEXT: adds r0, r0, r2 +; SOFT-NEXT: adcs r6, r4 +; SOFT-NEXT: ldr r2, [sp, #12] @ 4-byte Reload +; SOFT-NEXT: adds r2, r0, r2 +; SOFT-NEXT: ldr r0, [sp, #8] @ 4-byte Reload +; SOFT-NEXT: adcs r6, r0 +; SOFT-NEXT: b .LBB8_5 ; SOFT-NEXT: .p2align 2 -; SOFT-NEXT: @ %bb.25: +; SOFT-NEXT: @ %bb.9: ; SOFT-NEXT: .LCPI8_0: -; SOFT-NEXT: .long 1895825407 @ 0x70ffffff +; SOFT-NEXT: .long 8388607 @ 0x7fffff ; -; VFP-LABEL: test_signed_i100_f32: -; VFP: @ %bb.0: -; VFP-NEXT: .save {r4, lr} -; VFP-NEXT: push {r4, lr} -; VFP-NEXT: mov r4, r0 -; VFP-NEXT: bl __fixsfti -; VFP-NEXT: vldr s0, .LCPI8_0 -; VFP-NEXT: vmov s2, r4 -; VFP-NEXT: vldr s4, .LCPI8_1 -; VFP-NEXT: vcmp.f32 s2, s0 -; VFP-NEXT: vmrs APSR_nzcv, fpscr -; VFP-NEXT: itttt lt -; VFP-NEXT: movlt r0, #0 -; VFP-NEXT: movlt r1, #0 -; VFP-NEXT: movlt r2, #0 -; VFP-NEXT: mvnlt r3, #7 -; VFP-NEXT: vcmp.f32 s2, s4 -; VFP-NEXT: vmrs APSR_nzcv, fpscr -; VFP-NEXT: itttt gt -; VFP-NEXT: movgt r3, #7 -; VFP-NEXT: movgt.w r2, #-1 -; VFP-NEXT: movgt.w r1, #-1 -; VFP-NEXT: movgt.w r0, #-1 -; VFP-NEXT: vcmp.f32 s2, s2 -; VFP-NEXT: vmrs APSR_nzcv, fpscr -; VFP-NEXT: itttt vs -; VFP-NEXT: movvs r0, #0 -; VFP-NEXT: movvs r1, #0 -; VFP-NEXT: movvs r2, #0 -; VFP-NEXT: movvs r3, #0 -; VFP-NEXT: pop {r4, pc} -; VFP-NEXT: .p2align 2 -; VFP-NEXT: @ %bb.1: -; VFP-NEXT: .LCPI8_0: -; VFP-NEXT: .long 0xf1000000 @ float -6.338253E+29 -; VFP-NEXT: .LCPI8_1: -; VFP-NEXT: .long 0x70ffffff @ float 6.33825262E+29 +; VFP2-LABEL: test_signed_i100_f32: +; VFP2: @ %bb.0: @ %fp-to-i-entry +; VFP2-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; VFP2-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; VFP2-NEXT: .pad #52 +; VFP2-NEXT: sub sp, #52 +; VFP2-NEXT: ubfx lr, r0, #23, #8 +; VFP2-NEXT: mov r12, r0 +; VFP2-NEXT: movs r0, #0 +; VFP2-NEXT: cmp.w lr, #127 +; VFP2-NEXT: blo .LBB8_4 +; VFP2-NEXT: @ %bb.1: @ %fp-to-i-entry +; VFP2-NEXT: vmov s0, r12 +; VFP2-NEXT: movs r1, #0 +; VFP2-NEXT: movs r2, #0 +; VFP2-NEXT: movs r3, #0 +; VFP2-NEXT: vcmp.f32 s0, s0 +; VFP2-NEXT: vmrs APSR_nzcv, fpscr +; VFP2-NEXT: bvs.w .LBB8_8 +; VFP2-NEXT: @ %bb.2: @ %fp-to-i-if-check.saturate +; VFP2-NEXT: cmp.w lr, #226 +; VFP2-NEXT: blo .LBB8_5 +; VFP2-NEXT: @ %bb.3: @ %fp-to-i-if-saturate +; VFP2-NEXT: movs r0, #7 +; VFP2-NEXT: eor.w r3, r0, r12, asr #31 +; VFP2-NEXT: mvn.w r0, r12, asr #31 +; VFP2-NEXT: mov r1, r0 +; VFP2-NEXT: mov r2, r0 +; VFP2-NEXT: add sp, #52 +; VFP2-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; VFP2-NEXT: .LBB8_4: +; VFP2-NEXT: movs r1, #0 +; VFP2-NEXT: movs r2, #0 +; VFP2-NEXT: movs r3, #0 +; VFP2-NEXT: add sp, #52 +; VFP2-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; VFP2-NEXT: .LBB8_5: @ %fp-to-i-if-check.exp.size +; VFP2-NEXT: movs r0, #1 +; VFP2-NEXT: orr.w r8, r0, r12, asr #31 +; VFP2-NEXT: asr.w r11, r12, #31 +; VFP2-NEXT: bfi r12, r0, #23, #9 +; VFP2-NEXT: cmp.w lr, #149 +; VFP2-NEXT: bhi .LBB8_7 +; VFP2-NEXT: @ %bb.6: @ %fp-to-i-if-exp.small +; VFP2-NEXT: rsb.w r0, lr, #150 +; VFP2-NEXT: movs r5, #0 +; VFP2-NEXT: lsr.w r4, r12, r0 +; VFP2-NEXT: umull r0, r1, r4, r8 +; VFP2-NEXT: umull r3, r2, r4, r11 +; VFP2-NEXT: adds r6, r3, r1 +; VFP2-NEXT: adcs r6, r2, #0 +; VFP2-NEXT: adc r12, r5, #0 +; VFP2-NEXT: adds r6, r3, r1 +; VFP2-NEXT: mla r6, r11, r4, r2 +; VFP2-NEXT: adcs r2, r3 +; VFP2-NEXT: umlal r1, r5, r4, r11 +; VFP2-NEXT: adc.w r3, r12, r6 +; VFP2-NEXT: add sp, #52 +; VFP2-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; VFP2-NEXT: .LBB8_7: @ %fp-to-i-if-exp.large +; VFP2-NEXT: sub.w r0, lr, #150 +; VFP2-NEXT: movs r1, #12 +; VFP2-NEXT: add r2, sp, #16 +; VFP2-NEXT: mov.w r9, #0 +; VFP2-NEXT: and.w r1, r1, r0, lsr #3 +; VFP2-NEXT: adds r2, #16 +; VFP2-NEXT: strd r9, r9, [sp, #40] +; VFP2-NEXT: subs r1, r2, r1 +; VFP2-NEXT: strd r12, r9, [sp, #32] +; VFP2-NEXT: and r6, r0, #31 +; VFP2-NEXT: strd r9, r9, [sp, #24] +; VFP2-NEXT: eor r5, r6, #31 +; VFP2-NEXT: strd r9, r9, [sp, #16] +; VFP2-NEXT: ldrd r2, r3, [r1] +; VFP2-NEXT: str r3, [sp, #12] @ 4-byte Spill +; VFP2-NEXT: ldrd r4, lr, [r1, #8] +; VFP2-NEXT: lsrs r1, r2, #1 +; VFP2-NEXT: lsr.w r0, r1, r5 +; VFP2-NEXT: lsl.w r1, r3, r6 +; VFP2-NEXT: lsl.w r10, r2, r6 +; VFP2-NEXT: orr.w r12, r1, r0 +; VFP2-NEXT: umull r0, r1, r10, r8 +; VFP2-NEXT: movs r2, #0 +; VFP2-NEXT: umull r3, r7, r10, r11 +; VFP2-NEXT: umlal r1, r2, r12, r8 +; VFP2-NEXT: strd r7, r3, [sp, #4] @ 8-byte Folded Spill +; VFP2-NEXT: adds r1, r1, r3 +; VFP2-NEXT: adcs r2, r7 +; VFP2-NEXT: lsl.w r7, lr, r6 +; VFP2-NEXT: lsr.w lr, r4, #1 +; VFP2-NEXT: adc r3, r9, #0 +; VFP2-NEXT: mov r9, r4 +; VFP2-NEXT: lsr.w r4, lr, r5 +; VFP2-NEXT: orr.w lr, r7, r4 +; VFP2-NEXT: ldr r4, [sp, #12] @ 4-byte Reload +; VFP2-NEXT: lsl.w r6, r9, r6 +; VFP2-NEXT: ldr r7, [sp, #4] @ 4-byte Reload +; VFP2-NEXT: umlal r2, r3, r12, r11 +; VFP2-NEXT: lsrs.w r4, r4, #1 +; VFP2-NEXT: lsrs r4, r5 +; VFP2-NEXT: orrs r4, r6 +; VFP2-NEXT: umull r5, r6, r8, r4 +; VFP2-NEXT: mla r6, r8, lr, r6 +; VFP2-NEXT: mla r6, r11, r4, r6 +; VFP2-NEXT: mla r4, r11, r12, r7 +; VFP2-NEXT: mla r7, r11, r10, r4 +; VFP2-NEXT: ldr r4, [sp, #8] @ 4-byte Reload +; VFP2-NEXT: adds r5, r5, r4 +; VFP2-NEXT: adcs r7, r6 +; VFP2-NEXT: adds r2, r2, r5 +; VFP2-NEXT: adcs r3, r7 +; VFP2-NEXT: .LBB8_8: @ %fp-to-i-cleanup +; VFP2-NEXT: add sp, #52 +; VFP2-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; +; FP16-LABEL: test_signed_i100_f32: +; FP16: @ %bb.0: @ %fp-to-i-entry +; FP16-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; FP16-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; FP16-NEXT: .pad #52 +; FP16-NEXT: sub sp, #52 +; FP16-NEXT: ubfx lr, r0, #23, #8 +; FP16-NEXT: mov r12, r0 +; FP16-NEXT: movs r0, #0 +; FP16-NEXT: cmp.w lr, #127 +; FP16-NEXT: blo .LBB8_4 +; FP16-NEXT: @ %bb.1: @ %fp-to-i-entry +; FP16-NEXT: vmov s0, r12 +; FP16-NEXT: movs r1, #0 +; FP16-NEXT: movs r2, #0 +; FP16-NEXT: movs r3, #0 +; FP16-NEXT: vcmp.f32 s0, s0 +; FP16-NEXT: vmrs APSR_nzcv, fpscr +; FP16-NEXT: bvs.w .LBB8_8 +; FP16-NEXT: @ %bb.2: @ %fp-to-i-if-check.saturate +; FP16-NEXT: cmp.w lr, #226 +; FP16-NEXT: blo .LBB8_5 +; FP16-NEXT: @ %bb.3: @ %fp-to-i-if-saturate +; FP16-NEXT: movs r0, #7 +; FP16-NEXT: eor.w r3, r0, r12, asr #31 +; FP16-NEXT: mvn.w r0, r12, asr #31 +; FP16-NEXT: mov r1, r0 +; FP16-NEXT: mov r2, r0 +; FP16-NEXT: add sp, #52 +; FP16-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; FP16-NEXT: .LBB8_4: +; FP16-NEXT: movs r1, #0 +; FP16-NEXT: movs r2, #0 +; FP16-NEXT: movs r3, #0 +; FP16-NEXT: add sp, #52 +; FP16-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; FP16-NEXT: .LBB8_5: @ %fp-to-i-if-check.exp.size +; FP16-NEXT: movs r0, #1 +; FP16-NEXT: orr.w r11, r0, r12, asr #31 +; FP16-NEXT: asr.w r7, r12, #31 +; FP16-NEXT: bfi r12, r0, #23, #9 +; FP16-NEXT: cmp.w lr, #149 +; FP16-NEXT: bhi .LBB8_7 +; FP16-NEXT: @ %bb.6: @ %fp-to-i-if-exp.small +; FP16-NEXT: rsb.w r0, lr, #150 +; FP16-NEXT: movs r5, #0 +; FP16-NEXT: lsr.w r4, r12, r0 +; FP16-NEXT: umull r0, r1, r4, r11 +; FP16-NEXT: umull r3, r2, r4, r7 +; FP16-NEXT: adds r6, r3, r1 +; FP16-NEXT: adcs r6, r2, #0 +; FP16-NEXT: adc r12, r5, #0 +; FP16-NEXT: adds r6, r3, r1 +; FP16-NEXT: mla r6, r7, r4, r2 +; FP16-NEXT: adcs r2, r3 +; FP16-NEXT: umlal r1, r5, r4, r7 +; FP16-NEXT: adc.w r3, r12, r6 +; FP16-NEXT: add sp, #52 +; FP16-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; FP16-NEXT: .LBB8_7: @ %fp-to-i-if-exp.large +; FP16-NEXT: sub.w r0, lr, #150 +; FP16-NEXT: movs r1, #12 +; FP16-NEXT: add r2, sp, #16 +; FP16-NEXT: mov.w r10, #0 +; FP16-NEXT: and.w r1, r1, r0, lsr #3 +; FP16-NEXT: adds r2, #16 +; FP16-NEXT: strd r10, r10, [sp, #40] +; FP16-NEXT: subs r1, r2, r1 +; FP16-NEXT: strd r12, r10, [sp, #32] +; FP16-NEXT: and r5, r0, #31 +; FP16-NEXT: strd r10, r10, [sp, #24] +; FP16-NEXT: eor r12, r5, #31 +; FP16-NEXT: strd r10, r10, [sp, #16] +; FP16-NEXT: movs r4, #0 +; FP16-NEXT: ldrd r2, r3, [r1] +; FP16-NEXT: str r3, [sp, #4] @ 4-byte Spill +; FP16-NEXT: ldr r6, [r1, #8] +; FP16-NEXT: str r6, [sp, #12] @ 4-byte Spill +; FP16-NEXT: ldr r1, [r1, #12] +; FP16-NEXT: str r1, [sp, #8] @ 4-byte Spill +; FP16-NEXT: lsrs r1, r2, #1 +; FP16-NEXT: lsr.w r0, r1, r12 +; FP16-NEXT: lsl.w r1, r3, r5 +; FP16-NEXT: lsls r2, r5 +; FP16-NEXT: orr.w r9, r1, r0 +; FP16-NEXT: umull r0, r1, r2, r11 +; FP16-NEXT: umull r3, r8, r2, r7 +; FP16-NEXT: umlal r1, r4, r9, r11 +; FP16-NEXT: str r3, [sp] @ 4-byte Spill +; FP16-NEXT: adds.w lr, r3, r1 +; FP16-NEXT: ldr r3, [sp, #8] @ 4-byte Reload +; FP16-NEXT: adcs.w r6, r4, r8 +; FP16-NEXT: umlal r1, r4, r2, r7 +; FP16-NEXT: ldr r6, [sp, #12] @ 4-byte Reload +; FP16-NEXT: lsl.w lr, r3, r5 +; FP16-NEXT: adc r10, r10, #0 +; FP16-NEXT: lsrs r3, r6, #1 +; FP16-NEXT: lsl.w r5, r6, r5 +; FP16-NEXT: lsr.w r3, r3, r12 +; FP16-NEXT: orr.w lr, lr, r3 +; FP16-NEXT: ldr r3, [sp, #4] @ 4-byte Reload +; FP16-NEXT: umlal r4, r10, r9, r7 +; FP16-NEXT: lsrs.w r3, r3, #1 +; FP16-NEXT: lsr.w r3, r3, r12 +; FP16-NEXT: orrs r3, r5 +; FP16-NEXT: umull r12, r5, r11, r3 +; FP16-NEXT: mla r5, r11, lr, r5 +; FP16-NEXT: mla r3, r7, r3, r5 +; FP16-NEXT: mla r5, r7, r9, r8 +; FP16-NEXT: mla r2, r7, r2, r5 +; FP16-NEXT: ldr r7, [sp] @ 4-byte Reload +; FP16-NEXT: adds.w r7, r7, r12 +; FP16-NEXT: adcs r3, r2 +; FP16-NEXT: adds r2, r4, r7 +; FP16-NEXT: adc.w r3, r3, r10 +; FP16-NEXT: .LBB8_8: @ %fp-to-i-cleanup +; FP16-NEXT: add sp, #52 +; FP16-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} %x = call i100 @llvm.fptosi.sat.i100.f32(float %f) ret i100 %x } define i128 @test_signed_i128_f32(float %f) nounwind { ; SOFT-LABEL: test_signed_i128_f32: -; SOFT: @ %bb.0: +; SOFT: @ %bb.0: @ %fp-to-i-entry ; SOFT-NEXT: .save {r4, r5, r6, r7, lr} ; SOFT-NEXT: push {r4, r5, r6, r7, lr} -; SOFT-NEXT: .pad #20 -; SOFT-NEXT: sub sp, #20 +; SOFT-NEXT: .pad #60 +; SOFT-NEXT: sub sp, #60 ; SOFT-NEXT: mov r4, r0 -; SOFT-NEXT: movs r0, #255 -; SOFT-NEXT: lsls r1, r0, #24 -; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: bl __aeabi_fcmpge -; SOFT-NEXT: mov r6, r0 +; SOFT-NEXT: lsrs r0, r0, #23 +; SOFT-NEXT: uxtb r7, r0 +; SOFT-NEXT: movs r6, #0 +; SOFT-NEXT: mov r5, r6 +; SOFT-NEXT: cmp r7, #127 +; SOFT-NEXT: blo .LBB9_4 +; SOFT-NEXT: @ %bb.1: @ %fp-to-i-entry ; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: bl __fixsfti -; SOFT-NEXT: movs r5, #1 -; SOFT-NEXT: lsls r7, r5, #31 -; SOFT-NEXT: cmp r6, #0 -; SOFT-NEXT: beq .LBB9_18 -; SOFT-NEXT: @ %bb.1: -; SOFT-NEXT: beq .LBB9_19 -; SOFT-NEXT: .LBB9_2: -; SOFT-NEXT: bne .LBB9_4 -; SOFT-NEXT: .LBB9_3: -; SOFT-NEXT: mov r2, r6 +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: bl __aeabi_fcmpun +; SOFT-NEXT: cmp r0, #0 +; SOFT-NEXT: mov r1, r5 +; SOFT-NEXT: mov r2, r5 +; SOFT-NEXT: bne .LBB9_5 +; SOFT-NEXT: @ %bb.2: @ %fp-to-i-if-check.saturate +; SOFT-NEXT: asrs r0, r4, #31 +; SOFT-NEXT: cmp r7, #254 +; SOFT-NEXT: blo .LBB9_6 +; SOFT-NEXT: @ %bb.3: @ %fp-to-i-if-saturate +; SOFT-NEXT: ldr r6, .LCPI9_1 +; SOFT-NEXT: eors r6, r0 +; SOFT-NEXT: mvns r5, r0 ; SOFT-NEXT: .LBB9_4: -; SOFT-NEXT: str r2, [sp, #4] @ 4-byte Spill -; SOFT-NEXT: str r1, [sp, #8] @ 4-byte Spill +; SOFT-NEXT: mov r1, r5 +; SOFT-NEXT: mov r2, r5 +; SOFT-NEXT: .LBB9_5: @ %fp-to-i-cleanup +; SOFT-NEXT: mov r0, r5 +; SOFT-NEXT: mov r3, r6 +; SOFT-NEXT: add sp, #60 +; SOFT-NEXT: pop {r4, r5, r6, r7, pc} +; SOFT-NEXT: .LBB9_6: @ %fp-to-i-if-check.exp.size +; SOFT-NEXT: movs r6, #1 +; SOFT-NEXT: str r0, [sp, #20] @ 4-byte Spill +; SOFT-NEXT: orrs r6, r0 +; SOFT-NEXT: ldr r0, .LCPI9_0 +; SOFT-NEXT: ands r4, r0 +; SOFT-NEXT: adds r0, r4, r0 +; SOFT-NEXT: adds r0, r0, #1 +; SOFT-NEXT: cmp r7, #149 +; SOFT-NEXT: bhi .LBB9_8 +; SOFT-NEXT: @ %bb.7: @ %fp-to-i-if-exp.small +; SOFT-NEXT: movs r1, #150 +; SOFT-NEXT: subs r1, r1, r7 +; SOFT-NEXT: lsrs r0, r1 ; SOFT-NEXT: str r0, [sp, #12] @ 4-byte Spill -; SOFT-NEXT: beq .LBB9_6 -; SOFT-NEXT: @ %bb.5: -; SOFT-NEXT: mov r7, r3 -; SOFT-NEXT: .LBB9_6: -; SOFT-NEXT: ldr r1, .LCPI9_0 -; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: bl __aeabi_fcmpgt +; SOFT-NEXT: movs r7, #0 +; SOFT-NEXT: mov r1, r7 +; SOFT-NEXT: mov r2, r6 +; SOFT-NEXT: mov r3, r7 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: mov r5, r0 +; SOFT-NEXT: str r1, [sp, #16] @ 4-byte Spill +; SOFT-NEXT: mov r0, r7 +; SOFT-NEXT: mov r1, r7 +; SOFT-NEXT: mov r2, r6 +; SOFT-NEXT: mov r3, r7 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: mov r4, r1 +; SOFT-NEXT: ldr r1, [sp, #16] @ 4-byte Reload +; SOFT-NEXT: adds r6, r0, r1 +; SOFT-NEXT: adcs r4, r7 +; SOFT-NEXT: ldr r0, [sp, #12] @ 4-byte Reload +; SOFT-NEXT: mov r1, r7 +; SOFT-NEXT: ldr r2, [sp, #20] @ 4-byte Reload +; SOFT-NEXT: mov r3, r7 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: adds r0, r0, r6 +; SOFT-NEXT: str r0, [sp, #16] @ 4-byte Spill +; SOFT-NEXT: adcs r1, r7 +; SOFT-NEXT: adds r0, r4, r1 +; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill +; SOFT-NEXT: mov r6, r7 +; SOFT-NEXT: adcs r6, r7 +; SOFT-NEXT: mov r0, r7 +; SOFT-NEXT: mov r1, r7 +; SOFT-NEXT: ldr r2, [sp, #20] @ 4-byte Reload +; SOFT-NEXT: mov r3, r7 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: mov r4, r1 +; SOFT-NEXT: ldr r1, [sp, #8] @ 4-byte Reload +; SOFT-NEXT: adds r0, r0, r1 +; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill +; SOFT-NEXT: adcs r4, r6 +; SOFT-NEXT: ldr r0, [sp, #20] @ 4-byte Reload +; SOFT-NEXT: mov r1, r0 +; SOFT-NEXT: ldr r2, [sp, #12] @ 4-byte Reload +; SOFT-NEXT: mov r3, r7 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: mov r6, r1 +; SOFT-NEXT: ldr r1, [sp, #16] @ 4-byte Reload +; SOFT-NEXT: ldr r2, [sp, #8] @ 4-byte Reload +; SOFT-NEXT: adds r2, r2, r0 +; SOFT-NEXT: adcs r6, r4 +; SOFT-NEXT: b .LBB9_5 +; SOFT-NEXT: .LBB9_8: @ %fp-to-i-if-exp.large +; SOFT-NEXT: str r6, [sp, #16] @ 4-byte Spill ; SOFT-NEXT: movs r6, #0 -; SOFT-NEXT: mvns r5, r6 -; SOFT-NEXT: cmp r0, #0 -; SOFT-NEXT: beq .LBB9_8 -; SOFT-NEXT: @ %bb.7: -; SOFT-NEXT: ldr r7, .LCPI9_1 -; SOFT-NEXT: .LBB9_8: -; SOFT-NEXT: str r7, [sp, #16] @ 4-byte Spill +; SOFT-NEXT: str r6, [sp, #52] +; SOFT-NEXT: str r6, [sp, #48] +; SOFT-NEXT: str r6, [sp, #44] +; SOFT-NEXT: str r0, [sp, #40] +; SOFT-NEXT: str r6, [sp, #36] +; SOFT-NEXT: str r6, [sp, #32] +; SOFT-NEXT: str r6, [sp, #28] +; SOFT-NEXT: str r6, [sp, #24] +; SOFT-NEXT: subs r7, #150 +; SOFT-NEXT: movs r5, #31 +; SOFT-NEXT: lsrs r0, r7, #3 +; SOFT-NEXT: ands r7, r5 +; SOFT-NEXT: movs r1, #12 +; SOFT-NEXT: ands r1, r0 +; SOFT-NEXT: add r0, sp, #24 +; SOFT-NEXT: adds r0, #16 +; SOFT-NEXT: subs r4, r0, r1 +; SOFT-NEXT: ldr r0, [r4, #4] +; SOFT-NEXT: str r0, [sp, #12] @ 4-byte Spill +; SOFT-NEXT: ldr r0, [r4, #8] +; SOFT-NEXT: ldr r1, [r4, #12] +; SOFT-NEXT: lsrs r3, r0, #1 +; SOFT-NEXT: lsls r0, r7 +; SOFT-NEXT: eors r5, r7 +; SOFT-NEXT: ldr r2, [sp, #12] @ 4-byte Reload +; SOFT-NEXT: lsrs r2, r2, #1 +; SOFT-NEXT: lsrs r2, r5 +; SOFT-NEXT: orrs r2, r0 +; SOFT-NEXT: lsls r1, r7 +; SOFT-NEXT: lsrs r3, r5 +; SOFT-NEXT: orrs r3, r1 +; SOFT-NEXT: ldr r0, [sp, #16] @ 4-byte Reload +; SOFT-NEXT: ldr r1, [sp, #20] @ 4-byte Reload +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: str r0, [sp] @ 4-byte Spill +; SOFT-NEXT: str r1, [sp, #8] @ 4-byte Spill +; SOFT-NEXT: ldr r1, [sp, #12] @ 4-byte Reload +; SOFT-NEXT: lsls r1, r7 +; SOFT-NEXT: ldr r4, [r4] +; SOFT-NEXT: lsrs r0, r4, #1 +; SOFT-NEXT: lsrs r0, r5 +; SOFT-NEXT: orrs r0, r1 +; SOFT-NEXT: mov r5, r0 +; SOFT-NEXT: lsls r4, r7 +; SOFT-NEXT: ldr r0, [sp, #20] @ 4-byte Reload +; SOFT-NEXT: mov r1, r0 +; SOFT-NEXT: mov r2, r4 +; SOFT-NEXT: str r5, [sp, #4] @ 4-byte Spill +; SOFT-NEXT: mov r3, r5 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: ldr r2, [sp] @ 4-byte Reload +; SOFT-NEXT: adds r0, r0, r2 +; SOFT-NEXT: str r0, [sp, #12] @ 4-byte Spill +; SOFT-NEXT: ldr r0, [sp, #8] @ 4-byte Reload +; SOFT-NEXT: adcs r1, r0 +; SOFT-NEXT: str r1, [sp, #8] @ 4-byte Spill ; SOFT-NEXT: mov r0, r5 -; SOFT-NEXT: beq .LBB9_20 -; SOFT-NEXT: @ %bb.9: -; SOFT-NEXT: mov r7, r5 -; SOFT-NEXT: beq .LBB9_21 -; SOFT-NEXT: .LBB9_10: -; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill -; SOFT-NEXT: bne .LBB9_12 -; SOFT-NEXT: .LBB9_11: -; SOFT-NEXT: ldr r5, [sp, #12] @ 4-byte Reload -; SOFT-NEXT: .LBB9_12: +; SOFT-NEXT: mov r1, r6 +; SOFT-NEXT: ldr r5, [sp, #16] @ 4-byte Reload +; SOFT-NEXT: mov r2, r5 +; SOFT-NEXT: mov r3, r6 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: str r0, [sp] @ 4-byte Spill +; SOFT-NEXT: mov r7, r1 ; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: mov r1, r4 -; SOFT-NEXT: bl __aeabi_fcmpun -; SOFT-NEXT: cmp r0, #0 -; SOFT-NEXT: mov r0, r6 -; SOFT-NEXT: beq .LBB9_22 -; SOFT-NEXT: @ %bb.13: ; SOFT-NEXT: mov r1, r6 -; SOFT-NEXT: beq .LBB9_23 -; SOFT-NEXT: .LBB9_14: -; SOFT-NEXT: mov r2, r6 -; SOFT-NEXT: beq .LBB9_24 -; SOFT-NEXT: .LBB9_15: -; SOFT-NEXT: ldr r3, [sp, #16] @ 4-byte Reload -; SOFT-NEXT: bne .LBB9_17 -; SOFT-NEXT: .LBB9_16: -; SOFT-NEXT: mov r6, r3 -; SOFT-NEXT: .LBB9_17: +; SOFT-NEXT: mov r2, r5 ; SOFT-NEXT: mov r3, r6 -; SOFT-NEXT: add sp, #20 -; SOFT-NEXT: pop {r4, r5, r6, r7, pc} -; SOFT-NEXT: .LBB9_18: -; SOFT-NEXT: mov r0, r6 -; SOFT-NEXT: bne .LBB9_2 -; SOFT-NEXT: .LBB9_19: +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: mov r5, r0 +; SOFT-NEXT: ldr r0, [sp] @ 4-byte Reload +; SOFT-NEXT: adds r0, r0, r1 +; SOFT-NEXT: str r0, [sp, #16] @ 4-byte Spill +; SOFT-NEXT: adcs r7, r6 +; SOFT-NEXT: str r7, [sp] @ 4-byte Spill +; SOFT-NEXT: mov r0, r4 ; SOFT-NEXT: mov r1, r6 -; SOFT-NEXT: beq .LBB9_3 -; SOFT-NEXT: b .LBB9_4 -; SOFT-NEXT: .LBB9_20: +; SOFT-NEXT: ldr r7, [sp, #20] @ 4-byte Reload +; SOFT-NEXT: mov r2, r7 +; SOFT-NEXT: mov r3, r6 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: ldr r2, [sp, #16] @ 4-byte Reload +; SOFT-NEXT: adds r0, r0, r2 +; SOFT-NEXT: str r0, [sp, #16] @ 4-byte Spill +; SOFT-NEXT: adcs r1, r6 +; SOFT-NEXT: ldr r0, [sp] @ 4-byte Reload +; SOFT-NEXT: adds r0, r0, r1 +; SOFT-NEXT: str r0, [sp] @ 4-byte Spill +; SOFT-NEXT: mov r4, r6 +; SOFT-NEXT: adcs r4, r6 ; SOFT-NEXT: ldr r0, [sp, #4] @ 4-byte Reload -; SOFT-NEXT: mov r7, r5 -; SOFT-NEXT: bne .LBB9_10 -; SOFT-NEXT: .LBB9_21: -; SOFT-NEXT: ldr r7, [sp, #8] @ 4-byte Reload -; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill -; SOFT-NEXT: beq .LBB9_11 -; SOFT-NEXT: b .LBB9_12 -; SOFT-NEXT: .LBB9_22: -; SOFT-NEXT: mov r0, r5 ; SOFT-NEXT: mov r1, r6 -; SOFT-NEXT: bne .LBB9_14 -; SOFT-NEXT: .LBB9_23: -; SOFT-NEXT: mov r1, r7 -; SOFT-NEXT: mov r2, r6 -; SOFT-NEXT: bne .LBB9_15 -; SOFT-NEXT: .LBB9_24: -; SOFT-NEXT: ldr r2, [sp, #8] @ 4-byte Reload -; SOFT-NEXT: ldr r3, [sp, #16] @ 4-byte Reload -; SOFT-NEXT: beq .LBB9_16 -; SOFT-NEXT: b .LBB9_17 +; SOFT-NEXT: mov r2, r7 +; SOFT-NEXT: mov r3, r6 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: mov r6, r1 +; SOFT-NEXT: ldr r1, [sp, #16] @ 4-byte Reload +; SOFT-NEXT: ldr r2, [sp] @ 4-byte Reload +; SOFT-NEXT: adds r0, r0, r2 +; SOFT-NEXT: adcs r6, r4 +; SOFT-NEXT: ldr r2, [sp, #12] @ 4-byte Reload +; SOFT-NEXT: adds r2, r0, r2 +; SOFT-NEXT: ldr r0, [sp, #8] @ 4-byte Reload +; SOFT-NEXT: adcs r6, r0 +; SOFT-NEXT: b .LBB9_5 ; SOFT-NEXT: .p2align 2 -; SOFT-NEXT: @ %bb.25: +; SOFT-NEXT: @ %bb.9: ; SOFT-NEXT: .LCPI9_0: -; SOFT-NEXT: .long 2130706431 @ 0x7effffff +; SOFT-NEXT: .long 8388607 @ 0x7fffff ; SOFT-NEXT: .LCPI9_1: ; SOFT-NEXT: .long 2147483647 @ 0x7fffffff ; -; VFP-LABEL: test_signed_i128_f32: -; VFP: @ %bb.0: -; VFP-NEXT: .save {r4, lr} -; VFP-NEXT: push {r4, lr} -; VFP-NEXT: mov r4, r0 -; VFP-NEXT: bl __fixsfti -; VFP-NEXT: vldr s0, .LCPI9_0 -; VFP-NEXT: vmov s2, r4 -; VFP-NEXT: vldr s4, .LCPI9_1 -; VFP-NEXT: vcmp.f32 s2, s0 -; VFP-NEXT: vmrs APSR_nzcv, fpscr -; VFP-NEXT: itttt lt -; VFP-NEXT: movlt r0, #0 -; VFP-NEXT: movlt r1, #0 -; VFP-NEXT: movlt r2, #0 -; VFP-NEXT: movlt.w r3, #-2147483648 -; VFP-NEXT: vcmp.f32 s2, s4 -; VFP-NEXT: vmrs APSR_nzcv, fpscr -; VFP-NEXT: itttt gt -; VFP-NEXT: mvngt r3, #-2147483648 -; VFP-NEXT: movgt.w r2, #-1 -; VFP-NEXT: movgt.w r1, #-1 -; VFP-NEXT: movgt.w r0, #-1 -; VFP-NEXT: vcmp.f32 s2, s2 -; VFP-NEXT: vmrs APSR_nzcv, fpscr -; VFP-NEXT: itttt vs -; VFP-NEXT: movvs r0, #0 -; VFP-NEXT: movvs r1, #0 -; VFP-NEXT: movvs r2, #0 -; VFP-NEXT: movvs r3, #0 -; VFP-NEXT: pop {r4, pc} -; VFP-NEXT: .p2align 2 -; VFP-NEXT: @ %bb.1: -; VFP-NEXT: .LCPI9_0: -; VFP-NEXT: .long 0xff000000 @ float -1.70141183E+38 -; VFP-NEXT: .LCPI9_1: -; VFP-NEXT: .long 0x7effffff @ float 1.70141173E+38 +; VFP2-LABEL: test_signed_i128_f32: +; VFP2: @ %bb.0: @ %fp-to-i-entry +; VFP2-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; VFP2-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; VFP2-NEXT: .pad #52 +; VFP2-NEXT: sub sp, #52 +; VFP2-NEXT: ubfx lr, r0, #23, #8 +; VFP2-NEXT: mov r12, r0 +; VFP2-NEXT: movs r0, #0 +; VFP2-NEXT: cmp.w lr, #127 +; VFP2-NEXT: blo .LBB9_4 +; VFP2-NEXT: @ %bb.1: @ %fp-to-i-entry +; VFP2-NEXT: vmov s0, r12 +; VFP2-NEXT: movs r1, #0 +; VFP2-NEXT: movs r2, #0 +; VFP2-NEXT: movs r3, #0 +; VFP2-NEXT: vcmp.f32 s0, s0 +; VFP2-NEXT: vmrs APSR_nzcv, fpscr +; VFP2-NEXT: bvs.w .LBB9_8 +; VFP2-NEXT: @ %bb.2: @ %fp-to-i-if-check.saturate +; VFP2-NEXT: cmp.w lr, #254 +; VFP2-NEXT: blo .LBB9_5 +; VFP2-NEXT: @ %bb.3: @ %fp-to-i-if-saturate +; VFP2-NEXT: mvn r0, #-2147483648 +; VFP2-NEXT: eor.w r3, r0, r12, asr #31 +; VFP2-NEXT: mvn.w r0, r12, asr #31 +; VFP2-NEXT: mov r1, r0 +; VFP2-NEXT: mov r2, r0 +; VFP2-NEXT: add sp, #52 +; VFP2-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; VFP2-NEXT: .LBB9_4: +; VFP2-NEXT: movs r1, #0 +; VFP2-NEXT: movs r2, #0 +; VFP2-NEXT: movs r3, #0 +; VFP2-NEXT: add sp, #52 +; VFP2-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; VFP2-NEXT: .LBB9_5: @ %fp-to-i-if-check.exp.size +; VFP2-NEXT: movs r0, #1 +; VFP2-NEXT: orr.w r8, r0, r12, asr #31 +; VFP2-NEXT: asr.w r11, r12, #31 +; VFP2-NEXT: bfi r12, r0, #23, #9 +; VFP2-NEXT: cmp.w lr, #149 +; VFP2-NEXT: bhi .LBB9_7 +; VFP2-NEXT: @ %bb.6: @ %fp-to-i-if-exp.small +; VFP2-NEXT: rsb.w r0, lr, #150 +; VFP2-NEXT: movs r5, #0 +; VFP2-NEXT: lsr.w r4, r12, r0 +; VFP2-NEXT: umull r0, r1, r4, r8 +; VFP2-NEXT: umull r3, r2, r4, r11 +; VFP2-NEXT: adds r6, r3, r1 +; VFP2-NEXT: adcs r6, r2, #0 +; VFP2-NEXT: adc r12, r5, #0 +; VFP2-NEXT: adds r6, r3, r1 +; VFP2-NEXT: mla r6, r11, r4, r2 +; VFP2-NEXT: adcs r2, r3 +; VFP2-NEXT: umlal r1, r5, r4, r11 +; VFP2-NEXT: adc.w r3, r12, r6 +; VFP2-NEXT: add sp, #52 +; VFP2-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; VFP2-NEXT: .LBB9_7: @ %fp-to-i-if-exp.large +; VFP2-NEXT: sub.w r0, lr, #150 +; VFP2-NEXT: movs r1, #12 +; VFP2-NEXT: add r2, sp, #16 +; VFP2-NEXT: mov.w r9, #0 +; VFP2-NEXT: and.w r1, r1, r0, lsr #3 +; VFP2-NEXT: adds r2, #16 +; VFP2-NEXT: strd r9, r9, [sp, #40] +; VFP2-NEXT: subs r1, r2, r1 +; VFP2-NEXT: strd r12, r9, [sp, #32] +; VFP2-NEXT: and r6, r0, #31 +; VFP2-NEXT: strd r9, r9, [sp, #24] +; VFP2-NEXT: eor r5, r6, #31 +; VFP2-NEXT: strd r9, r9, [sp, #16] +; VFP2-NEXT: ldrd r2, r3, [r1] +; VFP2-NEXT: str r3, [sp, #12] @ 4-byte Spill +; VFP2-NEXT: ldrd r4, lr, [r1, #8] +; VFP2-NEXT: lsrs r1, r2, #1 +; VFP2-NEXT: lsr.w r0, r1, r5 +; VFP2-NEXT: lsl.w r1, r3, r6 +; VFP2-NEXT: lsl.w r10, r2, r6 +; VFP2-NEXT: orr.w r12, r1, r0 +; VFP2-NEXT: umull r0, r1, r10, r8 +; VFP2-NEXT: movs r2, #0 +; VFP2-NEXT: umull r3, r7, r10, r11 +; VFP2-NEXT: umlal r1, r2, r12, r8 +; VFP2-NEXT: strd r7, r3, [sp, #4] @ 8-byte Folded Spill +; VFP2-NEXT: adds r1, r1, r3 +; VFP2-NEXT: adcs r2, r7 +; VFP2-NEXT: lsl.w r7, lr, r6 +; VFP2-NEXT: lsr.w lr, r4, #1 +; VFP2-NEXT: adc r3, r9, #0 +; VFP2-NEXT: mov r9, r4 +; VFP2-NEXT: lsr.w r4, lr, r5 +; VFP2-NEXT: orr.w lr, r7, r4 +; VFP2-NEXT: ldr r4, [sp, #12] @ 4-byte Reload +; VFP2-NEXT: lsl.w r6, r9, r6 +; VFP2-NEXT: ldr r7, [sp, #4] @ 4-byte Reload +; VFP2-NEXT: umlal r2, r3, r12, r11 +; VFP2-NEXT: lsrs.w r4, r4, #1 +; VFP2-NEXT: lsrs r4, r5 +; VFP2-NEXT: orrs r4, r6 +; VFP2-NEXT: umull r5, r6, r8, r4 +; VFP2-NEXT: mla r6, r8, lr, r6 +; VFP2-NEXT: mla r6, r11, r4, r6 +; VFP2-NEXT: mla r4, r11, r12, r7 +; VFP2-NEXT: mla r7, r11, r10, r4 +; VFP2-NEXT: ldr r4, [sp, #8] @ 4-byte Reload +; VFP2-NEXT: adds r5, r5, r4 +; VFP2-NEXT: adcs r7, r6 +; VFP2-NEXT: adds r2, r2, r5 +; VFP2-NEXT: adcs r3, r7 +; VFP2-NEXT: .LBB9_8: @ %fp-to-i-cleanup +; VFP2-NEXT: add sp, #52 +; VFP2-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; +; FP16-LABEL: test_signed_i128_f32: +; FP16: @ %bb.0: @ %fp-to-i-entry +; FP16-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; FP16-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; FP16-NEXT: .pad #52 +; FP16-NEXT: sub sp, #52 +; FP16-NEXT: ubfx lr, r0, #23, #8 +; FP16-NEXT: mov r12, r0 +; FP16-NEXT: movs r0, #0 +; FP16-NEXT: cmp.w lr, #127 +; FP16-NEXT: blo .LBB9_4 +; FP16-NEXT: @ %bb.1: @ %fp-to-i-entry +; FP16-NEXT: vmov s0, r12 +; FP16-NEXT: movs r1, #0 +; FP16-NEXT: movs r2, #0 +; FP16-NEXT: movs r3, #0 +; FP16-NEXT: vcmp.f32 s0, s0 +; FP16-NEXT: vmrs APSR_nzcv, fpscr +; FP16-NEXT: bvs.w .LBB9_8 +; FP16-NEXT: @ %bb.2: @ %fp-to-i-if-check.saturate +; FP16-NEXT: cmp.w lr, #254 +; FP16-NEXT: blo .LBB9_5 +; FP16-NEXT: @ %bb.3: @ %fp-to-i-if-saturate +; FP16-NEXT: mvn r0, #-2147483648 +; FP16-NEXT: eor.w r3, r0, r12, asr #31 +; FP16-NEXT: mvn.w r0, r12, asr #31 +; FP16-NEXT: mov r1, r0 +; FP16-NEXT: mov r2, r0 +; FP16-NEXT: add sp, #52 +; FP16-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; FP16-NEXT: .LBB9_4: +; FP16-NEXT: movs r1, #0 +; FP16-NEXT: movs r2, #0 +; FP16-NEXT: movs r3, #0 +; FP16-NEXT: add sp, #52 +; FP16-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; FP16-NEXT: .LBB9_5: @ %fp-to-i-if-check.exp.size +; FP16-NEXT: movs r0, #1 +; FP16-NEXT: orr.w r11, r0, r12, asr #31 +; FP16-NEXT: asr.w r7, r12, #31 +; FP16-NEXT: bfi r12, r0, #23, #9 +; FP16-NEXT: cmp.w lr, #149 +; FP16-NEXT: bhi .LBB9_7 +; FP16-NEXT: @ %bb.6: @ %fp-to-i-if-exp.small +; FP16-NEXT: rsb.w r0, lr, #150 +; FP16-NEXT: movs r5, #0 +; FP16-NEXT: lsr.w r4, r12, r0 +; FP16-NEXT: umull r0, r1, r4, r11 +; FP16-NEXT: umull r3, r2, r4, r7 +; FP16-NEXT: adds r6, r3, r1 +; FP16-NEXT: adcs r6, r2, #0 +; FP16-NEXT: adc r12, r5, #0 +; FP16-NEXT: adds r6, r3, r1 +; FP16-NEXT: mla r6, r7, r4, r2 +; FP16-NEXT: adcs r2, r3 +; FP16-NEXT: umlal r1, r5, r4, r7 +; FP16-NEXT: adc.w r3, r12, r6 +; FP16-NEXT: add sp, #52 +; FP16-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; FP16-NEXT: .LBB9_7: @ %fp-to-i-if-exp.large +; FP16-NEXT: sub.w r0, lr, #150 +; FP16-NEXT: movs r1, #12 +; FP16-NEXT: add r2, sp, #16 +; FP16-NEXT: mov.w r10, #0 +; FP16-NEXT: and.w r1, r1, r0, lsr #3 +; FP16-NEXT: adds r2, #16 +; FP16-NEXT: strd r10, r10, [sp, #40] +; FP16-NEXT: subs r1, r2, r1 +; FP16-NEXT: strd r12, r10, [sp, #32] +; FP16-NEXT: and r5, r0, #31 +; FP16-NEXT: strd r10, r10, [sp, #24] +; FP16-NEXT: eor r12, r5, #31 +; FP16-NEXT: strd r10, r10, [sp, #16] +; FP16-NEXT: movs r4, #0 +; FP16-NEXT: ldrd r2, r3, [r1] +; FP16-NEXT: str r3, [sp, #4] @ 4-byte Spill +; FP16-NEXT: ldr r6, [r1, #8] +; FP16-NEXT: str r6, [sp, #12] @ 4-byte Spill +; FP16-NEXT: ldr r1, [r1, #12] +; FP16-NEXT: str r1, [sp, #8] @ 4-byte Spill +; FP16-NEXT: lsrs r1, r2, #1 +; FP16-NEXT: lsr.w r0, r1, r12 +; FP16-NEXT: lsl.w r1, r3, r5 +; FP16-NEXT: lsls r2, r5 +; FP16-NEXT: orr.w r9, r1, r0 +; FP16-NEXT: umull r0, r1, r2, r11 +; FP16-NEXT: umull r3, r8, r2, r7 +; FP16-NEXT: umlal r1, r4, r9, r11 +; FP16-NEXT: str r3, [sp] @ 4-byte Spill +; FP16-NEXT: adds.w lr, r3, r1 +; FP16-NEXT: ldr r3, [sp, #8] @ 4-byte Reload +; FP16-NEXT: adcs.w r6, r4, r8 +; FP16-NEXT: umlal r1, r4, r2, r7 +; FP16-NEXT: ldr r6, [sp, #12] @ 4-byte Reload +; FP16-NEXT: lsl.w lr, r3, r5 +; FP16-NEXT: adc r10, r10, #0 +; FP16-NEXT: lsrs r3, r6, #1 +; FP16-NEXT: lsl.w r5, r6, r5 +; FP16-NEXT: lsr.w r3, r3, r12 +; FP16-NEXT: orr.w lr, lr, r3 +; FP16-NEXT: ldr r3, [sp, #4] @ 4-byte Reload +; FP16-NEXT: umlal r4, r10, r9, r7 +; FP16-NEXT: lsrs.w r3, r3, #1 +; FP16-NEXT: lsr.w r3, r3, r12 +; FP16-NEXT: orrs r3, r5 +; FP16-NEXT: umull r12, r5, r11, r3 +; FP16-NEXT: mla r5, r11, lr, r5 +; FP16-NEXT: mla r3, r7, r3, r5 +; FP16-NEXT: mla r5, r7, r9, r8 +; FP16-NEXT: mla r2, r7, r2, r5 +; FP16-NEXT: ldr r7, [sp] @ 4-byte Reload +; FP16-NEXT: adds.w r7, r7, r12 +; FP16-NEXT: adcs r3, r2 +; FP16-NEXT: adds r2, r4, r7 +; FP16-NEXT: adc.w r3, r3, r10 +; FP16-NEXT: .LBB9_8: @ %fp-to-i-cleanup +; FP16-NEXT: add sp, #52 +; FP16-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} %x = call i128 @llvm.fptosi.sat.i128.f32(float %f) ret i128 %x } @@ -1987,409 +2589,1057 @@ define i64 @test_signed_i64_f64(double %f) nounwind { define i100 @test_signed_i100_f64(double %f) nounwind { ; SOFT-LABEL: test_signed_i100_f64: -; SOFT: @ %bb.0: +; SOFT: @ %bb.0: @ %fp-to-i-entry ; SOFT-NEXT: .save {r4, r5, r6, r7, lr} ; SOFT-NEXT: push {r4, r5, r6, r7, lr} -; SOFT-NEXT: .pad #20 -; SOFT-NEXT: sub sp, #20 +; SOFT-NEXT: .pad #68 +; SOFT-NEXT: sub sp, #68 ; SOFT-NEXT: mov r5, r1 -; SOFT-NEXT: mov r6, r0 -; SOFT-NEXT: movs r4, #0 -; SOFT-NEXT: ldr r3, .LCPI18_0 -; SOFT-NEXT: mov r2, r4 -; SOFT-NEXT: bl __aeabi_dcmpge ; SOFT-NEXT: mov r7, r0 -; SOFT-NEXT: mov r0, r6 -; SOFT-NEXT: mov r1, r5 -; SOFT-NEXT: bl __fixdfti -; SOFT-NEXT: str r0, [sp, #12] @ 4-byte Spill -; SOFT-NEXT: str r1, [sp, #8] @ 4-byte Spill -; SOFT-NEXT: str r2, [sp, #4] @ 4-byte Spill -; SOFT-NEXT: movs r0, #7 -; SOFT-NEXT: str r0, [sp, #16] @ 4-byte Spill -; SOFT-NEXT: mvns r0, r0 -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: beq .LBB18_17 -; SOFT-NEXT: @ %bb.1: -; SOFT-NEXT: beq .LBB18_18 -; SOFT-NEXT: .LBB18_2: -; SOFT-NEXT: beq .LBB18_19 -; SOFT-NEXT: .LBB18_3: -; SOFT-NEXT: beq .LBB18_5 -; SOFT-NEXT: .LBB18_4: -; SOFT-NEXT: mov r0, r3 -; SOFT-NEXT: .LBB18_5: -; SOFT-NEXT: str r0, [sp] @ 4-byte Spill -; SOFT-NEXT: mvns r7, r4 -; SOFT-NEXT: ldr r3, .LCPI18_1 -; SOFT-NEXT: mov r0, r6 -; SOFT-NEXT: mov r1, r5 -; SOFT-NEXT: mov r2, r7 -; SOFT-NEXT: bl __aeabi_dcmpgt -; SOFT-NEXT: cmp r0, #0 -; SOFT-NEXT: beq .LBB18_20 -; SOFT-NEXT: @ %bb.6: +; SOFT-NEXT: movs r4, #0 +; SOFT-NEXT: ldr r0, .LCPI18_1 +; SOFT-NEXT: subs r0, #52 +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: sbcs r1, r4 +; SOFT-NEXT: lsls r2, r5, #1 +; SOFT-NEXT: lsrs r6, r2, #21 +; SOFT-NEXT: subs r0, r6, r0 +; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: sbcs r0, r1 +; SOFT-NEXT: blo .LBB18_8 +; SOFT-NEXT: @ %bb.1: @ %fp-to-i-entry ; SOFT-NEXT: mov r0, r7 -; SOFT-NEXT: beq .LBB18_21 -; SOFT-NEXT: .LBB18_7: -; SOFT-NEXT: mov r1, r7 -; SOFT-NEXT: bne .LBB18_9 -; SOFT-NEXT: .LBB18_8: -; SOFT-NEXT: ldr r1, [sp, #8] @ 4-byte Reload -; SOFT-NEXT: .LBB18_9: -; SOFT-NEXT: str r1, [sp, #4] @ 4-byte Spill -; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill -; SOFT-NEXT: bne .LBB18_11 -; SOFT-NEXT: @ %bb.10: -; SOFT-NEXT: ldr r7, [sp, #12] @ 4-byte Reload -; SOFT-NEXT: .LBB18_11: -; SOFT-NEXT: mov r0, r6 ; SOFT-NEXT: mov r1, r5 -; SOFT-NEXT: mov r2, r6 +; SOFT-NEXT: mov r2, r7 ; SOFT-NEXT: mov r3, r5 ; SOFT-NEXT: bl __aeabi_dcmpun ; SOFT-NEXT: cmp r0, #0 -; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: beq .LBB18_22 -; SOFT-NEXT: @ %bb.12: ; SOFT-NEXT: mov r1, r4 -; SOFT-NEXT: beq .LBB18_23 -; SOFT-NEXT: .LBB18_13: ; SOFT-NEXT: mov r2, r4 -; SOFT-NEXT: beq .LBB18_24 -; SOFT-NEXT: .LBB18_14: -; SOFT-NEXT: bne .LBB18_16 -; SOFT-NEXT: .LBB18_15: -; SOFT-NEXT: ldr r4, [sp, #16] @ 4-byte Reload -; SOFT-NEXT: .LBB18_16: ; SOFT-NEXT: mov r3, r4 -; SOFT-NEXT: add sp, #20 -; SOFT-NEXT: pop {r4, r5, r6, r7, pc} -; SOFT-NEXT: .LBB18_17: -; SOFT-NEXT: str r7, [sp, #12] @ 4-byte Spill -; SOFT-NEXT: bne .LBB18_2 -; SOFT-NEXT: .LBB18_18: -; SOFT-NEXT: str r7, [sp, #8] @ 4-byte Spill -; SOFT-NEXT: bne .LBB18_3 -; SOFT-NEXT: .LBB18_19: -; SOFT-NEXT: str r7, [sp, #4] @ 4-byte Spill -; SOFT-NEXT: bne .LBB18_4 -; SOFT-NEXT: b .LBB18_5 -; SOFT-NEXT: .LBB18_20: -; SOFT-NEXT: ldr r0, [sp] @ 4-byte Reload -; SOFT-NEXT: str r0, [sp, #16] @ 4-byte Spill -; SOFT-NEXT: mov r0, r7 +; SOFT-NEXT: beq .LBB18_2 +; SOFT-NEXT: b .LBB18_12 +; SOFT-NEXT: .LBB18_2: @ %fp-to-i-if-check.saturate +; SOFT-NEXT: mvns r0, r5 +; SOFT-NEXT: ldr r3, .LCPI18_1 +; SOFT-NEXT: mov r1, r3 +; SOFT-NEXT: adds r1, #47 +; SOFT-NEXT: subs r1, r6, r1 +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: sbcs r1, r4 +; SOFT-NEXT: blo .LBB18_9 +; SOFT-NEXT: @ %bb.3: @ %fp-to-i-if-saturate +; SOFT-NEXT: lsrs r0, r0, #31 +; SOFT-NEXT: mvns r4, r4 +; SOFT-NEXT: movs r3, #7 +; SOFT-NEXT: mvns r1, r3 +; SOFT-NEXT: cmp r0, #0 +; SOFT-NEXT: bne .LBB18_5 +; SOFT-NEXT: @ %bb.4: @ %fp-to-i-if-saturate +; SOFT-NEXT: mov r3, r1 +; SOFT-NEXT: .LBB18_5: @ %fp-to-i-if-saturate ; SOFT-NEXT: bne .LBB18_7 -; SOFT-NEXT: .LBB18_21: -; SOFT-NEXT: ldr r0, [sp, #4] @ 4-byte Reload -; SOFT-NEXT: mov r1, r7 -; SOFT-NEXT: beq .LBB18_8 -; SOFT-NEXT: b .LBB18_9 -; SOFT-NEXT: .LBB18_22: -; SOFT-NEXT: mov r0, r7 +; SOFT-NEXT: @ %bb.6: @ %fp-to-i-if-saturate +; SOFT-NEXT: mov r4, r0 +; SOFT-NEXT: .LBB18_7: @ %fp-to-i-if-saturate ; SOFT-NEXT: mov r1, r4 -; SOFT-NEXT: bne .LBB18_13 -; SOFT-NEXT: .LBB18_23: -; SOFT-NEXT: ldr r1, [sp, #4] @ 4-byte Reload ; SOFT-NEXT: mov r2, r4 -; SOFT-NEXT: bne .LBB18_14 -; SOFT-NEXT: .LBB18_24: +; SOFT-NEXT: b .LBB18_12 +; SOFT-NEXT: .LBB18_8: +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: mov r2, r4 +; SOFT-NEXT: mov r3, r4 +; SOFT-NEXT: b .LBB18_12 +; SOFT-NEXT: .LBB18_9: @ %fp-to-i-if-check.exp.size +; SOFT-NEXT: asrs r1, r5, #31 +; SOFT-NEXT: movs r5, #1 +; SOFT-NEXT: str r1, [sp, #28] @ 4-byte Spill +; SOFT-NEXT: orrs r5, r1 +; SOFT-NEXT: ldr r1, .LCPI18_0 +; SOFT-NEXT: mvns r2, r1 +; SOFT-NEXT: orrs r0, r2 +; SOFT-NEXT: subs r1, r1, r0 +; SOFT-NEXT: mvns r0, r7 +; SOFT-NEXT: mvns r0, r0 +; SOFT-NEXT: cmp r3, r6 +; SOFT-NEXT: bls .LBB18_11 +; SOFT-NEXT: @ %bb.10: @ %fp-to-i-if-exp.small +; SOFT-NEXT: subs r2, r3, r6 +; SOFT-NEXT: bl __aeabi_llsr +; SOFT-NEXT: mov r6, r1 +; SOFT-NEXT: str r1, [sp, #12] @ 4-byte Spill +; SOFT-NEXT: mov r7, r0 +; SOFT-NEXT: str r0, [sp, #16] @ 4-byte Spill +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: mov r2, r5 +; SOFT-NEXT: mov r3, r4 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: str r0, [sp, #20] @ 4-byte Spill +; SOFT-NEXT: mov r2, r5 +; SOFT-NEXT: mov r5, r1 +; SOFT-NEXT: mov r0, r6 +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: mov r3, r4 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: mov r6, r1 +; SOFT-NEXT: adds r5, r0, r5 +; SOFT-NEXT: adcs r6, r4 +; SOFT-NEXT: mov r0, r7 +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: ldr r7, [sp, #28] @ 4-byte Reload +; SOFT-NEXT: mov r2, r7 +; SOFT-NEXT: mov r3, r4 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: adds r0, r0, r5 +; SOFT-NEXT: str r0, [sp, #24] @ 4-byte Spill +; SOFT-NEXT: adcs r1, r4 +; SOFT-NEXT: adds r0, r6, r1 +; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill +; SOFT-NEXT: mov r5, r4 +; SOFT-NEXT: adcs r5, r4 +; SOFT-NEXT: ldr r6, [sp, #12] @ 4-byte Reload +; SOFT-NEXT: mov r0, r6 +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: mov r2, r7 +; SOFT-NEXT: mov r3, r4 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: mov r4, r1 +; SOFT-NEXT: ldr r1, [sp, #8] @ 4-byte Reload +; SOFT-NEXT: adds r0, r0, r1 +; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill +; SOFT-NEXT: adcs r4, r5 +; SOFT-NEXT: mov r0, r7 +; SOFT-NEXT: mov r1, r7 +; SOFT-NEXT: ldr r2, [sp, #16] @ 4-byte Reload +; SOFT-NEXT: mov r3, r6 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: mov r3, r1 +; SOFT-NEXT: ldr r1, [sp, #24] @ 4-byte Reload ; SOFT-NEXT: ldr r2, [sp, #8] @ 4-byte Reload -; SOFT-NEXT: beq .LBB18_15 -; SOFT-NEXT: b .LBB18_16 +; SOFT-NEXT: adds r2, r2, r0 +; SOFT-NEXT: adcs r3, r4 +; SOFT-NEXT: ldr r4, [sp, #20] @ 4-byte Reload +; SOFT-NEXT: b .LBB18_12 +; SOFT-NEXT: .LBB18_11: @ %fp-to-i-if-exp.large +; SOFT-NEXT: str r4, [sp, #60] +; SOFT-NEXT: str r4, [sp, #44] +; SOFT-NEXT: str r4, [sp, #40] +; SOFT-NEXT: str r4, [sp, #36] +; SOFT-NEXT: str r4, [sp, #32] +; SOFT-NEXT: add r2, sp, #48 +; SOFT-NEXT: stm r2!, {r0, r1, r4} +; SOFT-NEXT: ldr r0, .LCPI18_2 +; SOFT-NEXT: adds r6, r6, r0 +; SOFT-NEXT: movs r7, #31 +; SOFT-NEXT: lsrs r0, r6, #3 +; SOFT-NEXT: ands r6, r7 +; SOFT-NEXT: movs r1, #12 +; SOFT-NEXT: ands r1, r0 +; SOFT-NEXT: add r0, sp, #32 +; SOFT-NEXT: adds r0, #16 +; SOFT-NEXT: str r5, [sp, #24] @ 4-byte Spill +; SOFT-NEXT: subs r5, r0, r1 +; SOFT-NEXT: ldr r0, [r5, #4] +; SOFT-NEXT: str r0, [sp, #20] @ 4-byte Spill +; SOFT-NEXT: ldr r0, [r5, #8] +; SOFT-NEXT: ldr r1, [r5, #12] +; SOFT-NEXT: lsrs r3, r0, #1 +; SOFT-NEXT: lsls r0, r6 +; SOFT-NEXT: eors r7, r6 +; SOFT-NEXT: ldr r2, [sp, #20] @ 4-byte Reload +; SOFT-NEXT: lsrs r2, r2, #1 +; SOFT-NEXT: lsrs r2, r7 +; SOFT-NEXT: orrs r2, r0 +; SOFT-NEXT: lsls r1, r6 +; SOFT-NEXT: lsrs r3, r7 +; SOFT-NEXT: orrs r3, r1 +; SOFT-NEXT: ldr r0, [sp, #24] @ 4-byte Reload +; SOFT-NEXT: ldr r1, [sp, #28] @ 4-byte Reload +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: str r0, [sp, #12] @ 4-byte Spill +; SOFT-NEXT: str r1, [sp, #16] @ 4-byte Spill +; SOFT-NEXT: ldr r1, [sp, #20] @ 4-byte Reload +; SOFT-NEXT: lsls r1, r6 +; SOFT-NEXT: ldr r5, [r5] +; SOFT-NEXT: lsrs r0, r5, #1 +; SOFT-NEXT: lsrs r0, r7 +; SOFT-NEXT: orrs r0, r1 +; SOFT-NEXT: mov r3, r0 +; SOFT-NEXT: lsls r5, r6 +; SOFT-NEXT: ldr r0, [sp, #28] @ 4-byte Reload +; SOFT-NEXT: mov r1, r0 +; SOFT-NEXT: mov r2, r5 +; SOFT-NEXT: mov r6, r3 +; SOFT-NEXT: str r3, [sp, #8] @ 4-byte Spill +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: ldr r2, [sp, #12] @ 4-byte Reload +; SOFT-NEXT: adds r0, r0, r2 +; SOFT-NEXT: str r0, [sp, #20] @ 4-byte Spill +; SOFT-NEXT: ldr r0, [sp, #16] @ 4-byte Reload +; SOFT-NEXT: adcs r1, r0 +; SOFT-NEXT: str r1, [sp, #16] @ 4-byte Spill +; SOFT-NEXT: mov r0, r6 +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: ldr r7, [sp, #24] @ 4-byte Reload +; SOFT-NEXT: mov r2, r7 +; SOFT-NEXT: mov r3, r4 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: str r0, [sp, #4] @ 4-byte Spill +; SOFT-NEXT: mov r6, r1 +; SOFT-NEXT: mov r0, r5 +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: mov r2, r7 +; SOFT-NEXT: mov r3, r4 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: str r0, [sp, #12] @ 4-byte Spill +; SOFT-NEXT: ldr r0, [sp, #4] @ 4-byte Reload +; SOFT-NEXT: adds r7, r0, r1 +; SOFT-NEXT: adcs r6, r4 +; SOFT-NEXT: mov r0, r5 +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: ldr r5, [sp, #28] @ 4-byte Reload +; SOFT-NEXT: mov r2, r5 +; SOFT-NEXT: mov r3, r4 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: adds r0, r0, r7 +; SOFT-NEXT: str r0, [sp, #24] @ 4-byte Spill +; SOFT-NEXT: adcs r1, r4 +; SOFT-NEXT: adds r6, r6, r1 +; SOFT-NEXT: mov r7, r4 +; SOFT-NEXT: adcs r7, r4 +; SOFT-NEXT: ldr r0, [sp, #8] @ 4-byte Reload +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: mov r2, r5 +; SOFT-NEXT: mov r3, r4 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: mov r3, r1 +; SOFT-NEXT: ldr r1, [sp, #24] @ 4-byte Reload +; SOFT-NEXT: adds r0, r0, r6 +; SOFT-NEXT: adcs r3, r7 +; SOFT-NEXT: ldr r2, [sp, #20] @ 4-byte Reload +; SOFT-NEXT: adds r2, r0, r2 +; SOFT-NEXT: ldr r0, [sp, #16] @ 4-byte Reload +; SOFT-NEXT: adcs r3, r0 +; SOFT-NEXT: ldr r4, [sp, #12] @ 4-byte Reload +; SOFT-NEXT: .LBB18_12: @ %fp-to-i-cleanup +; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: add sp, #68 +; SOFT-NEXT: pop {r4, r5, r6, r7, pc} ; SOFT-NEXT: .p2align 2 -; SOFT-NEXT: @ %bb.25: +; SOFT-NEXT: @ %bb.13: ; SOFT-NEXT: .LCPI18_0: -; SOFT-NEXT: .long 3323985920 @ 0xc6200000 +; SOFT-NEXT: .long 1048575 @ 0xfffff ; SOFT-NEXT: .LCPI18_1: -; SOFT-NEXT: .long 1176502271 @ 0x461fffff +; SOFT-NEXT: .long 1075 @ 0x433 +; SOFT-NEXT: .LCPI18_2: +; SOFT-NEXT: .long 4294966221 @ 0xfffffbcd ; ; VFP2-LABEL: test_signed_i100_f64: -; VFP2: @ %bb.0: -; VFP2-NEXT: .save {r4, r5, r7, lr} -; VFP2-NEXT: push {r4, r5, r7, lr} -; VFP2-NEXT: mov r4, r1 -; VFP2-NEXT: mov r5, r0 -; VFP2-NEXT: bl __fixdfti -; VFP2-NEXT: vldr d16, .LCPI18_0 -; VFP2-NEXT: vmov d17, r5, r4 -; VFP2-NEXT: vldr d18, .LCPI18_1 -; VFP2-NEXT: vcmp.f64 d17, d16 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: itttt lt -; VFP2-NEXT: movlt r0, #0 -; VFP2-NEXT: movlt r1, #0 -; VFP2-NEXT: movlt r2, #0 -; VFP2-NEXT: mvnlt r3, #7 -; VFP2-NEXT: vcmp.f64 d17, d18 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: itttt gt -; VFP2-NEXT: movgt r3, #7 -; VFP2-NEXT: movgt.w r2, #-1 -; VFP2-NEXT: movgt.w r1, #-1 -; VFP2-NEXT: movgt.w r0, #-1 -; VFP2-NEXT: vcmp.f64 d17, d17 +; VFP2: @ %bb.0: @ %fp-to-i-entry +; VFP2-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; VFP2-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; VFP2-NEXT: .pad #52 +; VFP2-NEXT: sub sp, #52 +; VFP2-NEXT: vmov d16, r0, r1 +; VFP2-NEXT: ubfx r4, r1, #20, #11 +; VFP2-NEXT: mov lr, r1 +; VFP2-NEXT: mov r12, r0 +; VFP2-NEXT: movs r0, #0 +; VFP2-NEXT: movw r1, #1023 +; VFP2-NEXT: cmp r4, r1 +; VFP2-NEXT: blo .LBB18_4 +; VFP2-NEXT: @ %bb.1: @ %fp-to-i-entry +; VFP2-NEXT: vcmp.f64 d16, d16 +; VFP2-NEXT: movs r1, #0 +; VFP2-NEXT: movs r2, #0 +; VFP2-NEXT: movs r3, #0 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: itttt vs -; VFP2-NEXT: movvs r0, #0 -; VFP2-NEXT: movvs r1, #0 -; VFP2-NEXT: movvs r2, #0 -; VFP2-NEXT: movvs r3, #0 -; VFP2-NEXT: pop {r4, r5, r7, pc} -; VFP2-NEXT: .p2align 3 -; VFP2-NEXT: @ %bb.1: -; VFP2-NEXT: .LCPI18_0: -; VFP2-NEXT: .long 0 @ double -6.338253001141147E+29 -; VFP2-NEXT: .long 3323985920 -; VFP2-NEXT: .LCPI18_1: -; VFP2-NEXT: .long 4294967295 @ double 6.3382530011411463E+29 -; VFP2-NEXT: .long 1176502271 +; VFP2-NEXT: bvs.w .LBB18_8 +; VFP2-NEXT: @ %bb.2: @ %fp-to-i-if-check.saturate +; VFP2-NEXT: movw r0, #1122 +; VFP2-NEXT: cmp r4, r0 +; VFP2-NEXT: blo .LBB18_5 +; VFP2-NEXT: @ %bb.3: @ %fp-to-i-if-saturate +; VFP2-NEXT: mvn.w r0, lr +; VFP2-NEXT: mvn r3, #7 +; VFP2-NEXT: lsrs r0, r0, #31 +; VFP2-NEXT: itt ne +; VFP2-NEXT: movne r3, #7 +; VFP2-NEXT: movne.w r0, #-1 +; VFP2-NEXT: mov r1, r0 +; VFP2-NEXT: mov r2, r0 +; VFP2-NEXT: add sp, #52 +; VFP2-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; VFP2-NEXT: .LBB18_4: +; VFP2-NEXT: movs r1, #0 +; VFP2-NEXT: movs r2, #0 +; VFP2-NEXT: movs r3, #0 +; VFP2-NEXT: add sp, #52 +; VFP2-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; VFP2-NEXT: .LBB18_5: @ %fp-to-i-if-check.exp.size +; VFP2-NEXT: movs r0, #1 +; VFP2-NEXT: mov r1, lr +; VFP2-NEXT: bfi r1, r0, #20, #12 +; VFP2-NEXT: orr.w r11, r0, lr, asr #31 +; VFP2-NEXT: asr.w r8, lr, #31 +; VFP2-NEXT: movw r0, #1074 +; VFP2-NEXT: cmp r4, r0 +; VFP2-NEXT: bhi .LBB18_7 +; VFP2-NEXT: @ %bb.6: @ %fp-to-i-if-exp.small +; VFP2-NEXT: movw r0, #1075 +; VFP2-NEXT: subs r3, r0, r4 +; VFP2-NEXT: rsb.w r0, r3, #32 +; VFP2-NEXT: movs r6, #0 +; VFP2-NEXT: lsr.w r12, r12, r3 +; VFP2-NEXT: lsr.w r3, r1, r3 +; VFP2-NEXT: lsl.w r0, r1, r0 +; VFP2-NEXT: orr.w r5, r12, r0 +; VFP2-NEXT: movw r0, #1043 +; VFP2-NEXT: mov.w r12, #0 +; VFP2-NEXT: subs r0, r0, r4 +; VFP2-NEXT: it pl +; VFP2-NEXT: lsrpl.w r5, r1, r0 +; VFP2-NEXT: umull r0, r4, r5, r11 +; VFP2-NEXT: it pl +; VFP2-NEXT: movpl r3, #0 +; VFP2-NEXT: umlal r4, r6, r3, r11 +; VFP2-NEXT: umull lr, r7, r5, r8 +; VFP2-NEXT: adds.w r1, lr, r4 +; VFP2-NEXT: adcs.w r4, r6, r7 +; VFP2-NEXT: mla r7, r8, r3, r7 +; VFP2-NEXT: adc r6, r12, #0 +; VFP2-NEXT: umlal r4, r6, r3, r8 +; VFP2-NEXT: mla r3, r8, r5, r7 +; VFP2-NEXT: adds.w r2, r4, lr +; VFP2-NEXT: adcs r3, r6 +; VFP2-NEXT: add sp, #52 +; VFP2-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; VFP2-NEXT: .LBB18_7: @ %fp-to-i-if-exp.large +; VFP2-NEXT: subw r0, r4, #1075 +; VFP2-NEXT: add r3, sp, #16 +; VFP2-NEXT: strd r12, r1, [sp, #32] +; VFP2-NEXT: movs r1, #12 +; VFP2-NEXT: mov.w lr, #0 +; VFP2-NEXT: and.w r1, r1, r0, lsr #3 +; VFP2-NEXT: adds r3, #16 +; VFP2-NEXT: strd lr, lr, [sp, #40] +; VFP2-NEXT: strd lr, lr, [sp, #24] +; VFP2-NEXT: subs r1, r3, r1 +; VFP2-NEXT: strd lr, lr, [sp, #16] +; VFP2-NEXT: and r4, r0, #31 +; VFP2-NEXT: ldrd r3, r7, [r1] +; VFP2-NEXT: eor r6, r4, #31 +; VFP2-NEXT: str r7, [sp, #12] @ 4-byte Spill +; VFP2-NEXT: ldrd r10, r9, [r1, #8] +; VFP2-NEXT: lsrs r1, r3, #1 +; VFP2-NEXT: lsl.w r12, r3, r4 +; VFP2-NEXT: lsr.w r0, r1, r6 +; VFP2-NEXT: lsl.w r1, r7, r4 +; VFP2-NEXT: orr.w r5, r1, r0 +; VFP2-NEXT: umull r0, r1, r12, r11 +; VFP2-NEXT: movs r7, #0 +; VFP2-NEXT: umull r3, r2, r12, r8 +; VFP2-NEXT: umlal r1, r7, r5, r11 +; VFP2-NEXT: strd r2, r3, [sp, #4] @ 8-byte Folded Spill +; VFP2-NEXT: adds r1, r1, r3 +; VFP2-NEXT: adcs r7, r2 +; VFP2-NEXT: lsl.w r2, r9, r4 +; VFP2-NEXT: lsr.w r9, r10, #1 +; VFP2-NEXT: adc lr, lr, #0 +; VFP2-NEXT: lsr.w r3, r9, r6 +; VFP2-NEXT: orrs r2, r3 +; VFP2-NEXT: ldr r3, [sp, #12] @ 4-byte Reload +; VFP2-NEXT: lsl.w r4, r10, r4 +; VFP2-NEXT: umlal r7, lr, r5, r8 +; VFP2-NEXT: lsrs.w r3, r3, #1 +; VFP2-NEXT: lsrs r3, r6 +; VFP2-NEXT: orrs r3, r4 +; VFP2-NEXT: umull r4, r6, r11, r3 +; VFP2-NEXT: mla r6, r11, r2, r6 +; VFP2-NEXT: ldr r2, [sp, #4] @ 4-byte Reload +; VFP2-NEXT: mla r5, r8, r5, r2 +; VFP2-NEXT: mla r3, r8, r3, r6 +; VFP2-NEXT: ldr r6, [sp, #8] @ 4-byte Reload +; VFP2-NEXT: mla r2, r8, r12, r5 +; VFP2-NEXT: adds r6, r6, r4 +; VFP2-NEXT: adcs r3, r2 +; VFP2-NEXT: adds r2, r7, r6 +; VFP2-NEXT: adc.w r3, r3, lr +; VFP2-NEXT: .LBB18_8: @ %fp-to-i-cleanup +; VFP2-NEXT: add sp, #52 +; VFP2-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; ; FP16-LABEL: test_signed_i100_f64: -; FP16: @ %bb.0: -; FP16-NEXT: .save {r4, r5, r7, lr} -; FP16-NEXT: push {r4, r5, r7, lr} -; FP16-NEXT: mov r4, r1 -; FP16-NEXT: mov r5, r0 -; FP16-NEXT: bl __fixdfti -; FP16-NEXT: vldr d0, .LCPI18_0 -; FP16-NEXT: vmov d1, r5, r4 -; FP16-NEXT: vldr d2, .LCPI18_1 -; FP16-NEXT: vcmp.f64 d1, d0 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: itttt lt -; FP16-NEXT: movlt r0, #0 -; FP16-NEXT: movlt r1, #0 -; FP16-NEXT: movlt r2, #0 -; FP16-NEXT: mvnlt r3, #7 -; FP16-NEXT: vcmp.f64 d1, d2 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: itttt gt -; FP16-NEXT: movgt r3, #7 -; FP16-NEXT: movgt.w r2, #-1 -; FP16-NEXT: movgt.w r1, #-1 -; FP16-NEXT: movgt.w r0, #-1 -; FP16-NEXT: vcmp.f64 d1, d1 +; FP16: @ %bb.0: @ %fp-to-i-entry +; FP16-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; FP16-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; FP16-NEXT: .pad #52 +; FP16-NEXT: sub sp, #52 +; FP16-NEXT: vmov d0, r0, r1 +; FP16-NEXT: ubfx r4, r1, #20, #11 +; FP16-NEXT: mov lr, r1 +; FP16-NEXT: mov r12, r0 +; FP16-NEXT: movs r0, #0 +; FP16-NEXT: movw r1, #1023 +; FP16-NEXT: cmp r4, r1 +; FP16-NEXT: blo .LBB18_4 +; FP16-NEXT: @ %bb.1: @ %fp-to-i-entry +; FP16-NEXT: vcmp.f64 d0, d0 +; FP16-NEXT: movs r1, #0 +; FP16-NEXT: movs r2, #0 +; FP16-NEXT: movs r3, #0 ; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: itttt vs -; FP16-NEXT: movvs r0, #0 -; FP16-NEXT: movvs r1, #0 -; FP16-NEXT: movvs r2, #0 -; FP16-NEXT: movvs r3, #0 -; FP16-NEXT: pop {r4, r5, r7, pc} -; FP16-NEXT: .p2align 3 -; FP16-NEXT: @ %bb.1: -; FP16-NEXT: .LCPI18_0: -; FP16-NEXT: .long 0 @ double -6.338253001141147E+29 -; FP16-NEXT: .long 3323985920 -; FP16-NEXT: .LCPI18_1: -; FP16-NEXT: .long 4294967295 @ double 6.3382530011411463E+29 -; FP16-NEXT: .long 1176502271 +; FP16-NEXT: bvs.w .LBB18_8 +; FP16-NEXT: @ %bb.2: @ %fp-to-i-if-check.saturate +; FP16-NEXT: movw r0, #1122 +; FP16-NEXT: cmp r4, r0 +; FP16-NEXT: blo .LBB18_5 +; FP16-NEXT: @ %bb.3: @ %fp-to-i-if-saturate +; FP16-NEXT: mvn.w r0, lr +; FP16-NEXT: lsrs r0, r0, #31 +; FP16-NEXT: mov.w r0, #7 +; FP16-NEXT: cinv r3, r0, eq +; FP16-NEXT: csetm r0, ne +; FP16-NEXT: mov r1, r0 +; FP16-NEXT: mov r2, r0 +; FP16-NEXT: add sp, #52 +; FP16-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; FP16-NEXT: .LBB18_4: +; FP16-NEXT: movs r1, #0 +; FP16-NEXT: movs r2, #0 +; FP16-NEXT: movs r3, #0 +; FP16-NEXT: add sp, #52 +; FP16-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; FP16-NEXT: .LBB18_5: @ %fp-to-i-if-check.exp.size +; FP16-NEXT: movs r0, #1 +; FP16-NEXT: mov r5, lr +; FP16-NEXT: bfi r5, r0, #20, #12 +; FP16-NEXT: orr.w r9, r0, lr, asr #31 +; FP16-NEXT: asr.w r2, lr, #31 +; FP16-NEXT: movw r0, #1074 +; FP16-NEXT: cmp r4, r0 +; FP16-NEXT: bhi .LBB18_7 +; FP16-NEXT: @ %bb.6: @ %fp-to-i-if-exp.small +; FP16-NEXT: movw r0, #1075 +; FP16-NEXT: subs r3, r0, r4 +; FP16-NEXT: rsb.w r1, r3, #32 +; FP16-NEXT: lsr.w r0, r12, r3 +; FP16-NEXT: lsr.w r3, r5, r3 +; FP16-NEXT: lsl.w r1, r5, r1 +; FP16-NEXT: orr.w r6, r0, r1 +; FP16-NEXT: movw r0, #1043 +; FP16-NEXT: mov.w r12, #0 +; FP16-NEXT: subs r0, r0, r4 +; FP16-NEXT: it pl +; FP16-NEXT: lsrpl.w r6, r5, r0 +; FP16-NEXT: umull r0, r1, r6, r9 +; FP16-NEXT: mov.w r4, #0 +; FP16-NEXT: it pl +; FP16-NEXT: movpl r3, #0 +; FP16-NEXT: umlal r1, r4, r3, r9 +; FP16-NEXT: umull lr, r5, r6, r2 +; FP16-NEXT: adds.w r7, lr, r1 +; FP16-NEXT: adcs.w r7, r4, r5 +; FP16-NEXT: umlal r1, r4, r6, r2 +; FP16-NEXT: adc r7, r12, #0 +; FP16-NEXT: mla r5, r2, r3, r5 +; FP16-NEXT: umlal r4, r7, r3, r2 +; FP16-NEXT: mla r3, r2, r6, r5 +; FP16-NEXT: adds.w r2, r4, lr +; FP16-NEXT: adcs r3, r7 +; FP16-NEXT: add sp, #52 +; FP16-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; FP16-NEXT: .LBB18_7: @ %fp-to-i-if-exp.large +; FP16-NEXT: subw r0, r4, #1075 +; FP16-NEXT: movs r1, #12 +; FP16-NEXT: add r3, sp, #16 +; FP16-NEXT: mov.w lr, #0 +; FP16-NEXT: and.w r1, r1, r0, lsr #3 +; FP16-NEXT: adds r3, #16 +; FP16-NEXT: strd lr, lr, [sp, #40] +; FP16-NEXT: subs r1, r3, r1 +; FP16-NEXT: strd lr, lr, [sp, #24] +; FP16-NEXT: and r4, r0, #31 +; FP16-NEXT: strd r12, r5, [sp, #32] +; FP16-NEXT: eor r12, r4, #31 +; FP16-NEXT: strd lr, lr, [sp, #16] +; FP16-NEXT: movs r6, #0 +; FP16-NEXT: ldrd r3, r5, [r1] +; FP16-NEXT: str r5, [sp, #4] @ 4-byte Spill +; FP16-NEXT: ldr r7, [r1, #8] +; FP16-NEXT: str r7, [sp, #12] @ 4-byte Spill +; FP16-NEXT: ldr r1, [r1, #12] +; FP16-NEXT: str r1, [sp, #8] @ 4-byte Spill +; FP16-NEXT: lsrs r1, r3, #1 +; FP16-NEXT: lsr.w r0, r1, r12 +; FP16-NEXT: lsl.w r1, r5, r4 +; FP16-NEXT: lsls r3, r4 +; FP16-NEXT: orr.w r10, r1, r0 +; FP16-NEXT: umull r0, r1, r3, r9 +; FP16-NEXT: umull r5, r11, r3, r2 +; FP16-NEXT: umlal r1, r6, r10, r9 +; FP16-NEXT: str r5, [sp] @ 4-byte Spill +; FP16-NEXT: adds.w r8, r5, r1 +; FP16-NEXT: adcs.w r7, r6, r11 +; FP16-NEXT: umlal r1, r6, r3, r2 +; FP16-NEXT: ldr r7, [sp, #8] @ 4-byte Reload +; FP16-NEXT: adc r8, lr, #0 +; FP16-NEXT: umlal r6, r8, r10, r2 +; FP16-NEXT: lsl.w lr, r7, r4 +; FP16-NEXT: ldr r7, [sp, #12] @ 4-byte Reload +; FP16-NEXT: lsrs r5, r7, #1 +; FP16-NEXT: lsl.w r4, r7, r4 +; FP16-NEXT: lsr.w r5, r5, r12 +; FP16-NEXT: orr.w lr, lr, r5 +; FP16-NEXT: ldr r5, [sp, #4] @ 4-byte Reload +; FP16-NEXT: lsrs.w r5, r5, #1 +; FP16-NEXT: lsr.w r5, r5, r12 +; FP16-NEXT: orrs r5, r4 +; FP16-NEXT: umull r12, r4, r9, r5 +; FP16-NEXT: mla r4, r9, lr, r4 +; FP16-NEXT: mla r5, r2, r5, r4 +; FP16-NEXT: mla r4, r2, r10, r11 +; FP16-NEXT: mla r2, r2, r3, r4 +; FP16-NEXT: ldr r3, [sp] @ 4-byte Reload +; FP16-NEXT: adds.w r3, r3, r12 +; FP16-NEXT: adcs r5, r2 +; FP16-NEXT: adds r2, r6, r3 +; FP16-NEXT: adc.w r3, r8, r5 +; FP16-NEXT: .LBB18_8: @ %fp-to-i-cleanup +; FP16-NEXT: add sp, #52 +; FP16-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} %x = call i100 @llvm.fptosi.sat.i100.f64(double %f) ret i100 %x } define i128 @test_signed_i128_f64(double %f) nounwind { ; SOFT-LABEL: test_signed_i128_f64: -; SOFT: @ %bb.0: +; SOFT: @ %bb.0: @ %fp-to-i-entry ; SOFT-NEXT: .save {r4, r5, r6, r7, lr} ; SOFT-NEXT: push {r4, r5, r6, r7, lr} -; SOFT-NEXT: .pad #20 -; SOFT-NEXT: sub sp, #20 +; SOFT-NEXT: .pad #68 +; SOFT-NEXT: sub sp, #68 ; SOFT-NEXT: mov r5, r1 -; SOFT-NEXT: mov r6, r0 -; SOFT-NEXT: movs r4, #0 -; SOFT-NEXT: ldr r3, .LCPI19_0 -; SOFT-NEXT: mov r2, r4 -; SOFT-NEXT: bl __aeabi_dcmpge ; SOFT-NEXT: mov r7, r0 -; SOFT-NEXT: mov r0, r6 -; SOFT-NEXT: mov r1, r5 -; SOFT-NEXT: bl __fixdfti -; SOFT-NEXT: str r0, [sp, #12] @ 4-byte Spill -; SOFT-NEXT: str r1, [sp, #8] @ 4-byte Spill -; SOFT-NEXT: str r2, [sp, #4] @ 4-byte Spill -; SOFT-NEXT: movs r0, #1 -; SOFT-NEXT: lsls r0, r0, #31 -; SOFT-NEXT: str r0, [sp, #16] @ 4-byte Spill -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: beq .LBB19_17 -; SOFT-NEXT: @ %bb.1: -; SOFT-NEXT: beq .LBB19_18 -; SOFT-NEXT: .LBB19_2: -; SOFT-NEXT: beq .LBB19_19 -; SOFT-NEXT: .LBB19_3: -; SOFT-NEXT: beq .LBB19_5 -; SOFT-NEXT: .LBB19_4: -; SOFT-NEXT: str r3, [sp, #16] @ 4-byte Spill -; SOFT-NEXT: .LBB19_5: -; SOFT-NEXT: mvns r7, r4 -; SOFT-NEXT: ldr r3, .LCPI19_1 -; SOFT-NEXT: mov r0, r6 -; SOFT-NEXT: mov r1, r5 -; SOFT-NEXT: mov r2, r7 -; SOFT-NEXT: bl __aeabi_dcmpgt -; SOFT-NEXT: cmp r0, #0 -; SOFT-NEXT: bne .LBB19_20 -; SOFT-NEXT: @ %bb.6: +; SOFT-NEXT: movs r4, #0 +; SOFT-NEXT: ldr r0, .LCPI19_1 +; SOFT-NEXT: subs r0, #52 +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: sbcs r1, r4 +; SOFT-NEXT: lsls r2, r5, #1 +; SOFT-NEXT: lsrs r6, r2, #21 +; SOFT-NEXT: subs r0, r6, r0 +; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: sbcs r0, r1 +; SOFT-NEXT: blo .LBB19_8 +; SOFT-NEXT: @ %bb.1: @ %fp-to-i-entry ; SOFT-NEXT: mov r0, r7 -; SOFT-NEXT: beq .LBB19_21 -; SOFT-NEXT: .LBB19_7: -; SOFT-NEXT: mov r1, r7 -; SOFT-NEXT: bne .LBB19_9 -; SOFT-NEXT: .LBB19_8: -; SOFT-NEXT: ldr r1, [sp, #8] @ 4-byte Reload -; SOFT-NEXT: .LBB19_9: -; SOFT-NEXT: str r1, [sp, #4] @ 4-byte Spill -; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill -; SOFT-NEXT: bne .LBB19_11 -; SOFT-NEXT: @ %bb.10: -; SOFT-NEXT: ldr r7, [sp, #12] @ 4-byte Reload -; SOFT-NEXT: .LBB19_11: -; SOFT-NEXT: mov r0, r6 ; SOFT-NEXT: mov r1, r5 -; SOFT-NEXT: mov r2, r6 +; SOFT-NEXT: mov r2, r7 ; SOFT-NEXT: mov r3, r5 ; SOFT-NEXT: bl __aeabi_dcmpun ; SOFT-NEXT: cmp r0, #0 -; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: beq .LBB19_22 -; SOFT-NEXT: @ %bb.12: ; SOFT-NEXT: mov r1, r4 -; SOFT-NEXT: beq .LBB19_23 -; SOFT-NEXT: .LBB19_13: ; SOFT-NEXT: mov r2, r4 -; SOFT-NEXT: beq .LBB19_24 -; SOFT-NEXT: .LBB19_14: -; SOFT-NEXT: bne .LBB19_16 -; SOFT-NEXT: .LBB19_15: -; SOFT-NEXT: ldr r4, [sp, #16] @ 4-byte Reload -; SOFT-NEXT: .LBB19_16: ; SOFT-NEXT: mov r3, r4 -; SOFT-NEXT: add sp, #20 -; SOFT-NEXT: pop {r4, r5, r6, r7, pc} -; SOFT-NEXT: .LBB19_17: -; SOFT-NEXT: str r7, [sp, #12] @ 4-byte Spill -; SOFT-NEXT: bne .LBB19_2 -; SOFT-NEXT: .LBB19_18: -; SOFT-NEXT: str r7, [sp, #8] @ 4-byte Spill -; SOFT-NEXT: bne .LBB19_3 -; SOFT-NEXT: .LBB19_19: -; SOFT-NEXT: str r7, [sp, #4] @ 4-byte Spill -; SOFT-NEXT: bne .LBB19_4 -; SOFT-NEXT: b .LBB19_5 -; SOFT-NEXT: .LBB19_20: -; SOFT-NEXT: ldr r0, .LCPI19_2 -; SOFT-NEXT: str r0, [sp, #16] @ 4-byte Spill -; SOFT-NEXT: mov r0, r7 +; SOFT-NEXT: beq .LBB19_2 +; SOFT-NEXT: b .LBB19_12 +; SOFT-NEXT: .LBB19_2: @ %fp-to-i-if-check.saturate +; SOFT-NEXT: movs r2, #1 +; SOFT-NEXT: mvns r0, r5 +; SOFT-NEXT: ldr r3, .LCPI19_1 +; SOFT-NEXT: mov r1, r3 +; SOFT-NEXT: adds r1, #75 +; SOFT-NEXT: subs r1, r6, r1 +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: sbcs r1, r4 +; SOFT-NEXT: blo .LBB19_9 +; SOFT-NEXT: @ %bb.3: @ %fp-to-i-if-saturate +; SOFT-NEXT: lsrs r0, r0, #31 +; SOFT-NEXT: mvns r4, r4 +; SOFT-NEXT: lsls r3, r2, #31 +; SOFT-NEXT: cmp r0, #0 +; SOFT-NEXT: beq .LBB19_5 +; SOFT-NEXT: @ %bb.4: +; SOFT-NEXT: ldr r3, .LCPI19_3 +; SOFT-NEXT: .LBB19_5: @ %fp-to-i-if-saturate ; SOFT-NEXT: bne .LBB19_7 -; SOFT-NEXT: .LBB19_21: -; SOFT-NEXT: ldr r0, [sp, #4] @ 4-byte Reload -; SOFT-NEXT: mov r1, r7 -; SOFT-NEXT: beq .LBB19_8 -; SOFT-NEXT: b .LBB19_9 -; SOFT-NEXT: .LBB19_22: -; SOFT-NEXT: mov r0, r7 +; SOFT-NEXT: @ %bb.6: @ %fp-to-i-if-saturate +; SOFT-NEXT: mov r4, r0 +; SOFT-NEXT: .LBB19_7: @ %fp-to-i-if-saturate ; SOFT-NEXT: mov r1, r4 -; SOFT-NEXT: bne .LBB19_13 -; SOFT-NEXT: .LBB19_23: -; SOFT-NEXT: ldr r1, [sp, #4] @ 4-byte Reload ; SOFT-NEXT: mov r2, r4 -; SOFT-NEXT: bne .LBB19_14 -; SOFT-NEXT: .LBB19_24: +; SOFT-NEXT: b .LBB19_12 +; SOFT-NEXT: .LBB19_8: +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: mov r2, r4 +; SOFT-NEXT: mov r3, r4 +; SOFT-NEXT: b .LBB19_12 +; SOFT-NEXT: .LBB19_9: @ %fp-to-i-if-check.exp.size +; SOFT-NEXT: asrs r1, r5, #31 +; SOFT-NEXT: str r1, [sp, #28] @ 4-byte Spill +; SOFT-NEXT: orrs r2, r1 +; SOFT-NEXT: str r2, [sp, #24] @ 4-byte Spill +; SOFT-NEXT: ldr r1, .LCPI19_0 +; SOFT-NEXT: mvns r2, r1 +; SOFT-NEXT: orrs r0, r2 +; SOFT-NEXT: subs r1, r1, r0 +; SOFT-NEXT: mvns r0, r7 +; SOFT-NEXT: mvns r0, r0 +; SOFT-NEXT: cmp r3, r6 +; SOFT-NEXT: bls .LBB19_11 +; SOFT-NEXT: @ %bb.10: @ %fp-to-i-if-exp.small +; SOFT-NEXT: subs r2, r3, r6 +; SOFT-NEXT: bl __aeabi_llsr +; SOFT-NEXT: mov r7, r1 +; SOFT-NEXT: str r1, [sp, #12] @ 4-byte Spill +; SOFT-NEXT: mov r5, r0 +; SOFT-NEXT: str r0, [sp, #16] @ 4-byte Spill +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: ldr r6, [sp, #24] @ 4-byte Reload +; SOFT-NEXT: mov r2, r6 +; SOFT-NEXT: mov r3, r4 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: str r0, [sp, #20] @ 4-byte Spill +; SOFT-NEXT: str r1, [sp, #8] @ 4-byte Spill +; SOFT-NEXT: mov r0, r7 +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: mov r2, r6 +; SOFT-NEXT: mov r3, r4 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: mov r6, r1 +; SOFT-NEXT: ldr r1, [sp, #8] @ 4-byte Reload +; SOFT-NEXT: adds r0, r0, r1 +; SOFT-NEXT: str r0, [sp, #24] @ 4-byte Spill +; SOFT-NEXT: adcs r6, r4 +; SOFT-NEXT: mov r0, r5 +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: ldr r7, [sp, #28] @ 4-byte Reload +; SOFT-NEXT: mov r2, r7 +; SOFT-NEXT: mov r3, r4 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: ldr r2, [sp, #24] @ 4-byte Reload +; SOFT-NEXT: adds r0, r0, r2 +; SOFT-NEXT: str r0, [sp, #24] @ 4-byte Spill +; SOFT-NEXT: adcs r1, r4 +; SOFT-NEXT: adds r0, r6, r1 +; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill +; SOFT-NEXT: mov r5, r4 +; SOFT-NEXT: adcs r5, r4 +; SOFT-NEXT: ldr r6, [sp, #12] @ 4-byte Reload +; SOFT-NEXT: mov r0, r6 +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: mov r2, r7 +; SOFT-NEXT: mov r3, r4 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: mov r4, r1 +; SOFT-NEXT: ldr r1, [sp, #8] @ 4-byte Reload +; SOFT-NEXT: adds r0, r0, r1 +; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill +; SOFT-NEXT: adcs r4, r5 +; SOFT-NEXT: mov r0, r7 +; SOFT-NEXT: mov r1, r7 +; SOFT-NEXT: ldr r2, [sp, #16] @ 4-byte Reload +; SOFT-NEXT: mov r3, r6 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: mov r3, r1 +; SOFT-NEXT: ldr r1, [sp, #24] @ 4-byte Reload ; SOFT-NEXT: ldr r2, [sp, #8] @ 4-byte Reload -; SOFT-NEXT: beq .LBB19_15 -; SOFT-NEXT: b .LBB19_16 +; SOFT-NEXT: adds r2, r2, r0 +; SOFT-NEXT: adcs r3, r4 +; SOFT-NEXT: ldr r4, [sp, #20] @ 4-byte Reload +; SOFT-NEXT: b .LBB19_12 +; SOFT-NEXT: .LBB19_11: @ %fp-to-i-if-exp.large +; SOFT-NEXT: str r4, [sp, #60] +; SOFT-NEXT: str r4, [sp, #44] +; SOFT-NEXT: str r4, [sp, #40] +; SOFT-NEXT: str r4, [sp, #36] +; SOFT-NEXT: str r4, [sp, #32] +; SOFT-NEXT: add r2, sp, #48 +; SOFT-NEXT: stm r2!, {r0, r1, r4} +; SOFT-NEXT: ldr r0, .LCPI19_2 +; SOFT-NEXT: adds r6, r6, r0 +; SOFT-NEXT: movs r7, #31 +; SOFT-NEXT: lsrs r0, r6, #3 +; SOFT-NEXT: ands r6, r7 +; SOFT-NEXT: movs r1, #12 +; SOFT-NEXT: ands r1, r0 +; SOFT-NEXT: add r0, sp, #32 +; SOFT-NEXT: adds r0, #16 +; SOFT-NEXT: subs r5, r0, r1 +; SOFT-NEXT: ldr r0, [r5, #4] +; SOFT-NEXT: str r0, [sp, #20] @ 4-byte Spill +; SOFT-NEXT: ldr r0, [r5, #8] +; SOFT-NEXT: ldr r1, [r5, #12] +; SOFT-NEXT: lsrs r3, r0, #1 +; SOFT-NEXT: lsls r0, r6 +; SOFT-NEXT: eors r7, r6 +; SOFT-NEXT: ldr r2, [sp, #20] @ 4-byte Reload +; SOFT-NEXT: lsrs r2, r2, #1 +; SOFT-NEXT: lsrs r2, r7 +; SOFT-NEXT: orrs r2, r0 +; SOFT-NEXT: lsls r1, r6 +; SOFT-NEXT: lsrs r3, r7 +; SOFT-NEXT: orrs r3, r1 +; SOFT-NEXT: ldr r0, [sp, #24] @ 4-byte Reload +; SOFT-NEXT: ldr r1, [sp, #28] @ 4-byte Reload +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: str r0, [sp, #12] @ 4-byte Spill +; SOFT-NEXT: str r1, [sp, #16] @ 4-byte Spill +; SOFT-NEXT: ldr r1, [sp, #20] @ 4-byte Reload +; SOFT-NEXT: lsls r1, r6 +; SOFT-NEXT: ldr r5, [r5] +; SOFT-NEXT: lsrs r0, r5, #1 +; SOFT-NEXT: lsrs r0, r7 +; SOFT-NEXT: orrs r0, r1 +; SOFT-NEXT: mov r3, r0 +; SOFT-NEXT: lsls r5, r6 +; SOFT-NEXT: ldr r0, [sp, #28] @ 4-byte Reload +; SOFT-NEXT: mov r1, r0 +; SOFT-NEXT: mov r2, r5 +; SOFT-NEXT: mov r6, r3 +; SOFT-NEXT: str r3, [sp, #8] @ 4-byte Spill +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: ldr r2, [sp, #12] @ 4-byte Reload +; SOFT-NEXT: adds r0, r0, r2 +; SOFT-NEXT: str r0, [sp, #20] @ 4-byte Spill +; SOFT-NEXT: ldr r0, [sp, #16] @ 4-byte Reload +; SOFT-NEXT: adcs r1, r0 +; SOFT-NEXT: str r1, [sp, #16] @ 4-byte Spill +; SOFT-NEXT: mov r0, r6 +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: ldr r6, [sp, #24] @ 4-byte Reload +; SOFT-NEXT: mov r2, r6 +; SOFT-NEXT: mov r3, r4 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: str r0, [sp, #4] @ 4-byte Spill +; SOFT-NEXT: mov r7, r1 +; SOFT-NEXT: mov r0, r5 +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: mov r2, r6 +; SOFT-NEXT: mov r3, r4 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: str r0, [sp, #12] @ 4-byte Spill +; SOFT-NEXT: ldr r0, [sp, #4] @ 4-byte Reload +; SOFT-NEXT: adds r0, r0, r1 +; SOFT-NEXT: str r0, [sp, #24] @ 4-byte Spill +; SOFT-NEXT: adcs r7, r4 +; SOFT-NEXT: mov r0, r5 +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: ldr r6, [sp, #28] @ 4-byte Reload +; SOFT-NEXT: mov r2, r6 +; SOFT-NEXT: mov r3, r4 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: ldr r2, [sp, #24] @ 4-byte Reload +; SOFT-NEXT: adds r0, r0, r2 +; SOFT-NEXT: str r0, [sp, #24] @ 4-byte Spill +; SOFT-NEXT: adcs r1, r4 +; SOFT-NEXT: adds r7, r7, r1 +; SOFT-NEXT: mov r5, r4 +; SOFT-NEXT: adcs r5, r4 +; SOFT-NEXT: ldr r0, [sp, #8] @ 4-byte Reload +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: mov r2, r6 +; SOFT-NEXT: mov r3, r4 +; SOFT-NEXT: bl __aeabi_lmul +; SOFT-NEXT: mov r3, r1 +; SOFT-NEXT: ldr r1, [sp, #24] @ 4-byte Reload +; SOFT-NEXT: adds r0, r0, r7 +; SOFT-NEXT: adcs r3, r5 +; SOFT-NEXT: ldr r2, [sp, #20] @ 4-byte Reload +; SOFT-NEXT: adds r2, r0, r2 +; SOFT-NEXT: ldr r0, [sp, #16] @ 4-byte Reload +; SOFT-NEXT: adcs r3, r0 +; SOFT-NEXT: ldr r4, [sp, #12] @ 4-byte Reload +; SOFT-NEXT: .LBB19_12: @ %fp-to-i-cleanup +; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: add sp, #68 +; SOFT-NEXT: pop {r4, r5, r6, r7, pc} ; SOFT-NEXT: .p2align 2 -; SOFT-NEXT: @ %bb.25: +; SOFT-NEXT: @ %bb.13: ; SOFT-NEXT: .LCPI19_0: -; SOFT-NEXT: .long 3353346048 @ 0xc7e00000 +; SOFT-NEXT: .long 1048575 @ 0xfffff ; SOFT-NEXT: .LCPI19_1: -; SOFT-NEXT: .long 1205862399 @ 0x47dfffff +; SOFT-NEXT: .long 1075 @ 0x433 ; SOFT-NEXT: .LCPI19_2: +; SOFT-NEXT: .long 4294966221 @ 0xfffffbcd +; SOFT-NEXT: .LCPI19_3: ; SOFT-NEXT: .long 2147483647 @ 0x7fffffff ; ; VFP2-LABEL: test_signed_i128_f64: -; VFP2: @ %bb.0: -; VFP2-NEXT: .save {r4, r5, r7, lr} -; VFP2-NEXT: push {r4, r5, r7, lr} -; VFP2-NEXT: mov r4, r1 -; VFP2-NEXT: mov r5, r0 -; VFP2-NEXT: bl __fixdfti -; VFP2-NEXT: vldr d16, .LCPI19_0 -; VFP2-NEXT: vmov d17, r5, r4 -; VFP2-NEXT: vldr d18, .LCPI19_1 -; VFP2-NEXT: vcmp.f64 d17, d16 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: itttt lt -; VFP2-NEXT: movlt r0, #0 -; VFP2-NEXT: movlt r1, #0 -; VFP2-NEXT: movlt r2, #0 -; VFP2-NEXT: movlt.w r3, #-2147483648 -; VFP2-NEXT: vcmp.f64 d17, d18 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: itttt gt -; VFP2-NEXT: mvngt r3, #-2147483648 -; VFP2-NEXT: movgt.w r2, #-1 -; VFP2-NEXT: movgt.w r1, #-1 -; VFP2-NEXT: movgt.w r0, #-1 -; VFP2-NEXT: vcmp.f64 d17, d17 +; VFP2: @ %bb.0: @ %fp-to-i-entry +; VFP2-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; VFP2-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; VFP2-NEXT: .pad #52 +; VFP2-NEXT: sub sp, #52 +; VFP2-NEXT: vmov d16, r0, r1 +; VFP2-NEXT: ubfx r4, r1, #20, #11 +; VFP2-NEXT: mov lr, r1 +; VFP2-NEXT: mov r12, r0 +; VFP2-NEXT: movs r0, #0 +; VFP2-NEXT: movw r1, #1023 +; VFP2-NEXT: cmp r4, r1 +; VFP2-NEXT: blo .LBB19_4 +; VFP2-NEXT: @ %bb.1: @ %fp-to-i-entry +; VFP2-NEXT: vcmp.f64 d16, d16 +; VFP2-NEXT: movs r1, #0 +; VFP2-NEXT: movs r2, #0 +; VFP2-NEXT: movs r3, #0 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: itttt vs -; VFP2-NEXT: movvs r0, #0 -; VFP2-NEXT: movvs r1, #0 -; VFP2-NEXT: movvs r2, #0 -; VFP2-NEXT: movvs r3, #0 -; VFP2-NEXT: pop {r4, r5, r7, pc} -; VFP2-NEXT: .p2align 3 -; VFP2-NEXT: @ %bb.1: -; VFP2-NEXT: .LCPI19_0: -; VFP2-NEXT: .long 0 @ double -1.7014118346046923E+38 -; VFP2-NEXT: .long 3353346048 -; VFP2-NEXT: .LCPI19_1: -; VFP2-NEXT: .long 4294967295 @ double 1.7014118346046921E+38 -; VFP2-NEXT: .long 1205862399 +; VFP2-NEXT: bvs.w .LBB19_8 +; VFP2-NEXT: @ %bb.2: @ %fp-to-i-if-check.saturate +; VFP2-NEXT: movw r0, #1150 +; VFP2-NEXT: cmp r4, r0 +; VFP2-NEXT: blo .LBB19_5 +; VFP2-NEXT: @ %bb.3: @ %fp-to-i-if-saturate +; VFP2-NEXT: mvn.w r0, lr +; VFP2-NEXT: mov.w r3, #-2147483648 +; VFP2-NEXT: lsrs r0, r0, #31 +; VFP2-NEXT: itt ne +; VFP2-NEXT: mvnne r3, #-2147483648 +; VFP2-NEXT: movne.w r0, #-1 +; VFP2-NEXT: mov r1, r0 +; VFP2-NEXT: mov r2, r0 +; VFP2-NEXT: add sp, #52 +; VFP2-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; VFP2-NEXT: .LBB19_4: +; VFP2-NEXT: movs r1, #0 +; VFP2-NEXT: movs r2, #0 +; VFP2-NEXT: movs r3, #0 +; VFP2-NEXT: add sp, #52 +; VFP2-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; VFP2-NEXT: .LBB19_5: @ %fp-to-i-if-check.exp.size +; VFP2-NEXT: movs r0, #1 +; VFP2-NEXT: mov r1, lr +; VFP2-NEXT: bfi r1, r0, #20, #12 +; VFP2-NEXT: orr.w r11, r0, lr, asr #31 +; VFP2-NEXT: asr.w r8, lr, #31 +; VFP2-NEXT: movw r0, #1074 +; VFP2-NEXT: cmp r4, r0 +; VFP2-NEXT: bhi .LBB19_7 +; VFP2-NEXT: @ %bb.6: @ %fp-to-i-if-exp.small +; VFP2-NEXT: movw r0, #1075 +; VFP2-NEXT: subs r3, r0, r4 +; VFP2-NEXT: rsb.w r0, r3, #32 +; VFP2-NEXT: movs r6, #0 +; VFP2-NEXT: lsr.w r12, r12, r3 +; VFP2-NEXT: lsr.w r3, r1, r3 +; VFP2-NEXT: lsl.w r0, r1, r0 +; VFP2-NEXT: orr.w r5, r12, r0 +; VFP2-NEXT: movw r0, #1043 +; VFP2-NEXT: mov.w r12, #0 +; VFP2-NEXT: subs r0, r0, r4 +; VFP2-NEXT: it pl +; VFP2-NEXT: lsrpl.w r5, r1, r0 +; VFP2-NEXT: umull r0, r4, r5, r11 +; VFP2-NEXT: it pl +; VFP2-NEXT: movpl r3, #0 +; VFP2-NEXT: umlal r4, r6, r3, r11 +; VFP2-NEXT: umull lr, r7, r5, r8 +; VFP2-NEXT: adds.w r1, lr, r4 +; VFP2-NEXT: adcs.w r4, r6, r7 +; VFP2-NEXT: mla r7, r8, r3, r7 +; VFP2-NEXT: adc r6, r12, #0 +; VFP2-NEXT: umlal r4, r6, r3, r8 +; VFP2-NEXT: mla r3, r8, r5, r7 +; VFP2-NEXT: adds.w r2, r4, lr +; VFP2-NEXT: adcs r3, r6 +; VFP2-NEXT: add sp, #52 +; VFP2-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; VFP2-NEXT: .LBB19_7: @ %fp-to-i-if-exp.large +; VFP2-NEXT: subw r0, r4, #1075 +; VFP2-NEXT: add r3, sp, #16 +; VFP2-NEXT: strd r12, r1, [sp, #32] +; VFP2-NEXT: movs r1, #12 +; VFP2-NEXT: mov.w lr, #0 +; VFP2-NEXT: and.w r1, r1, r0, lsr #3 +; VFP2-NEXT: adds r3, #16 +; VFP2-NEXT: strd lr, lr, [sp, #40] +; VFP2-NEXT: strd lr, lr, [sp, #24] +; VFP2-NEXT: subs r1, r3, r1 +; VFP2-NEXT: strd lr, lr, [sp, #16] +; VFP2-NEXT: and r4, r0, #31 +; VFP2-NEXT: ldrd r3, r7, [r1] +; VFP2-NEXT: eor r6, r4, #31 +; VFP2-NEXT: str r7, [sp, #12] @ 4-byte Spill +; VFP2-NEXT: ldrd r10, r9, [r1, #8] +; VFP2-NEXT: lsrs r1, r3, #1 +; VFP2-NEXT: lsl.w r12, r3, r4 +; VFP2-NEXT: lsr.w r0, r1, r6 +; VFP2-NEXT: lsl.w r1, r7, r4 +; VFP2-NEXT: orr.w r5, r1, r0 +; VFP2-NEXT: umull r0, r1, r12, r11 +; VFP2-NEXT: movs r7, #0 +; VFP2-NEXT: umull r3, r2, r12, r8 +; VFP2-NEXT: umlal r1, r7, r5, r11 +; VFP2-NEXT: strd r2, r3, [sp, #4] @ 8-byte Folded Spill +; VFP2-NEXT: adds r1, r1, r3 +; VFP2-NEXT: adcs r7, r2 +; VFP2-NEXT: lsl.w r2, r9, r4 +; VFP2-NEXT: lsr.w r9, r10, #1 +; VFP2-NEXT: adc lr, lr, #0 +; VFP2-NEXT: lsr.w r3, r9, r6 +; VFP2-NEXT: orrs r2, r3 +; VFP2-NEXT: ldr r3, [sp, #12] @ 4-byte Reload +; VFP2-NEXT: lsl.w r4, r10, r4 +; VFP2-NEXT: umlal r7, lr, r5, r8 +; VFP2-NEXT: lsrs.w r3, r3, #1 +; VFP2-NEXT: lsrs r3, r6 +; VFP2-NEXT: orrs r3, r4 +; VFP2-NEXT: umull r4, r6, r11, r3 +; VFP2-NEXT: mla r6, r11, r2, r6 +; VFP2-NEXT: ldr r2, [sp, #4] @ 4-byte Reload +; VFP2-NEXT: mla r5, r8, r5, r2 +; VFP2-NEXT: mla r3, r8, r3, r6 +; VFP2-NEXT: ldr r6, [sp, #8] @ 4-byte Reload +; VFP2-NEXT: mla r2, r8, r12, r5 +; VFP2-NEXT: adds r6, r6, r4 +; VFP2-NEXT: adcs r3, r2 +; VFP2-NEXT: adds r2, r7, r6 +; VFP2-NEXT: adc.w r3, r3, lr +; VFP2-NEXT: .LBB19_8: @ %fp-to-i-cleanup +; VFP2-NEXT: add sp, #52 +; VFP2-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; ; FP16-LABEL: test_signed_i128_f64: -; FP16: @ %bb.0: -; FP16-NEXT: .save {r4, r5, r7, lr} -; FP16-NEXT: push {r4, r5, r7, lr} -; FP16-NEXT: mov r4, r1 -; FP16-NEXT: mov r5, r0 -; FP16-NEXT: bl __fixdfti -; FP16-NEXT: vldr d0, .LCPI19_0 -; FP16-NEXT: vmov d1, r5, r4 -; FP16-NEXT: vldr d2, .LCPI19_1 -; FP16-NEXT: vcmp.f64 d1, d0 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: itttt lt -; FP16-NEXT: movlt r0, #0 -; FP16-NEXT: movlt r1, #0 -; FP16-NEXT: movlt r2, #0 -; FP16-NEXT: movlt.w r3, #-2147483648 -; FP16-NEXT: vcmp.f64 d1, d2 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: itttt gt -; FP16-NEXT: mvngt r3, #-2147483648 -; FP16-NEXT: movgt.w r2, #-1 -; FP16-NEXT: movgt.w r1, #-1 -; FP16-NEXT: movgt.w r0, #-1 -; FP16-NEXT: vcmp.f64 d1, d1 +; FP16: @ %bb.0: @ %fp-to-i-entry +; FP16-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; FP16-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; FP16-NEXT: .pad #52 +; FP16-NEXT: sub sp, #52 +; FP16-NEXT: vmov d0, r0, r1 +; FP16-NEXT: ubfx r4, r1, #20, #11 +; FP16-NEXT: mov lr, r1 +; FP16-NEXT: mov r12, r0 +; FP16-NEXT: movs r0, #0 +; FP16-NEXT: movw r1, #1023 +; FP16-NEXT: cmp r4, r1 +; FP16-NEXT: blo .LBB19_4 +; FP16-NEXT: @ %bb.1: @ %fp-to-i-entry +; FP16-NEXT: vcmp.f64 d0, d0 +; FP16-NEXT: movs r1, #0 +; FP16-NEXT: movs r2, #0 +; FP16-NEXT: movs r3, #0 ; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: itttt vs -; FP16-NEXT: movvs r0, #0 -; FP16-NEXT: movvs r1, #0 -; FP16-NEXT: movvs r2, #0 -; FP16-NEXT: movvs r3, #0 -; FP16-NEXT: pop {r4, r5, r7, pc} -; FP16-NEXT: .p2align 3 -; FP16-NEXT: @ %bb.1: -; FP16-NEXT: .LCPI19_0: -; FP16-NEXT: .long 0 @ double -1.7014118346046923E+38 -; FP16-NEXT: .long 3353346048 -; FP16-NEXT: .LCPI19_1: -; FP16-NEXT: .long 4294967295 @ double 1.7014118346046921E+38 -; FP16-NEXT: .long 1205862399 +; FP16-NEXT: bvs.w .LBB19_8 +; FP16-NEXT: @ %bb.2: @ %fp-to-i-if-check.saturate +; FP16-NEXT: movw r0, #1150 +; FP16-NEXT: cmp r4, r0 +; FP16-NEXT: blo .LBB19_5 +; FP16-NEXT: @ %bb.3: @ %fp-to-i-if-saturate +; FP16-NEXT: mvn.w r0, lr +; FP16-NEXT: lsrs r0, r0, #31 +; FP16-NEXT: mvn r0, #-2147483648 +; FP16-NEXT: cinv r3, r0, eq +; FP16-NEXT: csetm r0, ne +; FP16-NEXT: mov r1, r0 +; FP16-NEXT: mov r2, r0 +; FP16-NEXT: add sp, #52 +; FP16-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; FP16-NEXT: .LBB19_4: +; FP16-NEXT: movs r1, #0 +; FP16-NEXT: movs r2, #0 +; FP16-NEXT: movs r3, #0 +; FP16-NEXT: add sp, #52 +; FP16-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; FP16-NEXT: .LBB19_5: @ %fp-to-i-if-check.exp.size +; FP16-NEXT: movs r0, #1 +; FP16-NEXT: mov r5, lr +; FP16-NEXT: bfi r5, r0, #20, #12 +; FP16-NEXT: orr.w r9, r0, lr, asr #31 +; FP16-NEXT: asr.w r2, lr, #31 +; FP16-NEXT: movw r0, #1074 +; FP16-NEXT: cmp r4, r0 +; FP16-NEXT: bhi .LBB19_7 +; FP16-NEXT: @ %bb.6: @ %fp-to-i-if-exp.small +; FP16-NEXT: movw r0, #1075 +; FP16-NEXT: subs r3, r0, r4 +; FP16-NEXT: rsb.w r1, r3, #32 +; FP16-NEXT: lsr.w r0, r12, r3 +; FP16-NEXT: lsr.w r3, r5, r3 +; FP16-NEXT: lsl.w r1, r5, r1 +; FP16-NEXT: orr.w r6, r0, r1 +; FP16-NEXT: movw r0, #1043 +; FP16-NEXT: mov.w r12, #0 +; FP16-NEXT: subs r0, r0, r4 +; FP16-NEXT: it pl +; FP16-NEXT: lsrpl.w r6, r5, r0 +; FP16-NEXT: umull r0, r1, r6, r9 +; FP16-NEXT: mov.w r4, #0 +; FP16-NEXT: it pl +; FP16-NEXT: movpl r3, #0 +; FP16-NEXT: umlal r1, r4, r3, r9 +; FP16-NEXT: umull lr, r5, r6, r2 +; FP16-NEXT: adds.w r7, lr, r1 +; FP16-NEXT: adcs.w r7, r4, r5 +; FP16-NEXT: umlal r1, r4, r6, r2 +; FP16-NEXT: adc r7, r12, #0 +; FP16-NEXT: mla r5, r2, r3, r5 +; FP16-NEXT: umlal r4, r7, r3, r2 +; FP16-NEXT: mla r3, r2, r6, r5 +; FP16-NEXT: adds.w r2, r4, lr +; FP16-NEXT: adcs r3, r7 +; FP16-NEXT: add sp, #52 +; FP16-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; FP16-NEXT: .LBB19_7: @ %fp-to-i-if-exp.large +; FP16-NEXT: subw r0, r4, #1075 +; FP16-NEXT: movs r1, #12 +; FP16-NEXT: add r3, sp, #16 +; FP16-NEXT: mov.w lr, #0 +; FP16-NEXT: and.w r1, r1, r0, lsr #3 +; FP16-NEXT: adds r3, #16 +; FP16-NEXT: strd lr, lr, [sp, #40] +; FP16-NEXT: subs r1, r3, r1 +; FP16-NEXT: strd lr, lr, [sp, #24] +; FP16-NEXT: and r4, r0, #31 +; FP16-NEXT: strd r12, r5, [sp, #32] +; FP16-NEXT: eor r12, r4, #31 +; FP16-NEXT: strd lr, lr, [sp, #16] +; FP16-NEXT: movs r6, #0 +; FP16-NEXT: ldrd r3, r5, [r1] +; FP16-NEXT: str r5, [sp, #4] @ 4-byte Spill +; FP16-NEXT: ldr r7, [r1, #8] +; FP16-NEXT: str r7, [sp, #12] @ 4-byte Spill +; FP16-NEXT: ldr r1, [r1, #12] +; FP16-NEXT: str r1, [sp, #8] @ 4-byte Spill +; FP16-NEXT: lsrs r1, r3, #1 +; FP16-NEXT: lsr.w r0, r1, r12 +; FP16-NEXT: lsl.w r1, r5, r4 +; FP16-NEXT: lsls r3, r4 +; FP16-NEXT: orr.w r10, r1, r0 +; FP16-NEXT: umull r0, r1, r3, r9 +; FP16-NEXT: umull r5, r11, r3, r2 +; FP16-NEXT: umlal r1, r6, r10, r9 +; FP16-NEXT: str r5, [sp] @ 4-byte Spill +; FP16-NEXT: adds.w r8, r5, r1 +; FP16-NEXT: adcs.w r7, r6, r11 +; FP16-NEXT: umlal r1, r6, r3, r2 +; FP16-NEXT: ldr r7, [sp, #8] @ 4-byte Reload +; FP16-NEXT: adc r8, lr, #0 +; FP16-NEXT: umlal r6, r8, r10, r2 +; FP16-NEXT: lsl.w lr, r7, r4 +; FP16-NEXT: ldr r7, [sp, #12] @ 4-byte Reload +; FP16-NEXT: lsrs r5, r7, #1 +; FP16-NEXT: lsl.w r4, r7, r4 +; FP16-NEXT: lsr.w r5, r5, r12 +; FP16-NEXT: orr.w lr, lr, r5 +; FP16-NEXT: ldr r5, [sp, #4] @ 4-byte Reload +; FP16-NEXT: lsrs.w r5, r5, #1 +; FP16-NEXT: lsr.w r5, r5, r12 +; FP16-NEXT: orrs r5, r4 +; FP16-NEXT: umull r12, r4, r9, r5 +; FP16-NEXT: mla r4, r9, lr, r4 +; FP16-NEXT: mla r5, r2, r5, r4 +; FP16-NEXT: mla r4, r2, r10, r11 +; FP16-NEXT: mla r2, r2, r3, r4 +; FP16-NEXT: ldr r3, [sp] @ 4-byte Reload +; FP16-NEXT: adds.w r3, r3, r12 +; FP16-NEXT: adcs r5, r2 +; FP16-NEXT: adds r2, r6, r3 +; FP16-NEXT: adc.w r3, r8, r5 +; FP16-NEXT: .LBB19_8: @ %fp-to-i-cleanup +; FP16-NEXT: add sp, #52 +; FP16-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} %x = call i128 @llvm.fptosi.sat.i128.f64(double %f) ret i128 %x } @@ -3258,195 +4508,38 @@ define i64 @test_signed_i64_f16(half %f) nounwind { define i100 @test_signed_i100_f16(half %f) nounwind { ; SOFT-LABEL: test_signed_i100_f16: ; SOFT: @ %bb.0: -; SOFT-NEXT: .save {r4, r5, r6, r7, lr} -; SOFT-NEXT: push {r4, r5, r6, r7, lr} -; SOFT-NEXT: .pad #20 -; SOFT-NEXT: sub sp, #20 +; SOFT-NEXT: .save {r7, lr} +; SOFT-NEXT: push {r7, lr} ; SOFT-NEXT: uxth r0, r0 ; SOFT-NEXT: bl __aeabi_h2f -; SOFT-NEXT: mov r4, r0 -; SOFT-NEXT: movs r0, #241 -; SOFT-NEXT: lsls r1, r0, #24 -; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: bl __aeabi_fcmpge -; SOFT-NEXT: mov r6, r0 -; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: bl __fixsfti -; SOFT-NEXT: movs r5, #7 -; SOFT-NEXT: str r5, [sp, #16] @ 4-byte Spill -; SOFT-NEXT: mvns r7, r5 -; SOFT-NEXT: cmp r6, #0 -; SOFT-NEXT: beq .LBB28_17 -; SOFT-NEXT: @ %bb.1: -; SOFT-NEXT: beq .LBB28_18 -; SOFT-NEXT: .LBB28_2: -; SOFT-NEXT: bne .LBB28_4 -; SOFT-NEXT: .LBB28_3: -; SOFT-NEXT: mov r2, r6 -; SOFT-NEXT: .LBB28_4: -; SOFT-NEXT: str r2, [sp, #4] @ 4-byte Spill -; SOFT-NEXT: str r1, [sp, #8] @ 4-byte Spill -; SOFT-NEXT: str r0, [sp, #12] @ 4-byte Spill -; SOFT-NEXT: beq .LBB28_6 -; SOFT-NEXT: @ %bb.5: -; SOFT-NEXT: mov r7, r3 -; SOFT-NEXT: .LBB28_6: -; SOFT-NEXT: ldr r1, .LCPI28_0 -; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: bl __aeabi_fcmpgt -; SOFT-NEXT: movs r6, #0 -; SOFT-NEXT: mvns r5, r6 -; SOFT-NEXT: cmp r0, #0 -; SOFT-NEXT: beq .LBB28_19 -; SOFT-NEXT: @ %bb.7: -; SOFT-NEXT: mov r0, r5 -; SOFT-NEXT: beq .LBB28_20 -; SOFT-NEXT: .LBB28_8: -; SOFT-NEXT: mov r7, r5 -; SOFT-NEXT: beq .LBB28_21 -; SOFT-NEXT: .LBB28_9: -; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill -; SOFT-NEXT: bne .LBB28_11 -; SOFT-NEXT: .LBB28_10: -; SOFT-NEXT: ldr r5, [sp, #12] @ 4-byte Reload -; SOFT-NEXT: .LBB28_11: -; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: mov r1, r4 -; SOFT-NEXT: bl __aeabi_fcmpun -; SOFT-NEXT: cmp r0, #0 -; SOFT-NEXT: mov r0, r6 -; SOFT-NEXT: beq .LBB28_22 -; SOFT-NEXT: @ %bb.12: -; SOFT-NEXT: mov r1, r6 -; SOFT-NEXT: beq .LBB28_23 -; SOFT-NEXT: .LBB28_13: -; SOFT-NEXT: mov r2, r6 -; SOFT-NEXT: beq .LBB28_24 -; SOFT-NEXT: .LBB28_14: -; SOFT-NEXT: bne .LBB28_16 -; SOFT-NEXT: .LBB28_15: -; SOFT-NEXT: ldr r6, [sp, #16] @ 4-byte Reload -; SOFT-NEXT: .LBB28_16: -; SOFT-NEXT: mov r3, r6 -; SOFT-NEXT: add sp, #20 -; SOFT-NEXT: pop {r4, r5, r6, r7, pc} -; SOFT-NEXT: .LBB28_17: -; SOFT-NEXT: mov r0, r6 -; SOFT-NEXT: bne .LBB28_2 -; SOFT-NEXT: .LBB28_18: -; SOFT-NEXT: mov r1, r6 -; SOFT-NEXT: beq .LBB28_3 -; SOFT-NEXT: b .LBB28_4 -; SOFT-NEXT: .LBB28_19: -; SOFT-NEXT: str r7, [sp, #16] @ 4-byte Spill -; SOFT-NEXT: mov r0, r5 -; SOFT-NEXT: bne .LBB28_8 -; SOFT-NEXT: .LBB28_20: -; SOFT-NEXT: ldr r0, [sp, #4] @ 4-byte Reload -; SOFT-NEXT: mov r7, r5 -; SOFT-NEXT: bne .LBB28_9 -; SOFT-NEXT: .LBB28_21: -; SOFT-NEXT: ldr r7, [sp, #8] @ 4-byte Reload -; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill -; SOFT-NEXT: beq .LBB28_10 -; SOFT-NEXT: b .LBB28_11 -; SOFT-NEXT: .LBB28_22: -; SOFT-NEXT: mov r0, r5 -; SOFT-NEXT: mov r1, r6 -; SOFT-NEXT: bne .LBB28_13 -; SOFT-NEXT: .LBB28_23: -; SOFT-NEXT: mov r1, r7 -; SOFT-NEXT: mov r2, r6 -; SOFT-NEXT: bne .LBB28_14 -; SOFT-NEXT: .LBB28_24: -; SOFT-NEXT: ldr r2, [sp, #8] @ 4-byte Reload -; SOFT-NEXT: beq .LBB28_15 -; SOFT-NEXT: b .LBB28_16 -; SOFT-NEXT: .p2align 2 -; SOFT-NEXT: @ %bb.25: -; SOFT-NEXT: .LCPI28_0: -; SOFT-NEXT: .long 1895825407 @ 0x70ffffff +; SOFT-NEXT: bl __aeabi_f2iz +; SOFT-NEXT: asrs r1, r0, #31 +; SOFT-NEXT: mov r2, r1 +; SOFT-NEXT: mov r3, r1 +; SOFT-NEXT: pop {r7, pc} ; ; VFP2-LABEL: test_signed_i100_f16: ; VFP2: @ %bb.0: -; VFP2-NEXT: .save {r4, lr} -; VFP2-NEXT: push {r4, lr} +; VFP2-NEXT: .save {r7, lr} +; VFP2-NEXT: push {r7, lr} ; VFP2-NEXT: bl __aeabi_h2f -; VFP2-NEXT: mov r4, r0 -; VFP2-NEXT: bl __fixsfti -; VFP2-NEXT: vldr s0, .LCPI28_0 -; VFP2-NEXT: vmov s2, r4 -; VFP2-NEXT: vldr s4, .LCPI28_1 -; VFP2-NEXT: vcmp.f32 s2, s0 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: itttt lt -; VFP2-NEXT: movlt r0, #0 -; VFP2-NEXT: movlt r1, #0 -; VFP2-NEXT: movlt r2, #0 -; VFP2-NEXT: mvnlt r3, #7 -; VFP2-NEXT: vcmp.f32 s2, s4 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: itttt gt -; VFP2-NEXT: movgt r3, #7 -; VFP2-NEXT: movgt.w r2, #-1 -; VFP2-NEXT: movgt.w r1, #-1 -; VFP2-NEXT: movgt.w r0, #-1 -; VFP2-NEXT: vcmp.f32 s2, s2 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: itttt vs -; VFP2-NEXT: movvs r0, #0 -; VFP2-NEXT: movvs r1, #0 -; VFP2-NEXT: movvs r2, #0 -; VFP2-NEXT: movvs r3, #0 -; VFP2-NEXT: pop {r4, pc} -; VFP2-NEXT: .p2align 2 -; VFP2-NEXT: @ %bb.1: -; VFP2-NEXT: .LCPI28_0: -; VFP2-NEXT: .long 0xf1000000 @ float -6.338253E+29 -; VFP2-NEXT: .LCPI28_1: -; VFP2-NEXT: .long 0x70ffffff @ float 6.33825262E+29 +; VFP2-NEXT: vmov s0, r0 +; VFP2-NEXT: vcvt.s32.f32 s0, s0 +; VFP2-NEXT: vmov r0, s0 +; VFP2-NEXT: asrs r1, r0, #31 +; VFP2-NEXT: mov r2, r1 +; VFP2-NEXT: mov r3, r1 +; VFP2-NEXT: pop {r7, pc} ; ; FP16-LABEL: test_signed_i100_f16: ; FP16: @ %bb.0: -; FP16-NEXT: .save {r7, lr} -; FP16-NEXT: push {r7, lr} -; FP16-NEXT: .vsave {d8} -; FP16-NEXT: vpush {d8} ; FP16-NEXT: vmov.f16 s0, r0 -; FP16-NEXT: vcvtb.f32.f16 s16, s0 -; FP16-NEXT: vmov r0, s16 -; FP16-NEXT: bl __fixsfti -; FP16-NEXT: vldr s0, .LCPI28_0 -; FP16-NEXT: vldr s2, .LCPI28_1 -; FP16-NEXT: vcmp.f32 s16, s0 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: itttt lt -; FP16-NEXT: movlt r0, #0 -; FP16-NEXT: movlt r1, #0 -; FP16-NEXT: movlt r2, #0 -; FP16-NEXT: mvnlt r3, #7 -; FP16-NEXT: vcmp.f32 s16, s2 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: itttt gt -; FP16-NEXT: movgt r3, #7 -; FP16-NEXT: movgt.w r2, #-1 -; FP16-NEXT: movgt.w r1, #-1 -; FP16-NEXT: movgt.w r0, #-1 -; FP16-NEXT: vcmp.f32 s16, s16 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: itttt vs -; FP16-NEXT: movvs r0, #0 -; FP16-NEXT: movvs r1, #0 -; FP16-NEXT: movvs r2, #0 -; FP16-NEXT: movvs r3, #0 -; FP16-NEXT: vpop {d8} -; FP16-NEXT: pop {r7, pc} -; FP16-NEXT: .p2align 2 -; FP16-NEXT: @ %bb.1: -; FP16-NEXT: .LCPI28_0: -; FP16-NEXT: .long 0xf1000000 @ float -6.338253E+29 -; FP16-NEXT: .LCPI28_1: -; FP16-NEXT: .long 0x70ffffff @ float 6.33825262E+29 +; FP16-NEXT: vcvt.s32.f16 s0, s0 +; FP16-NEXT: vmov r0, s0 +; FP16-NEXT: asrs r1, r0, #31 +; FP16-NEXT: mov r2, r1 +; FP16-NEXT: mov r3, r1 +; FP16-NEXT: bx lr %x = call i100 @llvm.fptosi.sat.i100.f16(half %f) ret i100 %x } @@ -3454,197 +4547,38 @@ define i100 @test_signed_i100_f16(half %f) nounwind { define i128 @test_signed_i128_f16(half %f) nounwind { ; SOFT-LABEL: test_signed_i128_f16: ; SOFT: @ %bb.0: -; SOFT-NEXT: .save {r4, r5, r6, r7, lr} -; SOFT-NEXT: push {r4, r5, r6, r7, lr} -; SOFT-NEXT: .pad #20 -; SOFT-NEXT: sub sp, #20 +; SOFT-NEXT: .save {r7, lr} +; SOFT-NEXT: push {r7, lr} ; SOFT-NEXT: uxth r0, r0 ; SOFT-NEXT: bl __aeabi_h2f -; SOFT-NEXT: mov r4, r0 -; SOFT-NEXT: movs r0, #255 -; SOFT-NEXT: lsls r1, r0, #24 -; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: bl __aeabi_fcmpge -; SOFT-NEXT: mov r6, r0 -; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: bl __fixsfti -; SOFT-NEXT: movs r5, #1 -; SOFT-NEXT: lsls r7, r5, #31 -; SOFT-NEXT: cmp r6, #0 -; SOFT-NEXT: beq .LBB29_18 -; SOFT-NEXT: @ %bb.1: -; SOFT-NEXT: beq .LBB29_19 -; SOFT-NEXT: .LBB29_2: -; SOFT-NEXT: bne .LBB29_4 -; SOFT-NEXT: .LBB29_3: -; SOFT-NEXT: mov r2, r6 -; SOFT-NEXT: .LBB29_4: -; SOFT-NEXT: str r2, [sp, #4] @ 4-byte Spill -; SOFT-NEXT: str r1, [sp, #8] @ 4-byte Spill -; SOFT-NEXT: str r0, [sp, #12] @ 4-byte Spill -; SOFT-NEXT: beq .LBB29_6 -; SOFT-NEXT: @ %bb.5: -; SOFT-NEXT: mov r7, r3 -; SOFT-NEXT: .LBB29_6: -; SOFT-NEXT: ldr r1, .LCPI29_0 -; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: bl __aeabi_fcmpgt -; SOFT-NEXT: movs r6, #0 -; SOFT-NEXT: mvns r5, r6 -; SOFT-NEXT: cmp r0, #0 -; SOFT-NEXT: beq .LBB29_8 -; SOFT-NEXT: @ %bb.7: -; SOFT-NEXT: ldr r7, .LCPI29_1 -; SOFT-NEXT: .LBB29_8: -; SOFT-NEXT: str r7, [sp, #16] @ 4-byte Spill -; SOFT-NEXT: mov r0, r5 -; SOFT-NEXT: beq .LBB29_20 -; SOFT-NEXT: @ %bb.9: -; SOFT-NEXT: mov r7, r5 -; SOFT-NEXT: beq .LBB29_21 -; SOFT-NEXT: .LBB29_10: -; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill -; SOFT-NEXT: bne .LBB29_12 -; SOFT-NEXT: .LBB29_11: -; SOFT-NEXT: ldr r5, [sp, #12] @ 4-byte Reload -; SOFT-NEXT: .LBB29_12: -; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: mov r1, r4 -; SOFT-NEXT: bl __aeabi_fcmpun -; SOFT-NEXT: cmp r0, #0 -; SOFT-NEXT: mov r0, r6 -; SOFT-NEXT: beq .LBB29_22 -; SOFT-NEXT: @ %bb.13: -; SOFT-NEXT: mov r1, r6 -; SOFT-NEXT: beq .LBB29_23 -; SOFT-NEXT: .LBB29_14: -; SOFT-NEXT: mov r2, r6 -; SOFT-NEXT: beq .LBB29_24 -; SOFT-NEXT: .LBB29_15: -; SOFT-NEXT: ldr r3, [sp, #16] @ 4-byte Reload -; SOFT-NEXT: bne .LBB29_17 -; SOFT-NEXT: .LBB29_16: -; SOFT-NEXT: mov r6, r3 -; SOFT-NEXT: .LBB29_17: -; SOFT-NEXT: mov r3, r6 -; SOFT-NEXT: add sp, #20 -; SOFT-NEXT: pop {r4, r5, r6, r7, pc} -; SOFT-NEXT: .LBB29_18: -; SOFT-NEXT: mov r0, r6 -; SOFT-NEXT: bne .LBB29_2 -; SOFT-NEXT: .LBB29_19: -; SOFT-NEXT: mov r1, r6 -; SOFT-NEXT: beq .LBB29_3 -; SOFT-NEXT: b .LBB29_4 -; SOFT-NEXT: .LBB29_20: -; SOFT-NEXT: ldr r0, [sp, #4] @ 4-byte Reload -; SOFT-NEXT: mov r7, r5 -; SOFT-NEXT: bne .LBB29_10 -; SOFT-NEXT: .LBB29_21: -; SOFT-NEXT: ldr r7, [sp, #8] @ 4-byte Reload -; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill -; SOFT-NEXT: beq .LBB29_11 -; SOFT-NEXT: b .LBB29_12 -; SOFT-NEXT: .LBB29_22: -; SOFT-NEXT: mov r0, r5 -; SOFT-NEXT: mov r1, r6 -; SOFT-NEXT: bne .LBB29_14 -; SOFT-NEXT: .LBB29_23: -; SOFT-NEXT: mov r1, r7 -; SOFT-NEXT: mov r2, r6 -; SOFT-NEXT: bne .LBB29_15 -; SOFT-NEXT: .LBB29_24: -; SOFT-NEXT: ldr r2, [sp, #8] @ 4-byte Reload -; SOFT-NEXT: ldr r3, [sp, #16] @ 4-byte Reload -; SOFT-NEXT: beq .LBB29_16 -; SOFT-NEXT: b .LBB29_17 -; SOFT-NEXT: .p2align 2 -; SOFT-NEXT: @ %bb.25: -; SOFT-NEXT: .LCPI29_0: -; SOFT-NEXT: .long 2130706431 @ 0x7effffff -; SOFT-NEXT: .LCPI29_1: -; SOFT-NEXT: .long 2147483647 @ 0x7fffffff +; SOFT-NEXT: bl __aeabi_f2iz +; SOFT-NEXT: asrs r1, r0, #31 +; SOFT-NEXT: mov r2, r1 +; SOFT-NEXT: mov r3, r1 +; SOFT-NEXT: pop {r7, pc} ; ; VFP2-LABEL: test_signed_i128_f16: ; VFP2: @ %bb.0: -; VFP2-NEXT: .save {r4, lr} -; VFP2-NEXT: push {r4, lr} +; VFP2-NEXT: .save {r7, lr} +; VFP2-NEXT: push {r7, lr} ; VFP2-NEXT: bl __aeabi_h2f -; VFP2-NEXT: mov r4, r0 -; VFP2-NEXT: bl __fixsfti -; VFP2-NEXT: vldr s0, .LCPI29_0 -; VFP2-NEXT: vmov s2, r4 -; VFP2-NEXT: vldr s4, .LCPI29_1 -; VFP2-NEXT: vcmp.f32 s2, s0 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: itttt lt -; VFP2-NEXT: movlt r0, #0 -; VFP2-NEXT: movlt r1, #0 -; VFP2-NEXT: movlt r2, #0 -; VFP2-NEXT: movlt.w r3, #-2147483648 -; VFP2-NEXT: vcmp.f32 s2, s4 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: itttt gt -; VFP2-NEXT: mvngt r3, #-2147483648 -; VFP2-NEXT: movgt.w r2, #-1 -; VFP2-NEXT: movgt.w r1, #-1 -; VFP2-NEXT: movgt.w r0, #-1 -; VFP2-NEXT: vcmp.f32 s2, s2 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: itttt vs -; VFP2-NEXT: movvs r0, #0 -; VFP2-NEXT: movvs r1, #0 -; VFP2-NEXT: movvs r2, #0 -; VFP2-NEXT: movvs r3, #0 -; VFP2-NEXT: pop {r4, pc} -; VFP2-NEXT: .p2align 2 -; VFP2-NEXT: @ %bb.1: -; VFP2-NEXT: .LCPI29_0: -; VFP2-NEXT: .long 0xff000000 @ float -1.70141183E+38 -; VFP2-NEXT: .LCPI29_1: -; VFP2-NEXT: .long 0x7effffff @ float 1.70141173E+38 +; VFP2-NEXT: vmov s0, r0 +; VFP2-NEXT: vcvt.s32.f32 s0, s0 +; VFP2-NEXT: vmov r0, s0 +; VFP2-NEXT: asrs r1, r0, #31 +; VFP2-NEXT: mov r2, r1 +; VFP2-NEXT: mov r3, r1 +; VFP2-NEXT: pop {r7, pc} ; ; FP16-LABEL: test_signed_i128_f16: ; FP16: @ %bb.0: -; FP16-NEXT: .save {r7, lr} -; FP16-NEXT: push {r7, lr} -; FP16-NEXT: .vsave {d8} -; FP16-NEXT: vpush {d8} ; FP16-NEXT: vmov.f16 s0, r0 -; FP16-NEXT: vcvtb.f32.f16 s16, s0 -; FP16-NEXT: vmov r0, s16 -; FP16-NEXT: bl __fixsfti -; FP16-NEXT: vldr s0, .LCPI29_0 -; FP16-NEXT: vldr s2, .LCPI29_1 -; FP16-NEXT: vcmp.f32 s16, s0 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: itttt lt -; FP16-NEXT: movlt r0, #0 -; FP16-NEXT: movlt r1, #0 -; FP16-NEXT: movlt r2, #0 -; FP16-NEXT: movlt.w r3, #-2147483648 -; FP16-NEXT: vcmp.f32 s16, s2 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: itttt gt -; FP16-NEXT: mvngt r3, #-2147483648 -; FP16-NEXT: movgt.w r2, #-1 -; FP16-NEXT: movgt.w r1, #-1 -; FP16-NEXT: movgt.w r0, #-1 -; FP16-NEXT: vcmp.f32 s16, s16 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: itttt vs -; FP16-NEXT: movvs r0, #0 -; FP16-NEXT: movvs r1, #0 -; FP16-NEXT: movvs r2, #0 -; FP16-NEXT: movvs r3, #0 -; FP16-NEXT: vpop {d8} -; FP16-NEXT: pop {r7, pc} -; FP16-NEXT: .p2align 2 -; FP16-NEXT: @ %bb.1: -; FP16-NEXT: .LCPI29_0: -; FP16-NEXT: .long 0xff000000 @ float -1.70141183E+38 -; FP16-NEXT: .LCPI29_1: -; FP16-NEXT: .long 0x7effffff @ float 1.70141173E+38 +; FP16-NEXT: vcvt.s32.f16 s0, s0 +; FP16-NEXT: vmov r0, s0 +; FP16-NEXT: asrs r1, r0, #31 +; FP16-NEXT: mov r2, r1 +; FP16-NEXT: mov r3, r1 +; FP16-NEXT: bx lr %x = call i128 @llvm.fptosi.sat.i128.f16(half %f) ret i128 %x } diff --git a/llvm/test/CodeGen/ARM/fptoui-sat-scalar.ll b/llvm/test/CodeGen/ARM/fptoui-sat-scalar.ll index 5ea9a79483873..575fdf9ef1903 100644 --- a/llvm/test/CodeGen/ARM/fptoui-sat-scalar.ll +++ b/llvm/test/CodeGen/ARM/fptoui-sat-scalar.ll @@ -607,217 +607,363 @@ define i64 @test_signed_i64_f32(float %f) nounwind { define i100 @test_signed_i100_f32(float %f) nounwind { ; SOFT-LABEL: test_signed_i100_f32: -; SOFT: @ %bb.0: +; SOFT: @ %bb.0: @ %fp-to-i-entry ; SOFT-NEXT: .save {r4, r5, r6, r7, lr} ; SOFT-NEXT: push {r4, r5, r6, r7, lr} -; SOFT-NEXT: .pad #12 -; SOFT-NEXT: sub sp, #12 -; SOFT-NEXT: mov r4, r0 -; SOFT-NEXT: movs r6, #0 +; SOFT-NEXT: .pad #36 +; SOFT-NEXT: sub sp, #36 +; SOFT-NEXT: mov r6, r0 +; SOFT-NEXT: lsrs r0, r0, #23 +; SOFT-NEXT: uxtb r7, r0 +; SOFT-NEXT: movs r5, #0 +; SOFT-NEXT: mov r4, r5 +; SOFT-NEXT: cmp r7, #127 +; SOFT-NEXT: blo .LBB8_5 +; SOFT-NEXT: @ %bb.1: @ %fp-to-i-entry +; SOFT-NEXT: mov r0, r6 ; SOFT-NEXT: mov r1, r6 -; SOFT-NEXT: bl __aeabi_fcmpge -; SOFT-NEXT: mov r7, r0 -; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: bl __fixunssfti -; SOFT-NEXT: mov r5, r0 -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: beq .LBB8_11 -; SOFT-NEXT: @ %bb.1: -; SOFT-NEXT: beq .LBB8_12 -; SOFT-NEXT: .LBB8_2: -; SOFT-NEXT: bne .LBB8_4 -; SOFT-NEXT: .LBB8_3: -; SOFT-NEXT: mov r1, r7 -; SOFT-NEXT: .LBB8_4: -; SOFT-NEXT: str r1, [sp] @ 4-byte Spill -; SOFT-NEXT: str r2, [sp, #4] @ 4-byte Spill -; SOFT-NEXT: str r3, [sp, #8] @ 4-byte Spill -; SOFT-NEXT: bne .LBB8_6 -; SOFT-NEXT: @ %bb.5: -; SOFT-NEXT: mov r5, r7 -; SOFT-NEXT: .LBB8_6: -; SOFT-NEXT: ldr r1, .LCPI8_0 -; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: bl __aeabi_fcmpgt -; SOFT-NEXT: mvns r2, r6 -; SOFT-NEXT: movs r3, #15 +; SOFT-NEXT: bl __aeabi_fcmpun ; SOFT-NEXT: cmp r0, #0 -; SOFT-NEXT: mov r0, r2 -; SOFT-NEXT: beq .LBB8_13 -; SOFT-NEXT: @ %bb.7: -; SOFT-NEXT: mov r1, r2 -; SOFT-NEXT: beq .LBB8_14 -; SOFT-NEXT: .LBB8_8: -; SOFT-NEXT: beq .LBB8_15 -; SOFT-NEXT: .LBB8_9: -; SOFT-NEXT: beq .LBB8_16 -; SOFT-NEXT: .LBB8_10: -; SOFT-NEXT: add sp, #12 -; SOFT-NEXT: pop {r4, r5, r6, r7, pc} -; SOFT-NEXT: .LBB8_11: -; SOFT-NEXT: mov r3, r7 -; SOFT-NEXT: bne .LBB8_2 -; SOFT-NEXT: .LBB8_12: -; SOFT-NEXT: mov r2, r7 -; SOFT-NEXT: beq .LBB8_3 -; SOFT-NEXT: b .LBB8_4 -; SOFT-NEXT: .LBB8_13: -; SOFT-NEXT: mov r0, r5 -; SOFT-NEXT: mov r1, r2 -; SOFT-NEXT: bne .LBB8_8 -; SOFT-NEXT: .LBB8_14: -; SOFT-NEXT: ldr r1, [sp] @ 4-byte Reload -; SOFT-NEXT: bne .LBB8_9 -; SOFT-NEXT: .LBB8_15: -; SOFT-NEXT: ldr r2, [sp, #4] @ 4-byte Reload -; SOFT-NEXT: bne .LBB8_10 -; SOFT-NEXT: .LBB8_16: -; SOFT-NEXT: ldr r3, [sp, #8] @ 4-byte Reload -; SOFT-NEXT: add sp, #12 +; SOFT-NEXT: bne .LBB8_5 +; SOFT-NEXT: @ %bb.2: @ %fp-to-i-entry +; SOFT-NEXT: cmp r6, #0 +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: mov r2, r4 +; SOFT-NEXT: bmi .LBB8_6 +; SOFT-NEXT: @ %bb.3: @ %fp-to-i-if-check.saturate +; SOFT-NEXT: movs r1, #0 +; SOFT-NEXT: cmp r7, #226 +; SOFT-NEXT: bls .LBB8_7 +; SOFT-NEXT: @ %bb.4: +; SOFT-NEXT: mvns r4, r1 +; SOFT-NEXT: movs r5, #15 +; SOFT-NEXT: .LBB8_5: +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: mov r2, r4 +; SOFT-NEXT: .LBB8_6: @ %fp-to-i-cleanup +; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: mov r3, r5 +; SOFT-NEXT: add sp, #36 ; SOFT-NEXT: pop {r4, r5, r6, r7, pc} +; SOFT-NEXT: .LBB8_7: @ %fp-to-i-if-check.exp.size +; SOFT-NEXT: ldr r0, .LCPI8_0 +; SOFT-NEXT: ands r6, r0 +; SOFT-NEXT: adds r0, r6, r0 +; SOFT-NEXT: adds r4, r0, #1 +; SOFT-NEXT: cmp r7, #149 +; SOFT-NEXT: bhi .LBB8_9 +; SOFT-NEXT: @ %bb.8: @ %fp-to-i-if-exp.small +; SOFT-NEXT: movs r0, #150 +; SOFT-NEXT: subs r0, r0, r7 +; SOFT-NEXT: lsrs r4, r0 +; SOFT-NEXT: mov r2, r1 +; SOFT-NEXT: mov r5, r1 +; SOFT-NEXT: b .LBB8_6 +; SOFT-NEXT: .LBB8_9: @ %fp-to-i-if-exp.large +; SOFT-NEXT: str r1, [sp, #28] +; SOFT-NEXT: str r1, [sp, #24] +; SOFT-NEXT: str r1, [sp, #20] +; SOFT-NEXT: str r4, [sp, #16] +; SOFT-NEXT: str r1, [sp, #12] +; SOFT-NEXT: str r1, [sp, #8] +; SOFT-NEXT: str r1, [sp, #4] +; SOFT-NEXT: str r1, [sp] +; SOFT-NEXT: subs r7, #150 +; SOFT-NEXT: movs r0, #31 +; SOFT-NEXT: lsrs r1, r7, #3 +; SOFT-NEXT: ands r7, r0 +; SOFT-NEXT: movs r2, #12 +; SOFT-NEXT: ands r2, r1 +; SOFT-NEXT: mov r1, sp +; SOFT-NEXT: adds r1, #16 +; SOFT-NEXT: subs r2, r1, r2 +; SOFT-NEXT: ldr r4, [r2] +; SOFT-NEXT: ldr r3, [r2, #4] +; SOFT-NEXT: ldr r1, [r2, #8] +; SOFT-NEXT: ldr r2, [r2, #12] +; SOFT-NEXT: lsls r2, r7 +; SOFT-NEXT: eors r0, r7 +; SOFT-NEXT: lsrs r5, r1, #1 +; SOFT-NEXT: lsrs r5, r0 +; SOFT-NEXT: orrs r5, r2 +; SOFT-NEXT: lsls r1, r7 +; SOFT-NEXT: lsrs r2, r3, #1 +; SOFT-NEXT: lsrs r2, r0 +; SOFT-NEXT: orrs r2, r1 +; SOFT-NEXT: lsls r3, r7 +; SOFT-NEXT: lsrs r1, r4, #1 +; SOFT-NEXT: lsrs r1, r0 +; SOFT-NEXT: orrs r1, r3 +; SOFT-NEXT: lsls r4, r7 +; SOFT-NEXT: b .LBB8_6 ; SOFT-NEXT: .p2align 2 -; SOFT-NEXT: @ %bb.17: +; SOFT-NEXT: @ %bb.10: ; SOFT-NEXT: .LCPI8_0: -; SOFT-NEXT: .long 1904214015 @ 0x717fffff +; SOFT-NEXT: .long 8388607 @ 0x7fffff ; ; VFP-LABEL: test_signed_i100_f32: -; VFP: @ %bb.0: -; VFP-NEXT: .save {r7, lr} -; VFP-NEXT: push {r7, lr} -; VFP-NEXT: .vsave {d8} -; VFP-NEXT: vpush {d8} -; VFP-NEXT: vmov s16, r0 -; VFP-NEXT: bl __fixunssfti -; VFP-NEXT: vldr s0, .LCPI8_0 -; VFP-NEXT: vcmp.f32 s16, #0 -; VFP-NEXT: vmrs APSR_nzcv, fpscr -; VFP-NEXT: itttt lt -; VFP-NEXT: movlt r3, #0 -; VFP-NEXT: movlt r2, #0 -; VFP-NEXT: movlt r1, #0 -; VFP-NEXT: movlt r0, #0 -; VFP-NEXT: vcmp.f32 s16, s0 +; VFP: @ %bb.0: @ %fp-to-i-entry +; VFP-NEXT: .save {r4, lr} +; VFP-NEXT: push {r4, lr} +; VFP-NEXT: .pad #32 +; VFP-NEXT: sub sp, #32 +; VFP-NEXT: ubfx lr, r0, #23, #8 +; VFP-NEXT: mov r12, r0 +; VFP-NEXT: movs r0, #0 +; VFP-NEXT: cmp.w lr, #127 +; VFP-NEXT: blo .LBB8_7 +; VFP-NEXT: @ %bb.1: @ %fp-to-i-entry +; VFP-NEXT: vmov s0, r12 +; VFP-NEXT: vcmp.f32 s0, s0 ; VFP-NEXT: vmrs APSR_nzcv, fpscr -; VFP-NEXT: itttt gt -; VFP-NEXT: movgt.w r0, #-1 -; VFP-NEXT: movgt.w r1, #-1 -; VFP-NEXT: movgt.w r2, #-1 -; VFP-NEXT: movgt r3, #15 -; VFP-NEXT: vpop {d8} -; VFP-NEXT: pop {r7, pc} -; VFP-NEXT: .p2align 2 -; VFP-NEXT: @ %bb.1: -; VFP-NEXT: .LCPI8_0: -; VFP-NEXT: .long 0x717fffff @ float 1.26765052E+30 +; VFP-NEXT: bvs .LBB8_7 +; VFP-NEXT: @ %bb.2: @ %fp-to-i-entry +; VFP-NEXT: movs r1, #0 +; VFP-NEXT: movs r2, #0 +; VFP-NEXT: movs r3, #0 +; VFP-NEXT: cmp.w r12, #0 +; VFP-NEXT: bmi .LBB8_9 +; VFP-NEXT: @ %bb.3: @ %fp-to-i-if-check.saturate +; VFP-NEXT: cmp.w lr, #226 +; VFP-NEXT: bls .LBB8_5 +; VFP-NEXT: @ %bb.4: +; VFP-NEXT: movs r3, #15 +; VFP-NEXT: mov.w r0, #-1 +; VFP-NEXT: mov.w r1, #-1 +; VFP-NEXT: mov.w r2, #-1 +; VFP-NEXT: add sp, #32 +; VFP-NEXT: pop {r4, pc} +; VFP-NEXT: .LBB8_5: @ %fp-to-i-if-check.exp.size +; VFP-NEXT: movs r0, #1 +; VFP-NEXT: cmp.w lr, #149 +; VFP-NEXT: bfi r12, r0, #23, #9 +; VFP-NEXT: bhi .LBB8_8 +; VFP-NEXT: @ %bb.6: @ %fp-to-i-if-exp.small +; VFP-NEXT: rsb.w r0, lr, #150 +; VFP-NEXT: lsr.w r0, r12, r0 +; VFP-NEXT: add sp, #32 +; VFP-NEXT: pop {r4, pc} +; VFP-NEXT: .LBB8_7: +; VFP-NEXT: movs r1, #0 +; VFP-NEXT: movs r2, #0 +; VFP-NEXT: movs r3, #0 +; VFP-NEXT: add sp, #32 +; VFP-NEXT: pop {r4, pc} +; VFP-NEXT: .LBB8_8: @ %fp-to-i-if-exp.large +; VFP-NEXT: movs r0, #0 +; VFP-NEXT: mov r2, sp +; VFP-NEXT: strd r0, r0, [sp, #24] +; VFP-NEXT: movs r1, #12 +; VFP-NEXT: strd r12, r0, [sp, #16] +; VFP-NEXT: adds r2, #16 +; VFP-NEXT: strd r0, r0, [sp, #8] +; VFP-NEXT: strd r0, r0, [sp] +; VFP-NEXT: sub.w r0, lr, #150 +; VFP-NEXT: and.w r1, r1, r0, lsr #3 +; VFP-NEXT: and r0, r0, #31 +; VFP-NEXT: subs r1, r2, r1 +; VFP-NEXT: eor r4, r0, #31 +; VFP-NEXT: ldrd r12, lr, [r1] +; VFP-NEXT: ldrd r2, r1, [r1, #8] +; VFP-NEXT: lsls r1, r0 +; VFP-NEXT: lsrs r3, r2, #1 +; VFP-NEXT: lsrs r3, r4 +; VFP-NEXT: orrs r3, r1 +; VFP-NEXT: lsl.w r1, r2, r0 +; VFP-NEXT: lsrs.w r2, lr, #1 +; VFP-NEXT: lsl.w lr, lr, r0 +; VFP-NEXT: lsrs r2, r4 +; VFP-NEXT: lsl.w r0, r12, r0 +; VFP-NEXT: orrs r2, r1 +; VFP-NEXT: lsr.w r1, r12, #1 +; VFP-NEXT: lsrs r1, r4 +; VFP-NEXT: orr.w r1, r1, lr +; VFP-NEXT: .LBB8_9: @ %fp-to-i-cleanup +; VFP-NEXT: add sp, #32 +; VFP-NEXT: pop {r4, pc} %x = call i100 @llvm.fptoui.sat.i100.f32(float %f) ret i100 %x } define i128 @test_signed_i128_f32(float %f) nounwind { ; SOFT-LABEL: test_signed_i128_f32: -; SOFT: @ %bb.0: +; SOFT: @ %bb.0: @ %fp-to-i-entry ; SOFT-NEXT: .save {r4, r5, r6, r7, lr} ; SOFT-NEXT: push {r4, r5, r6, r7, lr} -; SOFT-NEXT: .pad #12 -; SOFT-NEXT: sub sp, #12 -; SOFT-NEXT: mov r4, r0 -; SOFT-NEXT: movs r6, #0 +; SOFT-NEXT: .pad #36 +; SOFT-NEXT: sub sp, #36 +; SOFT-NEXT: mov r6, r0 +; SOFT-NEXT: lsrs r0, r0, #23 +; SOFT-NEXT: uxtb r7, r0 +; SOFT-NEXT: movs r5, #0 +; SOFT-NEXT: mov r4, r5 +; SOFT-NEXT: cmp r7, #127 +; SOFT-NEXT: blo .LBB9_7 +; SOFT-NEXT: @ %bb.1: @ %fp-to-i-entry +; SOFT-NEXT: mov r0, r6 ; SOFT-NEXT: mov r1, r6 -; SOFT-NEXT: bl __aeabi_fcmpge -; SOFT-NEXT: mov r7, r0 -; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: bl __fixunssfti -; SOFT-NEXT: mov r5, r0 -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: beq .LBB9_11 -; SOFT-NEXT: @ %bb.1: -; SOFT-NEXT: beq .LBB9_12 -; SOFT-NEXT: .LBB9_2: -; SOFT-NEXT: bne .LBB9_4 -; SOFT-NEXT: .LBB9_3: -; SOFT-NEXT: mov r1, r7 -; SOFT-NEXT: .LBB9_4: -; SOFT-NEXT: str r1, [sp] @ 4-byte Spill -; SOFT-NEXT: str r2, [sp, #4] @ 4-byte Spill -; SOFT-NEXT: str r3, [sp, #8] @ 4-byte Spill -; SOFT-NEXT: bne .LBB9_6 -; SOFT-NEXT: @ %bb.5: -; SOFT-NEXT: mov r5, r7 -; SOFT-NEXT: .LBB9_6: -; SOFT-NEXT: ldr r1, .LCPI9_0 -; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: bl __aeabi_fcmpgt -; SOFT-NEXT: mvns r3, r6 +; SOFT-NEXT: bl __aeabi_fcmpun ; SOFT-NEXT: cmp r0, #0 -; SOFT-NEXT: mov r0, r3 -; SOFT-NEXT: beq .LBB9_13 -; SOFT-NEXT: @ %bb.7: -; SOFT-NEXT: mov r1, r3 -; SOFT-NEXT: beq .LBB9_14 -; SOFT-NEXT: .LBB9_8: -; SOFT-NEXT: mov r2, r3 -; SOFT-NEXT: beq .LBB9_15 -; SOFT-NEXT: .LBB9_9: -; SOFT-NEXT: beq .LBB9_16 -; SOFT-NEXT: .LBB9_10: -; SOFT-NEXT: add sp, #12 -; SOFT-NEXT: pop {r4, r5, r6, r7, pc} -; SOFT-NEXT: .LBB9_11: -; SOFT-NEXT: mov r3, r7 -; SOFT-NEXT: bne .LBB9_2 -; SOFT-NEXT: .LBB9_12: -; SOFT-NEXT: mov r2, r7 -; SOFT-NEXT: beq .LBB9_3 -; SOFT-NEXT: b .LBB9_4 -; SOFT-NEXT: .LBB9_13: -; SOFT-NEXT: mov r0, r5 -; SOFT-NEXT: mov r1, r3 -; SOFT-NEXT: bne .LBB9_8 -; SOFT-NEXT: .LBB9_14: -; SOFT-NEXT: ldr r1, [sp] @ 4-byte Reload -; SOFT-NEXT: mov r2, r3 -; SOFT-NEXT: bne .LBB9_9 -; SOFT-NEXT: .LBB9_15: -; SOFT-NEXT: ldr r2, [sp, #4] @ 4-byte Reload -; SOFT-NEXT: bne .LBB9_10 -; SOFT-NEXT: .LBB9_16: -; SOFT-NEXT: ldr r3, [sp, #8] @ 4-byte Reload -; SOFT-NEXT: add sp, #12 +; SOFT-NEXT: bne .LBB9_7 +; SOFT-NEXT: @ %bb.2: @ %fp-to-i-entry +; SOFT-NEXT: cmp r6, #0 +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: mov r2, r4 +; SOFT-NEXT: bmi .LBB9_9 +; SOFT-NEXT: @ %bb.3: @ %fp-to-i-if-check.saturate +; SOFT-NEXT: movs r1, #0 +; SOFT-NEXT: cmp r7, #254 +; SOFT-NEXT: bls .LBB9_5 +; SOFT-NEXT: @ %bb.4: +; SOFT-NEXT: mvns r4, r1 +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: mov r2, r4 +; SOFT-NEXT: mov r5, r4 +; SOFT-NEXT: b .LBB9_9 +; SOFT-NEXT: .LBB9_5: @ %fp-to-i-if-check.exp.size +; SOFT-NEXT: ldr r0, .LCPI9_0 +; SOFT-NEXT: ands r6, r0 +; SOFT-NEXT: adds r0, r6, r0 +; SOFT-NEXT: adds r4, r0, #1 +; SOFT-NEXT: cmp r7, #149 +; SOFT-NEXT: bhi .LBB9_8 +; SOFT-NEXT: @ %bb.6: @ %fp-to-i-if-exp.small +; SOFT-NEXT: movs r0, #150 +; SOFT-NEXT: subs r0, r0, r7 +; SOFT-NEXT: lsrs r4, r0 +; SOFT-NEXT: mov r2, r1 +; SOFT-NEXT: mov r5, r1 +; SOFT-NEXT: b .LBB9_9 +; SOFT-NEXT: .LBB9_7: +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: mov r2, r4 +; SOFT-NEXT: b .LBB9_9 +; SOFT-NEXT: .LBB9_8: @ %fp-to-i-if-exp.large +; SOFT-NEXT: str r1, [sp, #28] +; SOFT-NEXT: str r1, [sp, #24] +; SOFT-NEXT: str r1, [sp, #20] +; SOFT-NEXT: str r4, [sp, #16] +; SOFT-NEXT: str r1, [sp, #12] +; SOFT-NEXT: str r1, [sp, #8] +; SOFT-NEXT: str r1, [sp, #4] +; SOFT-NEXT: str r1, [sp] +; SOFT-NEXT: subs r7, #150 +; SOFT-NEXT: movs r0, #31 +; SOFT-NEXT: lsrs r1, r7, #3 +; SOFT-NEXT: ands r7, r0 +; SOFT-NEXT: movs r2, #12 +; SOFT-NEXT: ands r2, r1 +; SOFT-NEXT: mov r1, sp +; SOFT-NEXT: adds r1, #16 +; SOFT-NEXT: subs r2, r1, r2 +; SOFT-NEXT: ldr r4, [r2] +; SOFT-NEXT: ldr r3, [r2, #4] +; SOFT-NEXT: ldr r1, [r2, #8] +; SOFT-NEXT: ldr r2, [r2, #12] +; SOFT-NEXT: lsls r2, r7 +; SOFT-NEXT: eors r0, r7 +; SOFT-NEXT: lsrs r5, r1, #1 +; SOFT-NEXT: lsrs r5, r0 +; SOFT-NEXT: orrs r5, r2 +; SOFT-NEXT: lsls r1, r7 +; SOFT-NEXT: lsrs r2, r3, #1 +; SOFT-NEXT: lsrs r2, r0 +; SOFT-NEXT: orrs r2, r1 +; SOFT-NEXT: lsls r3, r7 +; SOFT-NEXT: lsrs r1, r4, #1 +; SOFT-NEXT: lsrs r1, r0 +; SOFT-NEXT: orrs r1, r3 +; SOFT-NEXT: lsls r4, r7 +; SOFT-NEXT: .LBB9_9: @ %fp-to-i-cleanup +; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: mov r3, r5 +; SOFT-NEXT: add sp, #36 ; SOFT-NEXT: pop {r4, r5, r6, r7, pc} ; SOFT-NEXT: .p2align 2 -; SOFT-NEXT: @ %bb.17: +; SOFT-NEXT: @ %bb.10: ; SOFT-NEXT: .LCPI9_0: -; SOFT-NEXT: .long 2139095039 @ 0x7f7fffff +; SOFT-NEXT: .long 8388607 @ 0x7fffff ; ; VFP-LABEL: test_signed_i128_f32: -; VFP: @ %bb.0: -; VFP-NEXT: .save {r7, lr} -; VFP-NEXT: push {r7, lr} -; VFP-NEXT: .vsave {d8} -; VFP-NEXT: vpush {d8} -; VFP-NEXT: vmov s16, r0 -; VFP-NEXT: bl __fixunssfti -; VFP-NEXT: vldr s0, .LCPI9_0 -; VFP-NEXT: vcmp.f32 s16, #0 +; VFP: @ %bb.0: @ %fp-to-i-entry +; VFP-NEXT: .save {r4, lr} +; VFP-NEXT: push {r4, lr} +; VFP-NEXT: .pad #32 +; VFP-NEXT: sub sp, #32 +; VFP-NEXT: ubfx lr, r0, #23, #8 +; VFP-NEXT: mov r12, r0 +; VFP-NEXT: movs r0, #0 +; VFP-NEXT: cmp.w lr, #127 +; VFP-NEXT: blo .LBB9_7 +; VFP-NEXT: @ %bb.1: @ %fp-to-i-entry +; VFP-NEXT: vmov s0, r12 +; VFP-NEXT: vcmp.f32 s0, s0 ; VFP-NEXT: vmrs APSR_nzcv, fpscr -; VFP-NEXT: itttt lt -; VFP-NEXT: movlt r3, #0 -; VFP-NEXT: movlt r2, #0 -; VFP-NEXT: movlt r1, #0 -; VFP-NEXT: movlt r0, #0 -; VFP-NEXT: vcmp.f32 s16, s0 -; VFP-NEXT: vmrs APSR_nzcv, fpscr -; VFP-NEXT: itttt gt -; VFP-NEXT: movgt.w r0, #-1 -; VFP-NEXT: movgt.w r1, #-1 -; VFP-NEXT: movgt.w r2, #-1 -; VFP-NEXT: movgt.w r3, #-1 -; VFP-NEXT: vpop {d8} -; VFP-NEXT: pop {r7, pc} -; VFP-NEXT: .p2align 2 -; VFP-NEXT: @ %bb.1: -; VFP-NEXT: .LCPI9_0: -; VFP-NEXT: .long 0x7f7fffff @ float 3.40282347E+38 +; VFP-NEXT: bvs .LBB9_7 +; VFP-NEXT: @ %bb.2: @ %fp-to-i-entry +; VFP-NEXT: movs r1, #0 +; VFP-NEXT: movs r2, #0 +; VFP-NEXT: movs r3, #0 +; VFP-NEXT: cmp.w r12, #0 +; VFP-NEXT: bmi .LBB9_9 +; VFP-NEXT: @ %bb.3: @ %fp-to-i-if-check.saturate +; VFP-NEXT: cmp.w lr, #254 +; VFP-NEXT: bls .LBB9_5 +; VFP-NEXT: @ %bb.4: +; VFP-NEXT: mov.w r0, #-1 +; VFP-NEXT: mov.w r1, #-1 +; VFP-NEXT: mov.w r2, #-1 +; VFP-NEXT: mov.w r3, #-1 +; VFP-NEXT: add sp, #32 +; VFP-NEXT: pop {r4, pc} +; VFP-NEXT: .LBB9_5: @ %fp-to-i-if-check.exp.size +; VFP-NEXT: movs r0, #1 +; VFP-NEXT: cmp.w lr, #149 +; VFP-NEXT: bfi r12, r0, #23, #9 +; VFP-NEXT: bhi .LBB9_8 +; VFP-NEXT: @ %bb.6: @ %fp-to-i-if-exp.small +; VFP-NEXT: rsb.w r0, lr, #150 +; VFP-NEXT: lsr.w r0, r12, r0 +; VFP-NEXT: add sp, #32 +; VFP-NEXT: pop {r4, pc} +; VFP-NEXT: .LBB9_7: +; VFP-NEXT: movs r1, #0 +; VFP-NEXT: movs r2, #0 +; VFP-NEXT: movs r3, #0 +; VFP-NEXT: add sp, #32 +; VFP-NEXT: pop {r4, pc} +; VFP-NEXT: .LBB9_8: @ %fp-to-i-if-exp.large +; VFP-NEXT: movs r0, #0 +; VFP-NEXT: mov r2, sp +; VFP-NEXT: strd r0, r0, [sp, #24] +; VFP-NEXT: movs r1, #12 +; VFP-NEXT: strd r12, r0, [sp, #16] +; VFP-NEXT: adds r2, #16 +; VFP-NEXT: strd r0, r0, [sp, #8] +; VFP-NEXT: strd r0, r0, [sp] +; VFP-NEXT: sub.w r0, lr, #150 +; VFP-NEXT: and.w r1, r1, r0, lsr #3 +; VFP-NEXT: and r0, r0, #31 +; VFP-NEXT: subs r1, r2, r1 +; VFP-NEXT: eor r4, r0, #31 +; VFP-NEXT: ldrd r12, lr, [r1] +; VFP-NEXT: ldrd r2, r1, [r1, #8] +; VFP-NEXT: lsls r1, r0 +; VFP-NEXT: lsrs r3, r2, #1 +; VFP-NEXT: lsrs r3, r4 +; VFP-NEXT: orrs r3, r1 +; VFP-NEXT: lsl.w r1, r2, r0 +; VFP-NEXT: lsrs.w r2, lr, #1 +; VFP-NEXT: lsl.w lr, lr, r0 +; VFP-NEXT: lsrs r2, r4 +; VFP-NEXT: lsl.w r0, r12, r0 +; VFP-NEXT: orrs r2, r1 +; VFP-NEXT: lsr.w r1, r12, #1 +; VFP-NEXT: lsrs r1, r4 +; VFP-NEXT: orr.w r1, r1, lr +; VFP-NEXT: .LBB9_9: @ %fp-to-i-cleanup +; VFP-NEXT: add sp, #32 +; VFP-NEXT: pop {r4, pc} %x = call i128 @llvm.fptoui.sat.i128.f32(float %f) ret i128 %x } @@ -1578,291 +1724,618 @@ define i64 @test_signed_i64_f64(double %f) nounwind { define i100 @test_signed_i100_f64(double %f) nounwind { ; SOFT-LABEL: test_signed_i100_f64: -; SOFT: @ %bb.0: +; SOFT: @ %bb.0: @ %fp-to-i-entry ; SOFT-NEXT: .save {r4, r5, r6, r7, lr} ; SOFT-NEXT: push {r4, r5, r6, r7, lr} -; SOFT-NEXT: .pad #12 -; SOFT-NEXT: sub sp, #12 -; SOFT-NEXT: mov r6, r1 -; SOFT-NEXT: mov r4, r0 -; SOFT-NEXT: movs r5, #0 -; SOFT-NEXT: mov r2, r5 -; SOFT-NEXT: mov r3, r5 -; SOFT-NEXT: bl __aeabi_dcmpge -; SOFT-NEXT: mov r7, r0 +; SOFT-NEXT: .pad #44 +; SOFT-NEXT: sub sp, #44 +; SOFT-NEXT: mov r7, r1 +; SOFT-NEXT: mov r6, r0 +; SOFT-NEXT: movs r4, #0 +; SOFT-NEXT: ldr r0, .LCPI18_1 +; SOFT-NEXT: subs r0, #52 +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: sbcs r1, r4 +; SOFT-NEXT: lsls r2, r7, #1 +; SOFT-NEXT: lsrs r2, r2, #21 +; SOFT-NEXT: mov r5, r4 +; SOFT-NEXT: subs r0, r2, r0 ; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: mov r1, r6 -; SOFT-NEXT: bl __fixunsdfti -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: beq .LBB18_12 -; SOFT-NEXT: @ %bb.1: -; SOFT-NEXT: beq .LBB18_13 -; SOFT-NEXT: .LBB18_2: -; SOFT-NEXT: bne .LBB18_4 -; SOFT-NEXT: .LBB18_3: -; SOFT-NEXT: mov r1, r7 -; SOFT-NEXT: .LBB18_4: -; SOFT-NEXT: str r1, [sp] @ 4-byte Spill +; SOFT-NEXT: sbcs r0, r1 +; SOFT-NEXT: blo .LBB18_7 +; SOFT-NEXT: @ %bb.1: @ %fp-to-i-entry ; SOFT-NEXT: str r2, [sp, #4] @ 4-byte Spill -; SOFT-NEXT: str r3, [sp, #8] @ 4-byte Spill -; SOFT-NEXT: bne .LBB18_6 -; SOFT-NEXT: @ %bb.5: -; SOFT-NEXT: mov r0, r7 -; SOFT-NEXT: .LBB18_6: -; SOFT-NEXT: mov r7, r0 -; SOFT-NEXT: mvns r5, r5 -; SOFT-NEXT: ldr r3, .LCPI18_0 -; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: mov r1, r6 -; SOFT-NEXT: mov r2, r5 -; SOFT-NEXT: bl __aeabi_dcmpgt -; SOFT-NEXT: movs r3, #15 +; SOFT-NEXT: mov r0, r6 +; SOFT-NEXT: mov r1, r7 +; SOFT-NEXT: mov r2, r6 +; SOFT-NEXT: mov r3, r7 +; SOFT-NEXT: bl __aeabi_dcmpun ; SOFT-NEXT: cmp r0, #0 +; SOFT-NEXT: bne .LBB18_7 +; SOFT-NEXT: @ %bb.2: @ %fp-to-i-entry +; SOFT-NEXT: cmp r7, #0 +; SOFT-NEXT: mov r1, r5 +; SOFT-NEXT: mov r3, r5 +; SOFT-NEXT: bmi .LBB18_9 +; SOFT-NEXT: @ %bb.3: @ %fp-to-i-if-check.saturate +; SOFT-NEXT: ldr r5, .LCPI18_1 ; SOFT-NEXT: mov r0, r5 -; SOFT-NEXT: beq .LBB18_14 -; SOFT-NEXT: @ %bb.7: +; SOFT-NEXT: adds r0, #48 +; SOFT-NEXT: ldr r3, [sp, #4] @ 4-byte Reload +; SOFT-NEXT: subs r0, r3, r0 +; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: sbcs r0, r4 +; SOFT-NEXT: blo .LBB18_5 +; SOFT-NEXT: @ %bb.4: +; SOFT-NEXT: mvns r5, r4 +; SOFT-NEXT: movs r3, #15 ; SOFT-NEXT: mov r1, r5 -; SOFT-NEXT: beq .LBB18_15 -; SOFT-NEXT: .LBB18_8: -; SOFT-NEXT: beq .LBB18_16 -; SOFT-NEXT: .LBB18_9: -; SOFT-NEXT: bne .LBB18_11 -; SOFT-NEXT: .LBB18_10: -; SOFT-NEXT: ldr r3, [sp, #8] @ 4-byte Reload -; SOFT-NEXT: .LBB18_11: -; SOFT-NEXT: mov r2, r5 -; SOFT-NEXT: add sp, #12 -; SOFT-NEXT: pop {r4, r5, r6, r7, pc} -; SOFT-NEXT: .LBB18_12: -; SOFT-NEXT: mov r3, r7 -; SOFT-NEXT: bne .LBB18_2 -; SOFT-NEXT: .LBB18_13: -; SOFT-NEXT: mov r2, r7 -; SOFT-NEXT: beq .LBB18_3 -; SOFT-NEXT: b .LBB18_4 -; SOFT-NEXT: .LBB18_14: -; SOFT-NEXT: mov r0, r7 +; SOFT-NEXT: mov r4, r5 +; SOFT-NEXT: b .LBB18_9 +; SOFT-NEXT: .LBB18_5: @ %fp-to-i-if-check.exp.size +; SOFT-NEXT: ldr r0, .LCPI18_0 +; SOFT-NEXT: mvns r1, r0 +; SOFT-NEXT: mvns r2, r7 +; SOFT-NEXT: orrs r2, r1 +; SOFT-NEXT: subs r1, r0, r2 +; SOFT-NEXT: mvns r0, r6 +; SOFT-NEXT: mvns r0, r0 +; SOFT-NEXT: cmp r5, r3 +; SOFT-NEXT: bls .LBB18_8 +; SOFT-NEXT: @ %bb.6: @ %fp-to-i-if-exp.small +; SOFT-NEXT: subs r2, r5, r3 +; SOFT-NEXT: bl __aeabi_llsr +; SOFT-NEXT: mov r5, r0 +; SOFT-NEXT: mov r3, r4 +; SOFT-NEXT: b .LBB18_9 +; SOFT-NEXT: .LBB18_7: ; SOFT-NEXT: mov r1, r5 -; SOFT-NEXT: bne .LBB18_8 -; SOFT-NEXT: .LBB18_15: -; SOFT-NEXT: ldr r1, [sp] @ 4-byte Reload -; SOFT-NEXT: bne .LBB18_9 -; SOFT-NEXT: .LBB18_16: -; SOFT-NEXT: ldr r5, [sp, #4] @ 4-byte Reload -; SOFT-NEXT: beq .LBB18_10 -; SOFT-NEXT: b .LBB18_11 +; SOFT-NEXT: mov r4, r5 +; SOFT-NEXT: mov r3, r5 +; SOFT-NEXT: b .LBB18_9 +; SOFT-NEXT: .LBB18_8: @ %fp-to-i-if-exp.large +; SOFT-NEXT: str r4, [sp, #36] +; SOFT-NEXT: str r4, [sp, #20] +; SOFT-NEXT: str r4, [sp, #16] +; SOFT-NEXT: str r4, [sp, #12] +; SOFT-NEXT: str r4, [sp, #8] +; SOFT-NEXT: add r2, sp, #24 +; SOFT-NEXT: stm r2!, {r0, r1, r4} +; SOFT-NEXT: ldr r0, .LCPI18_2 +; SOFT-NEXT: adds r0, r3, r0 +; SOFT-NEXT: movs r2, #31 +; SOFT-NEXT: lsrs r1, r0, #3 +; SOFT-NEXT: ands r0, r2 +; SOFT-NEXT: movs r3, #12 +; SOFT-NEXT: ands r3, r1 +; SOFT-NEXT: add r1, sp, #8 +; SOFT-NEXT: adds r1, #16 +; SOFT-NEXT: subs r3, r1, r3 +; SOFT-NEXT: ldm r3!, {r5, r6} +; SOFT-NEXT: ldr r1, [r3] +; SOFT-NEXT: ldr r4, [r3, #4] +; SOFT-NEXT: lsls r4, r0 +; SOFT-NEXT: eors r2, r0 +; SOFT-NEXT: lsrs r3, r1, #1 +; SOFT-NEXT: lsrs r3, r2 +; SOFT-NEXT: orrs r3, r4 +; SOFT-NEXT: lsls r1, r0 +; SOFT-NEXT: lsrs r4, r6, #1 +; SOFT-NEXT: lsrs r4, r2 +; SOFT-NEXT: orrs r4, r1 +; SOFT-NEXT: lsls r6, r0 +; SOFT-NEXT: lsrs r1, r5, #1 +; SOFT-NEXT: lsrs r1, r2 +; SOFT-NEXT: orrs r1, r6 +; SOFT-NEXT: lsls r5, r0 +; SOFT-NEXT: .LBB18_9: @ %fp-to-i-cleanup +; SOFT-NEXT: mov r0, r5 +; SOFT-NEXT: mov r2, r4 +; SOFT-NEXT: add sp, #44 +; SOFT-NEXT: pop {r4, r5, r6, r7, pc} ; SOFT-NEXT: .p2align 2 -; SOFT-NEXT: @ %bb.17: +; SOFT-NEXT: @ %bb.10: ; SOFT-NEXT: .LCPI18_0: -; SOFT-NEXT: .long 1177550847 @ 0x462fffff +; SOFT-NEXT: .long 1048575 @ 0xfffff +; SOFT-NEXT: .LCPI18_1: +; SOFT-NEXT: .long 1075 @ 0x433 +; SOFT-NEXT: .LCPI18_2: +; SOFT-NEXT: .long 4294966221 @ 0xfffffbcd ; ; VFP2-LABEL: test_signed_i100_f64: -; VFP2: @ %bb.0: -; VFP2-NEXT: .save {r7, lr} -; VFP2-NEXT: push {r7, lr} -; VFP2-NEXT: .vsave {d8} -; VFP2-NEXT: vpush {d8} -; VFP2-NEXT: vmov d8, r0, r1 -; VFP2-NEXT: bl __fixunsdfti -; VFP2-NEXT: vcmp.f64 d8, #0 -; VFP2-NEXT: vldr d16, .LCPI18_0 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: itttt lt -; VFP2-NEXT: movlt r3, #0 -; VFP2-NEXT: movlt r2, #0 -; VFP2-NEXT: movlt r1, #0 -; VFP2-NEXT: movlt r0, #0 -; VFP2-NEXT: vcmp.f64 d8, d16 +; VFP2: @ %bb.0: @ %fp-to-i-entry +; VFP2-NEXT: .save {r4, r5, r7, lr} +; VFP2-NEXT: push {r4, r5, r7, lr} +; VFP2-NEXT: .pad #32 +; VFP2-NEXT: sub sp, #32 +; VFP2-NEXT: vmov d16, r0, r1 +; VFP2-NEXT: ubfx r4, r1, #20, #11 +; VFP2-NEXT: mov r12, r0 +; VFP2-NEXT: movs r0, #0 +; VFP2-NEXT: movw r2, #1023 +; VFP2-NEXT: cmp r4, r2 +; VFP2-NEXT: blo .LBB18_7 +; VFP2-NEXT: @ %bb.1: @ %fp-to-i-entry +; VFP2-NEXT: vcmp.f64 d16, d16 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: itttt gt -; VFP2-NEXT: movgt.w r0, #-1 -; VFP2-NEXT: movgt.w r1, #-1 -; VFP2-NEXT: movgt.w r2, #-1 -; VFP2-NEXT: movgt r3, #15 -; VFP2-NEXT: vpop {d8} -; VFP2-NEXT: pop {r7, pc} -; VFP2-NEXT: .p2align 3 -; VFP2-NEXT: @ %bb.1: -; VFP2-NEXT: .LCPI18_0: -; VFP2-NEXT: .long 4294967295 @ double 1.2676506002282293E+30 -; VFP2-NEXT: .long 1177550847 +; VFP2-NEXT: bvs .LBB18_7 +; VFP2-NEXT: @ %bb.2: @ %fp-to-i-entry +; VFP2-NEXT: mov.w lr, #0 +; VFP2-NEXT: movs r2, #0 +; VFP2-NEXT: movs r3, #0 +; VFP2-NEXT: cmp r1, #0 +; VFP2-NEXT: bmi .LBB18_10 +; VFP2-NEXT: @ %bb.3: @ %fp-to-i-if-check.saturate +; VFP2-NEXT: movw r0, #1122 +; VFP2-NEXT: cmp r4, r0 +; VFP2-NEXT: bls .LBB18_5 +; VFP2-NEXT: @ %bb.4: +; VFP2-NEXT: movs r3, #15 +; VFP2-NEXT: mov.w r0, #-1 +; VFP2-NEXT: mov.w lr, #-1 +; VFP2-NEXT: mov.w r2, #-1 +; VFP2-NEXT: b .LBB18_10 +; VFP2-NEXT: .LBB18_5: @ %fp-to-i-if-check.exp.size +; VFP2-NEXT: movs r0, #1 +; VFP2-NEXT: bfi r1, r0, #20, #12 +; VFP2-NEXT: movw r0, #1074 +; VFP2-NEXT: cmp r4, r0 +; VFP2-NEXT: bhi .LBB18_9 +; VFP2-NEXT: @ %bb.6: @ %fp-to-i-if-exp.small +; VFP2-NEXT: movw r0, #1075 +; VFP2-NEXT: subs r2, r0, r4 +; VFP2-NEXT: rsb.w r3, r2, #32 +; VFP2-NEXT: lsr.w r0, r12, r2 +; VFP2-NEXT: lsr.w lr, r1, r2 +; VFP2-NEXT: lsl.w r3, r1, r3 +; VFP2-NEXT: orrs r0, r3 +; VFP2-NEXT: movw r3, #1043 +; VFP2-NEXT: subs r3, r3, r4 +; VFP2-NEXT: it pl +; VFP2-NEXT: lsrpl.w r0, r1, r3 +; VFP2-NEXT: it pl +; VFP2-NEXT: movpl.w lr, #0 +; VFP2-NEXT: b .LBB18_8 +; VFP2-NEXT: .LBB18_7: +; VFP2-NEXT: mov.w lr, #0 +; VFP2-NEXT: .LBB18_8: @ %fp-to-i-cleanup +; VFP2-NEXT: movs r2, #0 +; VFP2-NEXT: movs r3, #0 +; VFP2-NEXT: b .LBB18_10 +; VFP2-NEXT: .LBB18_9: @ %fp-to-i-if-exp.large +; VFP2-NEXT: movs r0, #0 +; VFP2-NEXT: mov r2, sp +; VFP2-NEXT: strd r0, r0, [sp, #24] +; VFP2-NEXT: adds r2, #16 +; VFP2-NEXT: strd r0, r0, [sp, #8] +; VFP2-NEXT: strd r0, r0, [sp] +; VFP2-NEXT: subw r0, r4, #1075 +; VFP2-NEXT: strd r12, r1, [sp, #16] +; VFP2-NEXT: movs r1, #12 +; VFP2-NEXT: and.w r1, r1, r0, lsr #3 +; VFP2-NEXT: and r0, r0, #31 +; VFP2-NEXT: subs r1, r2, r1 +; VFP2-NEXT: ldrd r2, lr, [r1, #8] +; VFP2-NEXT: ldrd r12, r4, [r1] +; VFP2-NEXT: eor r1, r0, #31 +; VFP2-NEXT: lsrs r3, r2, #1 +; VFP2-NEXT: lsls r2, r0 +; VFP2-NEXT: lsr.w r5, r3, r1 +; VFP2-NEXT: lsl.w r3, lr, r0 +; VFP2-NEXT: orrs r3, r5 +; VFP2-NEXT: lsrs.w r5, r4, #1 +; VFP2-NEXT: lsrs r5, r1 +; VFP2-NEXT: orrs r2, r5 +; VFP2-NEXT: lsl.w r5, r4, r0 +; VFP2-NEXT: lsr.w r4, r12, #1 +; VFP2-NEXT: lsr.w r1, r4, r1 +; VFP2-NEXT: orr.w lr, r5, r1 +; VFP2-NEXT: lsl.w r0, r12, r0 +; VFP2-NEXT: .LBB18_10: @ %fp-to-i-cleanup +; VFP2-NEXT: mov r1, lr +; VFP2-NEXT: add sp, #32 +; VFP2-NEXT: pop {r4, r5, r7, pc} ; ; FP16-LABEL: test_signed_i100_f64: -; FP16: @ %bb.0: -; FP16-NEXT: .save {r7, lr} -; FP16-NEXT: push {r7, lr} -; FP16-NEXT: .vsave {d8} -; FP16-NEXT: vpush {d8} -; FP16-NEXT: vmov d8, r0, r1 -; FP16-NEXT: bl __fixunsdfti -; FP16-NEXT: vcmp.f64 d8, #0 -; FP16-NEXT: vldr d0, .LCPI18_0 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: itttt lt -; FP16-NEXT: movlt r3, #0 -; FP16-NEXT: movlt r2, #0 -; FP16-NEXT: movlt r1, #0 -; FP16-NEXT: movlt r0, #0 -; FP16-NEXT: vcmp.f64 d8, d0 +; FP16: @ %bb.0: @ %fp-to-i-entry +; FP16-NEXT: .save {r4, r5, r7, lr} +; FP16-NEXT: push {r4, r5, r7, lr} +; FP16-NEXT: .pad #32 +; FP16-NEXT: sub sp, #32 +; FP16-NEXT: vmov d0, r0, r1 +; FP16-NEXT: ubfx r4, r1, #20, #11 +; FP16-NEXT: mov r12, r0 +; FP16-NEXT: movs r0, #0 +; FP16-NEXT: movw r2, #1023 +; FP16-NEXT: cmp r4, r2 +; FP16-NEXT: blo .LBB18_7 +; FP16-NEXT: @ %bb.1: @ %fp-to-i-entry +; FP16-NEXT: vcmp.f64 d0, d0 ; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: itttt gt -; FP16-NEXT: movgt.w r0, #-1 -; FP16-NEXT: movgt.w r1, #-1 -; FP16-NEXT: movgt.w r2, #-1 -; FP16-NEXT: movgt r3, #15 -; FP16-NEXT: vpop {d8} -; FP16-NEXT: pop {r7, pc} -; FP16-NEXT: .p2align 3 -; FP16-NEXT: @ %bb.1: -; FP16-NEXT: .LCPI18_0: -; FP16-NEXT: .long 4294967295 @ double 1.2676506002282293E+30 -; FP16-NEXT: .long 1177550847 +; FP16-NEXT: bvs .LBB18_7 +; FP16-NEXT: @ %bb.2: @ %fp-to-i-entry +; FP16-NEXT: mov.w lr, #0 +; FP16-NEXT: movs r2, #0 +; FP16-NEXT: movs r3, #0 +; FP16-NEXT: cmp r1, #0 +; FP16-NEXT: bmi .LBB18_10 +; FP16-NEXT: @ %bb.3: @ %fp-to-i-if-check.saturate +; FP16-NEXT: movw r0, #1122 +; FP16-NEXT: cmp r4, r0 +; FP16-NEXT: bls .LBB18_5 +; FP16-NEXT: @ %bb.4: +; FP16-NEXT: movs r3, #15 +; FP16-NEXT: mov.w r0, #-1 +; FP16-NEXT: mov.w lr, #-1 +; FP16-NEXT: mov.w r2, #-1 +; FP16-NEXT: b .LBB18_10 +; FP16-NEXT: .LBB18_5: @ %fp-to-i-if-check.exp.size +; FP16-NEXT: movs r0, #1 +; FP16-NEXT: bfi r1, r0, #20, #12 +; FP16-NEXT: movw r0, #1074 +; FP16-NEXT: cmp r4, r0 +; FP16-NEXT: bhi .LBB18_9 +; FP16-NEXT: @ %bb.6: @ %fp-to-i-if-exp.small +; FP16-NEXT: movw r0, #1075 +; FP16-NEXT: subs r2, r0, r4 +; FP16-NEXT: rsb.w r3, r2, #32 +; FP16-NEXT: lsr.w r0, r12, r2 +; FP16-NEXT: lsr.w lr, r1, r2 +; FP16-NEXT: lsl.w r3, r1, r3 +; FP16-NEXT: orrs r0, r3 +; FP16-NEXT: movw r3, #1043 +; FP16-NEXT: subs r3, r3, r4 +; FP16-NEXT: it pl +; FP16-NEXT: lsrpl.w r0, r1, r3 +; FP16-NEXT: it pl +; FP16-NEXT: movpl.w lr, #0 +; FP16-NEXT: b .LBB18_8 +; FP16-NEXT: .LBB18_7: +; FP16-NEXT: mov.w lr, #0 +; FP16-NEXT: .LBB18_8: @ %fp-to-i-cleanup +; FP16-NEXT: movs r2, #0 +; FP16-NEXT: movs r3, #0 +; FP16-NEXT: b .LBB18_10 +; FP16-NEXT: .LBB18_9: @ %fp-to-i-if-exp.large +; FP16-NEXT: movs r0, #0 +; FP16-NEXT: mov r2, sp +; FP16-NEXT: strd r0, r0, [sp, #24] +; FP16-NEXT: adds r2, #16 +; FP16-NEXT: strd r0, r0, [sp, #8] +; FP16-NEXT: strd r0, r0, [sp] +; FP16-NEXT: subw r0, r4, #1075 +; FP16-NEXT: strd r12, r1, [sp, #16] +; FP16-NEXT: movs r1, #12 +; FP16-NEXT: and.w r1, r1, r0, lsr #3 +; FP16-NEXT: and r0, r0, #31 +; FP16-NEXT: subs r1, r2, r1 +; FP16-NEXT: ldrd r2, lr, [r1, #8] +; FP16-NEXT: ldrd r12, r4, [r1] +; FP16-NEXT: eor r1, r0, #31 +; FP16-NEXT: lsrs r3, r2, #1 +; FP16-NEXT: lsls r2, r0 +; FP16-NEXT: lsr.w r5, r3, r1 +; FP16-NEXT: lsl.w r3, lr, r0 +; FP16-NEXT: orrs r3, r5 +; FP16-NEXT: lsrs.w r5, r4, #1 +; FP16-NEXT: lsrs r5, r1 +; FP16-NEXT: orrs r2, r5 +; FP16-NEXT: lsl.w r5, r4, r0 +; FP16-NEXT: lsr.w r4, r12, #1 +; FP16-NEXT: lsr.w r1, r4, r1 +; FP16-NEXT: orr.w lr, r5, r1 +; FP16-NEXT: lsl.w r0, r12, r0 +; FP16-NEXT: .LBB18_10: @ %fp-to-i-cleanup +; FP16-NEXT: mov r1, lr +; FP16-NEXT: add sp, #32 +; FP16-NEXT: pop {r4, r5, r7, pc} %x = call i100 @llvm.fptoui.sat.i100.f64(double %f) ret i100 %x } define i128 @test_signed_i128_f64(double %f) nounwind { ; SOFT-LABEL: test_signed_i128_f64: -; SOFT: @ %bb.0: +; SOFT: @ %bb.0: @ %fp-to-i-entry ; SOFT-NEXT: .save {r4, r5, r6, r7, lr} ; SOFT-NEXT: push {r4, r5, r6, r7, lr} -; SOFT-NEXT: .pad #12 -; SOFT-NEXT: sub sp, #12 -; SOFT-NEXT: mov r6, r1 -; SOFT-NEXT: mov r7, r0 -; SOFT-NEXT: movs r5, #0 -; SOFT-NEXT: mov r2, r5 -; SOFT-NEXT: mov r3, r5 -; SOFT-NEXT: bl __aeabi_dcmpge -; SOFT-NEXT: mov r4, r0 -; SOFT-NEXT: mov r0, r7 -; SOFT-NEXT: mov r1, r6 -; SOFT-NEXT: bl __fixunsdfti -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: beq .LBB19_12 -; SOFT-NEXT: @ %bb.1: -; SOFT-NEXT: beq .LBB19_13 -; SOFT-NEXT: .LBB19_2: -; SOFT-NEXT: bne .LBB19_4 -; SOFT-NEXT: .LBB19_3: +; SOFT-NEXT: .pad #44 +; SOFT-NEXT: sub sp, #44 +; SOFT-NEXT: mov r7, r1 +; SOFT-NEXT: mov r6, r0 +; SOFT-NEXT: movs r4, #0 +; SOFT-NEXT: ldr r0, .LCPI19_1 +; SOFT-NEXT: subs r0, #52 ; SOFT-NEXT: mov r1, r4 -; SOFT-NEXT: .LBB19_4: -; SOFT-NEXT: str r1, [sp] @ 4-byte Spill -; SOFT-NEXT: str r2, [sp, #4] @ 4-byte Spill -; SOFT-NEXT: str r3, [sp, #8] @ 4-byte Spill -; SOFT-NEXT: bne .LBB19_6 -; SOFT-NEXT: @ %bb.5: +; SOFT-NEXT: sbcs r1, r4 +; SOFT-NEXT: lsls r2, r7, #1 +; SOFT-NEXT: lsrs r2, r2, #21 +; SOFT-NEXT: mov r5, r4 +; SOFT-NEXT: subs r0, r2, r0 ; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: .LBB19_6: -; SOFT-NEXT: mov r4, r0 -; SOFT-NEXT: mvns r5, r5 -; SOFT-NEXT: ldr r3, .LCPI19_0 -; SOFT-NEXT: mov r0, r7 -; SOFT-NEXT: mov r1, r6 -; SOFT-NEXT: mov r2, r5 -; SOFT-NEXT: bl __aeabi_dcmpgt +; SOFT-NEXT: sbcs r0, r1 +; SOFT-NEXT: blo .LBB19_5 +; SOFT-NEXT: @ %bb.1: @ %fp-to-i-entry +; SOFT-NEXT: str r2, [sp, #4] @ 4-byte Spill +; SOFT-NEXT: mov r0, r6 +; SOFT-NEXT: mov r1, r7 +; SOFT-NEXT: mov r2, r6 +; SOFT-NEXT: mov r3, r7 +; SOFT-NEXT: bl __aeabi_dcmpun ; SOFT-NEXT: cmp r0, #0 +; SOFT-NEXT: bne .LBB19_5 +; SOFT-NEXT: @ %bb.2: @ %fp-to-i-entry +; SOFT-NEXT: cmp r7, #0 +; SOFT-NEXT: mov r1, r5 +; SOFT-NEXT: mov r3, r5 +; SOFT-NEXT: bmi .LBB19_6 +; SOFT-NEXT: @ %bb.3: @ %fp-to-i-if-check.saturate +; SOFT-NEXT: ldr r5, .LCPI19_1 ; SOFT-NEXT: mov r0, r5 -; SOFT-NEXT: beq .LBB19_14 -; SOFT-NEXT: @ %bb.7: +; SOFT-NEXT: adds r0, #76 +; SOFT-NEXT: ldr r3, [sp, #4] @ 4-byte Reload +; SOFT-NEXT: subs r0, r3, r0 +; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: sbcs r0, r4 +; SOFT-NEXT: blo .LBB19_7 +; SOFT-NEXT: @ %bb.4: +; SOFT-NEXT: mvns r5, r4 +; SOFT-NEXT: .LBB19_5: ; SOFT-NEXT: mov r1, r5 -; SOFT-NEXT: beq .LBB19_15 -; SOFT-NEXT: .LBB19_8: -; SOFT-NEXT: mov r2, r5 -; SOFT-NEXT: beq .LBB19_16 -; SOFT-NEXT: .LBB19_9: -; SOFT-NEXT: bne .LBB19_11 -; SOFT-NEXT: .LBB19_10: -; SOFT-NEXT: ldr r5, [sp, #8] @ 4-byte Reload -; SOFT-NEXT: .LBB19_11: +; SOFT-NEXT: mov r4, r5 ; SOFT-NEXT: mov r3, r5 -; SOFT-NEXT: add sp, #12 +; SOFT-NEXT: .LBB19_6: @ %fp-to-i-cleanup +; SOFT-NEXT: mov r0, r5 +; SOFT-NEXT: mov r2, r4 +; SOFT-NEXT: add sp, #44 ; SOFT-NEXT: pop {r4, r5, r6, r7, pc} -; SOFT-NEXT: .LBB19_12: +; SOFT-NEXT: .LBB19_7: @ %fp-to-i-if-check.exp.size +; SOFT-NEXT: ldr r0, .LCPI19_0 +; SOFT-NEXT: mvns r1, r0 +; SOFT-NEXT: mvns r2, r7 +; SOFT-NEXT: orrs r2, r1 +; SOFT-NEXT: subs r1, r0, r2 +; SOFT-NEXT: mvns r0, r6 +; SOFT-NEXT: mvns r0, r0 +; SOFT-NEXT: cmp r5, r3 +; SOFT-NEXT: bls .LBB19_9 +; SOFT-NEXT: @ %bb.8: @ %fp-to-i-if-exp.small +; SOFT-NEXT: subs r2, r5, r3 +; SOFT-NEXT: bl __aeabi_llsr +; SOFT-NEXT: mov r5, r0 ; SOFT-NEXT: mov r3, r4 -; SOFT-NEXT: bne .LBB19_2 -; SOFT-NEXT: .LBB19_13: -; SOFT-NEXT: mov r2, r4 -; SOFT-NEXT: beq .LBB19_3 -; SOFT-NEXT: b .LBB19_4 -; SOFT-NEXT: .LBB19_14: -; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: mov r1, r5 -; SOFT-NEXT: bne .LBB19_8 -; SOFT-NEXT: .LBB19_15: -; SOFT-NEXT: ldr r1, [sp] @ 4-byte Reload -; SOFT-NEXT: mov r2, r5 -; SOFT-NEXT: bne .LBB19_9 -; SOFT-NEXT: .LBB19_16: -; SOFT-NEXT: ldr r2, [sp, #4] @ 4-byte Reload -; SOFT-NEXT: beq .LBB19_10 -; SOFT-NEXT: b .LBB19_11 +; SOFT-NEXT: b .LBB19_6 +; SOFT-NEXT: .LBB19_9: @ %fp-to-i-if-exp.large +; SOFT-NEXT: str r4, [sp, #36] +; SOFT-NEXT: str r4, [sp, #20] +; SOFT-NEXT: str r4, [sp, #16] +; SOFT-NEXT: str r4, [sp, #12] +; SOFT-NEXT: str r4, [sp, #8] +; SOFT-NEXT: add r2, sp, #24 +; SOFT-NEXT: stm r2!, {r0, r1, r4} +; SOFT-NEXT: ldr r0, .LCPI19_2 +; SOFT-NEXT: adds r0, r3, r0 +; SOFT-NEXT: movs r2, #31 +; SOFT-NEXT: lsrs r1, r0, #3 +; SOFT-NEXT: ands r0, r2 +; SOFT-NEXT: movs r3, #12 +; SOFT-NEXT: ands r3, r1 +; SOFT-NEXT: add r1, sp, #8 +; SOFT-NEXT: adds r1, #16 +; SOFT-NEXT: subs r3, r1, r3 +; SOFT-NEXT: ldm r3!, {r5, r6} +; SOFT-NEXT: ldr r1, [r3] +; SOFT-NEXT: ldr r4, [r3, #4] +; SOFT-NEXT: lsls r4, r0 +; SOFT-NEXT: eors r2, r0 +; SOFT-NEXT: lsrs r3, r1, #1 +; SOFT-NEXT: lsrs r3, r2 +; SOFT-NEXT: orrs r3, r4 +; SOFT-NEXT: lsls r1, r0 +; SOFT-NEXT: lsrs r4, r6, #1 +; SOFT-NEXT: lsrs r4, r2 +; SOFT-NEXT: orrs r4, r1 +; SOFT-NEXT: lsls r6, r0 +; SOFT-NEXT: lsrs r1, r5, #1 +; SOFT-NEXT: lsrs r1, r2 +; SOFT-NEXT: orrs r1, r6 +; SOFT-NEXT: lsls r5, r0 +; SOFT-NEXT: b .LBB19_6 ; SOFT-NEXT: .p2align 2 -; SOFT-NEXT: @ %bb.17: +; SOFT-NEXT: @ %bb.10: ; SOFT-NEXT: .LCPI19_0: -; SOFT-NEXT: .long 1206910975 @ 0x47efffff +; SOFT-NEXT: .long 1048575 @ 0xfffff +; SOFT-NEXT: .LCPI19_1: +; SOFT-NEXT: .long 1075 @ 0x433 +; SOFT-NEXT: .LCPI19_2: +; SOFT-NEXT: .long 4294966221 @ 0xfffffbcd ; ; VFP2-LABEL: test_signed_i128_f64: -; VFP2: @ %bb.0: -; VFP2-NEXT: .save {r7, lr} -; VFP2-NEXT: push {r7, lr} -; VFP2-NEXT: .vsave {d8} -; VFP2-NEXT: vpush {d8} -; VFP2-NEXT: vmov d8, r0, r1 -; VFP2-NEXT: bl __fixunsdfti -; VFP2-NEXT: vcmp.f64 d8, #0 -; VFP2-NEXT: vldr d16, .LCPI19_0 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: itttt lt -; VFP2-NEXT: movlt r3, #0 -; VFP2-NEXT: movlt r2, #0 -; VFP2-NEXT: movlt r1, #0 -; VFP2-NEXT: movlt r0, #0 -; VFP2-NEXT: vcmp.f64 d8, d16 +; VFP2: @ %bb.0: @ %fp-to-i-entry +; VFP2-NEXT: .save {r4, r5, r7, lr} +; VFP2-NEXT: push {r4, r5, r7, lr} +; VFP2-NEXT: .pad #32 +; VFP2-NEXT: sub sp, #32 +; VFP2-NEXT: vmov d16, r0, r1 +; VFP2-NEXT: ubfx r4, r1, #20, #11 +; VFP2-NEXT: mov r12, r0 +; VFP2-NEXT: movs r0, #0 +; VFP2-NEXT: movw r2, #1023 +; VFP2-NEXT: cmp r4, r2 +; VFP2-NEXT: blo .LBB19_7 +; VFP2-NEXT: @ %bb.1: @ %fp-to-i-entry +; VFP2-NEXT: vcmp.f64 d16, d16 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: itttt gt -; VFP2-NEXT: movgt.w r0, #-1 -; VFP2-NEXT: movgt.w r1, #-1 -; VFP2-NEXT: movgt.w r2, #-1 -; VFP2-NEXT: movgt.w r3, #-1 -; VFP2-NEXT: vpop {d8} -; VFP2-NEXT: pop {r7, pc} -; VFP2-NEXT: .p2align 3 -; VFP2-NEXT: @ %bb.1: -; VFP2-NEXT: .LCPI19_0: -; VFP2-NEXT: .long 4294967295 @ double 3.4028236692093843E+38 -; VFP2-NEXT: .long 1206910975 +; VFP2-NEXT: bvs .LBB19_7 +; VFP2-NEXT: @ %bb.2: @ %fp-to-i-entry +; VFP2-NEXT: mov.w lr, #0 +; VFP2-NEXT: movs r2, #0 +; VFP2-NEXT: movs r3, #0 +; VFP2-NEXT: cmp r1, #0 +; VFP2-NEXT: bmi .LBB19_10 +; VFP2-NEXT: @ %bb.3: @ %fp-to-i-if-check.saturate +; VFP2-NEXT: movw r0, #1150 +; VFP2-NEXT: cmp r4, r0 +; VFP2-NEXT: bls .LBB19_5 +; VFP2-NEXT: @ %bb.4: +; VFP2-NEXT: mov.w r0, #-1 +; VFP2-NEXT: mov.w lr, #-1 +; VFP2-NEXT: mov.w r2, #-1 +; VFP2-NEXT: mov.w r3, #-1 +; VFP2-NEXT: b .LBB19_10 +; VFP2-NEXT: .LBB19_5: @ %fp-to-i-if-check.exp.size +; VFP2-NEXT: movs r0, #1 +; VFP2-NEXT: bfi r1, r0, #20, #12 +; VFP2-NEXT: movw r0, #1074 +; VFP2-NEXT: cmp r4, r0 +; VFP2-NEXT: bhi .LBB19_9 +; VFP2-NEXT: @ %bb.6: @ %fp-to-i-if-exp.small +; VFP2-NEXT: movw r0, #1075 +; VFP2-NEXT: subs r2, r0, r4 +; VFP2-NEXT: rsb.w r3, r2, #32 +; VFP2-NEXT: lsr.w r0, r12, r2 +; VFP2-NEXT: lsr.w lr, r1, r2 +; VFP2-NEXT: lsl.w r3, r1, r3 +; VFP2-NEXT: orrs r0, r3 +; VFP2-NEXT: movw r3, #1043 +; VFP2-NEXT: subs r3, r3, r4 +; VFP2-NEXT: it pl +; VFP2-NEXT: lsrpl.w r0, r1, r3 +; VFP2-NEXT: it pl +; VFP2-NEXT: movpl.w lr, #0 +; VFP2-NEXT: b .LBB19_8 +; VFP2-NEXT: .LBB19_7: +; VFP2-NEXT: mov.w lr, #0 +; VFP2-NEXT: .LBB19_8: @ %fp-to-i-cleanup +; VFP2-NEXT: movs r2, #0 +; VFP2-NEXT: movs r3, #0 +; VFP2-NEXT: b .LBB19_10 +; VFP2-NEXT: .LBB19_9: @ %fp-to-i-if-exp.large +; VFP2-NEXT: movs r0, #0 +; VFP2-NEXT: mov r2, sp +; VFP2-NEXT: strd r0, r0, [sp, #24] +; VFP2-NEXT: adds r2, #16 +; VFP2-NEXT: strd r0, r0, [sp, #8] +; VFP2-NEXT: strd r0, r0, [sp] +; VFP2-NEXT: subw r0, r4, #1075 +; VFP2-NEXT: strd r12, r1, [sp, #16] +; VFP2-NEXT: movs r1, #12 +; VFP2-NEXT: and.w r1, r1, r0, lsr #3 +; VFP2-NEXT: and r0, r0, #31 +; VFP2-NEXT: subs r1, r2, r1 +; VFP2-NEXT: ldrd r2, lr, [r1, #8] +; VFP2-NEXT: ldrd r12, r4, [r1] +; VFP2-NEXT: eor r1, r0, #31 +; VFP2-NEXT: lsrs r3, r2, #1 +; VFP2-NEXT: lsls r2, r0 +; VFP2-NEXT: lsr.w r5, r3, r1 +; VFP2-NEXT: lsl.w r3, lr, r0 +; VFP2-NEXT: orrs r3, r5 +; VFP2-NEXT: lsrs.w r5, r4, #1 +; VFP2-NEXT: lsrs r5, r1 +; VFP2-NEXT: orrs r2, r5 +; VFP2-NEXT: lsl.w r5, r4, r0 +; VFP2-NEXT: lsr.w r4, r12, #1 +; VFP2-NEXT: lsr.w r1, r4, r1 +; VFP2-NEXT: orr.w lr, r5, r1 +; VFP2-NEXT: lsl.w r0, r12, r0 +; VFP2-NEXT: .LBB19_10: @ %fp-to-i-cleanup +; VFP2-NEXT: mov r1, lr +; VFP2-NEXT: add sp, #32 +; VFP2-NEXT: pop {r4, r5, r7, pc} ; ; FP16-LABEL: test_signed_i128_f64: -; FP16: @ %bb.0: -; FP16-NEXT: .save {r7, lr} -; FP16-NEXT: push {r7, lr} -; FP16-NEXT: .vsave {d8} -; FP16-NEXT: vpush {d8} -; FP16-NEXT: vmov d8, r0, r1 -; FP16-NEXT: bl __fixunsdfti -; FP16-NEXT: vcmp.f64 d8, #0 -; FP16-NEXT: vldr d0, .LCPI19_0 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: itttt lt -; FP16-NEXT: movlt r3, #0 -; FP16-NEXT: movlt r2, #0 -; FP16-NEXT: movlt r1, #0 -; FP16-NEXT: movlt r0, #0 -; FP16-NEXT: vcmp.f64 d8, d0 +; FP16: @ %bb.0: @ %fp-to-i-entry +; FP16-NEXT: .save {r4, r5, r7, lr} +; FP16-NEXT: push {r4, r5, r7, lr} +; FP16-NEXT: .pad #32 +; FP16-NEXT: sub sp, #32 +; FP16-NEXT: vmov d0, r0, r1 +; FP16-NEXT: ubfx r4, r1, #20, #11 +; FP16-NEXT: mov r12, r0 +; FP16-NEXT: movs r0, #0 +; FP16-NEXT: movw r2, #1023 +; FP16-NEXT: cmp r4, r2 +; FP16-NEXT: blo .LBB19_7 +; FP16-NEXT: @ %bb.1: @ %fp-to-i-entry +; FP16-NEXT: vcmp.f64 d0, d0 ; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: itttt gt -; FP16-NEXT: movgt.w r0, #-1 -; FP16-NEXT: movgt.w r1, #-1 -; FP16-NEXT: movgt.w r2, #-1 -; FP16-NEXT: movgt.w r3, #-1 -; FP16-NEXT: vpop {d8} -; FP16-NEXT: pop {r7, pc} -; FP16-NEXT: .p2align 3 -; FP16-NEXT: @ %bb.1: -; FP16-NEXT: .LCPI19_0: -; FP16-NEXT: .long 4294967295 @ double 3.4028236692093843E+38 -; FP16-NEXT: .long 1206910975 +; FP16-NEXT: bvs .LBB19_7 +; FP16-NEXT: @ %bb.2: @ %fp-to-i-entry +; FP16-NEXT: mov.w lr, #0 +; FP16-NEXT: movs r2, #0 +; FP16-NEXT: movs r3, #0 +; FP16-NEXT: cmp r1, #0 +; FP16-NEXT: bmi .LBB19_10 +; FP16-NEXT: @ %bb.3: @ %fp-to-i-if-check.saturate +; FP16-NEXT: movw r0, #1150 +; FP16-NEXT: cmp r4, r0 +; FP16-NEXT: bls .LBB19_5 +; FP16-NEXT: @ %bb.4: +; FP16-NEXT: mov.w r0, #-1 +; FP16-NEXT: mov.w lr, #-1 +; FP16-NEXT: mov.w r2, #-1 +; FP16-NEXT: mov.w r3, #-1 +; FP16-NEXT: b .LBB19_10 +; FP16-NEXT: .LBB19_5: @ %fp-to-i-if-check.exp.size +; FP16-NEXT: movs r0, #1 +; FP16-NEXT: bfi r1, r0, #20, #12 +; FP16-NEXT: movw r0, #1074 +; FP16-NEXT: cmp r4, r0 +; FP16-NEXT: bhi .LBB19_9 +; FP16-NEXT: @ %bb.6: @ %fp-to-i-if-exp.small +; FP16-NEXT: movw r0, #1075 +; FP16-NEXT: subs r2, r0, r4 +; FP16-NEXT: rsb.w r3, r2, #32 +; FP16-NEXT: lsr.w r0, r12, r2 +; FP16-NEXT: lsr.w lr, r1, r2 +; FP16-NEXT: lsl.w r3, r1, r3 +; FP16-NEXT: orrs r0, r3 +; FP16-NEXT: movw r3, #1043 +; FP16-NEXT: subs r3, r3, r4 +; FP16-NEXT: it pl +; FP16-NEXT: lsrpl.w r0, r1, r3 +; FP16-NEXT: it pl +; FP16-NEXT: movpl.w lr, #0 +; FP16-NEXT: b .LBB19_8 +; FP16-NEXT: .LBB19_7: +; FP16-NEXT: mov.w lr, #0 +; FP16-NEXT: .LBB19_8: @ %fp-to-i-cleanup +; FP16-NEXT: movs r2, #0 +; FP16-NEXT: movs r3, #0 +; FP16-NEXT: b .LBB19_10 +; FP16-NEXT: .LBB19_9: @ %fp-to-i-if-exp.large +; FP16-NEXT: movs r0, #0 +; FP16-NEXT: mov r2, sp +; FP16-NEXT: strd r0, r0, [sp, #24] +; FP16-NEXT: adds r2, #16 +; FP16-NEXT: strd r0, r0, [sp, #8] +; FP16-NEXT: strd r0, r0, [sp] +; FP16-NEXT: subw r0, r4, #1075 +; FP16-NEXT: strd r12, r1, [sp, #16] +; FP16-NEXT: movs r1, #12 +; FP16-NEXT: and.w r1, r1, r0, lsr #3 +; FP16-NEXT: and r0, r0, #31 +; FP16-NEXT: subs r1, r2, r1 +; FP16-NEXT: ldrd r2, lr, [r1, #8] +; FP16-NEXT: ldrd r12, r4, [r1] +; FP16-NEXT: eor r1, r0, #31 +; FP16-NEXT: lsrs r3, r2, #1 +; FP16-NEXT: lsls r2, r0 +; FP16-NEXT: lsr.w r5, r3, r1 +; FP16-NEXT: lsl.w r3, lr, r0 +; FP16-NEXT: orrs r3, r5 +; FP16-NEXT: lsrs.w r5, r4, #1 +; FP16-NEXT: lsrs r5, r1 +; FP16-NEXT: orrs r2, r5 +; FP16-NEXT: lsl.w r5, r4, r0 +; FP16-NEXT: lsr.w r4, r12, #1 +; FP16-NEXT: lsr.w r1, r4, r1 +; FP16-NEXT: orr.w lr, r5, r1 +; FP16-NEXT: lsl.w r0, r12, r0 +; FP16-NEXT: .LBB19_10: @ %fp-to-i-cleanup +; FP16-NEXT: mov r1, lr +; FP16-NEXT: add sp, #32 +; FP16-NEXT: pop {r4, r5, r7, pc} %x = call i128 @llvm.fptoui.sat.i128.f64(double %f) ret i128 %x } @@ -2573,140 +3046,38 @@ define i64 @test_signed_i64_f16(half %f) nounwind { define i100 @test_signed_i100_f16(half %f) nounwind { ; SOFT-LABEL: test_signed_i100_f16: ; SOFT: @ %bb.0: -; SOFT-NEXT: .save {r4, r5, r6, r7, lr} -; SOFT-NEXT: push {r4, r5, r6, r7, lr} -; SOFT-NEXT: .pad #12 -; SOFT-NEXT: sub sp, #12 +; SOFT-NEXT: .save {r7, lr} +; SOFT-NEXT: push {r7, lr} ; SOFT-NEXT: uxth r0, r0 ; SOFT-NEXT: bl __aeabi_h2f -; SOFT-NEXT: mov r4, r0 -; SOFT-NEXT: movs r6, #0 -; SOFT-NEXT: mov r1, r6 -; SOFT-NEXT: bl __aeabi_fcmpge -; SOFT-NEXT: mov r7, r0 -; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: bl __fixunssfti -; SOFT-NEXT: mov r5, r0 -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: beq .LBB28_11 -; SOFT-NEXT: @ %bb.1: -; SOFT-NEXT: beq .LBB28_12 -; SOFT-NEXT: .LBB28_2: -; SOFT-NEXT: bne .LBB28_4 -; SOFT-NEXT: .LBB28_3: -; SOFT-NEXT: mov r1, r7 -; SOFT-NEXT: .LBB28_4: -; SOFT-NEXT: str r1, [sp] @ 4-byte Spill -; SOFT-NEXT: str r2, [sp, #4] @ 4-byte Spill -; SOFT-NEXT: str r3, [sp, #8] @ 4-byte Spill -; SOFT-NEXT: bne .LBB28_6 -; SOFT-NEXT: @ %bb.5: -; SOFT-NEXT: mov r5, r7 -; SOFT-NEXT: .LBB28_6: -; SOFT-NEXT: ldr r1, .LCPI28_0 -; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: bl __aeabi_fcmpgt -; SOFT-NEXT: mvns r2, r6 -; SOFT-NEXT: movs r3, #15 -; SOFT-NEXT: cmp r0, #0 -; SOFT-NEXT: mov r0, r2 -; SOFT-NEXT: beq .LBB28_13 -; SOFT-NEXT: @ %bb.7: -; SOFT-NEXT: mov r1, r2 -; SOFT-NEXT: beq .LBB28_14 -; SOFT-NEXT: .LBB28_8: -; SOFT-NEXT: beq .LBB28_15 -; SOFT-NEXT: .LBB28_9: -; SOFT-NEXT: beq .LBB28_16 -; SOFT-NEXT: .LBB28_10: -; SOFT-NEXT: add sp, #12 -; SOFT-NEXT: pop {r4, r5, r6, r7, pc} -; SOFT-NEXT: .LBB28_11: -; SOFT-NEXT: mov r3, r7 -; SOFT-NEXT: bne .LBB28_2 -; SOFT-NEXT: .LBB28_12: -; SOFT-NEXT: mov r2, r7 -; SOFT-NEXT: beq .LBB28_3 -; SOFT-NEXT: b .LBB28_4 -; SOFT-NEXT: .LBB28_13: -; SOFT-NEXT: mov r0, r5 -; SOFT-NEXT: mov r1, r2 -; SOFT-NEXT: bne .LBB28_8 -; SOFT-NEXT: .LBB28_14: -; SOFT-NEXT: ldr r1, [sp] @ 4-byte Reload -; SOFT-NEXT: bne .LBB28_9 -; SOFT-NEXT: .LBB28_15: -; SOFT-NEXT: ldr r2, [sp, #4] @ 4-byte Reload -; SOFT-NEXT: bne .LBB28_10 -; SOFT-NEXT: .LBB28_16: -; SOFT-NEXT: ldr r3, [sp, #8] @ 4-byte Reload -; SOFT-NEXT: add sp, #12 -; SOFT-NEXT: pop {r4, r5, r6, r7, pc} -; SOFT-NEXT: .p2align 2 -; SOFT-NEXT: @ %bb.17: -; SOFT-NEXT: .LCPI28_0: -; SOFT-NEXT: .long 1904214015 @ 0x717fffff +; SOFT-NEXT: bl __aeabi_f2iz +; SOFT-NEXT: asrs r1, r0, #31 +; SOFT-NEXT: mov r2, r1 +; SOFT-NEXT: mov r3, r1 +; SOFT-NEXT: pop {r7, pc} ; ; VFP2-LABEL: test_signed_i100_f16: ; VFP2: @ %bb.0: -; VFP2-NEXT: .save {r4, lr} -; VFP2-NEXT: push {r4, lr} +; VFP2-NEXT: .save {r7, lr} +; VFP2-NEXT: push {r7, lr} ; VFP2-NEXT: bl __aeabi_h2f -; VFP2-NEXT: mov r4, r0 -; VFP2-NEXT: bl __fixunssfti -; VFP2-NEXT: vmov s0, r4 -; VFP2-NEXT: vldr s2, .LCPI28_0 -; VFP2-NEXT: vcmp.f32 s0, #0 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: itttt lt -; VFP2-NEXT: movlt r3, #0 -; VFP2-NEXT: movlt r2, #0 -; VFP2-NEXT: movlt r1, #0 -; VFP2-NEXT: movlt r0, #0 -; VFP2-NEXT: vcmp.f32 s0, s2 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: itttt gt -; VFP2-NEXT: movgt.w r0, #-1 -; VFP2-NEXT: movgt.w r1, #-1 -; VFP2-NEXT: movgt.w r2, #-1 -; VFP2-NEXT: movgt r3, #15 -; VFP2-NEXT: pop {r4, pc} -; VFP2-NEXT: .p2align 2 -; VFP2-NEXT: @ %bb.1: -; VFP2-NEXT: .LCPI28_0: -; VFP2-NEXT: .long 0x717fffff @ float 1.26765052E+30 +; VFP2-NEXT: vmov s0, r0 +; VFP2-NEXT: vcvt.s32.f32 s0, s0 +; VFP2-NEXT: vmov r0, s0 +; VFP2-NEXT: asrs r1, r0, #31 +; VFP2-NEXT: mov r2, r1 +; VFP2-NEXT: mov r3, r1 +; VFP2-NEXT: pop {r7, pc} ; ; FP16-LABEL: test_signed_i100_f16: ; FP16: @ %bb.0: -; FP16-NEXT: .save {r7, lr} -; FP16-NEXT: push {r7, lr} -; FP16-NEXT: .vsave {d8} -; FP16-NEXT: vpush {d8} ; FP16-NEXT: vmov.f16 s0, r0 -; FP16-NEXT: vcvtb.f32.f16 s16, s0 -; FP16-NEXT: vmov r0, s16 -; FP16-NEXT: bl __fixunssfti -; FP16-NEXT: vldr s0, .LCPI28_0 -; FP16-NEXT: vcmp.f32 s16, #0 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: itttt lt -; FP16-NEXT: movlt r3, #0 -; FP16-NEXT: movlt r2, #0 -; FP16-NEXT: movlt r1, #0 -; FP16-NEXT: movlt r0, #0 -; FP16-NEXT: vcmp.f32 s16, s0 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: itttt gt -; FP16-NEXT: movgt.w r0, #-1 -; FP16-NEXT: movgt.w r1, #-1 -; FP16-NEXT: movgt.w r2, #-1 -; FP16-NEXT: movgt r3, #15 -; FP16-NEXT: vpop {d8} -; FP16-NEXT: pop {r7, pc} -; FP16-NEXT: .p2align 2 -; FP16-NEXT: @ %bb.1: -; FP16-NEXT: .LCPI28_0: -; FP16-NEXT: .long 0x717fffff @ float 1.26765052E+30 +; FP16-NEXT: vcvt.s32.f16 s0, s0 +; FP16-NEXT: vmov r0, s0 +; FP16-NEXT: asrs r1, r0, #31 +; FP16-NEXT: mov r2, r1 +; FP16-NEXT: mov r3, r1 +; FP16-NEXT: bx lr %x = call i100 @llvm.fptoui.sat.i100.f16(half %f) ret i100 %x } @@ -2714,141 +3085,38 @@ define i100 @test_signed_i100_f16(half %f) nounwind { define i128 @test_signed_i128_f16(half %f) nounwind { ; SOFT-LABEL: test_signed_i128_f16: ; SOFT: @ %bb.0: -; SOFT-NEXT: .save {r4, r5, r6, r7, lr} -; SOFT-NEXT: push {r4, r5, r6, r7, lr} -; SOFT-NEXT: .pad #12 -; SOFT-NEXT: sub sp, #12 +; SOFT-NEXT: .save {r7, lr} +; SOFT-NEXT: push {r7, lr} ; SOFT-NEXT: uxth r0, r0 ; SOFT-NEXT: bl __aeabi_h2f -; SOFT-NEXT: mov r4, r0 -; SOFT-NEXT: movs r6, #0 -; SOFT-NEXT: mov r1, r6 -; SOFT-NEXT: bl __aeabi_fcmpge -; SOFT-NEXT: mov r7, r0 -; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: bl __fixunssfti -; SOFT-NEXT: mov r5, r0 -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: beq .LBB29_11 -; SOFT-NEXT: @ %bb.1: -; SOFT-NEXT: beq .LBB29_12 -; SOFT-NEXT: .LBB29_2: -; SOFT-NEXT: bne .LBB29_4 -; SOFT-NEXT: .LBB29_3: -; SOFT-NEXT: mov r1, r7 -; SOFT-NEXT: .LBB29_4: -; SOFT-NEXT: str r1, [sp] @ 4-byte Spill -; SOFT-NEXT: str r2, [sp, #4] @ 4-byte Spill -; SOFT-NEXT: str r3, [sp, #8] @ 4-byte Spill -; SOFT-NEXT: bne .LBB29_6 -; SOFT-NEXT: @ %bb.5: -; SOFT-NEXT: mov r5, r7 -; SOFT-NEXT: .LBB29_6: -; SOFT-NEXT: ldr r1, .LCPI29_0 -; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: bl __aeabi_fcmpgt -; SOFT-NEXT: mvns r3, r6 -; SOFT-NEXT: cmp r0, #0 -; SOFT-NEXT: mov r0, r3 -; SOFT-NEXT: beq .LBB29_13 -; SOFT-NEXT: @ %bb.7: -; SOFT-NEXT: mov r1, r3 -; SOFT-NEXT: beq .LBB29_14 -; SOFT-NEXT: .LBB29_8: -; SOFT-NEXT: mov r2, r3 -; SOFT-NEXT: beq .LBB29_15 -; SOFT-NEXT: .LBB29_9: -; SOFT-NEXT: beq .LBB29_16 -; SOFT-NEXT: .LBB29_10: -; SOFT-NEXT: add sp, #12 -; SOFT-NEXT: pop {r4, r5, r6, r7, pc} -; SOFT-NEXT: .LBB29_11: -; SOFT-NEXT: mov r3, r7 -; SOFT-NEXT: bne .LBB29_2 -; SOFT-NEXT: .LBB29_12: -; SOFT-NEXT: mov r2, r7 -; SOFT-NEXT: beq .LBB29_3 -; SOFT-NEXT: b .LBB29_4 -; SOFT-NEXT: .LBB29_13: -; SOFT-NEXT: mov r0, r5 -; SOFT-NEXT: mov r1, r3 -; SOFT-NEXT: bne .LBB29_8 -; SOFT-NEXT: .LBB29_14: -; SOFT-NEXT: ldr r1, [sp] @ 4-byte Reload -; SOFT-NEXT: mov r2, r3 -; SOFT-NEXT: bne .LBB29_9 -; SOFT-NEXT: .LBB29_15: -; SOFT-NEXT: ldr r2, [sp, #4] @ 4-byte Reload -; SOFT-NEXT: bne .LBB29_10 -; SOFT-NEXT: .LBB29_16: -; SOFT-NEXT: ldr r3, [sp, #8] @ 4-byte Reload -; SOFT-NEXT: add sp, #12 -; SOFT-NEXT: pop {r4, r5, r6, r7, pc} -; SOFT-NEXT: .p2align 2 -; SOFT-NEXT: @ %bb.17: -; SOFT-NEXT: .LCPI29_0: -; SOFT-NEXT: .long 2139095039 @ 0x7f7fffff +; SOFT-NEXT: bl __aeabi_f2iz +; SOFT-NEXT: asrs r1, r0, #31 +; SOFT-NEXT: mov r2, r1 +; SOFT-NEXT: mov r3, r1 +; SOFT-NEXT: pop {r7, pc} ; ; VFP2-LABEL: test_signed_i128_f16: ; VFP2: @ %bb.0: -; VFP2-NEXT: .save {r4, lr} -; VFP2-NEXT: push {r4, lr} +; VFP2-NEXT: .save {r7, lr} +; VFP2-NEXT: push {r7, lr} ; VFP2-NEXT: bl __aeabi_h2f -; VFP2-NEXT: mov r4, r0 -; VFP2-NEXT: bl __fixunssfti -; VFP2-NEXT: vmov s0, r4 -; VFP2-NEXT: vldr s2, .LCPI29_0 -; VFP2-NEXT: vcmp.f32 s0, #0 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: itttt lt -; VFP2-NEXT: movlt r3, #0 -; VFP2-NEXT: movlt r2, #0 -; VFP2-NEXT: movlt r1, #0 -; VFP2-NEXT: movlt r0, #0 -; VFP2-NEXT: vcmp.f32 s0, s2 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: itttt gt -; VFP2-NEXT: movgt.w r0, #-1 -; VFP2-NEXT: movgt.w r1, #-1 -; VFP2-NEXT: movgt.w r2, #-1 -; VFP2-NEXT: movgt.w r3, #-1 -; VFP2-NEXT: pop {r4, pc} -; VFP2-NEXT: .p2align 2 -; VFP2-NEXT: @ %bb.1: -; VFP2-NEXT: .LCPI29_0: -; VFP2-NEXT: .long 0x7f7fffff @ float 3.40282347E+38 +; VFP2-NEXT: vmov s0, r0 +; VFP2-NEXT: vcvt.s32.f32 s0, s0 +; VFP2-NEXT: vmov r0, s0 +; VFP2-NEXT: asrs r1, r0, #31 +; VFP2-NEXT: mov r2, r1 +; VFP2-NEXT: mov r3, r1 +; VFP2-NEXT: pop {r7, pc} ; ; FP16-LABEL: test_signed_i128_f16: ; FP16: @ %bb.0: -; FP16-NEXT: .save {r7, lr} -; FP16-NEXT: push {r7, lr} -; FP16-NEXT: .vsave {d8} -; FP16-NEXT: vpush {d8} ; FP16-NEXT: vmov.f16 s0, r0 -; FP16-NEXT: vcvtb.f32.f16 s16, s0 -; FP16-NEXT: vmov r0, s16 -; FP16-NEXT: bl __fixunssfti -; FP16-NEXT: vldr s0, .LCPI29_0 -; FP16-NEXT: vcmp.f32 s16, #0 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: itttt lt -; FP16-NEXT: movlt r3, #0 -; FP16-NEXT: movlt r2, #0 -; FP16-NEXT: movlt r1, #0 -; FP16-NEXT: movlt r0, #0 -; FP16-NEXT: vcmp.f32 s16, s0 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: itttt gt -; FP16-NEXT: movgt.w r0, #-1 -; FP16-NEXT: movgt.w r1, #-1 -; FP16-NEXT: movgt.w r2, #-1 -; FP16-NEXT: movgt.w r3, #-1 -; FP16-NEXT: vpop {d8} -; FP16-NEXT: pop {r7, pc} -; FP16-NEXT: .p2align 2 -; FP16-NEXT: @ %bb.1: -; FP16-NEXT: .LCPI29_0: -; FP16-NEXT: .long 0x7f7fffff @ float 3.40282347E+38 +; FP16-NEXT: vcvt.s32.f16 s0, s0 +; FP16-NEXT: vmov r0, s0 +; FP16-NEXT: asrs r1, r0, #31 +; FP16-NEXT: mov r2, r1 +; FP16-NEXT: mov r3, r1 +; FP16-NEXT: bx lr %x = call i128 @llvm.fptoui.sat.i128.f16(half %f) ret i128 %x } diff --git a/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll b/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll index 0134ee48ad421..4f591575b2cb7 100644 --- a/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll @@ -669,64 +669,239 @@ entry: define arm_aapcs_vfpcc <2 x i64> @stest_f64i64(<2 x double> %x) { ; CHECK-LABEL: stest_f64i64: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, lr} -; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr} -; CHECK-NEXT: .pad #4 -; CHECK-NEXT: sub sp, #4 -; CHECK-NEXT: .vsave {d8, d9} -; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vmov r0, r1, d8 -; CHECK-NEXT: bl __fixdfti -; CHECK-NEXT: vmov r12, lr, d9 -; CHECK-NEXT: subs.w r5, r0, #-1 -; CHECK-NEXT: mvn r4, #-2147483648 -; CHECK-NEXT: sbcs.w r5, r1, r4 -; CHECK-NEXT: sbcs r5, r2, #0 -; CHECK-NEXT: mov.w r7, #-2147483648 -; CHECK-NEXT: sbcs r5, r3, #0 +; CHECK: @ %bb.0: @ %entryfp-to-i-entryfp-to-i-entry +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-NEXT: .pad #100 +; CHECK-NEXT: sub sp, #100 +; CHECK-NEXT: vmov r6, r0, d0 +; CHECK-NEXT: mov.w r12, #1 +; CHECK-NEXT: mov.w r8, #-1 +; CHECK-NEXT: movw lr, #1023 +; CHECK-NEXT: ubfx r4, r0, #20, #11 +; CHECK-NEXT: cmp r4, lr +; CHECK-NEXT: bhs .LBB18_2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: mov.w r10, #0 +; CHECK-NEXT: str r0, [sp, #28] @ 4-byte Spill +; CHECK-NEXT: movs r1, #0 +; CHECK-NEXT: movs r3, #0 +; CHECK-NEXT: b .LBB18_5 +; CHECK-NEXT: .LBB18_2: @ %fp-to-i-if-check.exp.size2 +; CHECK-NEXT: mov r3, r0 +; CHECK-NEXT: orr.w r11, r12, r0, asr #31 +; CHECK-NEXT: bfi r3, r12, #20, #12 +; CHECK-NEXT: asrs r0, r0, #31 +; CHECK-NEXT: movw r5, #1074 +; CHECK-NEXT: cmp r4, r5 +; CHECK-NEXT: bhi .LBB18_4 +; CHECK-NEXT: @ %bb.3: @ %fp-to-i-if-exp.small3 +; CHECK-NEXT: subw r4, r4, #1075 +; CHECK-NEXT: movs r7, #0 +; CHECK-NEXT: lsll r6, r3, r4 +; CHECK-NEXT: mov.w r9, #0 +; CHECK-NEXT: umull r10, r4, r6, r11 +; CHECK-NEXT: umlal r4, r7, r3, r11 +; CHECK-NEXT: umull r2, r5, r6, r0 +; CHECK-NEXT: adds r1, r2, r4 +; CHECK-NEXT: str r1, [sp, #28] @ 4-byte Spill +; CHECK-NEXT: adcs.w r4, r7, r5 +; CHECK-NEXT: mla r5, r0, r3, r5 +; CHECK-NEXT: adc r7, r9, #0 +; CHECK-NEXT: umlal r4, r7, r3, r0 +; CHECK-NEXT: mla r0, r0, r6, r5 +; CHECK-NEXT: adds r1, r4, r2 +; CHECK-NEXT: adc.w r3, r7, r0 +; CHECK-NEXT: b .LBB18_5 +; CHECK-NEXT: .LBB18_4: @ %fp-to-i-if-exp.large4 +; CHECK-NEXT: strd r6, r3, [sp, #80] +; CHECK-NEXT: subw r3, r4, #1075 +; CHECK-NEXT: movs r4, #12 +; CHECK-NEXT: add r5, sp, #64 +; CHECK-NEXT: and.w r4, r4, r3, lsr #3 +; CHECK-NEXT: adds r5, #16 +; CHECK-NEXT: movs r1, #0 +; CHECK-NEXT: subs r4, r5, r4 +; CHECK-NEXT: strd r1, r1, [sp, #88] +; CHECK-NEXT: and r5, r3, #31 +; CHECK-NEXT: strd r1, r1, [sp, #72] +; CHECK-NEXT: mov.w lr, #0 +; CHECK-NEXT: strd r1, r1, [sp, #64] +; CHECK-NEXT: ldm.w r4, {r6, r7, r10} +; CHECK-NEXT: mov r3, r7 +; CHECK-NEXT: ldr.w r9, [r4, #12] +; CHECK-NEXT: mov r4, r6 +; CHECK-NEXT: lsrl r6, r7, #1 +; CHECK-NEXT: lsll r4, r3, r5 +; CHECK-NEXT: lsll r10, r9, r5 +; CHECK-NEXT: umull r2, r1, r4, r11 +; CHECK-NEXT: umlal r1, lr, r3, r11 +; CHECK-NEXT: str r2, [sp, #24] @ 4-byte Spill +; CHECK-NEXT: umull r2, r12, r4, r0 +; CHECK-NEXT: str r2, [sp, #20] @ 4-byte Spill +; CHECK-NEXT: adds r1, r1, r2 +; CHECK-NEXT: str r1, [sp, #28] @ 4-byte Spill +; CHECK-NEXT: mov r2, r12 +; CHECK-NEXT: adcs.w r12, r12, lr +; CHECK-NEXT: mov.w r1, #0 +; CHECK-NEXT: adc lr, r1, #0 +; CHECK-NEXT: eor r1, r5, #63 +; CHECK-NEXT: mla r2, r0, r3, r2 +; CHECK-NEXT: rsbs r1, r1, #0 +; CHECK-NEXT: lsll r6, r7, r1 +; CHECK-NEXT: umlal r12, lr, r3, r0 +; CHECK-NEXT: orr.w r5, r10, r6 +; CHECK-NEXT: orr.w r1, r9, r7 +; CHECK-NEXT: umull r6, r7, r11, r5 +; CHECK-NEXT: ldr.w r10, [sp, #24] @ 4-byte Reload +; CHECK-NEXT: mla r1, r11, r1, r7 +; CHECK-NEXT: mla r1, r0, r5, r1 +; CHECK-NEXT: mla r0, r0, r4, r2 +; CHECK-NEXT: ldr r2, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: adds r2, r2, r6 +; CHECK-NEXT: adcs r0, r1 +; CHECK-NEXT: adds.w r1, r12, r2 +; CHECK-NEXT: adc.w r3, lr, r0 +; CHECK-NEXT: mov.w r12, #1 +; CHECK-NEXT: movw lr, #1023 +; CHECK-NEXT: .LBB18_5: @ %fp-to-i-cleanup1 +; CHECK-NEXT: vmov r0, r7, d1 +; CHECK-NEXT: mov.w r6, #-1 +; CHECK-NEXT: cmp.w r7, #-1 +; CHECK-NEXT: itt gt +; CHECK-NEXT: movgt r6, #0 +; CHECK-NEXT: movgt.w r8, #1 +; CHECK-NEXT: ubfx r2, r7, #20, #11 +; CHECK-NEXT: cmp r2, lr +; CHECK-NEXT: bhs .LBB18_7 +; CHECK-NEXT: @ %bb.6: +; CHECK-NEXT: mov.w r12, #0 +; CHECK-NEXT: mov.w r9, #0 +; CHECK-NEXT: mov.w r8, #0 +; CHECK-NEXT: mov.w lr, #0 +; CHECK-NEXT: b .LBB18_11 +; CHECK-NEXT: .LBB18_7: @ %fp-to-i-if-check.exp.size +; CHECK-NEXT: bfi r7, r12, #20, #12 +; CHECK-NEXT: strd r3, r1, [sp, #16] @ 8-byte Folded Spill +; CHECK-NEXT: movw r1, #1074 +; CHECK-NEXT: cmp r2, r1 +; CHECK-NEXT: bhi .LBB18_9 +; CHECK-NEXT: @ %bb.8: @ %fp-to-i-if-exp.small +; CHECK-NEXT: subw r1, r2, #1075 +; CHECK-NEXT: movs r4, #0 +; CHECK-NEXT: lsll r0, r7, r1 +; CHECK-NEXT: movs r3, #0 +; CHECK-NEXT: umull r12, r1, r0, r8 +; CHECK-NEXT: umlal r1, r4, r7, r8 +; CHECK-NEXT: umull lr, r5, r0, r6 +; CHECK-NEXT: adds.w r9, lr, r1 +; CHECK-NEXT: adcs.w r1, r4, r5 +; CHECK-NEXT: mla r5, r6, r7, r5 +; CHECK-NEXT: adc r4, r3, #0 +; CHECK-NEXT: umlal r1, r4, r7, r6 +; CHECK-NEXT: mla r0, r6, r0, r5 +; CHECK-NEXT: adds.w r8, r1, lr +; CHECK-NEXT: adc.w lr, r4, r0 +; CHECK-NEXT: b .LBB18_10 +; CHECK-NEXT: .LBB18_9: @ %fp-to-i-if-exp.large +; CHECK-NEXT: add r1, sp, #48 +; CHECK-NEXT: mov.w lr, #0 +; CHECK-NEXT: stm.w r1, {r0, r7, lr} +; CHECK-NEXT: subw r1, r2, #1075 +; CHECK-NEXT: movs r0, #12 +; CHECK-NEXT: add r2, sp, #32 +; CHECK-NEXT: and.w r0, r0, r1, lsr #3 +; CHECK-NEXT: adds r2, #16 +; CHECK-NEXT: str.w lr, [sp, #60] +; CHECK-NEXT: strd lr, lr, [sp, #40] +; CHECK-NEXT: subs r2, r2, r0 +; CHECK-NEXT: strd lr, lr, [sp, #32] +; CHECK-NEXT: and r1, r1, #31 +; CHECK-NEXT: ldrd r0, r7, [r2] +; CHECK-NEXT: ldr r3, [r2, #8] +; CHECK-NEXT: mov r5, r7 +; CHECK-NEXT: str r3, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: ldr r3, [r2, #12] +; CHECK-NEXT: lsll r4, r5, r1 +; CHECK-NEXT: str.w r10, [sp, #24] @ 4-byte Spill +; CHECK-NEXT: umull r12, r2, r4, r8 +; CHECK-NEXT: mov.w r10, #0 +; CHECK-NEXT: lsrl r0, r7, #1 +; CHECK-NEXT: umull r9, r11, r4, r6 +; CHECK-NEXT: umlal r2, r10, r5, r8 +; CHECK-NEXT: strd r11, r9, [sp, #4] @ 8-byte Folded Spill +; CHECK-NEXT: adds.w r9, r9, r2 +; CHECK-NEXT: ldr r2, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: adcs.w r10, r10, r11 +; CHECK-NEXT: eor r11, r1, #63 +; CHECK-NEXT: rsb.w r11, r11, #0 +; CHECK-NEXT: lsll r2, r3, r1 +; CHECK-NEXT: lsll r0, r7, r11 +; CHECK-NEXT: adc lr, lr, #0 +; CHECK-NEXT: orrs r0, r2 +; CHECK-NEXT: orr.w r1, r3, r7 +; CHECK-NEXT: umlal r10, lr, r5, r6 +; CHECK-NEXT: ldr r2, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: umull r3, r7, r8, r0 +; CHECK-NEXT: mla r1, r8, r1, r7 +; CHECK-NEXT: adds r7, r2, r3 +; CHECK-NEXT: mla r0, r6, r0, r1 +; CHECK-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: mla r1, r6, r5, r1 +; CHECK-NEXT: mla r1, r6, r4, r1 +; CHECK-NEXT: adcs r0, r1 +; CHECK-NEXT: adds.w r8, r10, r7 +; CHECK-NEXT: ldr.w r10, [sp, #24] @ 4-byte Reload +; CHECK-NEXT: adc.w lr, lr, r0 +; CHECK-NEXT: .LBB18_10: @ %fp-to-i-cleanup +; CHECK-NEXT: ldrd r3, r1, [sp, #16] @ 8-byte Folded Reload +; CHECK-NEXT: .LBB18_11: @ %fp-to-i-cleanup +; CHECK-NEXT: ldr r2, [sp, #28] @ 4-byte Reload +; CHECK-NEXT: subs.w r7, r10, #-1 +; CHECK-NEXT: mvn r0, #-2147483648 +; CHECK-NEXT: sbcs.w r7, r2, r0 +; CHECK-NEXT: sbcs r7, r1, #0 +; CHECK-NEXT: sbcs r7, r3, #0 +; CHECK-NEXT: cset r7, lt +; CHECK-NEXT: cmp r7, #0 +; CHECK-NEXT: csel r5, r3, r7, ne +; CHECK-NEXT: mov.w r3, #-1 +; CHECK-NEXT: csel r4, r2, r0, ne +; CHECK-NEXT: csel r2, r10, r3, ne +; CHECK-NEXT: csel r7, r1, r7, ne +; CHECK-NEXT: rsbs r6, r2, #0 +; CHECK-NEXT: mov.w r1, #-2147483648 +; CHECK-NEXT: sbcs.w r6, r1, r4 +; CHECK-NEXT: sbcs.w r6, r3, r7 +; CHECK-NEXT: sbcs.w r5, r3, r5 ; CHECK-NEXT: cset r5, lt +; CHECK-NEXT: csel r10, r4, r1, lt ; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: csel r3, r3, r5, ne ; CHECK-NEXT: csel r2, r2, r5, ne -; CHECK-NEXT: mov.w r5, #-1 -; CHECK-NEXT: csel r1, r1, r4, ne -; CHECK-NEXT: csel r0, r0, r5, ne -; CHECK-NEXT: rsbs r6, r0, #0 -; CHECK-NEXT: sbcs.w r6, r7, r1 -; CHECK-NEXT: sbcs.w r2, r5, r2 -; CHECK-NEXT: sbcs.w r2, r5, r3 -; CHECK-NEXT: csel r8, r1, r7, lt +; CHECK-NEXT: subs.w r5, r12, #-1 +; CHECK-NEXT: sbcs.w r5, r9, r0 +; CHECK-NEXT: sbcs r5, r8, #0 +; CHECK-NEXT: sbcs r5, lr, #0 +; CHECK-NEXT: cset r5, lt +; CHECK-NEXT: cmp r5, #0 +; CHECK-NEXT: csel r7, lr, r5, ne +; CHECK-NEXT: csel r6, r8, r5, ne +; CHECK-NEXT: csel r5, r12, r3, ne +; CHECK-NEXT: csel r0, r9, r0, ne +; CHECK-NEXT: rsbs r4, r5, #0 +; CHECK-NEXT: sbcs.w r4, r1, r0 +; CHECK-NEXT: sbcs.w r6, r3, r6 +; CHECK-NEXT: sbcs r3, r7 +; CHECK-NEXT: csel r0, r0, r1, lt ; CHECK-NEXT: cset r1, lt ; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: csel r9, r0, r1, ne -; CHECK-NEXT: mov r0, r12 -; CHECK-NEXT: mov r1, lr -; CHECK-NEXT: bl __fixdfti -; CHECK-NEXT: subs.w r6, r0, #-1 -; CHECK-NEXT: sbcs.w r6, r1, r4 -; CHECK-NEXT: sbcs r6, r2, #0 -; CHECK-NEXT: sbcs r6, r3, #0 -; CHECK-NEXT: cset r6, lt -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: csel r0, r0, r5, ne -; CHECK-NEXT: csel r3, r3, r6, ne -; CHECK-NEXT: csel r2, r2, r6, ne -; CHECK-NEXT: csel r1, r1, r4, ne -; CHECK-NEXT: rsbs r6, r0, #0 -; CHECK-NEXT: sbcs.w r6, r7, r1 -; CHECK-NEXT: sbcs.w r2, r5, r2 -; CHECK-NEXT: sbcs.w r2, r5, r3 -; CHECK-NEXT: cset r2, lt -; CHECK-NEXT: csel r1, r1, r7, lt -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r0, r0, r2, ne -; CHECK-NEXT: vmov q0[2], q0[0], r9, r0 -; CHECK-NEXT: vmov q0[3], q0[1], r8, r1 -; CHECK-NEXT: vpop {d8, d9} -; CHECK-NEXT: add sp, #4 -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc} +; CHECK-NEXT: csel r1, r5, r1, ne +; CHECK-NEXT: vmov q0[2], q0[0], r2, r1 +; CHECK-NEXT: vmov q0[3], q0[1], r10, r0 +; CHECK-NEXT: add sp, #100 +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} entry: %conv = fptosi <2 x double> %x to <2 x i128> %0 = icmp slt <2 x i128> %conv, @@ -739,34 +914,122 @@ entry: define arm_aapcs_vfpcc <2 x i64> @utest_f64i64(<2 x double> %x) { ; CHECK-LABEL: utest_f64i64: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r7, lr} -; CHECK-NEXT: push {r4, r5, r7, lr} -; CHECK-NEXT: .vsave {d8, d9} -; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vmov r0, r1, d9 -; CHECK-NEXT: bl __fixunsdfti -; CHECK-NEXT: vmov r12, lr, d8 -; CHECK-NEXT: subs r2, #1 -; CHECK-NEXT: sbcs r2, r3, #0 -; CHECK-NEXT: cset r2, lo -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r4, r1, r2, ne -; CHECK-NEXT: csel r5, r0, r2, ne -; CHECK-NEXT: mov r0, r12 -; CHECK-NEXT: mov r1, lr -; CHECK-NEXT: bl __fixunsdfti -; CHECK-NEXT: subs r2, #1 -; CHECK-NEXT: sbcs r2, r3, #0 -; CHECK-NEXT: cset r2, lo -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r0, r0, r2, ne -; CHECK-NEXT: csel r1, r1, r2, ne -; CHECK-NEXT: vmov q0[2], q0[0], r0, r5 -; CHECK-NEXT: vmov q0[3], q0[1], r1, r4 -; CHECK-NEXT: vpop {d8, d9} -; CHECK-NEXT: pop {r4, r5, r7, pc} +; CHECK: @ %bb.0: @ %entryfp-to-i-entryfp-to-i-entry +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} +; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, lr} +; CHECK-NEXT: .pad #64 +; CHECK-NEXT: sub sp, #64 +; CHECK-NEXT: vmov r10, r1, d0 +; CHECK-NEXT: mov.w r12, #1 +; CHECK-NEXT: movw lr, #1023 +; CHECK-NEXT: ubfx r2, r1, #20, #11 +; CHECK-NEXT: cmp r2, lr +; CHECK-NEXT: bhs .LBB19_2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: mov.w r10, #0 +; CHECK-NEXT: movs r1, #0 +; CHECK-NEXT: b .LBB19_4 +; CHECK-NEXT: .LBB19_2: @ %fp-to-i-if-check.exp.size2 +; CHECK-NEXT: bfi r1, r12, #20, #12 +; CHECK-NEXT: movw r3, #1074 +; CHECK-NEXT: cmp r2, r3 +; CHECK-NEXT: bhi .LBB19_5 +; CHECK-NEXT: @ %bb.3: @ %fp-to-i-if-exp.small3 +; CHECK-NEXT: subw r2, r2, #1075 +; CHECK-NEXT: lsll r10, r1, r2 +; CHECK-NEXT: .LBB19_4: @ %fp-to-i-cleanup1 +; CHECK-NEXT: mov.w r8, #0 +; CHECK-NEXT: mov.w r9, #0 +; CHECK-NEXT: b .LBB19_6 +; CHECK-NEXT: .LBB19_5: @ %fp-to-i-if-exp.large4 +; CHECK-NEXT: subw r2, r2, #1075 +; CHECK-NEXT: movs r0, #12 +; CHECK-NEXT: strd r10, r1, [sp, #48] +; CHECK-NEXT: add r1, sp, #32 +; CHECK-NEXT: movs r3, #0 +; CHECK-NEXT: and.w r0, r0, r2, lsr #3 +; CHECK-NEXT: adds r1, #16 +; CHECK-NEXT: strd r3, r3, [sp, #56] +; CHECK-NEXT: strd r3, r3, [sp, #40] +; CHECK-NEXT: and r2, r2, #31 +; CHECK-NEXT: strd r3, r3, [sp, #32] +; CHECK-NEXT: subs r3, r1, r0 +; CHECK-NEXT: ldrd r10, r1, [r3] +; CHECK-NEXT: eor r7, r2, #63 +; CHECK-NEXT: ldrd r4, r3, [r3, #8] +; CHECK-NEXT: mov r5, r1 +; CHECK-NEXT: rsbs r7, r7, #0 +; CHECK-NEXT: mov r6, r10 +; CHECK-NEXT: lsll r4, r3, r2 +; CHECK-NEXT: lsrl r6, r5, #1 +; CHECK-NEXT: lsll r10, r1, r2 +; CHECK-NEXT: lsll r6, r5, r7 +; CHECK-NEXT: orr.w r9, r3, r5 +; CHECK-NEXT: orr.w r8, r4, r6 +; CHECK-NEXT: .LBB19_6: @ %fp-to-i-cleanup1 +; CHECK-NEXT: vmov r4, r5, d1 +; CHECK-NEXT: ubfx r2, r5, #20, #11 +; CHECK-NEXT: cmp r2, lr +; CHECK-NEXT: bhs .LBB19_8 +; CHECK-NEXT: @ %bb.7: +; CHECK-NEXT: movs r4, #0 +; CHECK-NEXT: movs r5, #0 +; CHECK-NEXT: b .LBB19_10 +; CHECK-NEXT: .LBB19_8: @ %fp-to-i-if-check.exp.size +; CHECK-NEXT: bfi r5, r12, #20, #12 +; CHECK-NEXT: movw r7, #1074 +; CHECK-NEXT: cmp r2, r7 +; CHECK-NEXT: bhi .LBB19_11 +; CHECK-NEXT: @ %bb.9: @ %fp-to-i-if-exp.small +; CHECK-NEXT: subw r2, r2, #1075 +; CHECK-NEXT: lsll r4, r5, r2 +; CHECK-NEXT: .LBB19_10: @ %fp-to-i-cleanup +; CHECK-NEXT: movs r2, #0 +; CHECK-NEXT: movs r6, #0 +; CHECK-NEXT: b .LBB19_12 +; CHECK-NEXT: .LBB19_11: @ %fp-to-i-if-exp.large +; CHECK-NEXT: movs r7, #0 +; CHECK-NEXT: add r0, sp, #16 +; CHECK-NEXT: subw r12, r2, #1075 +; CHECK-NEXT: mov r6, sp +; CHECK-NEXT: str r7, [sp, #28] +; CHECK-NEXT: adds r6, #16 +; CHECK-NEXT: strd r7, r7, [sp, #8] +; CHECK-NEXT: strd r7, r7, [sp] +; CHECK-NEXT: stm r0!, {r4, r5, r7} +; CHECK-NEXT: movs r7, #12 +; CHECK-NEXT: and.w r7, r7, r12, lsr #3 +; CHECK-NEXT: subs r7, r6, r7 +; CHECK-NEXT: and r12, r12, #31 +; CHECK-NEXT: ldrd r4, r5, [r7] +; CHECK-NEXT: eor r6, r12, #63 +; CHECK-NEXT: ldrd r2, r7, [r7, #8] +; CHECK-NEXT: mov r3, r5 +; CHECK-NEXT: rsbs r6, r6, #0 +; CHECK-NEXT: mov r0, r4 +; CHECK-NEXT: lsll r2, r7, r12 +; CHECK-NEXT: lsrl r0, r3, #1 +; CHECK-NEXT: lsll r4, r5, r12 +; CHECK-NEXT: lsll r0, r3, r6 +; CHECK-NEXT: orr.w r6, r7, r3 +; CHECK-NEXT: orrs r2, r0 +; CHECK-NEXT: .LBB19_12: @ %fp-to-i-cleanup +; CHECK-NEXT: subs r0, r2, #1 +; CHECK-NEXT: sbcs r0, r6, #0 +; CHECK-NEXT: cset r0, lo +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: csel r2, r5, r0, ne +; CHECK-NEXT: csel r0, r4, r0, ne +; CHECK-NEXT: subs.w r3, r8, #1 +; CHECK-NEXT: sbcs r3, r9, #0 +; CHECK-NEXT: cset r3, lo +; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: csel r7, r10, r3, ne +; CHECK-NEXT: csel r1, r1, r3, ne +; CHECK-NEXT: vmov q0[2], q0[0], r7, r0 +; CHECK-NEXT: vmov q0[3], q0[1], r1, r2 +; CHECK-NEXT: add sp, #64 +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, pc} entry: %conv = fptoui <2 x double> %x to <2 x i128> %0 = icmp ult <2 x i128> %conv, @@ -777,56 +1040,241 @@ entry: define arm_aapcs_vfpcc <2 x i64> @ustest_f64i64(<2 x double> %x) { ; CHECK-LABEL: ustest_f64i64: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr} -; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr} -; CHECK-NEXT: .vsave {d8, d9} -; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vmov r0, r1, d9 -; CHECK-NEXT: bl __fixdfti -; CHECK-NEXT: vmov r12, lr, d8 -; CHECK-NEXT: subs r4, r2, #1 -; CHECK-NEXT: sbcs r4, r3, #0 -; CHECK-NEXT: mov.w r8, #1 -; CHECK-NEXT: cset r4, lt -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: csel r0, r0, r4, ne -; CHECK-NEXT: csel r3, r3, r4, ne -; CHECK-NEXT: csel r1, r1, r4, ne -; CHECK-NEXT: csel r2, r2, r8, ne -; CHECK-NEXT: rsbs r5, r0, #0 -; CHECK-NEXT: mov.w r4, #0 -; CHECK-NEXT: sbcs.w r5, r4, r1 -; CHECK-NEXT: sbcs.w r2, r4, r2 -; CHECK-NEXT: sbcs.w r2, r4, r3 -; CHECK-NEXT: cset r2, lt -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r5, r1, r2, ne -; CHECK-NEXT: csel r7, r0, r2, ne -; CHECK-NEXT: mov r0, r12 -; CHECK-NEXT: mov r1, lr -; CHECK-NEXT: bl __fixdfti -; CHECK-NEXT: subs r6, r2, #1 -; CHECK-NEXT: sbcs r6, r3, #0 -; CHECK-NEXT: cset r6, lt -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: csel r0, r0, r6, ne -; CHECK-NEXT: csel r3, r3, r6, ne -; CHECK-NEXT: csel r1, r1, r6, ne -; CHECK-NEXT: csel r2, r2, r8, ne -; CHECK-NEXT: rsbs r6, r0, #0 -; CHECK-NEXT: sbcs.w r6, r4, r1 -; CHECK-NEXT: sbcs.w r2, r4, r2 -; CHECK-NEXT: sbcs.w r2, r4, r3 -; CHECK-NEXT: cset r2, lt -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r0, r0, r2, ne -; CHECK-NEXT: csel r1, r1, r2, ne -; CHECK-NEXT: vmov q0[2], q0[0], r0, r7 -; CHECK-NEXT: vmov q0[3], q0[1], r1, r5 -; CHECK-NEXT: vpop {d8, d9} -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc} +; CHECK: @ %bb.0: @ %entryfp-to-i-entryfp-to-i-entry +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-NEXT: .pad #108 +; CHECK-NEXT: sub sp, #108 +; CHECK-NEXT: vmov r6, r0, d0 +; CHECK-NEXT: movs r2, #1 +; CHECK-NEXT: mov.w r9, #-1 +; CHECK-NEXT: movw r1, #1023 +; CHECK-NEXT: ubfx r5, r0, #20, #11 +; CHECK-NEXT: cmp r5, r1 +; CHECK-NEXT: bhs .LBB20_2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: mov.w r10, #0 +; CHECK-NEXT: str r0, [sp, #36] @ 4-byte Spill +; CHECK-NEXT: movs r4, #0 +; CHECK-NEXT: movs r5, #0 +; CHECK-NEXT: b .LBB20_5 +; CHECK-NEXT: .LBB20_2: @ %fp-to-i-if-check.exp.size2 +; CHECK-NEXT: mov r3, r0 +; CHECK-NEXT: orr.w r11, r2, r0, asr #31 +; CHECK-NEXT: bfi r3, r2, #20, #12 +; CHECK-NEXT: asr.w r8, r0, #31 +; CHECK-NEXT: movw r7, #1074 +; CHECK-NEXT: cmp r5, r7 +; CHECK-NEXT: bhi .LBB20_4 +; CHECK-NEXT: @ %bb.3: @ %fp-to-i-if-exp.small3 +; CHECK-NEXT: subw r7, r5, #1075 +; CHECK-NEXT: movs r4, #0 +; CHECK-NEXT: lsll r6, r3, r7 +; CHECK-NEXT: mov.w lr, #0 +; CHECK-NEXT: umull r10, r7, r6, r11 +; CHECK-NEXT: umlal r7, r4, r3, r11 +; CHECK-NEXT: umull r12, r5, r6, r8 +; CHECK-NEXT: adds.w r7, r7, r12 +; CHECK-NEXT: str r7, [sp, #36] @ 4-byte Spill +; CHECK-NEXT: adcs r4, r5 +; CHECK-NEXT: mla r5, r8, r3, r5 +; CHECK-NEXT: adc r7, lr, #0 +; CHECK-NEXT: umlal r4, r7, r3, r8 +; CHECK-NEXT: mla r0, r8, r6, r5 +; CHECK-NEXT: adds.w r4, r4, r12 +; CHECK-NEXT: adc.w r5, r7, r0 +; CHECK-NEXT: b .LBB20_5 +; CHECK-NEXT: .LBB20_4: @ %fp-to-i-if-exp.large4 +; CHECK-NEXT: subw r5, r5, #1075 +; CHECK-NEXT: strd r6, r3, [sp, #88] +; CHECK-NEXT: movs r3, #12 +; CHECK-NEXT: add r6, sp, #72 +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: and.w r3, r3, r5, lsr #3 +; CHECK-NEXT: adds r6, #16 +; CHECK-NEXT: strd r0, r0, [sp, #96] +; CHECK-NEXT: strd r0, r0, [sp, #80] +; CHECK-NEXT: subs r4, r6, r3 +; CHECK-NEXT: strd r0, r0, [sp, #72] +; CHECK-NEXT: and r7, r5, #31 +; CHECK-NEXT: ldrd r6, r3, [r4] +; CHECK-NEXT: mov.w r12, #0 +; CHECK-NEXT: ldr r0, [r4, #8] +; CHECK-NEXT: mov r5, r3 +; CHECK-NEXT: str r0, [sp, #28] @ 4-byte Spill +; CHECK-NEXT: ldr.w r10, [r4, #12] +; CHECK-NEXT: mov r4, r6 +; CHECK-NEXT: lsll r4, r5, r7 +; CHECK-NEXT: lsrl r6, r3, #1 +; CHECK-NEXT: umull r0, r2, r4, r11 +; CHECK-NEXT: umlal r2, r12, r5, r11 +; CHECK-NEXT: str r0, [sp, #32] @ 4-byte Spill +; CHECK-NEXT: umull r0, r1, r4, r8 +; CHECK-NEXT: adds r2, r2, r0 +; CHECK-NEXT: str r2, [sp, #36] @ 4-byte Spill +; CHECK-NEXT: strd r1, r0, [sp, #20] @ 8-byte Folded Spill +; CHECK-NEXT: adcs.w lr, r12, r1 +; CHECK-NEXT: mov.w r0, #0 +; CHECK-NEXT: adc r12, r0, #0 +; CHECK-NEXT: eor r2, r7, #63 +; CHECK-NEXT: ldr r0, [sp, #28] @ 4-byte Reload +; CHECK-NEXT: mov r1, r10 +; CHECK-NEXT: rsbs r2, r2, #0 +; CHECK-NEXT: lsll r6, r3, r2 +; CHECK-NEXT: lsll r0, r1, r7 +; CHECK-NEXT: orr.w r2, r1, r3 +; CHECK-NEXT: orr.w r3, r0, r6 +; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: umlal lr, r12, r5, r8 +; CHECK-NEXT: ldr.w r10, [sp, #32] @ 4-byte Reload +; CHECK-NEXT: umull r6, r7, r11, r3 +; CHECK-NEXT: mla r1, r11, r2, r7 +; CHECK-NEXT: mla r2, r8, r5, r0 +; CHECK-NEXT: mla r1, r8, r3, r1 +; CHECK-NEXT: mla r0, r8, r4, r2 +; CHECK-NEXT: ldr r2, [sp, #24] @ 4-byte Reload +; CHECK-NEXT: adds r2, r2, r6 +; CHECK-NEXT: adcs r0, r1 +; CHECK-NEXT: adds.w r4, lr, r2 +; CHECK-NEXT: adc.w r5, r12, r0 +; CHECK-NEXT: movs r2, #1 +; CHECK-NEXT: movw r1, #1023 +; CHECK-NEXT: .LBB20_5: @ %fp-to-i-cleanup1 +; CHECK-NEXT: vmov r0, r7, d1 +; CHECK-NEXT: mov.w r6, #-1 +; CHECK-NEXT: mov.w r8, #0 +; CHECK-NEXT: cmp.w r7, #-1 +; CHECK-NEXT: itt gt +; CHECK-NEXT: movgt r6, #0 +; CHECK-NEXT: movgt.w r9, #1 +; CHECK-NEXT: ubfx r3, r7, #20, #11 +; CHECK-NEXT: cmp r3, r1 +; CHECK-NEXT: bhs .LBB20_7 +; CHECK-NEXT: @ %bb.6: +; CHECK-NEXT: mov.w r11, #0 +; CHECK-NEXT: mov.w lr, #0 +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: movs r2, #0 +; CHECK-NEXT: b .LBB20_11 +; CHECK-NEXT: .LBB20_7: @ %fp-to-i-if-check.exp.size +; CHECK-NEXT: bfi r7, r2, #20, #12 +; CHECK-NEXT: movw r2, #1074 +; CHECK-NEXT: cmp r3, r2 +; CHECK-NEXT: strd r5, r4, [sp, #24] @ 8-byte Folded Spill +; CHECK-NEXT: bhi .LBB20_9 +; CHECK-NEXT: @ %bb.8: @ %fp-to-i-if-exp.small +; CHECK-NEXT: subw r2, r3, #1075 +; CHECK-NEXT: movs r4, #0 +; CHECK-NEXT: lsll r0, r7, r2 +; CHECK-NEXT: mov.w r12, #0 +; CHECK-NEXT: umull r11, r2, r0, r9 +; CHECK-NEXT: umlal r2, r4, r7, r9 +; CHECK-NEXT: umull r3, r1, r0, r6 +; CHECK-NEXT: adds.w lr, r3, r2 +; CHECK-NEXT: adcs.w r2, r4, r1 +; CHECK-NEXT: mla r1, r6, r7, r1 +; CHECK-NEXT: adc r4, r12, #0 +; CHECK-NEXT: umlal r2, r4, r7, r6 +; CHECK-NEXT: mla r1, r6, r0, r1 +; CHECK-NEXT: adds r0, r2, r3 +; CHECK-NEXT: adc.w r2, r4, r1 +; CHECK-NEXT: b .LBB20_10 +; CHECK-NEXT: .LBB20_9: @ %fp-to-i-if-exp.large +; CHECK-NEXT: add r1, sp, #56 +; CHECK-NEXT: mov.w r12, #0 +; CHECK-NEXT: stm.w r1, {r0, r7, r12} +; CHECK-NEXT: subw r1, r3, #1075 +; CHECK-NEXT: movs r0, #12 +; CHECK-NEXT: add r2, sp, #40 +; CHECK-NEXT: and.w r0, r0, r1, lsr #3 +; CHECK-NEXT: adds r2, #16 +; CHECK-NEXT: str.w r12, [sp, #68] +; CHECK-NEXT: strd r12, r12, [sp, #48] +; CHECK-NEXT: subs r2, r2, r0 +; CHECK-NEXT: strd r12, r12, [sp, #40] +; CHECK-NEXT: mov.w r11, #0 +; CHECK-NEXT: ldrd r0, r7, [r2] +; CHECK-NEXT: ldr r3, [r2, #8] +; CHECK-NEXT: str r3, [sp, #16] @ 4-byte Spill +; CHECK-NEXT: mov r3, r7 +; CHECK-NEXT: str.w r10, [sp, #32] @ 4-byte Spill +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: and r10, r1, #31 +; CHECK-NEXT: ldr r2, [r2, #12] +; CHECK-NEXT: lsll r4, r3, r10 +; CHECK-NEXT: str r2, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: umull r5, r2, r4, r9 +; CHECK-NEXT: lsrl r0, r7, #1 +; CHECK-NEXT: umlal r2, r11, r3, r9 +; CHECK-NEXT: str r5, [sp, #20] @ 4-byte Spill +; CHECK-NEXT: umull r5, r1, r4, r6 +; CHECK-NEXT: adds.w lr, r5, r2 +; CHECK-NEXT: adcs.w r2, r11, r1 +; CHECK-NEXT: strd r1, r5, [sp, #4] @ 8-byte Folded Spill +; CHECK-NEXT: adc r11, r12, #0 +; CHECK-NEXT: eor r12, r10, #63 +; CHECK-NEXT: rsb.w r12, r12, #0 +; CHECK-NEXT: umlal r2, r11, r3, r6 +; CHECK-NEXT: lsll r0, r7, r12 +; CHECK-NEXT: ldrd r5, r12, [sp, #12] @ 8-byte Folded Reload +; CHECK-NEXT: lsll r12, r5, r10 +; CHECK-NEXT: ldr.w r10, [sp, #32] @ 4-byte Reload +; CHECK-NEXT: orr.w r0, r0, r12 +; CHECK-NEXT: orr.w r1, r5, r7 +; CHECK-NEXT: umull r12, r7, r9, r0 +; CHECK-NEXT: mla r1, r9, r1, r7 +; CHECK-NEXT: mla r0, r6, r0, r1 +; CHECK-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: mla r1, r6, r3, r1 +; CHECK-NEXT: ldr r3, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: adds.w r3, r3, r12 +; CHECK-NEXT: mla r1, r6, r4, r1 +; CHECK-NEXT: adcs r1, r0 +; CHECK-NEXT: adds r0, r2, r3 +; CHECK-NEXT: adc.w r2, r11, r1 +; CHECK-NEXT: ldr.w r11, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: .LBB20_10: @ %fp-to-i-cleanup +; CHECK-NEXT: ldrd r5, r4, [sp, #24] @ 8-byte Folded Reload +; CHECK-NEXT: .LBB20_11: @ %fp-to-i-cleanup +; CHECK-NEXT: subs r1, r0, #1 +; CHECK-NEXT: mov.w r7, #1 +; CHECK-NEXT: sbcs r1, r2, #0 +; CHECK-NEXT: cset r1, lt +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: csel r2, r2, r1, ne +; CHECK-NEXT: csel r3, lr, r1, ne +; CHECK-NEXT: csel r1, r11, r1, ne +; CHECK-NEXT: csel r0, r0, r7, ne +; CHECK-NEXT: rsbs r6, r1, #0 +; CHECK-NEXT: sbcs.w r6, r8, r3 +; CHECK-NEXT: sbcs.w r0, r8, r0 +; CHECK-NEXT: ldr r6, [sp, #36] @ 4-byte Reload +; CHECK-NEXT: sbcs.w r0, r8, r2 +; CHECK-NEXT: cset r0, lt +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: csel r2, r3, r0, ne +; CHECK-NEXT: csel r0, r1, r0, ne +; CHECK-NEXT: subs r1, r4, #1 +; CHECK-NEXT: sbcs r1, r5, #0 +; CHECK-NEXT: cset r1, lt +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: csel r3, r5, r1, ne +; CHECK-NEXT: csel r6, r6, r1, ne +; CHECK-NEXT: csel r1, r10, r1, ne +; CHECK-NEXT: csel r7, r4, r7, ne +; CHECK-NEXT: rsbs r5, r1, #0 +; CHECK-NEXT: sbcs.w r5, r8, r6 +; CHECK-NEXT: sbcs.w r7, r8, r7 +; CHECK-NEXT: sbcs.w r3, r8, r3 +; CHECK-NEXT: cset r3, lt +; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: csel r1, r1, r3, ne +; CHECK-NEXT: csel r3, r6, r3, ne +; CHECK-NEXT: vmov q0[2], q0[0], r1, r0 +; CHECK-NEXT: vmov q0[3], q0[1], r3, r2 +; CHECK-NEXT: add sp, #108 +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} entry: %conv = fptosi <2 x double> %x to <2 x i128> %0 = icmp slt <2 x i128> %conv, @@ -839,55 +1287,230 @@ entry: define arm_aapcs_vfpcc <2 x i64> @stest_f32i64(<2 x float> %x) { ; CHECK-LABEL: stest_f32i64: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} -; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, lr} -; CHECK-NEXT: vmov r0, r9, d0 -; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: subs.w r7, r0, #-1 -; CHECK-NEXT: mvn r5, #-2147483648 -; CHECK-NEXT: sbcs.w r7, r1, r5 -; CHECK-NEXT: mov.w r6, #-1 -; CHECK-NEXT: sbcs r7, r2, #0 -; CHECK-NEXT: sbcs r7, r3, #0 -; CHECK-NEXT: cset r7, lt -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: csel r0, r0, r6, ne -; CHECK-NEXT: csel r3, r3, r7, ne -; CHECK-NEXT: csel r2, r2, r7, ne -; CHECK-NEXT: csel r1, r1, r5, ne -; CHECK-NEXT: rsbs r4, r0, #0 -; CHECK-NEXT: mov.w r7, #-2147483648 -; CHECK-NEXT: sbcs.w r4, r7, r1 -; CHECK-NEXT: sbcs.w r2, r6, r2 -; CHECK-NEXT: sbcs.w r2, r6, r3 -; CHECK-NEXT: csel r8, r1, r7, lt +; CHECK: @ %bb.0: @ %entryfp-to-i-entryfp-to-i-entry +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-NEXT: .pad #100 +; CHECK-NEXT: sub sp, #100 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: mov.w r10, #-1 +; CHECK-NEXT: ubfx r1, r0, #23, #8 +; CHECK-NEXT: cmp r1, #127 +; CHECK-NEXT: bhs .LBB21_2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: mov.w r8, #0 +; CHECK-NEXT: str r0, [sp, #28] @ 4-byte Spill +; CHECK-NEXT: movs r3, #0 +; CHECK-NEXT: movs r6, #0 +; CHECK-NEXT: b .LBB21_5 +; CHECK-NEXT: .LBB21_2: @ %fp-to-i-if-check.exp.size2 +; CHECK-NEXT: movs r3, #1 +; CHECK-NEXT: mov r2, r0 +; CHECK-NEXT: orr.w r11, r3, r0, asr #31 +; CHECK-NEXT: bfi r2, r3, #23, #9 +; CHECK-NEXT: asrs r0, r0, #31 +; CHECK-NEXT: cmp r1, #149 +; CHECK-NEXT: bhi .LBB21_4 +; CHECK-NEXT: @ %bb.3: @ %fp-to-i-if-exp.small3 +; CHECK-NEXT: rsb.w r1, r1, #150 +; CHECK-NEXT: movs r7, #0 +; CHECK-NEXT: lsr.w r1, r2, r1 +; CHECK-NEXT: umull r2, r8, r1, r11 +; CHECK-NEXT: str r2, [sp, #28] @ 4-byte Spill +; CHECK-NEXT: umull r2, r3, r1, r0 +; CHECK-NEXT: adds.w r6, r2, r8 +; CHECK-NEXT: adcs r6, r3, #0 +; CHECK-NEXT: adc r6, r7, #0 +; CHECK-NEXT: adds.w r5, r2, r8 +; CHECK-NEXT: mla r5, r0, r1, r3 +; CHECK-NEXT: adcs r3, r2 +; CHECK-NEXT: umlal r8, r7, r1, r0 +; CHECK-NEXT: adcs r6, r5 +; CHECK-NEXT: b .LBB21_5 +; CHECK-NEXT: .LBB21_4: @ %fp-to-i-if-exp.large4 +; CHECK-NEXT: mov.w lr, #0 +; CHECK-NEXT: subs r1, #150 +; CHECK-NEXT: add r7, sp, #64 +; CHECK-NEXT: strd r2, lr, [sp, #80] +; CHECK-NEXT: movs r2, #12 +; CHECK-NEXT: adds r7, #16 +; CHECK-NEXT: and.w r2, r2, r1, lsr #3 +; CHECK-NEXT: strd lr, lr, [sp, #88] +; CHECK-NEXT: strd lr, lr, [sp, #72] +; CHECK-NEXT: subs r2, r7, r2 +; CHECK-NEXT: strd lr, lr, [sp, #64] +; CHECK-NEXT: and r12, r1, #31 +; CHECK-NEXT: ldrd r6, r7, [r2] +; CHECK-NEXT: ldr r5, [r2, #8] +; CHECK-NEXT: mov r1, r7 +; CHECK-NEXT: str r5, [sp, #20] @ 4-byte Spill +; CHECK-NEXT: mov r4, r6 +; CHECK-NEXT: ldr r2, [r2, #12] +; CHECK-NEXT: lsll r4, r1, r12 +; CHECK-NEXT: str r2, [sp, #16] @ 4-byte Spill +; CHECK-NEXT: umull r5, r2, r4, r11 +; CHECK-NEXT: lsrl r6, r7, #1 +; CHECK-NEXT: umull r3, r9, r4, r0 +; CHECK-NEXT: str r5, [sp, #28] @ 4-byte Spill +; CHECK-NEXT: movs r5, #0 +; CHECK-NEXT: umlal r2, r5, r1, r11 +; CHECK-NEXT: adds r2, r2, r3 +; CHECK-NEXT: str r2, [sp, #24] @ 4-byte Spill +; CHECK-NEXT: eor r2, r12, #63 +; CHECK-NEXT: adcs.w r8, r5, r9 +; CHECK-NEXT: rsb.w r2, r2, #0 +; CHECK-NEXT: adc lr, lr, #0 +; CHECK-NEXT: lsll r6, r7, r2 +; CHECK-NEXT: ldrd r5, r2, [sp, #16] @ 8-byte Folded Reload +; CHECK-NEXT: umlal r8, lr, r1, r0 +; CHECK-NEXT: lsll r2, r5, r12 +; CHECK-NEXT: orrs r5, r7 +; CHECK-NEXT: orr.w r7, r2, r6 +; CHECK-NEXT: mla r1, r0, r1, r9 +; CHECK-NEXT: umull r12, r6, r11, r7 +; CHECK-NEXT: mla r2, r11, r5, r6 +; CHECK-NEXT: mla r2, r0, r7, r2 +; CHECK-NEXT: mla r0, r0, r4, r1 +; CHECK-NEXT: adds.w r1, r3, r12 +; CHECK-NEXT: adcs r0, r2 +; CHECK-NEXT: adds.w r3, r8, r1 +; CHECK-NEXT: ldr.w r8, [sp, #24] @ 4-byte Reload +; CHECK-NEXT: adc.w r6, lr, r0 +; CHECK-NEXT: .LBB21_5: @ %fp-to-i-cleanup1 +; CHECK-NEXT: vmov r1, s1 +; CHECK-NEXT: mov.w r5, #-1 +; CHECK-NEXT: cmp.w r1, #-1 +; CHECK-NEXT: itt gt +; CHECK-NEXT: movgt r5, #0 +; CHECK-NEXT: movgt.w r10, #1 +; CHECK-NEXT: ubfx r0, r1, #23, #8 +; CHECK-NEXT: cmp r0, #127 +; CHECK-NEXT: bhs .LBB21_7 +; CHECK-NEXT: @ %bb.6: +; CHECK-NEXT: mov.w r9, #0 +; CHECK-NEXT: mov.w r11, #0 +; CHECK-NEXT: mov.w lr, #0 +; CHECK-NEXT: mov.w r12, #0 +; CHECK-NEXT: b .LBB21_10 +; CHECK-NEXT: .LBB21_7: @ %fp-to-i-if-check.exp.size +; CHECK-NEXT: movw r2, #65535 +; CHECK-NEXT: cmp r0, #149 +; CHECK-NEXT: movt r2, #127 +; CHECK-NEXT: and.w r1, r1, r2 +; CHECK-NEXT: add.w r1, r1, #8388608 +; CHECK-NEXT: bhi .LBB21_9 +; CHECK-NEXT: @ %bb.8: @ %fp-to-i-if-exp.small +; CHECK-NEXT: rsb.w r0, r0, #150 +; CHECK-NEXT: lsr.w r0, r1, r0 +; CHECK-NEXT: umull r9, r1, r0, r10 +; CHECK-NEXT: umull r2, r7, r0, r5 +; CHECK-NEXT: muls r0, r5, r0 +; CHECK-NEXT: adds.w r11, r2, r1 +; CHECK-NEXT: adcs.w lr, r2, r7 +; CHECK-NEXT: adc.w r12, r7, r0 +; CHECK-NEXT: b .LBB21_10 +; CHECK-NEXT: .LBB21_9: @ %fp-to-i-if-exp.large +; CHECK-NEXT: mov.w r12, #0 +; CHECK-NEXT: subs r0, #150 +; CHECK-NEXT: strd r1, r12, [sp, #48] +; CHECK-NEXT: movs r1, #12 +; CHECK-NEXT: strd r12, r12, [sp, #56] +; CHECK-NEXT: and.w r1, r1, r0, lsr #3 +; CHECK-NEXT: strd r12, r12, [sp, #40] +; CHECK-NEXT: and lr, r0, #31 +; CHECK-NEXT: strd r12, r12, [sp, #32] +; CHECK-NEXT: str r3, [sp, #20] @ 4-byte Spill +; CHECK-NEXT: add r3, sp, #32 +; CHECK-NEXT: adds r3, #16 +; CHECK-NEXT: str r6, [sp, #16] @ 4-byte Spill +; CHECK-NEXT: subs r3, r3, r1 +; CHECK-NEXT: ldrd r6, r1, [r3] +; CHECK-NEXT: ldr r7, [r3, #8] +; CHECK-NEXT: str r7, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: mov r7, r1 +; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: ldr r3, [r3, #12] +; CHECK-NEXT: lsll r0, r7, lr +; CHECK-NEXT: str.w r8, [sp, #24] @ 4-byte Spill +; CHECK-NEXT: umull r9, r4, r0, r10 +; CHECK-NEXT: mov.w r8, #0 +; CHECK-NEXT: str.w r10, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: lsrl r6, r1, #1 +; CHECK-NEXT: umlal r4, r8, r7, r10 +; CHECK-NEXT: umull r10, r2, r0, r5 +; CHECK-NEXT: str.w r10, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: adds.w r11, r10, r4 +; CHECK-NEXT: ldr r4, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: adcs.w r8, r8, r2 +; CHECK-NEXT: mov r10, r2 +; CHECK-NEXT: eor r2, lr, #63 +; CHECK-NEXT: adc r12, r12, #0 +; CHECK-NEXT: rsbs r2, r2, #0 +; CHECK-NEXT: umlal r8, r12, r7, r5 +; CHECK-NEXT: lsll r6, r1, r2 +; CHECK-NEXT: ldr r2, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: lsll r2, r3, lr +; CHECK-NEXT: orrs r2, r6 +; CHECK-NEXT: orrs r1, r3 +; CHECK-NEXT: ldr r3, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: umull lr, r6, r4, r2 +; CHECK-NEXT: mla r1, r4, r1, r6 +; CHECK-NEXT: ldr r6, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: mla r1, r5, r2, r1 +; CHECK-NEXT: mla r2, r5, r7, r10 +; CHECK-NEXT: mla r0, r5, r0, r2 +; CHECK-NEXT: ldr r2, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: adds.w r2, r2, lr +; CHECK-NEXT: adcs r0, r1 +; CHECK-NEXT: adds.w lr, r8, r2 +; CHECK-NEXT: ldr.w r8, [sp, #24] @ 4-byte Reload +; CHECK-NEXT: adc.w r12, r12, r0 +; CHECK-NEXT: .LBB21_10: @ %fp-to-i-cleanup +; CHECK-NEXT: ldr r2, [sp, #28] @ 4-byte Reload +; CHECK-NEXT: mvn r0, #-2147483648 +; CHECK-NEXT: subs.w r1, r2, #-1 +; CHECK-NEXT: sbcs.w r1, r8, r0 +; CHECK-NEXT: sbcs r1, r3, #0 +; CHECK-NEXT: sbcs r1, r6, #0 ; CHECK-NEXT: cset r1, lt ; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: csel r10, r0, r1, ne -; CHECK-NEXT: mov r0, r9 -; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: subs.w r4, r0, #-1 -; CHECK-NEXT: sbcs.w r4, r1, r5 -; CHECK-NEXT: sbcs r4, r2, #0 -; CHECK-NEXT: sbcs r4, r3, #0 -; CHECK-NEXT: cset r4, lt -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: csel r0, r0, r6, ne -; CHECK-NEXT: csel r3, r3, r4, ne -; CHECK-NEXT: csel r2, r2, r4, ne -; CHECK-NEXT: csel r1, r1, r5, ne -; CHECK-NEXT: rsbs r5, r0, #0 -; CHECK-NEXT: sbcs.w r5, r7, r1 -; CHECK-NEXT: sbcs.w r2, r6, r2 -; CHECK-NEXT: sbcs.w r2, r6, r3 -; CHECK-NEXT: cset r2, lt -; CHECK-NEXT: csel r1, r1, r7, lt -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r0, r0, r2, ne -; CHECK-NEXT: vmov q0[2], q0[0], r10, r0 -; CHECK-NEXT: vmov q0[3], q0[1], r8, r1 -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, pc} +; CHECK-NEXT: csel r7, r6, r1, ne +; CHECK-NEXT: csel r1, r3, r1, ne +; CHECK-NEXT: mov.w r3, #-1 +; CHECK-NEXT: csel r6, r8, r0, ne +; CHECK-NEXT: csel r4, r2, r3, ne +; CHECK-NEXT: mov.w r2, #-2147483648 +; CHECK-NEXT: rsbs r5, r4, #0 +; CHECK-NEXT: sbcs.w r5, r2, r6 +; CHECK-NEXT: sbcs.w r1, r3, r1 +; CHECK-NEXT: sbcs.w r1, r3, r7 +; CHECK-NEXT: cset r5, lt +; CHECK-NEXT: csel r8, r6, r2, lt +; CHECK-NEXT: cmp r5, #0 +; CHECK-NEXT: csel r4, r4, r5, ne +; CHECK-NEXT: subs.w r5, r9, #-1 +; CHECK-NEXT: sbcs.w r5, r11, r0 +; CHECK-NEXT: sbcs r5, lr, #0 +; CHECK-NEXT: sbcs r5, r12, #0 +; CHECK-NEXT: cset r5, lt +; CHECK-NEXT: cmp r5, #0 +; CHECK-NEXT: csel r7, r12, r5, ne +; CHECK-NEXT: csel r6, lr, r5, ne +; CHECK-NEXT: csel r5, r9, r3, ne +; CHECK-NEXT: csel r0, r11, r0, ne +; CHECK-NEXT: rsbs r1, r5, #0 +; CHECK-NEXT: sbcs.w r1, r2, r0 +; CHECK-NEXT: sbcs.w r1, r3, r6 +; CHECK-NEXT: sbcs.w r1, r3, r7 +; CHECK-NEXT: cset r1, lt +; CHECK-NEXT: csel r0, r0, r2, lt +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: csel r1, r5, r1, ne +; CHECK-NEXT: vmov q0[2], q0[0], r4, r1 +; CHECK-NEXT: vmov q0[3], q0[1], r8, r0 +; CHECK-NEXT: add sp, #100 +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} entry: %conv = fptosi <2 x float> %x to <2 x i128> %0 = icmp slt <2 x i128> %conv, @@ -900,28 +1523,121 @@ entry: define arm_aapcs_vfpcc <2 x i64> @utest_f32i64(<2 x float> %x) { ; CHECK-LABEL: utest_f32i64: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, lr} -; CHECK-NEXT: push {r4, r5, r6, lr} -; CHECK-NEXT: vmov r4, r0, d0 -; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: subs r2, #1 -; CHECK-NEXT: sbcs r2, r3, #0 -; CHECK-NEXT: cset r2, lo -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r6, r0, r2, ne -; CHECK-NEXT: mov r0, r4 -; CHECK-NEXT: csel r5, r1, r2, ne -; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: subs r2, #1 -; CHECK-NEXT: sbcs r2, r3, #0 -; CHECK-NEXT: cset r2, lo -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r0, r0, r2, ne -; CHECK-NEXT: csel r1, r1, r2, ne -; CHECK-NEXT: vmov q0[2], q0[0], r0, r6 -; CHECK-NEXT: vmov q0[3], q0[1], r1, r5 -; CHECK-NEXT: pop {r4, r5, r6, pc} +; CHECK: @ %bb.0: @ %entryfp-to-i-entryfp-to-i-entry +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, lr} +; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr} +; CHECK-NEXT: .pad #68 +; CHECK-NEXT: sub sp, #68 +; CHECK-NEXT: vmov r1, s0 +; CHECK-NEXT: ubfx r0, r1, #23, #8 +; CHECK-NEXT: cmp r0, #127 +; CHECK-NEXT: bhs .LBB22_2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: mov.w r8, #0 +; CHECK-NEXT: b .LBB22_4 +; CHECK-NEXT: .LBB22_2: @ %fp-to-i-if-check.exp.size2 +; CHECK-NEXT: movs r2, #1 +; CHECK-NEXT: cmp r0, #149 +; CHECK-NEXT: bfi r1, r2, #23, #9 +; CHECK-NEXT: bhi .LBB22_5 +; CHECK-NEXT: @ %bb.3: @ %fp-to-i-if-exp.small3 +; CHECK-NEXT: rsb.w r0, r0, #150 +; CHECK-NEXT: lsr.w r8, r1, r0 +; CHECK-NEXT: .LBB22_4: @ %fp-to-i-cleanup1 +; CHECK-NEXT: mov.w r9, #0 +; CHECK-NEXT: mov.w r12, #0 +; CHECK-NEXT: mov.w lr, #0 +; CHECK-NEXT: b .LBB22_6 +; CHECK-NEXT: .LBB22_5: @ %fp-to-i-if-exp.large4 +; CHECK-NEXT: movs r2, #0 +; CHECK-NEXT: strd r1, r2, [sp, #48] +; CHECK-NEXT: add r1, sp, #32 +; CHECK-NEXT: strd r2, r2, [sp, #56] +; CHECK-NEXT: adds r1, #16 +; CHECK-NEXT: strd r2, r2, [sp, #40] +; CHECK-NEXT: strd r2, r2, [sp, #32] +; CHECK-NEXT: sub.w r2, r0, #150 +; CHECK-NEXT: movs r0, #12 +; CHECK-NEXT: and.w r0, r0, r2, lsr #3 +; CHECK-NEXT: and r2, r2, #31 +; CHECK-NEXT: subs r3, r1, r0 +; CHECK-NEXT: eor r6, r2, #63 +; CHECK-NEXT: ldrd r8, r9, [r3] +; CHECK-NEXT: rsbs r6, r6, #0 +; CHECK-NEXT: ldrd r4, r3, [r3, #8] +; CHECK-NEXT: mov r5, r9 +; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: lsll r4, r3, r2 +; CHECK-NEXT: lsrl r0, r5, #1 +; CHECK-NEXT: lsll r8, r9, r2 +; CHECK-NEXT: lsll r0, r5, r6 +; CHECK-NEXT: orr.w lr, r3, r5 +; CHECK-NEXT: orr.w r12, r4, r0 +; CHECK-NEXT: .LBB22_6: @ %fp-to-i-cleanup1 +; CHECK-NEXT: vmov r3, s1 +; CHECK-NEXT: ubfx r2, r3, #23, #8 +; CHECK-NEXT: cmp r2, #127 +; CHECK-NEXT: bhs .LBB22_8 +; CHECK-NEXT: @ %bb.7: +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: b .LBB22_10 +; CHECK-NEXT: .LBB22_8: @ %fp-to-i-if-check.exp.size +; CHECK-NEXT: movw r0, #65535 +; CHECK-NEXT: cmp r2, #149 +; CHECK-NEXT: movt r0, #127 +; CHECK-NEXT: and.w r0, r0, r3 +; CHECK-NEXT: add.w r3, r0, #8388608 +; CHECK-NEXT: bhi .LBB22_11 +; CHECK-NEXT: @ %bb.9: @ %fp-to-i-if-exp.small +; CHECK-NEXT: rsb.w r0, r2, #150 +; CHECK-NEXT: lsr.w r0, r3, r0 +; CHECK-NEXT: .LBB22_10: @ %fp-to-i-cleanup +; CHECK-NEXT: movs r5, #0 +; CHECK-NEXT: movs r3, #0 +; CHECK-NEXT: movs r4, #0 +; CHECK-NEXT: b .LBB22_12 +; CHECK-NEXT: .LBB22_11: @ %fp-to-i-if-exp.large +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: sub.w r4, r2, #150 +; CHECK-NEXT: movs r2, #12 +; CHECK-NEXT: strd r3, r0, [sp, #16] +; CHECK-NEXT: mov r3, sp +; CHECK-NEXT: and.w r2, r2, r4, lsr #3 +; CHECK-NEXT: adds r3, #16 +; CHECK-NEXT: strd r0, r0, [sp, #24] +; CHECK-NEXT: subs r3, r3, r2 +; CHECK-NEXT: strd r0, r0, [sp, #8] +; CHECK-NEXT: strd r0, r0, [sp] +; CHECK-NEXT: and r1, r4, #31 +; CHECK-NEXT: ldm.w r3, {r0, r5, r6} +; CHECK-NEXT: eor r4, r1, #63 +; CHECK-NEXT: mov r7, r5 +; CHECK-NEXT: ldr r3, [r3, #12] +; CHECK-NEXT: rsbs r4, r4, #0 +; CHECK-NEXT: mov r2, r0 +; CHECK-NEXT: lsll r0, r5, r1 +; CHECK-NEXT: lsrl r2, r7, #1 +; CHECK-NEXT: lsll r6, r3, r1 +; CHECK-NEXT: lsll r2, r7, r4 +; CHECK-NEXT: orr.w r4, r3, r7 +; CHECK-NEXT: orr.w r3, r6, r2 +; CHECK-NEXT: .LBB22_12: @ %fp-to-i-cleanup +; CHECK-NEXT: subs r1, r3, #1 +; CHECK-NEXT: sbcs r1, r4, #0 +; CHECK-NEXT: cset r1, lo +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: csel r2, r5, r1, ne +; CHECK-NEXT: csel r0, r0, r1, ne +; CHECK-NEXT: subs.w r1, r12, #1 +; CHECK-NEXT: sbcs r1, lr, #0 +; CHECK-NEXT: cset r1, lo +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: csel r3, r8, r1, ne +; CHECK-NEXT: csel r1, r9, r1, ne +; CHECK-NEXT: vmov q0[2], q0[0], r3, r0 +; CHECK-NEXT: vmov q0[3], q0[1], r1, r2 +; CHECK-NEXT: add sp, #68 +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc} entry: %conv = fptoui <2 x float> %x to <2 x i128> %0 = icmp ult <2 x i128> %conv, @@ -932,50 +1648,229 @@ entry: define arm_aapcs_vfpcc <2 x i64> @ustest_f32i64(<2 x float> %x) { ; CHECK-LABEL: ustest_f32i64: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr} -; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr} -; CHECK-NEXT: vmov r5, r0, d0 -; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: subs r4, r2, #1 -; CHECK-NEXT: mov.w r8, #1 -; CHECK-NEXT: sbcs r4, r3, #0 -; CHECK-NEXT: mov.w r6, #0 -; CHECK-NEXT: cset r4, lt -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: csel r0, r0, r4, ne -; CHECK-NEXT: csel r3, r3, r4, ne -; CHECK-NEXT: csel r1, r1, r4, ne -; CHECK-NEXT: csel r2, r2, r8, ne -; CHECK-NEXT: rsbs r4, r0, #0 -; CHECK-NEXT: sbcs.w r4, r6, r1 -; CHECK-NEXT: sbcs.w r2, r6, r2 -; CHECK-NEXT: sbcs.w r2, r6, r3 -; CHECK-NEXT: cset r2, lt -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r7, r0, r2, ne -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: csel r4, r1, r2, ne -; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: subs r5, r2, #1 -; CHECK-NEXT: sbcs r5, r3, #0 -; CHECK-NEXT: cset r5, lt -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: csel r0, r0, r5, ne -; CHECK-NEXT: csel r3, r3, r5, ne -; CHECK-NEXT: csel r1, r1, r5, ne -; CHECK-NEXT: csel r2, r2, r8, ne -; CHECK-NEXT: rsbs r5, r0, #0 -; CHECK-NEXT: sbcs.w r5, r6, r1 -; CHECK-NEXT: sbcs.w r2, r6, r2 -; CHECK-NEXT: sbcs.w r2, r6, r3 -; CHECK-NEXT: cset r2, lt -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r0, r0, r2, ne -; CHECK-NEXT: csel r1, r1, r2, ne -; CHECK-NEXT: vmov q0[2], q0[0], r0, r7 -; CHECK-NEXT: vmov q0[3], q0[1], r1, r4 -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc} +; CHECK: @ %bb.0: @ %entryfp-to-i-entryfp-to-i-entry +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-NEXT: .pad #108 +; CHECK-NEXT: sub sp, #108 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: mov.w r10, #-1 +; CHECK-NEXT: ubfx r2, r0, #23, #8 +; CHECK-NEXT: cmp r2, #127 +; CHECK-NEXT: bhs .LBB23_2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: mov.w lr, #0 +; CHECK-NEXT: str r0, [sp, #36] @ 4-byte Spill +; CHECK-NEXT: mov.w r8, #0 +; CHECK-NEXT: movs r1, #0 +; CHECK-NEXT: b .LBB23_5 +; CHECK-NEXT: .LBB23_2: @ %fp-to-i-if-check.exp.size2 +; CHECK-NEXT: movs r1, #1 +; CHECK-NEXT: mov r7, r0 +; CHECK-NEXT: orr.w r9, r1, r0, asr #31 +; CHECK-NEXT: bfi r7, r1, #23, #9 +; CHECK-NEXT: asrs r0, r0, #31 +; CHECK-NEXT: cmp r2, #149 +; CHECK-NEXT: bhi .LBB23_4 +; CHECK-NEXT: @ %bb.3: @ %fp-to-i-if-exp.small3 +; CHECK-NEXT: rsb.w r2, r2, #150 +; CHECK-NEXT: lsr.w r2, r7, r2 +; CHECK-NEXT: movs r7, #0 +; CHECK-NEXT: umull r1, lr, r2, r9 +; CHECK-NEXT: str r1, [sp, #36] @ 4-byte Spill +; CHECK-NEXT: umull r1, r3, r2, r0 +; CHECK-NEXT: adds.w r6, r1, lr +; CHECK-NEXT: adcs r6, r3, #0 +; CHECK-NEXT: adc r6, r7, #0 +; CHECK-NEXT: adds.w r5, r1, lr +; CHECK-NEXT: mla r5, r0, r2, r3 +; CHECK-NEXT: adcs.w r8, r1, r3 +; CHECK-NEXT: umlal lr, r7, r2, r0 +; CHECK-NEXT: adc.w r1, r6, r5 +; CHECK-NEXT: b .LBB23_5 +; CHECK-NEXT: .LBB23_4: @ %fp-to-i-if-exp.large4 +; CHECK-NEXT: mov.w r8, #0 +; CHECK-NEXT: sub.w r5, r2, #150 +; CHECK-NEXT: movs r2, #12 +; CHECK-NEXT: strd r7, r8, [sp, #88] +; CHECK-NEXT: add r7, sp, #72 +; CHECK-NEXT: and.w r2, r2, r5, lsr #3 +; CHECK-NEXT: adds r7, #16 +; CHECK-NEXT: strd r8, r8, [sp, #96] +; CHECK-NEXT: strd r8, r8, [sp, #80] +; CHECK-NEXT: subs r4, r7, r2 +; CHECK-NEXT: strd r8, r8, [sp, #72] +; CHECK-NEXT: and lr, r5, #31 +; CHECK-NEXT: ldrd r6, r7, [r4] +; CHECK-NEXT: ldr r2, [r4, #8] +; CHECK-NEXT: mov r5, r7 +; CHECK-NEXT: str r2, [sp, #28] @ 4-byte Spill +; CHECK-NEXT: ldr r1, [r4, #12] +; CHECK-NEXT: mov r4, r6 +; CHECK-NEXT: lsll r4, r5, lr +; CHECK-NEXT: str r1, [sp, #20] @ 4-byte Spill +; CHECK-NEXT: umull r3, r2, r4, r9 +; CHECK-NEXT: lsrl r6, r7, #1 +; CHECK-NEXT: umull r1, r11, r4, r0 +; CHECK-NEXT: str r3, [sp, #36] @ 4-byte Spill +; CHECK-NEXT: movs r3, #0 +; CHECK-NEXT: umlal r2, r3, r5, r9 +; CHECK-NEXT: str r1, [sp, #24] @ 4-byte Spill +; CHECK-NEXT: adds r2, r2, r1 +; CHECK-NEXT: str r2, [sp, #32] @ 4-byte Spill +; CHECK-NEXT: eor r2, lr, #63 +; CHECK-NEXT: ldr r1, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: rsb.w r2, r2, #0 +; CHECK-NEXT: adcs.w r12, r3, r11 +; CHECK-NEXT: lsll r6, r7, r2 +; CHECK-NEXT: ldr r2, [sp, #28] @ 4-byte Reload +; CHECK-NEXT: adc r3, r8, #0 +; CHECK-NEXT: lsll r2, r1, lr +; CHECK-NEXT: umlal r12, r3, r5, r0 +; CHECK-NEXT: orr.w r8, r1, r7 +; CHECK-NEXT: orr.w r7, r2, r6 +; CHECK-NEXT: mla r2, r0, r5, r11 +; CHECK-NEXT: umull lr, r6, r9, r7 +; CHECK-NEXT: mla r1, r9, r8, r6 +; CHECK-NEXT: mla r1, r0, r7, r1 +; CHECK-NEXT: mla r0, r0, r4, r2 +; CHECK-NEXT: ldr r2, [sp, #24] @ 4-byte Reload +; CHECK-NEXT: adds.w r2, r2, lr +; CHECK-NEXT: ldr.w lr, [sp, #32] @ 4-byte Reload +; CHECK-NEXT: adcs r0, r1 +; CHECK-NEXT: adds.w r8, r12, r2 +; CHECK-NEXT: adc.w r1, r3, r0 +; CHECK-NEXT: .LBB23_5: @ %fp-to-i-cleanup1 +; CHECK-NEXT: vmov r2, s1 +; CHECK-NEXT: mov.w r4, #-1 +; CHECK-NEXT: mov.w r9, #0 +; CHECK-NEXT: cmp.w r2, #-1 +; CHECK-NEXT: itt gt +; CHECK-NEXT: movgt r4, #0 +; CHECK-NEXT: movgt.w r10, #1 +; CHECK-NEXT: ubfx r0, r2, #23, #8 +; CHECK-NEXT: cmp r0, #127 +; CHECK-NEXT: bhs .LBB23_7 +; CHECK-NEXT: @ %bb.6: +; CHECK-NEXT: mov.w r11, #0 +; CHECK-NEXT: mov.w r10, #0 +; CHECK-NEXT: movs r2, #0 +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: b .LBB23_10 +; CHECK-NEXT: .LBB23_7: @ %fp-to-i-if-check.exp.size +; CHECK-NEXT: movw r3, #65535 +; CHECK-NEXT: cmp r0, #149 +; CHECK-NEXT: movt r3, #127 +; CHECK-NEXT: and.w r2, r2, r3 +; CHECK-NEXT: add.w r2, r2, #8388608 +; CHECK-NEXT: bhi .LBB23_9 +; CHECK-NEXT: @ %bb.8: @ %fp-to-i-if-exp.small +; CHECK-NEXT: rsb.w r0, r0, #150 +; CHECK-NEXT: lsr.w r0, r2, r0 +; CHECK-NEXT: umull r11, r2, r0, r10 +; CHECK-NEXT: umull r3, r7, r0, r4 +; CHECK-NEXT: muls r0, r4, r0 +; CHECK-NEXT: adds.w r10, r3, r2 +; CHECK-NEXT: adcs.w r2, r3, r7 +; CHECK-NEXT: adcs r0, r7 +; CHECK-NEXT: b .LBB23_10 +; CHECK-NEXT: .LBB23_9: @ %fp-to-i-if-exp.large +; CHECK-NEXT: mov.w r12, #0 +; CHECK-NEXT: add r3, sp, #40 +; CHECK-NEXT: strd r2, r12, [sp, #56] +; CHECK-NEXT: sub.w r2, r0, #150 +; CHECK-NEXT: movs r0, #12 +; CHECK-NEXT: adds r3, #16 +; CHECK-NEXT: and.w r0, r0, r2, lsr #3 +; CHECK-NEXT: strd r12, r12, [sp, #64] +; CHECK-NEXT: strd r12, r12, [sp, #48] +; CHECK-NEXT: subs r3, r3, r0 +; CHECK-NEXT: strd r12, r12, [sp, #40] +; CHECK-NEXT: ldrd r0, r7, [r3] +; CHECK-NEXT: ldr r5, [r3, #8] +; CHECK-NEXT: strd r5, r1, [sp, #20] @ 8-byte Folded Spill +; CHECK-NEXT: ldr r1, [r3, #12] +; CHECK-NEXT: mov r3, r7 +; CHECK-NEXT: str.w r8, [sp, #28] @ 4-byte Spill +; CHECK-NEXT: and r8, r2, #31 +; CHECK-NEXT: mov r2, r0 +; CHECK-NEXT: str.w lr, [sp, #32] @ 4-byte Spill +; CHECK-NEXT: lsll r2, r3, r8 +; CHECK-NEXT: str r1, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: umull r11, r6, r2, r10 +; CHECK-NEXT: mov.w lr, #0 +; CHECK-NEXT: lsrl r0, r7, #1 +; CHECK-NEXT: umull r5, r1, r2, r4 +; CHECK-NEXT: umlal r6, lr, r3, r10 +; CHECK-NEXT: str r5, [sp, #16] @ 4-byte Spill +; CHECK-NEXT: strd r10, r1, [sp, #4] @ 8-byte Folded Spill +; CHECK-NEXT: adds.w r10, r5, r6 +; CHECK-NEXT: ldr r6, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: adcs.w lr, lr, r1 +; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: adc r5, r12, #0 +; CHECK-NEXT: eor r12, r8, #63 +; CHECK-NEXT: rsb.w r12, r12, #0 +; CHECK-NEXT: lsll r6, r1, r8 +; CHECK-NEXT: lsll r0, r7, r12 +; CHECK-NEXT: umlal lr, r5, r3, r4 +; CHECK-NEXT: orrs r0, r6 +; CHECK-NEXT: ldr r6, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: orr.w r8, r1, r7 +; CHECK-NEXT: ldr r1, [sp, #24] @ 4-byte Reload +; CHECK-NEXT: umull r12, r7, r6, r0 +; CHECK-NEXT: mla r7, r6, r8, r7 +; CHECK-NEXT: ldr r6, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: ldr.w r8, [sp, #28] @ 4-byte Reload +; CHECK-NEXT: mla r3, r4, r3, r6 +; CHECK-NEXT: mla r0, r4, r0, r7 +; CHECK-NEXT: mla r2, r4, r2, r3 +; CHECK-NEXT: ldr r3, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: adds.w r3, r3, r12 +; CHECK-NEXT: adcs r0, r2 +; CHECK-NEXT: adds.w r2, lr, r3 +; CHECK-NEXT: ldr.w lr, [sp, #32] @ 4-byte Reload +; CHECK-NEXT: adcs r0, r5 +; CHECK-NEXT: .LBB23_10: @ %fp-to-i-cleanup +; CHECK-NEXT: subs r3, r2, #1 +; CHECK-NEXT: mov.w r6, #1 +; CHECK-NEXT: sbcs r3, r0, #0 +; CHECK-NEXT: cset r3, lt +; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: csel r0, r0, r3, ne +; CHECK-NEXT: csel r7, r10, r3, ne +; CHECK-NEXT: csel r3, r11, r3, ne +; CHECK-NEXT: csel r2, r2, r6, ne +; CHECK-NEXT: rsbs r5, r3, #0 +; CHECK-NEXT: sbcs.w r5, r9, r7 +; CHECK-NEXT: sbcs.w r2, r9, r2 +; CHECK-NEXT: sbcs.w r0, r9, r0 +; CHECK-NEXT: cset r0, lt +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: csel r2, r7, r0, ne +; CHECK-NEXT: csel r0, r3, r0, ne +; CHECK-NEXT: subs.w r3, r8, #1 +; CHECK-NEXT: sbcs r3, r1, #0 +; CHECK-NEXT: cset r3, lt +; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: csel r7, r1, r3, ne +; CHECK-NEXT: ldr r1, [sp, #36] @ 4-byte Reload +; CHECK-NEXT: csel r5, lr, r3, ne +; CHECK-NEXT: csel r6, r8, r6, ne +; CHECK-NEXT: csel r3, r1, r3, ne +; CHECK-NEXT: rsbs r4, r3, #0 +; CHECK-NEXT: sbcs.w r4, r9, r5 +; CHECK-NEXT: sbcs.w r6, r9, r6 +; CHECK-NEXT: sbcs.w r1, r9, r7 +; CHECK-NEXT: cset r1, lt +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: csel r3, r3, r1, ne +; CHECK-NEXT: csel r1, r5, r1, ne +; CHECK-NEXT: vmov q0[2], q0[0], r3, r0 +; CHECK-NEXT: vmov q0[3], q0[1], r1, r2 +; CHECK-NEXT: add sp, #108 +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} entry: %conv = fptosi <2 x float> %x to <2 x i128> %0 = icmp slt <2 x i128> %conv, @@ -991,18 +1886,56 @@ define arm_aapcs_vfpcc <2 x i64> @stest_f16i64(<2 x half> %x) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r4, r5, r7, lr} ; CHECK-NEXT: push {r4, r5, r7, lr} -; CHECK-NEXT: .vsave {d8, d9} -; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: vmov.u16 r0, q0[1] -; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: bl __fixhfti -; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: vmov.u16 r0, q4[0] -; CHECK-NEXT: mov r5, r1 -; CHECK-NEXT: bl __fixhfti -; CHECK-NEXT: vmov q0[2], q0[0], r0, r4 -; CHECK-NEXT: vmov q0[3], q0[1], r1, r5 -; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: vcvt.s32.f16 s2, s0 +; CHECK-NEXT: mvn r0, #-2147483648 +; CHECK-NEXT: vmov r1, s2 +; CHECK-NEXT: mov.w r12, #-1 +; CHECK-NEXT: mov.w lr, #-2147483648 +; CHECK-NEXT: vmovx.f16 s0, s0 +; CHECK-NEXT: vcvt.s32.f16 s0, s0 +; CHECK-NEXT: subs.w r3, r1, #-1 +; CHECK-NEXT: asr.w r2, r1, #31 +; CHECK-NEXT: sbcs.w r3, r2, r0 +; CHECK-NEXT: sbcs r3, r2, #0 +; CHECK-NEXT: sbcs r2, r2, #0 +; CHECK-NEXT: mvn r3, #-2147483648 +; CHECK-NEXT: cset r2, lt +; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: it ne +; CHECK-NEXT: asrne r2, r1, #31 +; CHECK-NEXT: it ne +; CHECK-NEXT: asrne r3, r1, #31 +; CHECK-NEXT: csel r1, r1, r12, ne +; CHECK-NEXT: rsbs r4, r1, #0 +; CHECK-NEXT: sbcs.w r4, lr, r3 +; CHECK-NEXT: sbcs.w r4, r12, r2 +; CHECK-NEXT: sbcs.w r2, r12, r2 +; CHECK-NEXT: csel r2, r3, lr, lt +; CHECK-NEXT: cset r3, lt +; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: csel r1, r1, r3, ne +; CHECK-NEXT: vmov r3, s0 +; CHECK-NEXT: subs.w r5, r3, #-1 +; CHECK-NEXT: asr.w r4, r3, #31 +; CHECK-NEXT: sbcs.w r5, r4, r0 +; CHECK-NEXT: sbcs r5, r4, #0 +; CHECK-NEXT: sbcs r4, r4, #0 +; CHECK-NEXT: cset r4, lt +; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: itt ne +; CHECK-NEXT: asrne r4, r3, #31 +; CHECK-NEXT: asrne r0, r3, #31 +; CHECK-NEXT: csel r3, r3, r12, ne +; CHECK-NEXT: rsbs r5, r3, #0 +; CHECK-NEXT: sbcs.w r5, lr, r0 +; CHECK-NEXT: sbcs.w r5, r12, r4 +; CHECK-NEXT: sbcs.w r5, r12, r4 +; CHECK-NEXT: cset r5, lt +; CHECK-NEXT: csel r0, r0, lr, lt +; CHECK-NEXT: cmp r5, #0 +; CHECK-NEXT: csel r3, r3, r5, ne +; CHECK-NEXT: vmov q0[2], q0[0], r1, r3 +; CHECK-NEXT: vmov q0[3], q0[1], r2, r0 ; CHECK-NEXT: pop {r4, r5, r7, pc} entry: %conv = fptosi <2 x half> %x to <2 x i128> @@ -1017,21 +1950,17 @@ entry: define arm_aapcs_vfpcc <2 x i64> @utesth_f16i64(<2 x half> %x) { ; CHECK-LABEL: utesth_f16i64: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r7, lr} -; CHECK-NEXT: push {r4, r5, r7, lr} -; CHECK-NEXT: .vsave {d8, d9} -; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: vmov.u16 r0, q0[1] -; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: bl __fixunshfti -; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: vmov.u16 r0, q4[0] -; CHECK-NEXT: mov r5, r1 -; CHECK-NEXT: bl __fixunshfti -; CHECK-NEXT: vmov q0[2], q0[0], r0, r4 -; CHECK-NEXT: vmov q0[3], q0[1], r1, r5 -; CHECK-NEXT: vpop {d8, d9} -; CHECK-NEXT: pop {r4, r5, r7, pc} +; CHECK-NEXT: vldr s5, .LCPI25_0 +; CHECK-NEXT: vcvt.u32.f16 s4, s0 +; CHECK-NEXT: vmovx.f16 s0, s0 +; CHECK-NEXT: vcvt.u32.f16 s6, s0 +; CHECK-NEXT: vmov.f32 s7, s5 +; CHECK-NEXT: vmov q0, q1 +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI25_0: +; CHECK-NEXT: .long 0x00000000 @ float 0 entry: %conv = fptoui <2 x half> %x to <2 x i128> %0 = icmp ult <2 x i128> %conv, @@ -1043,39 +1972,57 @@ entry: define arm_aapcs_vfpcc <2 x i64> @ustest_f16i64(<2 x half> %x) { ; CHECK-LABEL: ustest_f16i64: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, lr} -; CHECK-NEXT: push {r4, r5, r6, r7, lr} -; CHECK-NEXT: .pad #4 -; CHECK-NEXT: sub sp, #4 -; CHECK-NEXT: .vsave {d8, d9} -; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: vmov.u16 r0, q0[1] -; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: bl __fixhfti -; CHECK-NEXT: rsbs r4, r0, #0 -; CHECK-NEXT: mov.w r5, #0 -; CHECK-NEXT: sbcs.w r4, r5, r1 -; CHECK-NEXT: sbcs.w r2, r5, r2 -; CHECK-NEXT: sbcs.w r2, r5, r3 -; CHECK-NEXT: cset r2, lt -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r6, r0, r2, ne -; CHECK-NEXT: vmov.u16 r0, q4[0] -; CHECK-NEXT: csel r7, r1, r2, ne -; CHECK-NEXT: bl __fixhfti -; CHECK-NEXT: rsbs r4, r0, #0 -; CHECK-NEXT: sbcs.w r4, r5, r1 -; CHECK-NEXT: sbcs.w r2, r5, r2 -; CHECK-NEXT: sbcs.w r2, r5, r3 -; CHECK-NEXT: cset r2, lt -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r0, r0, r2, ne -; CHECK-NEXT: csel r1, r1, r2, ne -; CHECK-NEXT: vmov q0[2], q0[0], r0, r6 -; CHECK-NEXT: vmov q0[3], q0[1], r1, r7 -; CHECK-NEXT: vpop {d8, d9} -; CHECK-NEXT: add sp, #4 -; CHECK-NEXT: pop {r4, r5, r6, r7, pc} +; CHECK-NEXT: .save {r4, lr} +; CHECK-NEXT: push {r4, lr} +; CHECK-NEXT: vmovx.f16 s2, s0 +; CHECK-NEXT: mov.w r12, #1 +; CHECK-NEXT: vcvt.s32.f16 s2, s2 +; CHECK-NEXT: vcvt.s32.f16 s0, s0 +; CHECK-NEXT: vmov r1, s2 +; CHECK-NEXT: mov.w lr, #0 +; CHECK-NEXT: rsbs r3, r12, r1, asr #31 +; CHECK-NEXT: asr.w r2, r1, #31 +; CHECK-NEXT: sbcs r2, r2, #0 +; CHECK-NEXT: mov.w r2, #1 +; CHECK-NEXT: cset r0, lt +; CHECK-NEXT: it lt +; CHECK-NEXT: asrlt r2, r1, #31 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: mov r3, r0 +; CHECK-NEXT: csel r0, r1, r0, ne +; CHECK-NEXT: it ne +; CHECK-NEXT: asrne r3, r1, #31 +; CHECK-NEXT: rsbs r1, r0, #0 +; CHECK-NEXT: sbcs.w r1, lr, r3 +; CHECK-NEXT: sbcs.w r1, lr, r2 +; CHECK-NEXT: vmov r2, s0 +; CHECK-NEXT: sbcs.w r1, lr, r3 +; CHECK-NEXT: cset r1, lt +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: csel r0, r0, r1, ne +; CHECK-NEXT: csel r1, r3, r1, ne +; CHECK-NEXT: rsbs r4, r12, r2, asr #31 +; CHECK-NEXT: asr.w r3, r2, #31 +; CHECK-NEXT: sbcs r3, r3, #0 +; CHECK-NEXT: it lt +; CHECK-NEXT: asrlt.w r12, r2, #31 +; CHECK-NEXT: cset r3, lt +; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: mov r4, r3 +; CHECK-NEXT: it ne +; CHECK-NEXT: asrne r4, r2, #31 +; CHECK-NEXT: csel r2, r2, r3, ne +; CHECK-NEXT: rsbs r3, r2, #0 +; CHECK-NEXT: sbcs.w r3, lr, r4 +; CHECK-NEXT: sbcs.w r3, lr, r12 +; CHECK-NEXT: sbcs.w r3, lr, r4 +; CHECK-NEXT: cset r3, lt +; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: csel r2, r2, r3, ne +; CHECK-NEXT: csel r3, r4, r3, ne +; CHECK-NEXT: vmov q0[2], q0[0], r2, r0 +; CHECK-NEXT: vmov q0[3], q0[1], r3, r1 +; CHECK-NEXT: pop {r4, pc} entry: %conv = fptosi <2 x half> %x to <2 x i128> %0 = icmp slt <2 x i128> %conv, @@ -1726,64 +2673,245 @@ entry: define arm_aapcs_vfpcc <2 x i64> @stest_f64i64_mm(<2 x double> %x) { ; CHECK-LABEL: stest_f64i64_mm: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, lr} -; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr} -; CHECK-NEXT: .pad #4 -; CHECK-NEXT: sub sp, #4 -; CHECK-NEXT: .vsave {d8, d9} -; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vmov r0, r1, d9 -; CHECK-NEXT: bl __fixdfti -; CHECK-NEXT: vmov r12, lr, d8 -; CHECK-NEXT: subs.w r5, r0, #-1 -; CHECK-NEXT: mvn r4, #-2147483648 -; CHECK-NEXT: sbcs.w r5, r1, r4 -; CHECK-NEXT: sbcs r5, r2, #0 -; CHECK-NEXT: mov.w r6, #-1 -; CHECK-NEXT: sbcs r5, r3, #0 -; CHECK-NEXT: cset r5, lt -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: csel r0, r0, r6, ne -; CHECK-NEXT: csel r3, r3, r5, ne -; CHECK-NEXT: csel r2, r2, r5, ne -; CHECK-NEXT: csel r1, r1, r4, ne -; CHECK-NEXT: rsbs r7, r0, #0 +; CHECK: @ %bb.0: @ %entryfp-to-i-entryfp-to-i-entry +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-NEXT: .pad #92 +; CHECK-NEXT: sub sp, #92 +; CHECK-NEXT: vmov r4, r0, d0 +; CHECK-NEXT: movs r2, #1 +; CHECK-NEXT: mov.w lr, #-1 +; CHECK-NEXT: movw r1, #1023 +; CHECK-NEXT: ubfx r7, r0, #20, #11 +; CHECK-NEXT: cmp r7, r1 +; CHECK-NEXT: bhs .LBB45_2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: mov.w r9, #0 +; CHECK-NEXT: mov.w r10, #0 +; CHECK-NEXT: str r0, [sp, #20] @ 4-byte Spill +; CHECK-NEXT: mov.w r8, #0 +; CHECK-NEXT: b .LBB45_6 +; CHECK-NEXT: .LBB45_2: @ %fp-to-i-if-check.exp.size2 +; CHECK-NEXT: mov r3, r0 +; CHECK-NEXT: orr.w r1, r2, r0, asr #31 +; CHECK-NEXT: bfi r3, r2, #20, #12 +; CHECK-NEXT: asrs r0, r0, #31 +; CHECK-NEXT: movw r6, #1074 +; CHECK-NEXT: cmp r7, r6 +; CHECK-NEXT: bhi .LBB45_4 +; CHECK-NEXT: @ %bb.3: @ %fp-to-i-if-exp.small3 +; CHECK-NEXT: subw r7, r7, #1075 +; CHECK-NEXT: movs r5, #0 +; CHECK-NEXT: lsll r4, r3, r7 +; CHECK-NEXT: mov.w r12, #0 +; CHECK-NEXT: umull r9, r7, r4, r1 +; CHECK-NEXT: umlal r7, r5, r3, r1 +; CHECK-NEXT: umull r1, r6, r4, r0 +; CHECK-NEXT: adds.w r10, r1, r7 +; CHECK-NEXT: adcs r5, r6 +; CHECK-NEXT: mla r6, r0, r3, r6 +; CHECK-NEXT: adc r7, r12, #0 +; CHECK-NEXT: umlal r5, r7, r3, r0 +; CHECK-NEXT: mla r0, r0, r4, r6 +; CHECK-NEXT: adds r1, r1, r5 +; CHECK-NEXT: str r1, [sp, #20] @ 4-byte Spill +; CHECK-NEXT: adc.w r8, r7, r0 +; CHECK-NEXT: b .LBB45_5 +; CHECK-NEXT: .LBB45_4: @ %fp-to-i-if-exp.large4 +; CHECK-NEXT: strd r4, r3, [sp, #72] +; CHECK-NEXT: subw r3, r7, #1075 +; CHECK-NEXT: movs r7, #12 +; CHECK-NEXT: add r6, sp, #56 +; CHECK-NEXT: and.w r7, r7, r3, lsr #3 +; CHECK-NEXT: adds r6, #16 +; CHECK-NEXT: mov.w r8, #0 +; CHECK-NEXT: subs r5, r6, r7 +; CHECK-NEXT: strd r8, r8, [sp, #80] +; CHECK-NEXT: and r12, r3, #31 +; CHECK-NEXT: strd r8, r8, [sp, #64] +; CHECK-NEXT: mov.w r9, #0 +; CHECK-NEXT: strd r8, r8, [sp, #56] +; CHECK-NEXT: mov r11, lr +; CHECK-NEXT: ldm.w r5, {r6, r7, r10} +; CHECK-NEXT: mov r3, r7 +; CHECK-NEXT: ldr r2, [r5, #12] +; CHECK-NEXT: mov r4, r6 +; CHECK-NEXT: str r2, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: lsll r4, r3, r12 +; CHECK-NEXT: lsrl r6, r7, #1 +; CHECK-NEXT: umull r5, r2, r4, r1 +; CHECK-NEXT: umlal r2, r9, r3, r1 +; CHECK-NEXT: str r5, [sp, #16] @ 4-byte Spill +; CHECK-NEXT: umull r5, lr, r4, r0 +; CHECK-NEXT: str r5, [sp, #20] @ 4-byte Spill +; CHECK-NEXT: adds r2, r2, r5 +; CHECK-NEXT: eor r5, r12, #63 +; CHECK-NEXT: rsb.w r5, r5, #0 +; CHECK-NEXT: str r2, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: lsll r6, r7, r5 +; CHECK-NEXT: ldr r5, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: adcs.w r2, r9, lr +; CHECK-NEXT: lsll r10, r5, r12 +; CHECK-NEXT: adc r9, r8, #0 +; CHECK-NEXT: orr.w r6, r6, r10 +; CHECK-NEXT: orrs r7, r5 +; CHECK-NEXT: umlal r2, r9, r3, r0 +; CHECK-NEXT: ldr.w r10, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: umull r12, r5, r1, r6 +; CHECK-NEXT: mla r1, r1, r7, r5 +; CHECK-NEXT: mla r3, r0, r3, lr +; CHECK-NEXT: mov lr, r11 +; CHECK-NEXT: mla r1, r0, r6, r1 +; CHECK-NEXT: mla r0, r0, r4, r3 +; CHECK-NEXT: ldr r3, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: adds.w r3, r3, r12 +; CHECK-NEXT: adcs r0, r1 +; CHECK-NEXT: adds r1, r2, r3 +; CHECK-NEXT: adc.w r8, r9, r0 +; CHECK-NEXT: ldr.w r9, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: movs r2, #1 +; CHECK-NEXT: str r1, [sp, #20] @ 4-byte Spill +; CHECK-NEXT: .LBB45_5: @ %fp-to-i-cleanup1 +; CHECK-NEXT: movw r1, #1023 +; CHECK-NEXT: .LBB45_6: @ %fp-to-i-cleanup1 +; CHECK-NEXT: vmov r6, r7, d1 +; CHECK-NEXT: mov.w r5, #-1 +; CHECK-NEXT: cmp.w r7, #-1 +; CHECK-NEXT: itt gt +; CHECK-NEXT: movgt r5, #0 +; CHECK-NEXT: movgt.w lr, #1 +; CHECK-NEXT: ubfx r0, r7, #20, #11 +; CHECK-NEXT: cmp r0, r1 +; CHECK-NEXT: bhs .LBB45_8 +; CHECK-NEXT: @ %bb.7: +; CHECK-NEXT: movs r4, #0 +; CHECK-NEXT: movs r1, #0 +; CHECK-NEXT: movs r2, #0 +; CHECK-NEXT: movs r7, #0 +; CHECK-NEXT: b .LBB45_12 +; CHECK-NEXT: .LBB45_8: @ %fp-to-i-if-check.exp.size +; CHECK-NEXT: bfi r7, r2, #20, #12 +; CHECK-NEXT: movw r1, #1074 +; CHECK-NEXT: cmp r0, r1 +; CHECK-NEXT: str.w r8, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: bhi .LBB45_10 +; CHECK-NEXT: @ %bb.9: @ %fp-to-i-if-exp.small +; CHECK-NEXT: subw r0, r0, #1075 +; CHECK-NEXT: movs r3, #0 +; CHECK-NEXT: lsll r6, r7, r0 +; CHECK-NEXT: mov.w r8, #0 +; CHECK-NEXT: umull r4, r0, r6, lr +; CHECK-NEXT: umlal r0, r3, r7, lr +; CHECK-NEXT: umull lr, r2, r6, r5 +; CHECK-NEXT: adds.w r1, lr, r0 +; CHECK-NEXT: adcs.w r0, r3, r2 +; CHECK-NEXT: mla r2, r5, r7, r2 +; CHECK-NEXT: adc r3, r8, #0 +; CHECK-NEXT: umlal r0, r3, r7, r5 +; CHECK-NEXT: mla r5, r5, r6, r2 +; CHECK-NEXT: adds.w r2, r0, lr +; CHECK-NEXT: adc.w r7, r3, r5 +; CHECK-NEXT: b .LBB45_11 +; CHECK-NEXT: .LBB45_10: @ %fp-to-i-if-exp.large +; CHECK-NEXT: subw r0, r0, #1075 +; CHECK-NEXT: add r1, sp, #40 +; CHECK-NEXT: mov.w r12, #0 +; CHECK-NEXT: add r2, sp, #24 +; CHECK-NEXT: stm.w r1, {r6, r7, r12} +; CHECK-NEXT: movs r1, #12 +; CHECK-NEXT: and.w r1, r1, r0, lsr #3 +; CHECK-NEXT: adds r2, #16 +; CHECK-NEXT: str.w r12, [sp, #52] +; CHECK-NEXT: subs r1, r2, r1 +; CHECK-NEXT: strd r12, r12, [sp, #32] +; CHECK-NEXT: strd r12, r12, [sp, #24] +; CHECK-NEXT: and r8, r0, #31 +; CHECK-NEXT: ldrd r6, r7, [r1] +; CHECK-NEXT: mov r2, lr +; CHECK-NEXT: ldrd r4, r11, [r1, #8] +; CHECK-NEXT: str.w r9, [sp, #16] @ 4-byte Spill +; CHECK-NEXT: mov r9, r7 +; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: str.w r10, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: lsll r0, r9, r8 +; CHECK-NEXT: mov.w r10, #0 +; CHECK-NEXT: umull r3, r1, r0, lr +; CHECK-NEXT: lsrl r6, r7, #1 +; CHECK-NEXT: lsll r4, r11, r8 +; CHECK-NEXT: umlal r1, r10, r9, lr +; CHECK-NEXT: str r3, [sp] @ 4-byte Spill +; CHECK-NEXT: umull r3, lr, r0, r5 +; CHECK-NEXT: str r3, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: adds r1, r1, r3 +; CHECK-NEXT: adcs.w r10, r10, lr +; CHECK-NEXT: adc r3, r12, #0 +; CHECK-NEXT: eor r12, r8, #63 +; CHECK-NEXT: rsb.w r12, r12, #0 +; CHECK-NEXT: umlal r10, r3, r9, r5 +; CHECK-NEXT: lsll r6, r7, r12 +; CHECK-NEXT: orr.w r8, r11, r7 +; CHECK-NEXT: orr.w r7, r4, r6 +; CHECK-NEXT: ldr r4, [sp] @ 4-byte Reload +; CHECK-NEXT: umull r12, r6, r2, r7 +; CHECK-NEXT: mla r2, r2, r8, r6 +; CHECK-NEXT: mla r2, r5, r7, r2 +; CHECK-NEXT: mla r7, r5, r9, lr +; CHECK-NEXT: ldr.w r9, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: mla r0, r5, r0, r7 +; CHECK-NEXT: ldr r7, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: adds.w r7, r7, r12 +; CHECK-NEXT: adcs r0, r2 +; CHECK-NEXT: adds.w r2, r10, r7 +; CHECK-NEXT: ldr.w r10, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: adc.w r7, r3, r0 +; CHECK-NEXT: .LBB45_11: @ %fp-to-i-cleanup +; CHECK-NEXT: ldr.w r8, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: .LBB45_12: @ %fp-to-i-cleanup +; CHECK-NEXT: subs.w r3, r4, #-1 +; CHECK-NEXT: mvn r0, #-2147483648 +; CHECK-NEXT: sbcs.w r3, r1, r0 ; CHECK-NEXT: mov.w r5, #-2147483648 -; CHECK-NEXT: sbcs.w r7, r5, r1 -; CHECK-NEXT: sbcs.w r2, r6, r2 -; CHECK-NEXT: sbcs.w r2, r6, r3 +; CHECK-NEXT: sbcs r3, r2, #0 +; CHECK-NEXT: sbcs r3, r7, #0 +; CHECK-NEXT: cset r3, lt +; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: csel r7, r7, r3, ne +; CHECK-NEXT: csel r2, r2, r3, ne +; CHECK-NEXT: mov.w r3, #-1 +; CHECK-NEXT: csel r1, r1, r0, ne +; CHECK-NEXT: csel r6, r4, r3, ne +; CHECK-NEXT: rsbs r4, r6, #0 +; CHECK-NEXT: sbcs.w r4, r5, r1 +; CHECK-NEXT: sbcs.w r2, r3, r2 +; CHECK-NEXT: sbcs.w r2, r3, r7 ; CHECK-NEXT: cset r2, lt ; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r9, r0, r2, ne -; CHECK-NEXT: csel r8, r1, r5, ne -; CHECK-NEXT: mov r0, r12 -; CHECK-NEXT: mov r1, lr -; CHECK-NEXT: bl __fixdfti -; CHECK-NEXT: subs.w r7, r0, #-1 -; CHECK-NEXT: sbcs.w r7, r1, r4 -; CHECK-NEXT: sbcs r7, r2, #0 -; CHECK-NEXT: sbcs r7, r3, #0 +; CHECK-NEXT: csel r12, r1, r5, ne +; CHECK-NEXT: csel r2, r6, r2, ne +; CHECK-NEXT: subs.w r7, r9, #-1 +; CHECK-NEXT: ldr r1, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: sbcs.w r7, r10, r0 +; CHECK-NEXT: sbcs r7, r1, #0 +; CHECK-NEXT: sbcs r7, r8, #0 ; CHECK-NEXT: cset r7, lt ; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: csel r0, r0, r6, ne -; CHECK-NEXT: csel r3, r3, r7, ne -; CHECK-NEXT: csel r2, r2, r7, ne -; CHECK-NEXT: csel r1, r1, r4, ne -; CHECK-NEXT: rsbs r7, r0, #0 -; CHECK-NEXT: sbcs.w r7, r5, r1 -; CHECK-NEXT: sbcs.w r2, r6, r2 -; CHECK-NEXT: sbcs.w r2, r6, r3 -; CHECK-NEXT: cset r2, lt -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r0, r0, r2, ne -; CHECK-NEXT: csel r1, r1, r5, ne -; CHECK-NEXT: vmov q0[2], q0[0], r0, r9 -; CHECK-NEXT: vmov q0[3], q0[1], r1, r8 -; CHECK-NEXT: vpop {d8, d9} -; CHECK-NEXT: add sp, #4 -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc} +; CHECK-NEXT: csel r4, r9, r3, ne +; CHECK-NEXT: csel r6, r8, r7, ne +; CHECK-NEXT: csel r7, r1, r7, ne +; CHECK-NEXT: csel r0, r10, r0, ne +; CHECK-NEXT: rsbs r1, r4, #0 +; CHECK-NEXT: sbcs.w r1, r5, r0 +; CHECK-NEXT: sbcs.w r1, r3, r7 +; CHECK-NEXT: sbcs.w r1, r3, r6 +; CHECK-NEXT: cset r1, lt +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: csel r1, r4, r1, ne +; CHECK-NEXT: csel r0, r0, r5, ne +; CHECK-NEXT: vmov q0[2], q0[0], r1, r2 +; CHECK-NEXT: vmov q0[3], q0[1], r0, r12 +; CHECK-NEXT: add sp, #92 +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} entry: %conv = fptosi <2 x double> %x to <2 x i128> %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> ) @@ -1794,34 +2922,122 @@ entry: define arm_aapcs_vfpcc <2 x i64> @utest_f64i64_mm(<2 x double> %x) { ; CHECK-LABEL: utest_f64i64_mm: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r7, lr} -; CHECK-NEXT: push {r4, r5, r7, lr} -; CHECK-NEXT: .vsave {d8, d9} -; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vmov r0, r1, d9 -; CHECK-NEXT: bl __fixunsdfti -; CHECK-NEXT: vmov r12, lr, d8 -; CHECK-NEXT: subs r2, #1 -; CHECK-NEXT: sbcs r2, r3, #0 -; CHECK-NEXT: cset r2, lo -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r4, r1, r2, ne -; CHECK-NEXT: csel r5, r0, r2, ne -; CHECK-NEXT: mov r0, r12 -; CHECK-NEXT: mov r1, lr -; CHECK-NEXT: bl __fixunsdfti -; CHECK-NEXT: subs r2, #1 -; CHECK-NEXT: sbcs r2, r3, #0 -; CHECK-NEXT: cset r2, lo -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r0, r0, r2, ne -; CHECK-NEXT: csel r1, r1, r2, ne -; CHECK-NEXT: vmov q0[2], q0[0], r0, r5 -; CHECK-NEXT: vmov q0[3], q0[1], r1, r4 -; CHECK-NEXT: vpop {d8, d9} -; CHECK-NEXT: pop {r4, r5, r7, pc} +; CHECK: @ %bb.0: @ %entryfp-to-i-entryfp-to-i-entry +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} +; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, lr} +; CHECK-NEXT: .pad #64 +; CHECK-NEXT: sub sp, #64 +; CHECK-NEXT: vmov r10, r1, d0 +; CHECK-NEXT: mov.w r12, #1 +; CHECK-NEXT: movw lr, #1023 +; CHECK-NEXT: ubfx r2, r1, #20, #11 +; CHECK-NEXT: cmp r2, lr +; CHECK-NEXT: bhs .LBB46_2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: mov.w r10, #0 +; CHECK-NEXT: movs r1, #0 +; CHECK-NEXT: b .LBB46_4 +; CHECK-NEXT: .LBB46_2: @ %fp-to-i-if-check.exp.size2 +; CHECK-NEXT: bfi r1, r12, #20, #12 +; CHECK-NEXT: movw r3, #1074 +; CHECK-NEXT: cmp r2, r3 +; CHECK-NEXT: bhi .LBB46_5 +; CHECK-NEXT: @ %bb.3: @ %fp-to-i-if-exp.small3 +; CHECK-NEXT: subw r2, r2, #1075 +; CHECK-NEXT: lsll r10, r1, r2 +; CHECK-NEXT: .LBB46_4: @ %fp-to-i-cleanup1 +; CHECK-NEXT: mov.w r8, #0 +; CHECK-NEXT: mov.w r9, #0 +; CHECK-NEXT: b .LBB46_6 +; CHECK-NEXT: .LBB46_5: @ %fp-to-i-if-exp.large4 +; CHECK-NEXT: subw r2, r2, #1075 +; CHECK-NEXT: movs r0, #12 +; CHECK-NEXT: strd r10, r1, [sp, #48] +; CHECK-NEXT: add r1, sp, #32 +; CHECK-NEXT: movs r3, #0 +; CHECK-NEXT: and.w r0, r0, r2, lsr #3 +; CHECK-NEXT: adds r1, #16 +; CHECK-NEXT: strd r3, r3, [sp, #56] +; CHECK-NEXT: strd r3, r3, [sp, #40] +; CHECK-NEXT: and r2, r2, #31 +; CHECK-NEXT: strd r3, r3, [sp, #32] +; CHECK-NEXT: subs r3, r1, r0 +; CHECK-NEXT: ldrd r10, r1, [r3] +; CHECK-NEXT: eor r7, r2, #63 +; CHECK-NEXT: ldrd r4, r3, [r3, #8] +; CHECK-NEXT: mov r5, r1 +; CHECK-NEXT: rsbs r7, r7, #0 +; CHECK-NEXT: mov r6, r10 +; CHECK-NEXT: lsll r4, r3, r2 +; CHECK-NEXT: lsrl r6, r5, #1 +; CHECK-NEXT: lsll r10, r1, r2 +; CHECK-NEXT: lsll r6, r5, r7 +; CHECK-NEXT: orr.w r9, r3, r5 +; CHECK-NEXT: orr.w r8, r4, r6 +; CHECK-NEXT: .LBB46_6: @ %fp-to-i-cleanup1 +; CHECK-NEXT: vmov r4, r5, d1 +; CHECK-NEXT: ubfx r2, r5, #20, #11 +; CHECK-NEXT: cmp r2, lr +; CHECK-NEXT: bhs .LBB46_8 +; CHECK-NEXT: @ %bb.7: +; CHECK-NEXT: movs r4, #0 +; CHECK-NEXT: movs r5, #0 +; CHECK-NEXT: b .LBB46_10 +; CHECK-NEXT: .LBB46_8: @ %fp-to-i-if-check.exp.size +; CHECK-NEXT: bfi r5, r12, #20, #12 +; CHECK-NEXT: movw r7, #1074 +; CHECK-NEXT: cmp r2, r7 +; CHECK-NEXT: bhi .LBB46_11 +; CHECK-NEXT: @ %bb.9: @ %fp-to-i-if-exp.small +; CHECK-NEXT: subw r2, r2, #1075 +; CHECK-NEXT: lsll r4, r5, r2 +; CHECK-NEXT: .LBB46_10: @ %fp-to-i-cleanup +; CHECK-NEXT: movs r2, #0 +; CHECK-NEXT: movs r6, #0 +; CHECK-NEXT: b .LBB46_12 +; CHECK-NEXT: .LBB46_11: @ %fp-to-i-if-exp.large +; CHECK-NEXT: movs r7, #0 +; CHECK-NEXT: add r0, sp, #16 +; CHECK-NEXT: subw r12, r2, #1075 +; CHECK-NEXT: mov r6, sp +; CHECK-NEXT: str r7, [sp, #28] +; CHECK-NEXT: adds r6, #16 +; CHECK-NEXT: strd r7, r7, [sp, #8] +; CHECK-NEXT: strd r7, r7, [sp] +; CHECK-NEXT: stm r0!, {r4, r5, r7} +; CHECK-NEXT: movs r7, #12 +; CHECK-NEXT: and.w r7, r7, r12, lsr #3 +; CHECK-NEXT: subs r7, r6, r7 +; CHECK-NEXT: and r12, r12, #31 +; CHECK-NEXT: ldrd r4, r5, [r7] +; CHECK-NEXT: eor r6, r12, #63 +; CHECK-NEXT: ldrd r2, r7, [r7, #8] +; CHECK-NEXT: mov r3, r5 +; CHECK-NEXT: rsbs r6, r6, #0 +; CHECK-NEXT: mov r0, r4 +; CHECK-NEXT: lsll r2, r7, r12 +; CHECK-NEXT: lsrl r0, r3, #1 +; CHECK-NEXT: lsll r4, r5, r12 +; CHECK-NEXT: lsll r0, r3, r6 +; CHECK-NEXT: orr.w r6, r7, r3 +; CHECK-NEXT: orrs r2, r0 +; CHECK-NEXT: .LBB46_12: @ %fp-to-i-cleanup +; CHECK-NEXT: subs r0, r2, #1 +; CHECK-NEXT: sbcs r0, r6, #0 +; CHECK-NEXT: cset r0, lo +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: csel r2, r5, r0, ne +; CHECK-NEXT: csel r0, r4, r0, ne +; CHECK-NEXT: subs.w r3, r8, #1 +; CHECK-NEXT: sbcs r3, r9, #0 +; CHECK-NEXT: cset r3, lo +; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: csel r7, r10, r3, ne +; CHECK-NEXT: csel r1, r1, r3, ne +; CHECK-NEXT: vmov q0[2], q0[0], r7, r0 +; CHECK-NEXT: vmov q0[3], q0[1], r1, r2 +; CHECK-NEXT: add sp, #64 +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, pc} entry: %conv = fptoui <2 x double> %x to <2 x i128> %spec.store.select = call <2 x i128> @llvm.umin.v2i128(<2 x i128> %conv, <2 x i128> ) @@ -1831,44 +3047,221 @@ entry: define arm_aapcs_vfpcc <2 x i64> @ustest_f64i64_mm(<2 x double> %x) { ; CHECK-LABEL: ustest_f64i64_mm: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r7, lr} -; CHECK-NEXT: push {r4, r5, r7, lr} -; CHECK-NEXT: .vsave {d8, d9} -; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vmov r0, r1, d9 -; CHECK-NEXT: bl __fixdfti -; CHECK-NEXT: vmov r12, lr, d8 -; CHECK-NEXT: subs r2, #1 -; CHECK-NEXT: sbcs r2, r3, #0 -; CHECK-NEXT: cset r2, lt -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r5, r0, r2, ne -; CHECK-NEXT: csel r0, r3, r2, ne -; CHECK-NEXT: csel r4, r1, r2, ne +; CHECK: @ %bb.0: @ %entryfp-to-i-entryfp-to-i-entry +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-NEXT: .pad #100 +; CHECK-NEXT: sub sp, #100 +; CHECK-NEXT: vmov r6, r0, d0 +; CHECK-NEXT: movs r2, #1 +; CHECK-NEXT: mov.w r9, #-1 +; CHECK-NEXT: movw r10, #1023 +; CHECK-NEXT: ubfx r7, r0, #20, #11 +; CHECK-NEXT: cmp r7, r10 +; CHECK-NEXT: bhs .LBB47_2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: mov.w lr, #0 +; CHECK-NEXT: str r0, [sp, #28] @ 4-byte Spill +; CHECK-NEXT: movs r4, #0 +; CHECK-NEXT: b .LBB47_5 +; CHECK-NEXT: .LBB47_2: @ %fp-to-i-if-check.exp.size2 +; CHECK-NEXT: mov r3, r0 +; CHECK-NEXT: orr.w r1, r2, r0, asr #31 +; CHECK-NEXT: bfi r3, r2, #20, #12 +; CHECK-NEXT: asrs r0, r0, #31 +; CHECK-NEXT: movw r5, #1074 +; CHECK-NEXT: cmp r7, r5 +; CHECK-NEXT: bhi .LBB47_4 +; CHECK-NEXT: @ %bb.3: @ %fp-to-i-if-exp.small3 +; CHECK-NEXT: subw r7, r7, #1075 +; CHECK-NEXT: movs r4, #0 +; CHECK-NEXT: lsll r6, r3, r7 +; CHECK-NEXT: mov.w r12, #0 +; CHECK-NEXT: umull lr, r7, r6, r1 +; CHECK-NEXT: umlal r7, r4, r3, r1 +; CHECK-NEXT: umull r1, r5, r6, r0 +; CHECK-NEXT: adds r7, r7, r1 +; CHECK-NEXT: str r7, [sp, #28] @ 4-byte Spill +; CHECK-NEXT: adcs r4, r5 +; CHECK-NEXT: mla r5, r0, r3, r5 +; CHECK-NEXT: adc r7, r12, #0 +; CHECK-NEXT: umlal r4, r7, r3, r0 +; CHECK-NEXT: mla r0, r0, r6, r5 +; CHECK-NEXT: adds r4, r4, r1 +; CHECK-NEXT: adcs r0, r7 +; CHECK-NEXT: b .LBB47_5 +; CHECK-NEXT: .LBB47_4: @ %fp-to-i-if-exp.large4 +; CHECK-NEXT: strd r6, r3, [sp, #80] +; CHECK-NEXT: subw r3, r7, #1075 +; CHECK-NEXT: movs r4, #12 +; CHECK-NEXT: add r5, sp, #64 +; CHECK-NEXT: and.w r4, r4, r3, lsr #3 +; CHECK-NEXT: adds r5, #16 +; CHECK-NEXT: mov.w r12, #0 +; CHECK-NEXT: subs r4, r5, r4 +; CHECK-NEXT: strd r12, r12, [sp, #88] +; CHECK-NEXT: and r8, r3, #31 +; CHECK-NEXT: strd r12, r12, [sp, #72] +; CHECK-NEXT: mov.w lr, #0 +; CHECK-NEXT: strd r12, r12, [sp, #64] +; CHECK-NEXT: ldm.w r4, {r6, r7, r10, r11} +; CHECK-NEXT: mov r3, r7 +; CHECK-NEXT: lsll r10, r11, r8 +; CHECK-NEXT: mov r4, r6 +; CHECK-NEXT: lsrl r6, r7, #1 +; CHECK-NEXT: lsll r4, r3, r8 +; CHECK-NEXT: umull r5, r2, r4, r1 +; CHECK-NEXT: umlal r2, lr, r3, r1 +; CHECK-NEXT: str r5, [sp, #20] @ 4-byte Spill +; CHECK-NEXT: umull r5, r9, r4, r0 +; CHECK-NEXT: str r5, [sp, #24] @ 4-byte Spill +; CHECK-NEXT: adds r2, r2, r5 +; CHECK-NEXT: str r2, [sp, #28] @ 4-byte Spill +; CHECK-NEXT: eor r2, r8, #63 +; CHECK-NEXT: adcs.w lr, lr, r9 +; CHECK-NEXT: rsb.w r2, r2, #0 +; CHECK-NEXT: adc r12, r12, #0 +; CHECK-NEXT: lsll r6, r7, r2 +; CHECK-NEXT: umlal lr, r12, r3, r0 +; CHECK-NEXT: orr.w r5, r10, r6 +; CHECK-NEXT: orr.w r2, r11, r7 +; CHECK-NEXT: umull r6, r7, r1, r5 +; CHECK-NEXT: movw r10, #1023 +; CHECK-NEXT: mla r1, r1, r2, r7 +; CHECK-NEXT: mla r2, r0, r3, r9 +; CHECK-NEXT: mov.w r9, #-1 +; CHECK-NEXT: mla r1, r0, r5, r1 +; CHECK-NEXT: mla r0, r0, r4, r2 +; CHECK-NEXT: ldr r2, [sp, #24] @ 4-byte Reload +; CHECK-NEXT: adds r2, r2, r6 +; CHECK-NEXT: adcs r0, r1 +; CHECK-NEXT: adds.w r4, lr, r2 +; CHECK-NEXT: ldr.w lr, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: adc.w r0, r0, r12 +; CHECK-NEXT: movs r2, #1 +; CHECK-NEXT: .LBB47_5: @ %fp-to-i-cleanup1 +; CHECK-NEXT: vmov r6, r7, d1 +; CHECK-NEXT: str r0, [sp, #24] @ 4-byte Spill +; CHECK-NEXT: mov.w r5, #-1 +; CHECK-NEXT: cmp.w r7, #-1 +; CHECK-NEXT: itt gt +; CHECK-NEXT: movgt r5, #0 +; CHECK-NEXT: movgt.w r9, #1 +; CHECK-NEXT: ubfx r0, r7, #20, #11 +; CHECK-NEXT: cmp r0, r10 +; CHECK-NEXT: bhs .LBB47_7 +; CHECK-NEXT: @ %bb.6: +; CHECK-NEXT: movs r1, #0 +; CHECK-NEXT: mov.w r8, #0 +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: movs r2, #0 +; CHECK-NEXT: b .LBB47_11 +; CHECK-NEXT: .LBB47_7: @ %fp-to-i-if-check.exp.size +; CHECK-NEXT: bfi r7, r2, #20, #12 +; CHECK-NEXT: movw r1, #1074 +; CHECK-NEXT: cmp r0, r1 +; CHECK-NEXT: str r4, [sp, #16] @ 4-byte Spill +; CHECK-NEXT: bhi .LBB47_9 +; CHECK-NEXT: @ %bb.8: @ %fp-to-i-if-exp.small +; CHECK-NEXT: subw r0, r0, #1075 +; CHECK-NEXT: movs r3, #0 +; CHECK-NEXT: lsll r6, r7, r0 +; CHECK-NEXT: mov.w r10, #0 +; CHECK-NEXT: umull r1, r0, r6, r9 +; CHECK-NEXT: umlal r0, r3, r7, r9 +; CHECK-NEXT: umull r2, r4, r6, r5 +; CHECK-NEXT: adds.w r8, r2, r0 +; CHECK-NEXT: adcs.w r0, r3, r4 +; CHECK-NEXT: mla r4, r5, r7, r4 +; CHECK-NEXT: adc r3, r10, #0 +; CHECK-NEXT: umlal r0, r3, r7, r5 +; CHECK-NEXT: mla r7, r5, r6, r4 +; CHECK-NEXT: adds r0, r0, r2 +; CHECK-NEXT: adc.w r2, r3, r7 +; CHECK-NEXT: b .LBB47_10 +; CHECK-NEXT: .LBB47_9: @ %fp-to-i-if-exp.large +; CHECK-NEXT: add r1, sp, #48 +; CHECK-NEXT: subw r0, r0, #1075 +; CHECK-NEXT: add r2, sp, #32 +; CHECK-NEXT: str.w lr, [sp, #20] @ 4-byte Spill +; CHECK-NEXT: mov.w lr, #0 +; CHECK-NEXT: stm.w r1, {r6, r7, lr} +; CHECK-NEXT: movs r1, #12 +; CHECK-NEXT: and.w r1, r1, r0, lsr #3 +; CHECK-NEXT: adds r2, #16 +; CHECK-NEXT: subs r1, r2, r1 +; CHECK-NEXT: str.w lr, [sp, #60] +; CHECK-NEXT: strd lr, lr, [sp, #40] +; CHECK-NEXT: and r2, r0, #31 +; CHECK-NEXT: strd lr, lr, [sp, #32] +; CHECK-NEXT: ldm.w r1, {r6, r7, r12} +; CHECK-NEXT: mov r3, r7 +; CHECK-NEXT: ldr r1, [r1, #12] +; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: str r1, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: lsll r0, r3, r2 +; CHECK-NEXT: lsrl r6, r7, #1 +; CHECK-NEXT: umull r4, r1, r0, r9 +; CHECK-NEXT: umull r11, r10, r0, r5 +; CHECK-NEXT: str r4, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: movs r4, #0 +; CHECK-NEXT: umlal r1, r4, r3, r9 +; CHECK-NEXT: str.w r11, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: adds.w r8, r11, r1 +; CHECK-NEXT: eor r11, r2, #63 +; CHECK-NEXT: rsb.w r11, r11, #0 +; CHECK-NEXT: adcs.w r4, r4, r10 +; CHECK-NEXT: lsll r6, r7, r11 +; CHECK-NEXT: ldr.w r11, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: adc lr, lr, #0 +; CHECK-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: lsll r12, r11, r2 +; CHECK-NEXT: umlal r4, lr, r3, r5 +; CHECK-NEXT: orr.w r2, r11, r7 +; CHECK-NEXT: orr.w r7, r12, r6 +; CHECK-NEXT: mla r3, r5, r3, r10 +; CHECK-NEXT: umull r11, r6, r9, r7 +; CHECK-NEXT: mla r2, r9, r2, r6 +; CHECK-NEXT: mla r0, r5, r0, r3 +; CHECK-NEXT: ldr r3, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: mla r2, r5, r7, r2 +; CHECK-NEXT: adds.w r3, r3, r11 +; CHECK-NEXT: adcs r2, r0 +; CHECK-NEXT: adds r0, r4, r3 +; CHECK-NEXT: adc.w r2, r2, lr +; CHECK-NEXT: ldr.w lr, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: .LBB47_10: @ %fp-to-i-cleanup +; CHECK-NEXT: ldr r4, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: .LBB47_11: @ %fp-to-i-cleanup +; CHECK-NEXT: subs r0, #1 +; CHECK-NEXT: sbcs r0, r2, #0 +; CHECK-NEXT: cset r0, lt +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: csel r3, r1, r0, ne +; CHECK-NEXT: csel r1, r8, r0, ne +; CHECK-NEXT: csel r0, r2, r0, ne ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: itt mi -; CHECK-NEXT: movmi r4, #0 -; CHECK-NEXT: movmi r5, #0 -; CHECK-NEXT: mov r0, r12 -; CHECK-NEXT: mov r1, lr -; CHECK-NEXT: bl __fixdfti -; CHECK-NEXT: subs r2, #1 -; CHECK-NEXT: sbcs r2, r3, #0 -; CHECK-NEXT: cset r2, lt -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r1, r1, r2, ne -; CHECK-NEXT: csel r0, r0, r2, ne -; CHECK-NEXT: csel r2, r3, r2, ne -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: itt mi -; CHECK-NEXT: movmi r0, #0 ; CHECK-NEXT: movmi r1, #0 -; CHECK-NEXT: vmov q0[2], q0[0], r0, r5 -; CHECK-NEXT: vmov q0[3], q0[1], r1, r4 -; CHECK-NEXT: vpop {d8, d9} -; CHECK-NEXT: pop {r4, r5, r7, pc} +; CHECK-NEXT: movmi r3, #0 +; CHECK-NEXT: ldr r6, [sp, #24] @ 4-byte Reload +; CHECK-NEXT: subs r0, r4, #1 +; CHECK-NEXT: ldr r2, [sp, #28] @ 4-byte Reload +; CHECK-NEXT: sbcs r0, r6, #0 +; CHECK-NEXT: cset r0, lt +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: csel r2, r2, r0, ne +; CHECK-NEXT: csel r7, lr, r0, ne +; CHECK-NEXT: csel r0, r6, r0, ne +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: itt mi +; CHECK-NEXT: movmi r7, #0 +; CHECK-NEXT: movmi r2, #0 +; CHECK-NEXT: vmov q0[2], q0[0], r7, r3 +; CHECK-NEXT: vmov q0[3], q0[1], r2, r1 +; CHECK-NEXT: add sp, #100 +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} entry: %conv = fptosi <2 x double> %x to <2 x i128> %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> ) @@ -1879,55 +3272,226 @@ entry: define arm_aapcs_vfpcc <2 x i64> @stest_f32i64_mm(<2 x float> %x) { ; CHECK-LABEL: stest_f32i64_mm: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} -; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, lr} -; CHECK-NEXT: vmov r8, r0, d0 -; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: subs.w r7, r0, #-1 -; CHECK-NEXT: mvn r5, #-2147483648 -; CHECK-NEXT: sbcs.w r7, r1, r5 -; CHECK-NEXT: mov.w r6, #-2147483648 -; CHECK-NEXT: sbcs r7, r2, #0 -; CHECK-NEXT: sbcs r7, r3, #0 -; CHECK-NEXT: cset r7, lt -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: csel r3, r3, r7, ne -; CHECK-NEXT: csel r2, r2, r7, ne -; CHECK-NEXT: mov.w r7, #-1 -; CHECK-NEXT: csel r1, r1, r5, ne -; CHECK-NEXT: csel r0, r0, r7, ne -; CHECK-NEXT: rsbs r4, r0, #0 -; CHECK-NEXT: sbcs.w r4, r6, r1 -; CHECK-NEXT: sbcs.w r2, r7, r2 -; CHECK-NEXT: sbcs.w r2, r7, r3 -; CHECK-NEXT: cset r2, lt -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r10, r0, r2, ne -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: csel r9, r1, r6, ne -; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: subs.w r4, r0, #-1 -; CHECK-NEXT: sbcs.w r4, r1, r5 -; CHECK-NEXT: sbcs r4, r2, #0 -; CHECK-NEXT: sbcs r4, r3, #0 -; CHECK-NEXT: cset r4, lt -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: csel r0, r0, r7, ne -; CHECK-NEXT: csel r3, r3, r4, ne -; CHECK-NEXT: csel r2, r2, r4, ne -; CHECK-NEXT: csel r1, r1, r5, ne -; CHECK-NEXT: rsbs r5, r0, #0 -; CHECK-NEXT: sbcs.w r5, r6, r1 -; CHECK-NEXT: sbcs.w r2, r7, r2 -; CHECK-NEXT: sbcs.w r2, r7, r3 +; CHECK: @ %bb.0: @ %entryfp-to-i-entryfp-to-i-entry +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-NEXT: .pad #92 +; CHECK-NEXT: sub sp, #92 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: mov.w r10, #-1 +; CHECK-NEXT: ubfx r1, r0, #23, #8 +; CHECK-NEXT: cmp r1, #127 +; CHECK-NEXT: bhs .LBB48_2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: mov.w r8, #0 +; CHECK-NEXT: str r0, [sp, #20] @ 4-byte Spill +; CHECK-NEXT: mov.w r9, #0 +; CHECK-NEXT: mov.w r11, #0 +; CHECK-NEXT: b .LBB48_5 +; CHECK-NEXT: .LBB48_2: @ %fp-to-i-if-check.exp.size2 +; CHECK-NEXT: movs r3, #1 +; CHECK-NEXT: mov r2, r0 +; CHECK-NEXT: orr.w r11, r3, r0, asr #31 +; CHECK-NEXT: bfi r2, r3, #23, #9 +; CHECK-NEXT: asrs r0, r0, #31 +; CHECK-NEXT: cmp r1, #149 +; CHECK-NEXT: bhi .LBB48_4 +; CHECK-NEXT: @ %bb.3: @ %fp-to-i-if-exp.small3 +; CHECK-NEXT: rsb.w r1, r1, #150 +; CHECK-NEXT: movs r7, #0 +; CHECK-NEXT: lsr.w r1, r2, r1 +; CHECK-NEXT: umull r2, r8, r1, r11 +; CHECK-NEXT: str r2, [sp, #20] @ 4-byte Spill +; CHECK-NEXT: umull r2, r3, r1, r0 +; CHECK-NEXT: adds.w r6, r2, r8 +; CHECK-NEXT: adcs r6, r3, #0 +; CHECK-NEXT: adc r6, r7, #0 +; CHECK-NEXT: adds.w r5, r2, r8 +; CHECK-NEXT: mla r5, r0, r1, r3 +; CHECK-NEXT: adcs.w r9, r2, r3 +; CHECK-NEXT: umlal r8, r7, r1, r0 +; CHECK-NEXT: adc.w r11, r6, r5 +; CHECK-NEXT: b .LBB48_5 +; CHECK-NEXT: .LBB48_4: @ %fp-to-i-if-exp.large4 +; CHECK-NEXT: mov.w lr, #0 +; CHECK-NEXT: subs r1, #150 +; CHECK-NEXT: add r7, sp, #56 +; CHECK-NEXT: strd r2, lr, [sp, #72] +; CHECK-NEXT: movs r2, #12 +; CHECK-NEXT: adds r7, #16 +; CHECK-NEXT: and.w r2, r2, r1, lsr #3 +; CHECK-NEXT: strd lr, lr, [sp, #80] +; CHECK-NEXT: subs r2, r7, r2 +; CHECK-NEXT: strd lr, lr, [sp, #64] +; CHECK-NEXT: strd lr, lr, [sp, #56] +; CHECK-NEXT: and r12, r1, #31 +; CHECK-NEXT: ldm.w r2, {r6, r7, r8} +; CHECK-NEXT: mov r1, r7 +; CHECK-NEXT: movs r5, #0 +; CHECK-NEXT: ldr r2, [r2, #12] +; CHECK-NEXT: mov r4, r6 +; CHECK-NEXT: str r2, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: lsll r4, r1, r12 +; CHECK-NEXT: lsrl r6, r7, #1 +; CHECK-NEXT: umull r3, r2, r4, r11 +; CHECK-NEXT: umlal r2, r5, r1, r11 +; CHECK-NEXT: str r3, [sp, #20] @ 4-byte Spill +; CHECK-NEXT: umull r3, r10, r4, r0 +; CHECK-NEXT: adds r2, r2, r3 +; CHECK-NEXT: str r2, [sp, #16] @ 4-byte Spill +; CHECK-NEXT: adcs.w r9, r5, r10 +; CHECK-NEXT: eor r2, r12, #63 +; CHECK-NEXT: ldr r5, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: rsb.w r2, r2, #0 +; CHECK-NEXT: lsll r6, r7, r2 +; CHECK-NEXT: adc lr, lr, #0 +; CHECK-NEXT: lsll r8, r5, r12 +; CHECK-NEXT: umlal r9, lr, r1, r0 +; CHECK-NEXT: orr.w r2, r5, r7 +; CHECK-NEXT: orr.w r7, r8, r6 +; CHECK-NEXT: mla r1, r0, r1, r10 +; CHECK-NEXT: ldr.w r8, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: mov.w r10, #-1 +; CHECK-NEXT: umull r12, r6, r11, r7 +; CHECK-NEXT: mla r2, r11, r2, r6 +; CHECK-NEXT: mla r2, r0, r7, r2 +; CHECK-NEXT: mla r0, r0, r4, r1 +; CHECK-NEXT: adds.w r1, r3, r12 +; CHECK-NEXT: adcs r0, r2 +; CHECK-NEXT: adds.w r9, r9, r1 +; CHECK-NEXT: adc.w r11, lr, r0 +; CHECK-NEXT: .LBB48_5: @ %fp-to-i-cleanup1 +; CHECK-NEXT: vmov r1, s1 +; CHECK-NEXT: mov.w r4, #-1 +; CHECK-NEXT: cmp.w r1, #-1 +; CHECK-NEXT: itt gt +; CHECK-NEXT: movgt r4, #0 +; CHECK-NEXT: movgt.w r10, #1 +; CHECK-NEXT: ubfx r0, r1, #23, #8 +; CHECK-NEXT: cmp r0, #127 +; CHECK-NEXT: bhs .LBB48_7 +; CHECK-NEXT: @ %bb.6: +; CHECK-NEXT: movs r3, #0 +; CHECK-NEXT: movs r5, #0 +; CHECK-NEXT: movs r2, #0 +; CHECK-NEXT: movs r4, #0 +; CHECK-NEXT: b .LBB48_10 +; CHECK-NEXT: .LBB48_7: @ %fp-to-i-if-check.exp.size +; CHECK-NEXT: movw r2, #65535 +; CHECK-NEXT: cmp r0, #149 +; CHECK-NEXT: movt r2, #127 +; CHECK-NEXT: and.w r1, r1, r2 +; CHECK-NEXT: add.w r1, r1, #8388608 +; CHECK-NEXT: bhi .LBB48_9 +; CHECK-NEXT: @ %bb.8: @ %fp-to-i-if-exp.small +; CHECK-NEXT: rsb.w r0, r0, #150 +; CHECK-NEXT: lsr.w r0, r1, r0 +; CHECK-NEXT: umull r3, r1, r0, r10 +; CHECK-NEXT: umull r2, r7, r0, r4 +; CHECK-NEXT: muls r0, r4, r0 +; CHECK-NEXT: adds r5, r2, r1 +; CHECK-NEXT: adcs r2, r7 +; CHECK-NEXT: adc.w r4, r7, r0 +; CHECK-NEXT: b .LBB48_10 +; CHECK-NEXT: .LBB48_9: @ %fp-to-i-if-exp.large +; CHECK-NEXT: movs r2, #0 +; CHECK-NEXT: subs r0, #150 +; CHECK-NEXT: add r3, sp, #24 +; CHECK-NEXT: strd r1, r2, [sp, #40] +; CHECK-NEXT: movs r1, #12 +; CHECK-NEXT: adds r3, #16 +; CHECK-NEXT: and.w r1, r1, r0, lsr #3 +; CHECK-NEXT: strd r2, r2, [sp, #48] +; CHECK-NEXT: strd r2, r2, [sp, #32] +; CHECK-NEXT: subs r3, r3, r1 +; CHECK-NEXT: strd r2, r2, [sp, #24] +; CHECK-NEXT: mov.w lr, #0 +; CHECK-NEXT: ldrd r6, r1, [r3] +; CHECK-NEXT: str.w r8, [sp, #16] @ 4-byte Spill +; CHECK-NEXT: mov r7, r1 +; CHECK-NEXT: ldr.w r8, [r3, #8] +; CHECK-NEXT: str.w r11, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: ldr.w r11, [r3, #12] +; CHECK-NEXT: str.w r9, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: and r9, r0, #31 +; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: lsrl r6, r1, #1 +; CHECK-NEXT: lsll r0, r7, r9 +; CHECK-NEXT: lsll r8, r11, r9 +; CHECK-NEXT: umull r3, r5, r0, r10 +; CHECK-NEXT: umull r2, r12, r0, r4 +; CHECK-NEXT: umlal r5, lr, r7, r10 +; CHECK-NEXT: strd r12, r2, [sp] @ 8-byte Folded Spill +; CHECK-NEXT: adds r5, r5, r2 +; CHECK-NEXT: mov.w r2, #0 +; CHECK-NEXT: adcs.w lr, lr, r12 +; CHECK-NEXT: adc r12, r2, #0 +; CHECK-NEXT: eor r2, r9, #63 +; CHECK-NEXT: rsbs r2, r2, #0 +; CHECK-NEXT: umlal lr, r12, r7, r4 +; CHECK-NEXT: lsll r6, r1, r2 +; CHECK-NEXT: ldr.w r9, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: orr.w r2, r8, r6 +; CHECK-NEXT: orr.w r1, r1, r11 +; CHECK-NEXT: ldr.w r11, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: umull r8, r6, r10, r2 +; CHECK-NEXT: mla r1, r10, r1, r6 +; CHECK-NEXT: mla r1, r4, r2, r1 +; CHECK-NEXT: ldr r2, [sp] @ 4-byte Reload +; CHECK-NEXT: mla r2, r4, r7, r2 +; CHECK-NEXT: mla r0, r4, r0, r2 +; CHECK-NEXT: ldr r2, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: adds.w r2, r2, r8 +; CHECK-NEXT: ldr.w r8, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: adcs r0, r1 +; CHECK-NEXT: adds.w r2, r2, lr +; CHECK-NEXT: adc.w r4, r12, r0 +; CHECK-NEXT: .LBB48_10: @ %fp-to-i-cleanup +; CHECK-NEXT: subs.w r1, r3, #-1 +; CHECK-NEXT: mvn r0, #-2147483648 +; CHECK-NEXT: sbcs.w r1, r5, r0 +; CHECK-NEXT: mov.w r6, #-1 +; CHECK-NEXT: sbcs r1, r2, #0 +; CHECK-NEXT: sbcs r1, r4, #0 +; CHECK-NEXT: cset r1, lt +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: csel r3, r3, r6, ne +; CHECK-NEXT: csel r7, r4, r1, ne +; CHECK-NEXT: csel r1, r2, r1, ne +; CHECK-NEXT: csel r2, r5, r0, ne +; CHECK-NEXT: rsbs r4, r3, #0 +; CHECK-NEXT: mov.w r5, #-2147483648 +; CHECK-NEXT: sbcs.w r4, r5, r2 +; CHECK-NEXT: sbcs.w r1, r6, r1 +; CHECK-NEXT: sbcs.w r1, r6, r7 +; CHECK-NEXT: cset r1, lt +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: csel r12, r2, r5, ne +; CHECK-NEXT: ldr r2, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: csel r1, r3, r1, ne +; CHECK-NEXT: subs.w r3, r2, #-1 +; CHECK-NEXT: sbcs.w r3, r8, r0 +; CHECK-NEXT: sbcs r3, r9, #0 +; CHECK-NEXT: sbcs r3, r11, #0 +; CHECK-NEXT: cset r3, lt +; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: csel r4, r2, r6, ne +; CHECK-NEXT: csel r7, r11, r3, ne +; CHECK-NEXT: csel r3, r9, r3, ne +; CHECK-NEXT: csel r0, r8, r0, ne +; CHECK-NEXT: rsbs r2, r4, #0 +; CHECK-NEXT: sbcs.w r2, r5, r0 +; CHECK-NEXT: sbcs.w r2, r6, r3 +; CHECK-NEXT: sbcs.w r2, r6, r7 ; CHECK-NEXT: cset r2, lt ; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r0, r0, r2, ne -; CHECK-NEXT: csel r1, r1, r6, ne -; CHECK-NEXT: vmov q0[2], q0[0], r0, r10 -; CHECK-NEXT: vmov q0[3], q0[1], r1, r9 -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, pc} +; CHECK-NEXT: csel r2, r4, r2, ne +; CHECK-NEXT: csel r0, r0, r5, ne +; CHECK-NEXT: vmov q0[2], q0[0], r2, r1 +; CHECK-NEXT: vmov q0[3], q0[1], r0, r12 +; CHECK-NEXT: add sp, #92 +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} entry: %conv = fptosi <2 x float> %x to <2 x i128> %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> ) @@ -1938,28 +3502,121 @@ entry: define arm_aapcs_vfpcc <2 x i64> @utest_f32i64_mm(<2 x float> %x) { ; CHECK-LABEL: utest_f32i64_mm: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, lr} -; CHECK-NEXT: push {r4, r5, r6, lr} -; CHECK-NEXT: vmov r4, r0, d0 -; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: subs r2, #1 -; CHECK-NEXT: sbcs r2, r3, #0 -; CHECK-NEXT: cset r2, lo -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r6, r0, r2, ne -; CHECK-NEXT: mov r0, r4 -; CHECK-NEXT: csel r5, r1, r2, ne -; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: subs r2, #1 -; CHECK-NEXT: sbcs r2, r3, #0 -; CHECK-NEXT: cset r2, lo -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r0, r0, r2, ne -; CHECK-NEXT: csel r1, r1, r2, ne -; CHECK-NEXT: vmov q0[2], q0[0], r0, r6 -; CHECK-NEXT: vmov q0[3], q0[1], r1, r5 -; CHECK-NEXT: pop {r4, r5, r6, pc} +; CHECK: @ %bb.0: @ %entryfp-to-i-entryfp-to-i-entry +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, lr} +; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr} +; CHECK-NEXT: .pad #68 +; CHECK-NEXT: sub sp, #68 +; CHECK-NEXT: vmov r1, s0 +; CHECK-NEXT: ubfx r0, r1, #23, #8 +; CHECK-NEXT: cmp r0, #127 +; CHECK-NEXT: bhs .LBB49_2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: mov.w r8, #0 +; CHECK-NEXT: b .LBB49_4 +; CHECK-NEXT: .LBB49_2: @ %fp-to-i-if-check.exp.size2 +; CHECK-NEXT: movs r2, #1 +; CHECK-NEXT: cmp r0, #149 +; CHECK-NEXT: bfi r1, r2, #23, #9 +; CHECK-NEXT: bhi .LBB49_5 +; CHECK-NEXT: @ %bb.3: @ %fp-to-i-if-exp.small3 +; CHECK-NEXT: rsb.w r0, r0, #150 +; CHECK-NEXT: lsr.w r8, r1, r0 +; CHECK-NEXT: .LBB49_4: @ %fp-to-i-cleanup1 +; CHECK-NEXT: mov.w r9, #0 +; CHECK-NEXT: mov.w r12, #0 +; CHECK-NEXT: mov.w lr, #0 +; CHECK-NEXT: b .LBB49_6 +; CHECK-NEXT: .LBB49_5: @ %fp-to-i-if-exp.large4 +; CHECK-NEXT: movs r2, #0 +; CHECK-NEXT: strd r1, r2, [sp, #48] +; CHECK-NEXT: add r1, sp, #32 +; CHECK-NEXT: strd r2, r2, [sp, #56] +; CHECK-NEXT: adds r1, #16 +; CHECK-NEXT: strd r2, r2, [sp, #40] +; CHECK-NEXT: strd r2, r2, [sp, #32] +; CHECK-NEXT: sub.w r2, r0, #150 +; CHECK-NEXT: movs r0, #12 +; CHECK-NEXT: and.w r0, r0, r2, lsr #3 +; CHECK-NEXT: and r2, r2, #31 +; CHECK-NEXT: subs r3, r1, r0 +; CHECK-NEXT: eor r6, r2, #63 +; CHECK-NEXT: ldrd r8, r9, [r3] +; CHECK-NEXT: rsbs r6, r6, #0 +; CHECK-NEXT: ldrd r4, r3, [r3, #8] +; CHECK-NEXT: mov r5, r9 +; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: lsll r4, r3, r2 +; CHECK-NEXT: lsrl r0, r5, #1 +; CHECK-NEXT: lsll r8, r9, r2 +; CHECK-NEXT: lsll r0, r5, r6 +; CHECK-NEXT: orr.w lr, r3, r5 +; CHECK-NEXT: orr.w r12, r4, r0 +; CHECK-NEXT: .LBB49_6: @ %fp-to-i-cleanup1 +; CHECK-NEXT: vmov r3, s1 +; CHECK-NEXT: ubfx r2, r3, #23, #8 +; CHECK-NEXT: cmp r2, #127 +; CHECK-NEXT: bhs .LBB49_8 +; CHECK-NEXT: @ %bb.7: +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: b .LBB49_10 +; CHECK-NEXT: .LBB49_8: @ %fp-to-i-if-check.exp.size +; CHECK-NEXT: movw r0, #65535 +; CHECK-NEXT: cmp r2, #149 +; CHECK-NEXT: movt r0, #127 +; CHECK-NEXT: and.w r0, r0, r3 +; CHECK-NEXT: add.w r3, r0, #8388608 +; CHECK-NEXT: bhi .LBB49_11 +; CHECK-NEXT: @ %bb.9: @ %fp-to-i-if-exp.small +; CHECK-NEXT: rsb.w r0, r2, #150 +; CHECK-NEXT: lsr.w r0, r3, r0 +; CHECK-NEXT: .LBB49_10: @ %fp-to-i-cleanup +; CHECK-NEXT: movs r5, #0 +; CHECK-NEXT: movs r3, #0 +; CHECK-NEXT: movs r4, #0 +; CHECK-NEXT: b .LBB49_12 +; CHECK-NEXT: .LBB49_11: @ %fp-to-i-if-exp.large +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: sub.w r4, r2, #150 +; CHECK-NEXT: movs r2, #12 +; CHECK-NEXT: strd r3, r0, [sp, #16] +; CHECK-NEXT: mov r3, sp +; CHECK-NEXT: and.w r2, r2, r4, lsr #3 +; CHECK-NEXT: adds r3, #16 +; CHECK-NEXT: strd r0, r0, [sp, #24] +; CHECK-NEXT: subs r3, r3, r2 +; CHECK-NEXT: strd r0, r0, [sp, #8] +; CHECK-NEXT: strd r0, r0, [sp] +; CHECK-NEXT: and r1, r4, #31 +; CHECK-NEXT: ldm.w r3, {r0, r5, r6} +; CHECK-NEXT: eor r4, r1, #63 +; CHECK-NEXT: mov r7, r5 +; CHECK-NEXT: ldr r3, [r3, #12] +; CHECK-NEXT: rsbs r4, r4, #0 +; CHECK-NEXT: mov r2, r0 +; CHECK-NEXT: lsll r0, r5, r1 +; CHECK-NEXT: lsrl r2, r7, #1 +; CHECK-NEXT: lsll r6, r3, r1 +; CHECK-NEXT: lsll r2, r7, r4 +; CHECK-NEXT: orr.w r4, r3, r7 +; CHECK-NEXT: orr.w r3, r6, r2 +; CHECK-NEXT: .LBB49_12: @ %fp-to-i-cleanup +; CHECK-NEXT: subs r1, r3, #1 +; CHECK-NEXT: sbcs r1, r4, #0 +; CHECK-NEXT: cset r1, lo +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: csel r2, r5, r1, ne +; CHECK-NEXT: csel r0, r0, r1, ne +; CHECK-NEXT: subs.w r1, r12, #1 +; CHECK-NEXT: sbcs r1, lr, #0 +; CHECK-NEXT: cset r1, lo +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: csel r3, r8, r1, ne +; CHECK-NEXT: csel r1, r9, r1, ne +; CHECK-NEXT: vmov q0[2], q0[0], r3, r0 +; CHECK-NEXT: vmov q0[3], q0[1], r1, r2 +; CHECK-NEXT: add sp, #68 +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc} entry: %conv = fptoui <2 x float> %x to <2 x i128> %spec.store.select = call <2 x i128> @llvm.umin.v2i128(<2 x i128> %conv, <2 x i128> ) @@ -1969,38 +3626,215 @@ entry: define arm_aapcs_vfpcc <2 x i64> @ustest_f32i64_mm(<2 x float> %x) { ; CHECK-LABEL: ustest_f32i64_mm: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, lr} -; CHECK-NEXT: push {r4, r5, r6, lr} -; CHECK-NEXT: vmov r4, r0, d0 -; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: subs r2, #1 -; CHECK-NEXT: sbcs r2, r3, #0 -; CHECK-NEXT: cset r2, lt -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r6, r0, r2, ne -; CHECK-NEXT: csel r0, r3, r2, ne -; CHECK-NEXT: csel r5, r1, r2, ne +; CHECK: @ %bb.0: @ %entryfp-to-i-entryfp-to-i-entry +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-NEXT: .pad #100 +; CHECK-NEXT: sub sp, #100 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: mov.w r9, #-1 +; CHECK-NEXT: ubfx r1, r0, #23, #8 +; CHECK-NEXT: cmp r1, #127 +; CHECK-NEXT: bhs .LBB50_2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: mov.w r8, #0 +; CHECK-NEXT: movs r4, #0 +; CHECK-NEXT: mov.w r10, #0 +; CHECK-NEXT: mov.w lr, #0 +; CHECK-NEXT: b .LBB50_5 +; CHECK-NEXT: .LBB50_2: @ %fp-to-i-if-check.exp.size2 +; CHECK-NEXT: movs r3, #1 +; CHECK-NEXT: mov r2, r0 +; CHECK-NEXT: orr.w r8, r3, r0, asr #31 +; CHECK-NEXT: bfi r2, r3, #23, #9 +; CHECK-NEXT: asrs r0, r0, #31 +; CHECK-NEXT: cmp r1, #149 +; CHECK-NEXT: bhi .LBB50_4 +; CHECK-NEXT: @ %bb.3: @ %fp-to-i-if-exp.small3 +; CHECK-NEXT: rsb.w r1, r1, #150 +; CHECK-NEXT: movs r7, #0 +; CHECK-NEXT: lsr.w r1, r2, r1 +; CHECK-NEXT: umull r8, r4, r1, r8 +; CHECK-NEXT: umull r2, r3, r1, r0 +; CHECK-NEXT: adds r6, r2, r4 +; CHECK-NEXT: adcs r6, r3, #0 +; CHECK-NEXT: adc r6, r7, #0 +; CHECK-NEXT: adds r5, r2, r4 +; CHECK-NEXT: mla r5, r0, r1, r3 +; CHECK-NEXT: adcs.w r10, r2, r3 +; CHECK-NEXT: umlal r4, r7, r1, r0 +; CHECK-NEXT: adc.w lr, r6, r5 +; CHECK-NEXT: b .LBB50_5 +; CHECK-NEXT: .LBB50_4: @ %fp-to-i-if-exp.large4 +; CHECK-NEXT: mov.w r11, #0 +; CHECK-NEXT: subs r1, #150 +; CHECK-NEXT: add r7, sp, #64 +; CHECK-NEXT: strd r2, r11, [sp, #80] +; CHECK-NEXT: movs r2, #12 +; CHECK-NEXT: adds r7, #16 +; CHECK-NEXT: and.w r2, r2, r1, lsr #3 +; CHECK-NEXT: strd r11, r11, [sp, #88] +; CHECK-NEXT: strd r11, r11, [sp, #72] +; CHECK-NEXT: subs r4, r7, r2 +; CHECK-NEXT: strd r11, r11, [sp, #64] +; CHECK-NEXT: and lr, r1, #31 +; CHECK-NEXT: ldrd r6, r7, [r4] +; CHECK-NEXT: ldr r2, [r4, #8] +; CHECK-NEXT: mov r1, r7 +; CHECK-NEXT: str r2, [sp, #20] @ 4-byte Spill +; CHECK-NEXT: ldr r2, [r4, #12] +; CHECK-NEXT: mov r4, r6 +; CHECK-NEXT: lsll r4, r1, lr +; CHECK-NEXT: str r2, [sp, #16] @ 4-byte Spill +; CHECK-NEXT: umull r5, r2, r4, r8 +; CHECK-NEXT: lsrl r6, r7, #1 +; CHECK-NEXT: umull r3, r10, r4, r0 +; CHECK-NEXT: str r5, [sp, #28] @ 4-byte Spill +; CHECK-NEXT: movs r5, #0 +; CHECK-NEXT: umlal r2, r5, r1, r8 +; CHECK-NEXT: adds r2, r2, r3 +; CHECK-NEXT: str r2, [sp, #24] @ 4-byte Spill +; CHECK-NEXT: eor r2, lr, #63 +; CHECK-NEXT: adcs.w r12, r5, r10 +; CHECK-NEXT: rsb.w r2, r2, #0 +; CHECK-NEXT: adc r11, r11, #0 +; CHECK-NEXT: lsll r6, r7, r2 +; CHECK-NEXT: ldrd r5, r2, [sp, #16] @ 8-byte Folded Reload +; CHECK-NEXT: umlal r12, r11, r1, r0 +; CHECK-NEXT: lsll r2, r5, lr +; CHECK-NEXT: orrs r5, r7 +; CHECK-NEXT: orr.w r7, r2, r6 +; CHECK-NEXT: mla r1, r0, r1, r10 +; CHECK-NEXT: umull lr, r6, r8, r7 +; CHECK-NEXT: mla r2, r8, r5, r6 +; CHECK-NEXT: ldr.w r8, [sp, #28] @ 4-byte Reload +; CHECK-NEXT: mla r2, r0, r7, r2 +; CHECK-NEXT: mla r0, r0, r4, r1 +; CHECK-NEXT: adds.w r1, r3, lr +; CHECK-NEXT: ldr r4, [sp, #24] @ 4-byte Reload +; CHECK-NEXT: adcs r0, r2 +; CHECK-NEXT: adds.w r10, r12, r1 +; CHECK-NEXT: adc.w lr, r11, r0 +; CHECK-NEXT: .LBB50_5: @ %fp-to-i-cleanup1 +; CHECK-NEXT: vmov r2, s1 +; CHECK-NEXT: mov.w r0, #-1 +; CHECK-NEXT: cmp.w r2, #-1 +; CHECK-NEXT: itt gt +; CHECK-NEXT: movgt r0, #0 +; CHECK-NEXT: movgt.w r9, #1 +; CHECK-NEXT: ubfx r1, r2, #23, #8 +; CHECK-NEXT: cmp r1, #127 +; CHECK-NEXT: bhs .LBB50_7 +; CHECK-NEXT: @ %bb.6: +; CHECK-NEXT: mov.w r11, #0 +; CHECK-NEXT: movs r5, #0 +; CHECK-NEXT: movs r1, #0 +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: b .LBB50_10 +; CHECK-NEXT: .LBB50_7: @ %fp-to-i-if-check.exp.size +; CHECK-NEXT: movw r3, #65535 +; CHECK-NEXT: cmp r1, #149 +; CHECK-NEXT: movt r3, #127 +; CHECK-NEXT: and.w r2, r2, r3 +; CHECK-NEXT: add.w r2, r2, #8388608 +; CHECK-NEXT: bhi .LBB50_9 +; CHECK-NEXT: @ %bb.8: @ %fp-to-i-if-exp.small +; CHECK-NEXT: rsb.w r1, r1, #150 +; CHECK-NEXT: lsr.w r1, r2, r1 +; CHECK-NEXT: umull r3, r7, r1, r0 +; CHECK-NEXT: umull r11, r2, r1, r9 +; CHECK-NEXT: muls r0, r1, r0 +; CHECK-NEXT: adds r5, r3, r2 +; CHECK-NEXT: adcs.w r1, r3, r7 +; CHECK-NEXT: adcs r0, r7 +; CHECK-NEXT: b .LBB50_10 +; CHECK-NEXT: .LBB50_9: @ %fp-to-i-if-exp.large +; CHECK-NEXT: movs r7, #0 +; CHECK-NEXT: add r3, sp, #32 +; CHECK-NEXT: strd r2, r7, [sp, #48] +; CHECK-NEXT: sub.w r2, r1, #150 +; CHECK-NEXT: movs r1, #12 +; CHECK-NEXT: adds r3, #16 +; CHECK-NEXT: and.w r1, r1, r2, lsr #3 +; CHECK-NEXT: strd r7, r7, [sp, #56] +; CHECK-NEXT: strd r7, r7, [sp, #40] +; CHECK-NEXT: subs r3, r3, r1 +; CHECK-NEXT: strd r7, r7, [sp, #32] +; CHECK-NEXT: mov r12, r9 +; CHECK-NEXT: ldrd r6, r1, [r3] +; CHECK-NEXT: str.w r8, [sp, #28] @ 4-byte Spill +; CHECK-NEXT: and r8, r2, #31 +; CHECK-NEXT: ldr r5, [r3, #8] +; CHECK-NEXT: str r5, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: mov r2, r6 +; CHECK-NEXT: ldr r3, [r3, #12] +; CHECK-NEXT: str r3, [sp, #16] @ 4-byte Spill +; CHECK-NEXT: mov r3, r1 +; CHECK-NEXT: lsll r2, r3, r8 +; CHECK-NEXT: strd lr, r4, [sp, #20] @ 8-byte Folded Spill +; CHECK-NEXT: umull r11, r5, r2, r9 +; CHECK-NEXT: mov.w lr, #0 +; CHECK-NEXT: lsrl r6, r1, #1 +; CHECK-NEXT: umlal r5, lr, r3, r9 +; CHECK-NEXT: umull r4, r9, r2, r0 +; CHECK-NEXT: str r4, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: adds r5, r5, r4 +; CHECK-NEXT: str.w r9, [sp] @ 4-byte Spill +; CHECK-NEXT: adcs.w lr, lr, r9 +; CHECK-NEXT: eor r9, r8, #63 +; CHECK-NEXT: adc r4, r7, #0 +; CHECK-NEXT: ldr r7, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: umlal lr, r4, r3, r0 +; CHECK-NEXT: rsb.w r9, r9, #0 +; CHECK-NEXT: lsll r6, r1, r9 +; CHECK-NEXT: str r4, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: mov r4, r10 +; CHECK-NEXT: ldr.w r10, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: lsll r10, r7, r8 +; CHECK-NEXT: orr.w r6, r6, r10 +; CHECK-NEXT: orr.w r8, r7, r1 +; CHECK-NEXT: ldr r7, [sp] @ 4-byte Reload +; CHECK-NEXT: mla r3, r0, r3, r7 +; CHECK-NEXT: mov r10, r4 +; CHECK-NEXT: umull r9, r1, r12, r6 +; CHECK-NEXT: mla r1, r12, r8, r1 +; CHECK-NEXT: ldr.w r8, [sp, #28] @ 4-byte Reload +; CHECK-NEXT: mla r1, r0, r6, r1 +; CHECK-NEXT: mla r0, r0, r2, r3 +; CHECK-NEXT: ldr r2, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: adds.w r2, r2, r9 +; CHECK-NEXT: adcs r0, r1 +; CHECK-NEXT: adds.w r1, lr, r2 +; CHECK-NEXT: ldr r2, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: ldrd lr, r4, [sp, #20] @ 8-byte Folded Reload +; CHECK-NEXT: adcs r0, r2 +; CHECK-NEXT: .LBB50_10: @ %fp-to-i-cleanup +; CHECK-NEXT: subs r1, #1 +; CHECK-NEXT: sbcs r1, r0, #0 +; CHECK-NEXT: cset r1, lt +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: csel r0, r0, r1, ne +; CHECK-NEXT: csel r2, r11, r1, ne +; CHECK-NEXT: csel r3, r5, r1, ne ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r0, r4 ; CHECK-NEXT: itt mi -; CHECK-NEXT: movmi r5, #0 -; CHECK-NEXT: movmi r6, #0 -; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: subs r2, #1 -; CHECK-NEXT: sbcs r2, r3, #0 -; CHECK-NEXT: cset r2, lt -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r1, r1, r2, ne -; CHECK-NEXT: csel r0, r0, r2, ne -; CHECK-NEXT: csel r2, r3, r2, ne -; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: movmi r3, #0 +; CHECK-NEXT: movmi r2, #0 +; CHECK-NEXT: subs.w r0, r10, #1 +; CHECK-NEXT: sbcs r0, lr, #0 +; CHECK-NEXT: cset r0, lt +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: csel r1, r4, r0, ne +; CHECK-NEXT: csel r7, r8, r0, ne +; CHECK-NEXT: csel r0, lr, r0, ne +; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: itt mi -; CHECK-NEXT: movmi r0, #0 +; CHECK-NEXT: movmi r7, #0 ; CHECK-NEXT: movmi r1, #0 -; CHECK-NEXT: vmov q0[2], q0[0], r0, r6 -; CHECK-NEXT: vmov q0[3], q0[1], r1, r5 -; CHECK-NEXT: pop {r4, r5, r6, pc} +; CHECK-NEXT: vmov q0[2], q0[0], r7, r2 +; CHECK-NEXT: vmov q0[3], q0[1], r1, r3 +; CHECK-NEXT: add sp, #100 +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} entry: %conv = fptosi <2 x float> %x to <2 x i128> %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> ) @@ -2012,21 +3846,16 @@ entry: define arm_aapcs_vfpcc <2 x i64> @stest_f16i64_mm(<2 x half> %x) { ; CHECK-LABEL: stest_f16i64_mm: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r7, lr} -; CHECK-NEXT: push {r4, r5, r7, lr} -; CHECK-NEXT: .vsave {d8, d9} -; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: vmov.u16 r0, q0[1] -; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: bl __fixhfti -; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: vmov.u16 r0, q4[0] -; CHECK-NEXT: mov r5, r1 -; CHECK-NEXT: bl __fixhfti -; CHECK-NEXT: vmov q0[2], q0[0], r0, r4 -; CHECK-NEXT: vmov q0[3], q0[1], r1, r5 -; CHECK-NEXT: vpop {d8, d9} -; CHECK-NEXT: pop {r4, r5, r7, pc} +; CHECK-NEXT: vcvt.s32.f16 s2, s0 +; CHECK-NEXT: vmovx.f16 s0, s0 +; CHECK-NEXT: vcvt.s32.f16 s0, s0 +; CHECK-NEXT: vmov r2, s2 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: vmov q0[2], q0[0], r2, r0 +; CHECK-NEXT: asrs r3, r2, #31 +; CHECK-NEXT: asrs r1, r0, #31 +; CHECK-NEXT: vmov q0[3], q0[1], r3, r1 +; CHECK-NEXT: bx lr entry: %conv = fptosi <2 x half> %x to <2 x i128> %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> ) @@ -2038,21 +3867,17 @@ entry: define arm_aapcs_vfpcc <2 x i64> @utesth_f16i64_mm(<2 x half> %x) { ; CHECK-LABEL: utesth_f16i64_mm: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r7, lr} -; CHECK-NEXT: push {r4, r5, r7, lr} -; CHECK-NEXT: .vsave {d8, d9} -; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: vmov.u16 r0, q0[1] -; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: bl __fixunshfti -; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: vmov.u16 r0, q4[0] -; CHECK-NEXT: mov r5, r1 -; CHECK-NEXT: bl __fixunshfti -; CHECK-NEXT: vmov q0[2], q0[0], r0, r4 -; CHECK-NEXT: vmov q0[3], q0[1], r1, r5 -; CHECK-NEXT: vpop {d8, d9} -; CHECK-NEXT: pop {r4, r5, r7, pc} +; CHECK-NEXT: vldr s5, .LCPI52_0 +; CHECK-NEXT: vcvt.u32.f16 s4, s0 +; CHECK-NEXT: vmovx.f16 s0, s0 +; CHECK-NEXT: vcvt.u32.f16 s6, s0 +; CHECK-NEXT: vmov.f32 s7, s5 +; CHECK-NEXT: vmov q0, q1 +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI52_0: +; CHECK-NEXT: .long 0x00000000 @ float 0 entry: %conv = fptoui <2 x half> %x to <2 x i128> %spec.store.select = call <2 x i128> @llvm.umin.v2i128(<2 x i128> %conv, <2 x i128> ) @@ -2063,29 +3888,41 @@ entry: define arm_aapcs_vfpcc <2 x i64> @ustest_f16i64_mm(<2 x half> %x) { ; CHECK-LABEL: ustest_f16i64_mm: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r7, lr} -; CHECK-NEXT: push {r4, r5, r7, lr} -; CHECK-NEXT: .vsave {d8, d9} -; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: vmov.u16 r0, q0[1] -; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: bl __fixhfti -; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: vmov.u16 r0, q4[0] -; CHECK-NEXT: mov r5, r1 -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: itt mi -; CHECK-NEXT: movmi r5, #0 +; CHECK-NEXT: .save {r4, lr} +; CHECK-NEXT: push {r4, lr} +; CHECK-NEXT: vcvt.s32.f16 s2, s0 +; CHECK-NEXT: vmovx.f16 s0, s0 +; CHECK-NEXT: vcvt.s32.f16 s0, s0 +; CHECK-NEXT: mov.w r12, #1 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: rsbs r3, r12, r0, asr #31 +; CHECK-NEXT: asr.w r1, r0, #31 +; CHECK-NEXT: sbcs r1, r1, #0 +; CHECK-NEXT: cset r1, lt +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: csel r4, r0, r1, ne +; CHECK-NEXT: it ne +; CHECK-NEXT: asrne r1, r0, #31 +; CHECK-NEXT: vmov r0, s2 +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: it mi ; CHECK-NEXT: movmi r4, #0 -; CHECK-NEXT: bl __fixhfti -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: itt mi -; CHECK-NEXT: movmi r0, #0 -; CHECK-NEXT: movmi r1, #0 -; CHECK-NEXT: vmov q0[2], q0[0], r0, r4 -; CHECK-NEXT: vmov q0[3], q0[1], r1, r5 -; CHECK-NEXT: vpop {d8, d9} -; CHECK-NEXT: pop {r4, r5, r7, pc} +; CHECK-NEXT: rsbs r2, r12, r0, asr #31 +; CHECK-NEXT: asr.w lr, r0, #31 +; CHECK-NEXT: sbcs r2, lr, #0 +; CHECK-NEXT: cset r2, lt +; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: csel r3, r0, r2, ne +; CHECK-NEXT: it ne +; CHECK-NEXT: asrne r2, r0, #31 +; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: it mi +; CHECK-NEXT: movmi r3, #0 +; CHECK-NEXT: bic.w r0, r1, r1, asr #31 +; CHECK-NEXT: bic.w r1, r2, r2, asr #31 +; CHECK-NEXT: vmov q0[2], q0[0], r3, r4 +; CHECK-NEXT: vmov q0[3], q0[1], r1, r0 +; CHECK-NEXT: pop {r4, pc} entry: %conv = fptosi <2 x half> %x to <2 x i128> %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> ) diff --git a/llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll b/llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll index 77548b49d77f2..2941db5480406 100644 --- a/llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll +++ b/llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll @@ -1984,327 +1984,1024 @@ define arm_aapcs_vfpcc <4 x i64> @test_signed_v4f32_v4i64(<4 x float> %f) { define arm_aapcs_vfpcc <4 x i100> @test_signed_v4f32_v4i100(<4 x float> %f) { ; CHECK-LABEL: test_signed_v4f32_v4i100: -; CHECK: @ %bb.0: +; CHECK: @ %bb.0: @ %fp-to-i-entryfp-to-i-entryfp-to-i-entryfp-to-i-entry ; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-NEXT: .pad #4 -; CHECK-NEXT: sub sp, #4 -; CHECK-NEXT: .vsave {d8, d9, d10, d11} -; CHECK-NEXT: vpush {d8, d9, d10, d11} -; CHECK-NEXT: .pad #8 -; CHECK-NEXT: sub sp, #8 -; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: vmov r0, s18 -; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: vmov r6, s17 -; CHECK-NEXT: vldr s22, .LCPI30_0 -; CHECK-NEXT: vldr s20, .LCPI30_1 -; CHECK-NEXT: vmov r7, s19 -; CHECK-NEXT: vcmp.f32 s18, s22 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt lt -; CHECK-NEXT: movlt r2, #0 -; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: mvnlt r3, #7 -; CHECK-NEXT: vcmp.f32 s18, s20 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt gt -; CHECK-NEXT: movgt r3, #7 -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: vcmp.f32 s18, s18 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r2, #0 -; CHECK-NEXT: str.w r2, [r4, #33] -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r1, #0 -; CHECK-NEXT: str.w r1, [r4, #29] -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r0, #0 -; CHECK-NEXT: str.w r0, [r4, #25] -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r3, #0 -; CHECK-NEXT: str r3, [sp, #4] @ 4-byte Spill -; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: mov r10, r0 -; CHECK-NEXT: vcmp.f32 s17, s22 -; CHECK-NEXT: mov r5, r1 -; CHECK-NEXT: mov r6, r2 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt lt -; CHECK-NEXT: mvnlt r3, #7 -; CHECK-NEXT: movlt r6, #0 -; CHECK-NEXT: movlt.w r10, #0 -; CHECK-NEXT: movlt r5, #0 -; CHECK-NEXT: vcmp.f32 s17, s20 -; CHECK-NEXT: mov r0, r7 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt gt -; CHECK-NEXT: movgt.w r5, #-1 -; CHECK-NEXT: movgt.w r10, #-1 -; CHECK-NEXT: movgt.w r6, #-1 -; CHECK-NEXT: movgt r3, #7 -; CHECK-NEXT: vcmp.f32 s17, s17 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r3, #0 -; CHECK-NEXT: str r3, [sp] @ 4-byte Spill -; CHECK-NEXT: ittt vs -; CHECK-NEXT: movvs r6, #0 -; CHECK-NEXT: movvs.w r10, #0 -; CHECK-NEXT: movvs r5, #0 -; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: mov r7, r0 -; CHECK-NEXT: vmov r0, s16 -; CHECK-NEXT: vcmp.f32 s19, s22 -; CHECK-NEXT: mov r11, r1 -; CHECK-NEXT: mov r8, r2 -; CHECK-NEXT: mov r9, r3 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt lt -; CHECK-NEXT: mvnlt r9, #7 -; CHECK-NEXT: movlt.w r8, #0 -; CHECK-NEXT: movlt.w r11, #0 -; CHECK-NEXT: movlt r7, #0 -; CHECK-NEXT: vcmp.f32 s19, s20 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt gt -; CHECK-NEXT: movgt.w r7, #-1 -; CHECK-NEXT: movgt.w r11, #-1 -; CHECK-NEXT: movgt.w r8, #-1 -; CHECK-NEXT: movgt.w r9, #7 -; CHECK-NEXT: vcmp.f32 s19, s19 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt vs -; CHECK-NEXT: movvs.w r9, #0 -; CHECK-NEXT: movvs.w r8, #0 -; CHECK-NEXT: movvs.w r11, #0 -; CHECK-NEXT: movvs r7, #0 -; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: vcmp.f32 s16, s22 +; CHECK-NEXT: .pad #204 +; CHECK-NEXT: sub sp, #204 +; CHECK-NEXT: vmov r1, s0 +; CHECK-NEXT: movs r2, #0 +; CHECK-NEXT: str r2, [sp, #64] @ 4-byte Spill +; CHECK-NEXT: movs r2, #15 +; CHECK-NEXT: mov.w r12, #7 +; CHECK-NEXT: mov.w r11, #-1 +; CHECK-NEXT: mov.w r9, #8 +; CHECK-NEXT: str r2, [sp, #68] @ 4-byte Spill +; CHECK-NEXT: ubfx r3, r1, #23, #8 +; CHECK-NEXT: cmp r3, #127 +; CHECK-NEXT: blo .LBB30_4 +; CHECK-NEXT: @ %bb.1: @ %fp-to-i-entryfp-to-i-entryfp-to-i-entryfp-to-i-entry +; CHECK-NEXT: vcmp.f32 s0, s0 +; CHECK-NEXT: mov.w r8, #0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: mov.w r5, #0 +; CHECK-NEXT: mov.w r2, #0 +; CHECK-NEXT: bvs.w .LBB30_8 +; CHECK-NEXT: @ %bb.2: @ %fp-to-i-if-check.saturate28 +; CHECK-NEXT: cmp r3, #226 +; CHECK-NEXT: blo .LBB30_5 +; CHECK-NEXT: @ %bb.3: @ %fp-to-i-if-saturate29 +; CHECK-NEXT: bic.w r5, r11, r1, asr #31 +; CHECK-NEXT: cmp.w r1, #-1 +; CHECK-NEXT: csel r2, r12, r9, gt +; CHECK-NEXT: str r5, [sp, #64] @ 4-byte Spill +; CHECK-NEXT: mov r8, r5 +; CHECK-NEXT: b .LBB30_8 +; CHECK-NEXT: .LBB30_4: +; CHECK-NEXT: mov.w r8, #0 +; CHECK-NEXT: movs r5, #0 +; CHECK-NEXT: movs r2, #0 +; CHECK-NEXT: b .LBB30_8 +; CHECK-NEXT: .LBB30_5: @ %fp-to-i-if-check.exp.size30 +; CHECK-NEXT: movs r2, #1 +; CHECK-NEXT: mov r6, r1 +; CHECK-NEXT: bfi r6, r2, #23, #9 +; CHECK-NEXT: orr.w r10, r2, r1, asr #31 +; CHECK-NEXT: mov.w lr, #7 +; CHECK-NEXT: asr.w r9, r1, #31 +; CHECK-NEXT: cmp r3, #149 +; CHECK-NEXT: bhi .LBB30_7 +; CHECK-NEXT: @ %bb.6: @ %fp-to-i-if-exp.small31 +; CHECK-NEXT: rsb.w r3, r3, #150 +; CHECK-NEXT: lsr.w r3, r6, r3 +; CHECK-NEXT: umull r1, r8, r3, r10 +; CHECK-NEXT: umull r2, r7, r3, r9 +; CHECK-NEXT: str r1, [sp, #64] @ 4-byte Spill +; CHECK-NEXT: movs r1, #0 +; CHECK-NEXT: adds.w r5, r2, r8 +; CHECK-NEXT: adcs r5, r7, #0 +; CHECK-NEXT: adc r12, r1, #0 +; CHECK-NEXT: adds.w r4, r2, r8 +; CHECK-NEXT: mla r4, r9, r3, r7 +; CHECK-NEXT: adcs.w r5, r2, r7 +; CHECK-NEXT: umlal r8, r1, r3, r9 +; CHECK-NEXT: mov.w r9, #8 +; CHECK-NEXT: adc.w r2, r12, r4 +; CHECK-NEXT: mov r12, lr +; CHECK-NEXT: b .LBB30_8 +; CHECK-NEXT: .LBB30_7: @ %fp-to-i-if-exp.large32 +; CHECK-NEXT: subs r3, #150 +; CHECK-NEXT: add r5, sp, #168 +; CHECK-NEXT: strd r6, r8, [sp, #184] +; CHECK-NEXT: movs r6, #12 +; CHECK-NEXT: and.w r6, r6, r3, lsr #3 +; CHECK-NEXT: adds r5, #16 +; CHECK-NEXT: strd r8, r8, [sp, #192] +; CHECK-NEXT: subs r4, r5, r6 +; CHECK-NEXT: strd r8, r8, [sp, #176] +; CHECK-NEXT: and lr, r3, #31 +; CHECK-NEXT: strd r8, r8, [sp, #168] +; CHECK-NEXT: ldrd r6, r5, [r4] +; CHECK-NEXT: ldr r1, [r4, #8] +; CHECK-NEXT: mov r3, r5 +; CHECK-NEXT: str r1, [sp, #60] @ 4-byte Spill +; CHECK-NEXT: ldr r1, [r4, #12] +; CHECK-NEXT: mov r4, r6 +; CHECK-NEXT: lsll r4, r3, lr +; CHECK-NEXT: lsrl r6, r5, #1 +; CHECK-NEXT: umull r2, r7, r4, r10 +; CHECK-NEXT: umull r12, r11, r4, r9 +; CHECK-NEXT: str r2, [sp, #64] @ 4-byte Spill +; CHECK-NEXT: movs r2, #0 +; CHECK-NEXT: umlal r7, r2, r3, r10 +; CHECK-NEXT: str.w r12, [sp, #56] @ 4-byte Spill +; CHECK-NEXT: adds.w r7, r7, r12 +; CHECK-NEXT: str r7, [sp, #24] @ 4-byte Spill +; CHECK-NEXT: adcs.w r12, r2, r11 +; CHECK-NEXT: eor r2, lr, #63 +; CHECK-NEXT: rsb.w r2, r2, #0 +; CHECK-NEXT: adc r7, r8, #0 +; CHECK-NEXT: lsll r6, r5, r2 +; CHECK-NEXT: ldr r2, [sp, #60] @ 4-byte Reload +; CHECK-NEXT: umlal r12, r7, r3, r9 +; CHECK-NEXT: ldr.w r8, [sp, #24] @ 4-byte Reload +; CHECK-NEXT: lsll r2, r1, lr +; CHECK-NEXT: orrs r6, r2 +; CHECK-NEXT: orrs r1, r5 +; CHECK-NEXT: mla r3, r9, r3, r11 +; CHECK-NEXT: mov.w r11, #-1 +; CHECK-NEXT: umull lr, r5, r10, r6 +; CHECK-NEXT: mla r2, r10, r1, r5 +; CHECK-NEXT: mla r1, r9, r4, r3 +; CHECK-NEXT: ldr r3, [sp, #56] @ 4-byte Reload +; CHECK-NEXT: mla r2, r9, r6, r2 +; CHECK-NEXT: adds.w r3, r3, lr +; CHECK-NEXT: mov.w r9, #8 +; CHECK-NEXT: adcs r1, r2 +; CHECK-NEXT: adds.w r5, r12, r3 +; CHECK-NEXT: adc.w r2, r7, r1 +; CHECK-NEXT: mov.w r12, #7 +; CHECK-NEXT: .LBB30_8: @ %fp-to-i-cleanup27 +; CHECK-NEXT: strd r5, r2, [sp, #36] @ 8-byte Folded Spill +; CHECK-NEXT: vmov r5, s1 +; CHECK-NEXT: movs r1, #15 +; CHECK-NEXT: mov.w r3, #-1 +; CHECK-NEXT: mov.w r2, #-1 +; CHECK-NEXT: mov.w r10, #0 +; CHECK-NEXT: cmp.w r5, #-1 +; CHECK-NEXT: it gt +; CHECK-NEXT: movgt r1, #0 +; CHECK-NEXT: ubfx r7, r5, #23, #8 +; CHECK-NEXT: it gt +; CHECK-NEXT: movgt r3, #0 +; CHECK-NEXT: it gt +; CHECK-NEXT: movgt r2, #1 +; CHECK-NEXT: cmp r7, #127 +; CHECK-NEXT: blo .LBB30_12 +; CHECK-NEXT: @ %bb.9: @ %fp-to-i-cleanup27 +; CHECK-NEXT: vcmp.f32 s1, s1 +; CHECK-NEXT: movs r4, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt lt -; CHECK-NEXT: movlt r2, #0 -; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: mvnlt r3, #7 -; CHECK-NEXT: vcmp.f32 s16, s20 +; CHECK-NEXT: str.w r10, [sp, #60] @ 4-byte Spill +; CHECK-NEXT: str r4, [sp, #56] @ 4-byte Spill +; CHECK-NEXT: bvs.w .LBB30_17 +; CHECK-NEXT: @ %bb.10: @ %fp-to-i-if-check.saturate15 +; CHECK-NEXT: cmp r7, #226 +; CHECK-NEXT: blo .LBB30_13 +; CHECK-NEXT: @ %bb.11: @ %fp-to-i-if-saturate16 +; CHECK-NEXT: bic.w r10, r11, r5, asr #31 +; CHECK-NEXT: cmp.w r5, #-1 +; CHECK-NEXT: csel r4, r12, r9, gt +; CHECK-NEXT: strd r10, r10, [sp, #56] @ 8-byte Folded Spill +; CHECK-NEXT: b .LBB30_17 +; CHECK-NEXT: .LBB30_12: +; CHECK-NEXT: movs r1, #0 +; CHECK-NEXT: str.w r10, [sp, #60] @ 4-byte Spill +; CHECK-NEXT: str r1, [sp, #56] @ 4-byte Spill +; CHECK-NEXT: movs r4, #0 +; CHECK-NEXT: b .LBB30_17 +; CHECK-NEXT: .LBB30_13: @ %fp-to-i-if-check.exp.size17 +; CHECK-NEXT: movw r6, #65535 +; CHECK-NEXT: str.w r8, [sp, #24] @ 4-byte Spill +; CHECK-NEXT: movt r6, #127 +; CHECK-NEXT: cmp r7, #149 +; CHECK-NEXT: and.w r6, r6, r5 +; CHECK-NEXT: add.w r6, r6, #8388608 +; CHECK-NEXT: bhi .LBB30_15 +; CHECK-NEXT: @ %bb.14: @ %fp-to-i-if-exp.small18 +; CHECK-NEXT: rsb.w r7, r7, #150 +; CHECK-NEXT: lsr.w r7, r6, r7 +; CHECK-NEXT: umull r10, r2, r7, r2 +; CHECK-NEXT: umull r3, r6, r7, r3 +; CHECK-NEXT: muls r1, r7, r1 +; CHECK-NEXT: adds r2, r2, r3 +; CHECK-NEXT: str r2, [sp, #60] @ 4-byte Spill +; CHECK-NEXT: adcs.w r2, r3, r6 +; CHECK-NEXT: str r2, [sp, #56] @ 4-byte Spill +; CHECK-NEXT: adc.w r4, r6, r1 +; CHECK-NEXT: b .LBB30_16 +; CHECK-NEXT: .LBB30_15: @ %fp-to-i-if-exp.large19 +; CHECK-NEXT: mov.w r9, #0 +; CHECK-NEXT: sub.w r12, r7, #150 +; CHECK-NEXT: movs r7, #12 +; CHECK-NEXT: strd r6, r9, [sp, #152] +; CHECK-NEXT: add r6, sp, #136 +; CHECK-NEXT: and.w r7, r7, r12, lsr #3 +; CHECK-NEXT: adds r6, #16 +; CHECK-NEXT: strd r9, r9, [sp, #160] +; CHECK-NEXT: subs r4, r6, r7 +; CHECK-NEXT: strd r9, r9, [sp, #144] +; CHECK-NEXT: strd r9, r9, [sp, #136] +; CHECK-NEXT: ldrd r6, r7, [r4] +; CHECK-NEXT: ldr r5, [r4, #8] +; CHECK-NEXT: str r5, [sp, #56] @ 4-byte Spill +; CHECK-NEXT: mov r5, r7 +; CHECK-NEXT: ldr r4, [r4, #12] +; CHECK-NEXT: mov r8, r6 +; CHECK-NEXT: str r4, [sp, #52] @ 4-byte Spill +; CHECK-NEXT: and r4, r12, #31 +; CHECK-NEXT: lsll r8, r5, r4 +; CHECK-NEXT: mov.w r12, #0 +; CHECK-NEXT: umull r10, r11, r8, r2 +; CHECK-NEXT: str r4, [sp, #28] @ 4-byte Spill +; CHECK-NEXT: mov r4, r5 +; CHECK-NEXT: lsrl r6, r7, #1 +; CHECK-NEXT: umlal r11, r12, r5, r2 +; CHECK-NEXT: umull r5, lr, r8, r3 +; CHECK-NEXT: str r5, [sp, #48] @ 4-byte Spill +; CHECK-NEXT: adds.w r5, r5, r11 +; CHECK-NEXT: str r5, [sp, #60] @ 4-byte Spill +; CHECK-NEXT: adcs.w r11, r12, lr +; CHECK-NEXT: str.w lr, [sp, #32] @ 4-byte Spill +; CHECK-NEXT: adc r5, r9, #0 +; CHECK-NEXT: mov lr, r4 +; CHECK-NEXT: umlal r11, r5, r4, r3 +; CHECK-NEXT: ldr r4, [sp, #28] @ 4-byte Reload +; CHECK-NEXT: eor r9, r4, #63 +; CHECK-NEXT: rsb.w r9, r9, #0 +; CHECK-NEXT: str r5, [sp, #44] @ 4-byte Spill +; CHECK-NEXT: lsll r6, r7, r9 +; CHECK-NEXT: ldrd r5, r12, [sp, #52] @ 8-byte Folded Reload +; CHECK-NEXT: lsll r12, r5, r4 +; CHECK-NEXT: orr.w r6, r6, r12 +; CHECK-NEXT: orr.w r4, r5, r7 +; CHECK-NEXT: mov.w r12, #7 +; CHECK-NEXT: umull r9, r7, r2, r6 +; CHECK-NEXT: mla r2, r2, r4, r7 +; CHECK-NEXT: ldr r7, [sp, #32] @ 4-byte Reload +; CHECK-NEXT: mla r2, r3, r6, r2 +; CHECK-NEXT: mla r3, r3, lr, r7 +; CHECK-NEXT: mla r1, r1, r8, r3 +; CHECK-NEXT: ldr r3, [sp, #48] @ 4-byte Reload +; CHECK-NEXT: adds.w r3, r3, r9 +; CHECK-NEXT: mov.w r9, #8 +; CHECK-NEXT: adcs r1, r2 +; CHECK-NEXT: adds.w r2, r11, r3 +; CHECK-NEXT: str r2, [sp, #56] @ 4-byte Spill +; CHECK-NEXT: mov.w r11, #-1 +; CHECK-NEXT: ldr r2, [sp, #44] @ 4-byte Reload +; CHECK-NEXT: adc.w r4, r2, r1 +; CHECK-NEXT: .LBB30_16: @ %fp-to-i-cleanup14 +; CHECK-NEXT: ldr.w r8, [sp, #24] @ 4-byte Reload +; CHECK-NEXT: .LBB30_17: @ %fp-to-i-cleanup14 +; CHECK-NEXT: vmov r5, s2 +; CHECK-NEXT: mov.w lr, #15 +; CHECK-NEXT: mov.w r3, #-1 +; CHECK-NEXT: mov.w r2, #-1 +; CHECK-NEXT: movs r6, #0 +; CHECK-NEXT: str r4, [sp, #32] @ 4-byte Spill +; CHECK-NEXT: cmp.w r5, #-1 +; CHECK-NEXT: it gt +; CHECK-NEXT: movgt.w lr, #0 +; CHECK-NEXT: ubfx r7, r5, #23, #8 +; CHECK-NEXT: it gt +; CHECK-NEXT: movgt r3, #0 +; CHECK-NEXT: it gt +; CHECK-NEXT: movgt r2, #1 +; CHECK-NEXT: cmp r7, #127 +; CHECK-NEXT: blo .LBB30_21 +; CHECK-NEXT: @ %bb.18: @ %fp-to-i-cleanup14 +; CHECK-NEXT: vcmp.f32 s2, s2 +; CHECK-NEXT: movs r1, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt gt -; CHECK-NEXT: movgt r3, #7 -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: vcmp.f32 s16, s16 +; CHECK-NEXT: mov.w r4, #0 +; CHECK-NEXT: str r1, [sp, #52] @ 4-byte Spill +; CHECK-NEXT: bvs.w .LBB30_26 +; CHECK-NEXT: @ %bb.19: @ %fp-to-i-if-check.saturate2 +; CHECK-NEXT: cmp r7, #226 +; CHECK-NEXT: blo .LBB30_22 +; CHECK-NEXT: @ %bb.20: @ %fp-to-i-if-saturate3 +; CHECK-NEXT: bic.w r6, r11, r5, asr #31 +; CHECK-NEXT: cmp.w r5, #-1 +; CHECK-NEXT: csel r1, r12, r9, gt +; CHECK-NEXT: str r6, [sp, #52] @ 4-byte Spill +; CHECK-NEXT: mov r4, r6 +; CHECK-NEXT: b .LBB30_26 +; CHECK-NEXT: .LBB30_21: +; CHECK-NEXT: movs r1, #0 +; CHECK-NEXT: movs r4, #0 +; CHECK-NEXT: str r1, [sp, #52] @ 4-byte Spill +; CHECK-NEXT: b .LBB30_26 +; CHECK-NEXT: .LBB30_22: @ %fp-to-i-if-check.exp.size4 +; CHECK-NEXT: movw r6, #65535 +; CHECK-NEXT: str.w r8, [sp, #24] @ 4-byte Spill +; CHECK-NEXT: movt r6, #127 +; CHECK-NEXT: cmp r7, #149 +; CHECK-NEXT: and.w r6, r6, r5 +; CHECK-NEXT: add.w r6, r6, #8388608 +; CHECK-NEXT: bhi .LBB30_24 +; CHECK-NEXT: @ %bb.23: @ %fp-to-i-if-exp.small5 +; CHECK-NEXT: rsb.w r7, r7, #150 +; CHECK-NEXT: lsr.w r7, r6, r7 +; CHECK-NEXT: umull r6, r2, r7, r2 +; CHECK-NEXT: umull r3, r5, r7, r3 +; CHECK-NEXT: mul r1, lr, r7 +; CHECK-NEXT: adds r4, r3, r2 +; CHECK-NEXT: adcs.w r2, r3, r5 +; CHECK-NEXT: str r2, [sp, #52] @ 4-byte Spill +; CHECK-NEXT: adcs r1, r5 +; CHECK-NEXT: b .LBB30_25 +; CHECK-NEXT: .LBB30_24: @ %fp-to-i-if-exp.large6 +; CHECK-NEXT: sub.w r12, r7, #150 +; CHECK-NEXT: movs r7, #12 +; CHECK-NEXT: strd r6, r1, [sp, #120] +; CHECK-NEXT: add r6, sp, #104 +; CHECK-NEXT: and.w r7, r7, r12, lsr #3 +; CHECK-NEXT: adds r6, #16 +; CHECK-NEXT: subs r5, r6, r7 +; CHECK-NEXT: strd r1, r1, [sp, #128] +; CHECK-NEXT: strd r1, r1, [sp, #112] +; CHECK-NEXT: mov.w r11, #0 +; CHECK-NEXT: strd r1, r1, [sp, #104] +; CHECK-NEXT: ldrd r6, r7, [r5] +; CHECK-NEXT: ldr r4, [r5, #8] +; CHECK-NEXT: mov r9, r7 +; CHECK-NEXT: str r4, [sp, #52] @ 4-byte Spill +; CHECK-NEXT: ldr r4, [r5, #12] +; CHECK-NEXT: and r5, r12, #31 +; CHECK-NEXT: str r4, [sp, #28] @ 4-byte Spill +; CHECK-NEXT: mov r4, r6 +; CHECK-NEXT: lsll r4, r9, r5 +; CHECK-NEXT: str r5, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: umull r5, r8, r4, r2 +; CHECK-NEXT: lsrl r6, r7, #1 +; CHECK-NEXT: umlal r8, r11, r9, r2 +; CHECK-NEXT: str r5, [sp, #44] @ 4-byte Spill +; CHECK-NEXT: umull r5, r12, r4, r3 +; CHECK-NEXT: strd r12, r5, [sp, #16] @ 8-byte Folded Spill +; CHECK-NEXT: adds.w r5, r5, r8 +; CHECK-NEXT: adcs.w r8, r11, r12 +; CHECK-NEXT: ldr.w r11, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: str r5, [sp, #48] @ 4-byte Spill +; CHECK-NEXT: adc r1, r1, #0 +; CHECK-NEXT: eor r5, r11, #63 +; CHECK-NEXT: ldr.w r12, [sp, #52] @ 4-byte Reload +; CHECK-NEXT: rsbs r5, r5, #0 +; CHECK-NEXT: umlal r8, r1, r9, r3 +; CHECK-NEXT: lsll r6, r7, r5 +; CHECK-NEXT: ldr r5, [sp, #28] @ 4-byte Reload +; CHECK-NEXT: lsll r12, r5, r11 +; CHECK-NEXT: orr.w r6, r6, r12 +; CHECK-NEXT: orrs r5, r7 +; CHECK-NEXT: umull r11, r7, r2, r6 +; CHECK-NEXT: mla r2, r2, r5, r7 +; CHECK-NEXT: ldr r7, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: mla r2, r3, r6, r2 +; CHECK-NEXT: mla r3, r3, r9, r7 +; CHECK-NEXT: mla r6, lr, r4, r3 +; CHECK-NEXT: ldr r3, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: adds.w r3, r3, r11 +; CHECK-NEXT: adcs r2, r6 +; CHECK-NEXT: adds.w r3, r3, r8 +; CHECK-NEXT: ldrd r6, r4, [sp, #44] @ 8-byte Folded Reload +; CHECK-NEXT: adcs r1, r2 +; CHECK-NEXT: str r3, [sp, #52] @ 4-byte Spill +; CHECK-NEXT: .LBB30_25: @ %fp-to-i-cleanup1 +; CHECK-NEXT: ldr.w r8, [sp, #24] @ 4-byte Reload +; CHECK-NEXT: .LBB30_26: @ %fp-to-i-cleanup1 +; CHECK-NEXT: vmov r5, s3 +; CHECK-NEXT: str r1, [sp, #28] @ 4-byte Spill +; CHECK-NEXT: ldr r1, [sp, #68] @ 4-byte Reload +; CHECK-NEXT: mov.w r2, #-1 +; CHECK-NEXT: strd r6, r4, [sp, #44] @ 8-byte Folded Spill +; CHECK-NEXT: mov.w lr, #0 +; CHECK-NEXT: cmp.w r5, #-1 +; CHECK-NEXT: it gt +; CHECK-NEXT: movgt r1, #0 +; CHECK-NEXT: ubfx r9, r5, #23, #8 +; CHECK-NEXT: str r1, [sp, #68] @ 4-byte Spill +; CHECK-NEXT: mov.w r1, #-1 +; CHECK-NEXT: it gt +; CHECK-NEXT: movgt r1, #0 +; CHECK-NEXT: it gt +; CHECK-NEXT: movgt r2, #1 +; CHECK-NEXT: cmp.w r9, #127 +; CHECK-NEXT: blo .LBB30_30 +; CHECK-NEXT: @ %bb.27: @ %fp-to-i-cleanup1 +; CHECK-NEXT: vcmp.f32 s3, s3 +; CHECK-NEXT: movs r4, #8 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r2, #0 -; CHECK-NEXT: str r2, [r4, #8] -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r1, #0 -; CHECK-NEXT: str r1, [r4, #4] -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r0, #0 -; CHECK-NEXT: str r0, [r4] -; CHECK-NEXT: mov r0, r7 -; CHECK-NEXT: lsrl r0, r11, #28 -; CHECK-NEXT: and r1, r9, #15 -; CHECK-NEXT: str.w r0, [r4, #41] -; CHECK-NEXT: mov r0, r10 -; CHECK-NEXT: lsrl r0, r5, #28 -; CHECK-NEXT: str r0, [r4, #16] -; CHECK-NEXT: orr.w r0, r11, r8, lsl #4 -; CHECK-NEXT: lsrl r8, r1, #28 -; CHECK-NEXT: str.w r0, [r4, #45] -; CHECK-NEXT: strb.w r8, [r4, #49] -; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload -; CHECK-NEXT: and r0, r0, #15 -; CHECK-NEXT: orr.w r0, r0, r7, lsl #4 -; CHECK-NEXT: str.w r0, [r4, #37] -; CHECK-NEXT: orr.w r0, r5, r6, lsl #4 -; CHECK-NEXT: str r0, [r4, #20] -; CHECK-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-NEXT: and r1, r0, #15 -; CHECK-NEXT: lsrl r6, r1, #28 -; CHECK-NEXT: strb r6, [r4, #24] -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r3, #0 -; CHECK-NEXT: and r0, r3, #15 -; CHECK-NEXT: orr.w r0, r0, r10, lsl #4 -; CHECK-NEXT: str r0, [r4, #12] -; CHECK-NEXT: add sp, #8 -; CHECK-NEXT: vpop {d8, d9, d10, d11} -; CHECK-NEXT: add sp, #4 +; CHECK-NEXT: mov.w r6, #-1 +; CHECK-NEXT: mov.w r11, #7 +; CHECK-NEXT: mov r3, lr +; CHECK-NEXT: mov.w r12, #0 +; CHECK-NEXT: mov.w r7, #0 +; CHECK-NEXT: bvs.w .LBB30_35 +; CHECK-NEXT: @ %bb.28: @ %fp-to-i-if-check.saturate +; CHECK-NEXT: cmp.w r9, #226 +; CHECK-NEXT: blo .LBB30_31 +; CHECK-NEXT: @ %bb.29: @ %fp-to-i-if-saturate +; CHECK-NEXT: bic.w lr, r6, r5, asr #31 +; CHECK-NEXT: cmp.w r5, #-1 +; CHECK-NEXT: csel r7, r11, r4, gt +; CHECK-NEXT: mov r3, lr +; CHECK-NEXT: mov r12, lr +; CHECK-NEXT: b .LBB30_35 +; CHECK-NEXT: .LBB30_30: +; CHECK-NEXT: mov r3, lr +; CHECK-NEXT: mov.w r12, #0 +; CHECK-NEXT: movs r7, #0 +; CHECK-NEXT: b .LBB30_35 +; CHECK-NEXT: .LBB30_31: @ %fp-to-i-if-check.exp.size +; CHECK-NEXT: movw r3, #65535 +; CHECK-NEXT: str.w r8, [sp, #24] @ 4-byte Spill +; CHECK-NEXT: movt r3, #127 +; CHECK-NEXT: cmp.w r9, #149 +; CHECK-NEXT: and.w r3, r3, r5 +; CHECK-NEXT: add.w r3, r3, #8388608 +; CHECK-NEXT: bhi .LBB30_33 +; CHECK-NEXT: @ %bb.32: @ %fp-to-i-if-exp.small +; CHECK-NEXT: rsb.w r7, r9, #150 +; CHECK-NEXT: lsrs r3, r7 +; CHECK-NEXT: ldr r7, [sp, #68] @ 4-byte Reload +; CHECK-NEXT: umull lr, r2, r3, r2 +; CHECK-NEXT: umull r1, r5, r3, r1 +; CHECK-NEXT: mul r6, r7, r3 +; CHECK-NEXT: adds r3, r1, r2 +; CHECK-NEXT: adcs.w r12, r1, r5 +; CHECK-NEXT: adc.w r7, r5, r6 +; CHECK-NEXT: b .LBB30_34 +; CHECK-NEXT: .LBB30_33: @ %fp-to-i-if-exp.large +; CHECK-NEXT: mov.w r11, #0 +; CHECK-NEXT: add r6, sp, #72 +; CHECK-NEXT: strd r3, r11, [sp, #88] +; CHECK-NEXT: sub.w r3, r9, #150 +; CHECK-NEXT: movs r7, #12 +; CHECK-NEXT: adds r6, #16 +; CHECK-NEXT: and.w r7, r7, r3, lsr #3 +; CHECK-NEXT: strd r11, r11, [sp, #96] +; CHECK-NEXT: strd r11, r11, [sp, #80] +; CHECK-NEXT: subs r5, r6, r7 +; CHECK-NEXT: strd r11, r11, [sp, #72] +; CHECK-NEXT: ldrd r6, r7, [r5] +; CHECK-NEXT: str.w r10, [sp, #16] @ 4-byte Spill +; CHECK-NEXT: and r10, r3, #31 +; CHECK-NEXT: mov r9, r7 +; CHECK-NEXT: ldr r4, [r5, #8] +; CHECK-NEXT: mov r8, r6 +; CHECK-NEXT: str r4, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: lsll r8, r9, r10 +; CHECK-NEXT: ldr r5, [r5, #12] +; CHECK-NEXT: umull lr, r3, r8, r2 +; CHECK-NEXT: str r5, [sp, #20] @ 4-byte Spill +; CHECK-NEXT: movs r5, #0 +; CHECK-NEXT: lsrl r6, r7, #1 +; CHECK-NEXT: umull r12, r4, r8, r1 +; CHECK-NEXT: umlal r3, r5, r9, r2 +; CHECK-NEXT: str.w r12, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: adds.w r3, r3, r12 +; CHECK-NEXT: mov r12, r10 +; CHECK-NEXT: adcs r5, r4 +; CHECK-NEXT: adc r11, r11, #0 +; CHECK-NEXT: umlal r5, r11, r9, r1 +; CHECK-NEXT: str r5, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: eor r5, r10, #63 +; CHECK-NEXT: rsbs r5, r5, #0 +; CHECK-NEXT: ldr.w r10, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: lsll r6, r7, r5 +; CHECK-NEXT: ldr r5, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: lsll r10, r5, r12 +; CHECK-NEXT: orr.w r6, r6, r10 +; CHECK-NEXT: orrs r5, r7 +; CHECK-NEXT: ldr.w r10, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: umull r12, r7, r2, r6 +; CHECK-NEXT: mla r2, r2, r5, r7 +; CHECK-NEXT: ldr r5, [sp, #68] @ 4-byte Reload +; CHECK-NEXT: mla r2, r1, r6, r2 +; CHECK-NEXT: mla r1, r1, r9, r4 +; CHECK-NEXT: ldr r4, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: adds.w r7, r4, r12 +; CHECK-NEXT: mla r1, r5, r8, r1 +; CHECK-NEXT: adcs r1, r2 +; CHECK-NEXT: ldr r2, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: adds.w r12, r2, r7 +; CHECK-NEXT: adc.w r7, r11, r1 +; CHECK-NEXT: .LBB30_34: @ %fp-to-i-cleanup +; CHECK-NEXT: ldr.w r8, [sp, #24] @ 4-byte Reload +; CHECK-NEXT: .LBB30_35: @ %fp-to-i-cleanup +; CHECK-NEXT: mov r2, lr +; CHECK-NEXT: ldr r1, [sp, #64] @ 4-byte Reload +; CHECK-NEXT: lsrl r2, r3, #28 +; CHECK-NEXT: str r1, [r0] +; CHECK-NEXT: str.w r2, [r0, #41] +; CHECK-NEXT: orr.w r1, r3, r12, lsl #4 +; CHECK-NEXT: str.w r8, [r0, #4] +; CHECK-NEXT: str.w r1, [r0, #45] +; CHECK-NEXT: ldr r1, [sp, #36] @ 4-byte Reload +; CHECK-NEXT: str r1, [r0, #8] +; CHECK-NEXT: and r1, r7, #15 +; CHECK-NEXT: lsrl r12, r1, #28 +; CHECK-NEXT: strb.w r12, [r0, #49] +; CHECK-NEXT: ldr r1, [sp, #40] @ 4-byte Reload +; CHECK-NEXT: and r1, r1, #15 +; CHECK-NEXT: orr.w r1, r1, r10, lsl #4 +; CHECK-NEXT: str r1, [r0, #12] +; CHECK-NEXT: ldr r1, [sp, #60] @ 4-byte Reload +; CHECK-NEXT: lsrl r10, r1, #28 +; CHECK-NEXT: str.w r10, [r0, #16] +; CHECK-NEXT: ldr r2, [sp, #56] @ 4-byte Reload +; CHECK-NEXT: orr.w r1, r1, r2, lsl #4 +; CHECK-NEXT: str r1, [r0, #20] +; CHECK-NEXT: ldr r1, [sp, #44] @ 4-byte Reload +; CHECK-NEXT: str.w r1, [r0, #25] +; CHECK-NEXT: ldr r1, [sp, #32] @ 4-byte Reload +; CHECK-NEXT: and r1, r1, #15 +; CHECK-NEXT: lsrl r2, r1, #28 +; CHECK-NEXT: strb r2, [r0, #24] +; CHECK-NEXT: ldr r1, [sp, #48] @ 4-byte Reload +; CHECK-NEXT: str.w r1, [r0, #29] +; CHECK-NEXT: ldr r1, [sp, #52] @ 4-byte Reload +; CHECK-NEXT: str.w r1, [r0, #33] +; CHECK-NEXT: ldr r1, [sp, #28] @ 4-byte Reload +; CHECK-NEXT: and r1, r1, #15 +; CHECK-NEXT: orr.w r1, r1, lr, lsl #4 +; CHECK-NEXT: str.w r1, [r0, #37] +; CHECK-NEXT: add sp, #204 ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} -; CHECK-NEXT: .p2align 2 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI30_0: -; CHECK-NEXT: .long 0xf1000000 @ float -6.338253E+29 -; CHECK-NEXT: .LCPI30_1: -; CHECK-NEXT: .long 0x70ffffff @ float 6.33825262E+29 %x = call <4 x i100> @llvm.fptosi.sat.v4f32.v4i100(<4 x float> %f) ret <4 x i100> %x } define arm_aapcs_vfpcc <4 x i128> @test_signed_v4f32_v4i128(<4 x float> %f) { ; CHECK-LABEL: test_signed_v4f32_v4i128: +; CHECK: @ %bb.0: @ %fp-to-i-entryfp-to-i-entryfp-to-i-entryfp-to-i-entry +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-NEXT: .pad #204 +; CHECK-NEXT: sub sp, #204 +; CHECK-NEXT: vmov r1, s0 +; CHECK-NEXT: movs r2, #0 +; CHECK-NEXT: mvn r12, #-2147483648 +; CHECK-NEXT: mov.w r10, #-1 +; CHECK-NEXT: mov.w r11, #-2147483648 +; CHECK-NEXT: str r2, [sp, #68] @ 4-byte Spill +; CHECK-NEXT: ubfx r3, r1, #23, #8 +; CHECK-NEXT: cmp r3, #127 +; CHECK-NEXT: blo .LBB31_4 +; CHECK-NEXT: @ %bb.1: @ %fp-to-i-entryfp-to-i-entryfp-to-i-entryfp-to-i-entry +; CHECK-NEXT: vcmp.f32 s0, s0 +; CHECK-NEXT: movs r6, #0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: str r2, [sp, #64] @ 4-byte Spill +; CHECK-NEXT: bvs.w .LBB31_9 +; CHECK-NEXT: @ %bb.2: @ %fp-to-i-if-check.saturate28 +; CHECK-NEXT: cmp r3, #254 +; CHECK-NEXT: blo .LBB31_5 +; CHECK-NEXT: @ %bb.3: @ %fp-to-i-if-saturate29 +; CHECK-NEXT: bic.w r6, r10, r1, asr #31 +; CHECK-NEXT: cmp.w r1, #-1 +; CHECK-NEXT: csel r2, r12, r11, gt +; CHECK-NEXT: strd r6, r6, [sp, #64] @ 8-byte Folded Spill +; CHECK-NEXT: b .LBB31_9 +; CHECK-NEXT: .LBB31_4: +; CHECK-NEXT: movs r1, #0 +; CHECK-NEXT: movs r6, #0 +; CHECK-NEXT: str r1, [sp, #64] @ 4-byte Spill +; CHECK-NEXT: b .LBB31_9 +; CHECK-NEXT: .LBB31_5: @ %fp-to-i-if-check.exp.size30 +; CHECK-NEXT: movs r2, #1 +; CHECK-NEXT: mov r6, r1 +; CHECK-NEXT: bfi r6, r2, #23, #9 +; CHECK-NEXT: orr.w r10, r2, r1, asr #31 +; CHECK-NEXT: mov.w lr, #-1 +; CHECK-NEXT: asr.w r11, r1, #31 +; CHECK-NEXT: cmp r3, #149 +; CHECK-NEXT: bhi .LBB31_7 +; CHECK-NEXT: @ %bb.6: @ %fp-to-i-if-exp.small31 +; CHECK-NEXT: rsb.w r3, r3, #150 +; CHECK-NEXT: lsr.w r3, r6, r3 +; CHECK-NEXT: movs r6, #0 +; CHECK-NEXT: umull r2, r1, r3, r10 +; CHECK-NEXT: mov r10, lr +; CHECK-NEXT: str r2, [sp, #68] @ 4-byte Spill +; CHECK-NEXT: umull r2, r7, r3, r11 +; CHECK-NEXT: adds r5, r2, r1 +; CHECK-NEXT: adcs r5, r7, #0 +; CHECK-NEXT: adc r5, r6, #0 +; CHECK-NEXT: adds r4, r2, r1 +; CHECK-NEXT: mla r4, r11, r3, r7 +; CHECK-NEXT: umlal r1, r6, r3, r11 +; CHECK-NEXT: adcs.w r6, r2, r7 +; CHECK-NEXT: adc.w r2, r5, r4 +; CHECK-NEXT: str r1, [sp, #64] @ 4-byte Spill +; CHECK-NEXT: b .LBB31_8 +; CHECK-NEXT: .LBB31_7: @ %fp-to-i-if-exp.large32 +; CHECK-NEXT: mov.w r8, #0 +; CHECK-NEXT: subs r3, #150 +; CHECK-NEXT: add r5, sp, #168 +; CHECK-NEXT: strd r6, r8, [sp, #184] +; CHECK-NEXT: movs r6, #12 +; CHECK-NEXT: adds r5, #16 +; CHECK-NEXT: and.w r6, r6, r3, lsr #3 +; CHECK-NEXT: strd r8, r8, [sp, #192] +; CHECK-NEXT: strd r8, r8, [sp, #176] +; CHECK-NEXT: subs r4, r5, r6 +; CHECK-NEXT: strd r8, r8, [sp, #168] +; CHECK-NEXT: and lr, r3, #31 +; CHECK-NEXT: ldrd r6, r5, [r4] +; CHECK-NEXT: movs r2, #0 +; CHECK-NEXT: ldr r1, [r4, #8] +; CHECK-NEXT: mov r3, r5 +; CHECK-NEXT: str r1, [sp, #60] @ 4-byte Spill +; CHECK-NEXT: ldr.w r9, [r4, #12] +; CHECK-NEXT: mov r4, r6 +; CHECK-NEXT: lsll r4, r3, lr +; CHECK-NEXT: lsrl r6, r5, #1 +; CHECK-NEXT: umull r1, r7, r4, r10 +; CHECK-NEXT: umlal r7, r2, r3, r10 +; CHECK-NEXT: str r1, [sp, #68] @ 4-byte Spill +; CHECK-NEXT: umull r12, r1, r4, r11 +; CHECK-NEXT: str.w r12, [sp, #56] @ 4-byte Spill +; CHECK-NEXT: adds.w r7, r7, r12 +; CHECK-NEXT: str r7, [sp, #64] @ 4-byte Spill +; CHECK-NEXT: adcs.w r12, r2, r1 +; CHECK-NEXT: eor r2, lr, #63 +; CHECK-NEXT: rsb.w r2, r2, #0 +; CHECK-NEXT: adc r7, r8, #0 +; CHECK-NEXT: lsll r6, r5, r2 +; CHECK-NEXT: ldr r2, [sp, #60] @ 4-byte Reload +; CHECK-NEXT: umlal r12, r7, r3, r11 +; CHECK-NEXT: lsll r2, r9, lr +; CHECK-NEXT: orrs r6, r2 +; CHECK-NEXT: orr.w r8, r9, r5 +; CHECK-NEXT: mla r3, r11, r3, r1 +; CHECK-NEXT: umull lr, r5, r10, r6 +; CHECK-NEXT: mla r2, r10, r8, r5 +; CHECK-NEXT: mov.w r10, #-1 +; CHECK-NEXT: mla r1, r11, r4, r3 +; CHECK-NEXT: ldr r3, [sp, #56] @ 4-byte Reload +; CHECK-NEXT: mla r2, r11, r6, r2 +; CHECK-NEXT: adds.w r3, r3, lr +; CHECK-NEXT: adcs r1, r2 +; CHECK-NEXT: adds.w r6, r12, r3 +; CHECK-NEXT: adc.w r2, r7, r1 +; CHECK-NEXT: mvn r12, #-2147483648 +; CHECK-NEXT: .LBB31_8: @ %fp-to-i-cleanup27 +; CHECK-NEXT: mov.w r11, #-2147483648 +; CHECK-NEXT: .LBB31_9: @ %fp-to-i-cleanup27 +; CHECK-NEXT: vmov r7, s1 +; CHECK-NEXT: strd r6, r2, [sp, #44] @ 8-byte Folded Spill +; CHECK-NEXT: mov.w r3, #-1 +; CHECK-NEXT: mov.w r9, #-1 +; CHECK-NEXT: movs r1, #0 +; CHECK-NEXT: cmp.w r7, #-1 +; CHECK-NEXT: it gt +; CHECK-NEXT: movgt r3, #0 +; CHECK-NEXT: ubfx r2, r7, #23, #8 +; CHECK-NEXT: it gt +; CHECK-NEXT: movgt.w r9, #1 +; CHECK-NEXT: cmp r2, #127 +; CHECK-NEXT: blo .LBB31_13 +; CHECK-NEXT: @ %bb.10: @ %fp-to-i-cleanup27 +; CHECK-NEXT: vcmp.f32 s1, s1 +; CHECK-NEXT: movs r5, #0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: mov.w r4, #0 +; CHECK-NEXT: mov.w r6, #0 +; CHECK-NEXT: bvs .LBB31_18 +; CHECK-NEXT: @ %bb.11: @ %fp-to-i-if-check.saturate15 +; CHECK-NEXT: cmp r2, #254 +; CHECK-NEXT: blo .LBB31_14 +; CHECK-NEXT: @ %bb.12: @ %fp-to-i-if-saturate16 +; CHECK-NEXT: bic.w r1, r10, r7, asr #31 +; CHECK-NEXT: cmp.w r7, #-1 +; CHECK-NEXT: csel r6, r12, r11, gt +; CHECK-NEXT: mov r5, r1 +; CHECK-NEXT: mov r4, r1 +; CHECK-NEXT: b .LBB31_18 +; CHECK-NEXT: .LBB31_13: +; CHECK-NEXT: movs r5, #0 +; CHECK-NEXT: movs r4, #0 +; CHECK-NEXT: movs r6, #0 +; CHECK-NEXT: b .LBB31_18 +; CHECK-NEXT: .LBB31_14: @ %fp-to-i-if-check.exp.size17 +; CHECK-NEXT: movw r1, #65535 +; CHECK-NEXT: cmp r2, #149 +; CHECK-NEXT: movt r1, #127 +; CHECK-NEXT: and.w r7, r7, r1 +; CHECK-NEXT: add.w r7, r7, #8388608 +; CHECK-NEXT: bhi .LBB31_16 +; CHECK-NEXT: @ %bb.15: @ %fp-to-i-if-exp.small18 +; CHECK-NEXT: rsb.w r2, r2, #150 +; CHECK-NEXT: lsr.w r2, r7, r2 +; CHECK-NEXT: umull r1, r5, r2, r9 +; CHECK-NEXT: umull r7, r6, r2, r3 +; CHECK-NEXT: muls r2, r3, r2 +; CHECK-NEXT: adds r5, r5, r7 +; CHECK-NEXT: adcs.w r4, r7, r6 +; CHECK-NEXT: b .LBB31_17 +; CHECK-NEXT: .LBB31_16: @ %fp-to-i-if-exp.large19 +; CHECK-NEXT: mov.w r8, #0 +; CHECK-NEXT: add r6, sp, #136 +; CHECK-NEXT: strd r7, r8, [sp, #152] +; CHECK-NEXT: sub.w r7, r2, #150 +; CHECK-NEXT: movs r2, #12 +; CHECK-NEXT: adds r6, #16 +; CHECK-NEXT: and.w r2, r2, r7, lsr #3 +; CHECK-NEXT: strd r8, r8, [sp, #160] +; CHECK-NEXT: strd r8, r8, [sp, #144] +; CHECK-NEXT: subs r6, r6, r2 +; CHECK-NEXT: strd r8, r8, [sp, #136] +; CHECK-NEXT: and lr, r7, #31 +; CHECK-NEXT: ldrd r2, r5, [r6] +; CHECK-NEXT: ldr r1, [r6, #8] +; CHECK-NEXT: mov r7, r5 +; CHECK-NEXT: str r1, [sp, #52] @ 4-byte Spill +; CHECK-NEXT: mov r10, r2 +; CHECK-NEXT: ldr r1, [r6, #12] +; CHECK-NEXT: lsll r10, r7, lr +; CHECK-NEXT: lsrl r2, r5, #1 +; CHECK-NEXT: umull r6, r4, r10, r9 +; CHECK-NEXT: umull r12, r11, r10, r3 +; CHECK-NEXT: str r6, [sp, #60] @ 4-byte Spill +; CHECK-NEXT: movs r6, #0 +; CHECK-NEXT: umlal r4, r6, r7, r9 +; CHECK-NEXT: str.w r12, [sp, #40] @ 4-byte Spill +; CHECK-NEXT: adds.w r4, r4, r12 +; CHECK-NEXT: str r4, [sp, #56] @ 4-byte Spill +; CHECK-NEXT: eor r4, lr, #63 +; CHECK-NEXT: adcs.w r12, r6, r11 +; CHECK-NEXT: rsb.w r4, r4, #0 +; CHECK-NEXT: adc r6, r8, #0 +; CHECK-NEXT: lsll r2, r5, r4 +; CHECK-NEXT: ldr r4, [sp, #52] @ 4-byte Reload +; CHECK-NEXT: umlal r12, r6, r7, r3 +; CHECK-NEXT: lsll r4, r1, lr +; CHECK-NEXT: orrs r2, r4 +; CHECK-NEXT: orrs r5, r1 +; CHECK-NEXT: umull lr, r4, r9, r2 +; CHECK-NEXT: mla r1, r9, r5, r4 +; CHECK-NEXT: mla r4, r3, r2, r1 +; CHECK-NEXT: ldr r1, [sp, #40] @ 4-byte Reload +; CHECK-NEXT: mla r2, r3, r7, r11 +; CHECK-NEXT: mov.w r11, #-2147483648 +; CHECK-NEXT: mla r2, r3, r10, r2 +; CHECK-NEXT: adds.w r3, r1, lr +; CHECK-NEXT: ldrd r5, r1, [sp, #56] @ 8-byte Folded Reload +; CHECK-NEXT: mov.w r10, #-1 +; CHECK-NEXT: adcs r2, r4 +; CHECK-NEXT: adds.w r4, r12, r3 +; CHECK-NEXT: mvn r12, #-2147483648 +; CHECK-NEXT: .LBB31_17: @ %fp-to-i-cleanup14 +; CHECK-NEXT: adcs r6, r2 +; CHECK-NEXT: .LBB31_18: @ %fp-to-i-cleanup14 +; CHECK-NEXT: strd r4, r6, [sp, #36] @ 8-byte Folded Spill +; CHECK-NEXT: vmov r6, s2 +; CHECK-NEXT: strd r5, r1, [sp, #56] @ 8-byte Folded Spill +; CHECK-NEXT: mov.w r1, #-1 +; CHECK-NEXT: mov.w r2, #-1 +; CHECK-NEXT: movs r5, #0 +; CHECK-NEXT: cmp.w r6, #-1 +; CHECK-NEXT: it gt +; CHECK-NEXT: movgt r1, #0 +; CHECK-NEXT: ubfx r7, r6, #23, #8 +; CHECK-NEXT: it gt +; CHECK-NEXT: movgt r2, #1 +; CHECK-NEXT: cmp r7, #127 +; CHECK-NEXT: blo .LBB31_22 +; CHECK-NEXT: @ %bb.19: @ %fp-to-i-cleanup14 +; CHECK-NEXT: vcmp.f32 s2, s2 +; CHECK-NEXT: movs r3, #0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: mov.w lr, #0 +; CHECK-NEXT: mov.w r8, #0 +; CHECK-NEXT: bvs .LBB31_26 +; CHECK-NEXT: @ %bb.20: @ %fp-to-i-if-check.saturate2 +; CHECK-NEXT: cmp r7, #254 +; CHECK-NEXT: blo .LBB31_23 +; CHECK-NEXT: @ %bb.21: @ %fp-to-i-if-saturate3 +; CHECK-NEXT: bic.w r5, r10, r6, asr #31 +; CHECK-NEXT: cmp.w r6, #-1 +; CHECK-NEXT: csel r8, r12, r11, gt +; CHECK-NEXT: mov r3, r5 +; CHECK-NEXT: mov lr, r5 +; CHECK-NEXT: b .LBB31_26 +; CHECK-NEXT: .LBB31_22: +; CHECK-NEXT: movs r3, #0 +; CHECK-NEXT: mov.w lr, #0 +; CHECK-NEXT: mov.w r8, #0 +; CHECK-NEXT: b .LBB31_26 +; CHECK-NEXT: .LBB31_23: @ %fp-to-i-if-check.exp.size4 +; CHECK-NEXT: movw r3, #65535 +; CHECK-NEXT: cmp r7, #149 +; CHECK-NEXT: movt r3, #127 +; CHECK-NEXT: and.w r3, r3, r6 +; CHECK-NEXT: add.w r6, r3, #8388608 +; CHECK-NEXT: bhi .LBB31_25 +; CHECK-NEXT: @ %bb.24: @ %fp-to-i-if-exp.small5 +; CHECK-NEXT: rsb.w r3, r7, #150 +; CHECK-NEXT: lsr.w r3, r6, r3 +; CHECK-NEXT: umull r7, r6, r3, r1 +; CHECK-NEXT: umull r5, r2, r3, r2 +; CHECK-NEXT: muls r1, r3, r1 +; CHECK-NEXT: adds r3, r7, r2 +; CHECK-NEXT: adcs.w lr, r7, r6 +; CHECK-NEXT: adc.w r8, r6, r1 +; CHECK-NEXT: b .LBB31_26 +; CHECK-NEXT: .LBB31_25: @ %fp-to-i-if-exp.large6 +; CHECK-NEXT: strd r6, r8, [sp, #120] +; CHECK-NEXT: sub.w r6, r7, #150 +; CHECK-NEXT: movs r7, #12 +; CHECK-NEXT: add r5, sp, #104 +; CHECK-NEXT: and.w r7, r7, r6, lsr #3 +; CHECK-NEXT: adds r5, #16 +; CHECK-NEXT: strd r8, r8, [sp, #128] +; CHECK-NEXT: subs r5, r5, r7 +; CHECK-NEXT: strd r8, r8, [sp, #112] +; CHECK-NEXT: and r11, r6, #31 +; CHECK-NEXT: strd r8, r8, [sp, #104] +; CHECK-NEXT: movs r6, #0 +; CHECK-NEXT: ldrd r4, r7, [r5] +; CHECK-NEXT: ldr r3, [r5, #8] +; CHECK-NEXT: mov r9, r7 +; CHECK-NEXT: str r3, [sp, #32] @ 4-byte Spill +; CHECK-NEXT: mov r10, r4 +; CHECK-NEXT: ldr r3, [r5, #12] +; CHECK-NEXT: lsll r10, r9, r11 +; CHECK-NEXT: str r3, [sp, #24] @ 4-byte Spill +; CHECK-NEXT: umull r5, r3, r10, r2 +; CHECK-NEXT: lsrl r4, r7, #1 +; CHECK-NEXT: umull r12, lr, r10, r1 +; CHECK-NEXT: umlal r3, r6, r9, r2 +; CHECK-NEXT: str.w r12, [sp, #28] @ 4-byte Spill +; CHECK-NEXT: str.w lr, [sp, #20] @ 4-byte Spill +; CHECK-NEXT: adds.w r3, r3, r12 +; CHECK-NEXT: str r3, [sp, #52] @ 4-byte Spill +; CHECK-NEXT: eor r3, r11, #63 +; CHECK-NEXT: adcs.w r12, r6, lr +; CHECK-NEXT: rsb.w r3, r3, #0 +; CHECK-NEXT: ldr r6, [sp, #32] @ 4-byte Reload +; CHECK-NEXT: lsll r4, r7, r3 +; CHECK-NEXT: ldr r3, [sp, #24] @ 4-byte Reload +; CHECK-NEXT: adc r8, r8, #0 +; CHECK-NEXT: lsll r6, r3, r11 +; CHECK-NEXT: umlal r12, r8, r9, r1 +; CHECK-NEXT: orrs r3, r7 +; CHECK-NEXT: orr.w r7, r6, r4 +; CHECK-NEXT: umull lr, r4, r2, r7 +; CHECK-NEXT: mla r2, r2, r3, r4 +; CHECK-NEXT: ldr r3, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: mla r3, r1, r9, r3 +; CHECK-NEXT: mla r2, r1, r7, r2 +; CHECK-NEXT: mla r1, r1, r10, r3 +; CHECK-NEXT: ldr r3, [sp, #28] @ 4-byte Reload +; CHECK-NEXT: adds.w r3, r3, lr +; CHECK-NEXT: adcs r1, r2 +; CHECK-NEXT: adds.w lr, r12, r3 +; CHECK-NEXT: ldr r3, [sp, #52] @ 4-byte Reload +; CHECK-NEXT: adc.w r8, r8, r1 +; CHECK-NEXT: .LBB31_26: @ %fp-to-i-cleanup1 +; CHECK-NEXT: str r5, [sp, #32] @ 4-byte Spill +; CHECK-NEXT: vmov r5, s3 +; CHECK-NEXT: mov.w r1, #-1 +; CHECK-NEXT: mov.w r2, #-1 +; CHECK-NEXT: movs r7, #0 +; CHECK-NEXT: str r3, [sp, #52] @ 4-byte Spill +; CHECK-NEXT: cmp.w r5, #-1 +; CHECK-NEXT: it gt +; CHECK-NEXT: movgt r1, #0 +; CHECK-NEXT: ubfx r4, r5, #23, #8 +; CHECK-NEXT: it gt +; CHECK-NEXT: movgt r2, #1 +; CHECK-NEXT: cmp r4, #127 +; CHECK-NEXT: blo .LBB31_30 +; CHECK-NEXT: @ %bb.27: @ %fp-to-i-cleanup1 +; CHECK-NEXT: vcmp.f32 s3, s3 +; CHECK-NEXT: mov.w r9, #-2147483648 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: mov.w r6, #-1 +; CHECK-NEXT: mvn r3, #-2147483648 +; CHECK-NEXT: mov.w r10, #0 +; CHECK-NEXT: mov.w r12, #0 +; CHECK-NEXT: mov.w r11, #0 +; CHECK-NEXT: bvs.w .LBB31_34 +; CHECK-NEXT: @ %bb.28: @ %fp-to-i-if-check.saturate +; CHECK-NEXT: cmp r4, #254 +; CHECK-NEXT: blo .LBB31_31 +; CHECK-NEXT: @ %bb.29: @ %fp-to-i-if-saturate +; CHECK-NEXT: bic.w r7, r6, r5, asr #31 +; CHECK-NEXT: cmp.w r5, #-1 +; CHECK-NEXT: csel r11, r3, r9, gt +; CHECK-NEXT: mov r10, r7 +; CHECK-NEXT: mov r12, r7 +; CHECK-NEXT: b .LBB31_34 +; CHECK-NEXT: .LBB31_30: +; CHECK-NEXT: mov.w r10, #0 +; CHECK-NEXT: mov.w r12, #0 +; CHECK-NEXT: mov.w r11, #0 +; CHECK-NEXT: b .LBB31_34 +; CHECK-NEXT: .LBB31_31: @ %fp-to-i-if-check.exp.size +; CHECK-NEXT: movw r3, #65535 +; CHECK-NEXT: cmp r4, #149 +; CHECK-NEXT: movt r3, #127 +; CHECK-NEXT: and.w r3, r3, r5 +; CHECK-NEXT: add.w r3, r3, #8388608 +; CHECK-NEXT: bhi .LBB31_33 +; CHECK-NEXT: @ %bb.32: @ %fp-to-i-if-exp.small +; CHECK-NEXT: rsb.w r7, r4, #150 +; CHECK-NEXT: lsrs r3, r7 +; CHECK-NEXT: umull r6, r4, r3, r1 +; CHECK-NEXT: umull r7, r2, r3, r2 +; CHECK-NEXT: muls r1, r3, r1 +; CHECK-NEXT: adds.w r10, r6, r2 +; CHECK-NEXT: adcs.w r12, r6, r4 +; CHECK-NEXT: adc.w r11, r4, r1 +; CHECK-NEXT: b .LBB31_34 +; CHECK-NEXT: .LBB31_33: @ %fp-to-i-if-exp.large +; CHECK-NEXT: strd r3, r7, [sp, #88] +; CHECK-NEXT: sub.w r3, r4, #150 +; CHECK-NEXT: movs r4, #12 +; CHECK-NEXT: add r5, sp, #72 +; CHECK-NEXT: and.w r4, r4, r3, lsr #3 +; CHECK-NEXT: adds r5, #16 +; CHECK-NEXT: strd r7, r7, [sp, #96] +; CHECK-NEXT: subs r4, r5, r4 +; CHECK-NEXT: strd r7, r7, [sp, #80] +; CHECK-NEXT: mov.w r9, #0 +; CHECK-NEXT: strd r7, r7, [sp, #72] +; CHECK-NEXT: ldrd r6, r5, [r4] +; CHECK-NEXT: str.w r8, [sp, #28] @ 4-byte Spill +; CHECK-NEXT: and r8, r3, #31 +; CHECK-NEXT: ldr r7, [r4, #8] +; CHECK-NEXT: mov r11, r5 +; CHECK-NEXT: mov r12, r6 +; CHECK-NEXT: str r7, [sp, #16] @ 4-byte Spill +; CHECK-NEXT: ldr r7, [r4, #12] +; CHECK-NEXT: lsll r12, r11, r8 +; CHECK-NEXT: str r7, [sp, #24] @ 4-byte Spill +; CHECK-NEXT: umull r7, r10, r12, r2 +; CHECK-NEXT: str.w r8, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: lsrl r6, r5, #1 +; CHECK-NEXT: umull r3, r4, r12, r1 +; CHECK-NEXT: umlal r10, r9, r11, r2 +; CHECK-NEXT: str r3, [sp, #20] @ 4-byte Spill +; CHECK-NEXT: str r4, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: adds.w r10, r10, r3 +; CHECK-NEXT: mov.w r3, #0 +; CHECK-NEXT: adcs.w r9, r9, r4 +; CHECK-NEXT: eor r4, r8, #63 +; CHECK-NEXT: adc r3, r3, #0 +; CHECK-NEXT: rsbs r4, r4, #0 +; CHECK-NEXT: umlal r9, r3, r11, r1 +; CHECK-NEXT: lsll r6, r5, r4 +; CHECK-NEXT: ldr.w r8, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: ldr r4, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: str r3, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: ldr r3, [sp, #24] @ 4-byte Reload +; CHECK-NEXT: lsll r8, r3, r4 +; CHECK-NEXT: orr.w r4, r3, r5 +; CHECK-NEXT: orr.w r5, r8, r6 +; CHECK-NEXT: ldr.w r8, [sp, #28] @ 4-byte Reload +; CHECK-NEXT: umull r3, r6, r2, r5 +; CHECK-NEXT: mla r2, r2, r4, r6 +; CHECK-NEXT: str r3, [sp, #24] @ 4-byte Spill +; CHECK-NEXT: ldr r3, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: mla r3, r1, r11, r3 +; CHECK-NEXT: mla r2, r1, r5, r2 +; CHECK-NEXT: mla r1, r1, r12, r3 +; CHECK-NEXT: ldrd r3, r6, [sp, #20] @ 8-byte Folded Reload +; CHECK-NEXT: adds r3, r3, r6 +; CHECK-NEXT: adcs r1, r2 +; CHECK-NEXT: ldr r2, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: adds.w r12, r9, r3 +; CHECK-NEXT: adc.w r11, r2, r1 +; CHECK-NEXT: .LBB31_34: @ %fp-to-i-cleanup +; CHECK-NEXT: str r7, [r0, #48] +; CHECK-NEXT: ldr r1, [sp, #68] @ 4-byte Reload +; CHECK-NEXT: str r1, [r0] +; CHECK-NEXT: str.w r10, [r0, #52] +; CHECK-NEXT: ldr r1, [sp, #64] @ 4-byte Reload +; CHECK-NEXT: str r1, [r0, #4] +; CHECK-NEXT: str.w r12, [r0, #56] +; CHECK-NEXT: ldr r1, [sp, #44] @ 4-byte Reload +; CHECK-NEXT: str r1, [r0, #8] +; CHECK-NEXT: str.w r11, [r0, #60] +; CHECK-NEXT: ldr r1, [sp, #48] @ 4-byte Reload +; CHECK-NEXT: str r1, [r0, #12] +; CHECK-NEXT: ldr r1, [sp, #60] @ 4-byte Reload +; CHECK-NEXT: str r1, [r0, #16] +; CHECK-NEXT: ldr r1, [sp, #56] @ 4-byte Reload +; CHECK-NEXT: str r1, [r0, #20] +; CHECK-NEXT: ldr r1, [sp, #36] @ 4-byte Reload +; CHECK-NEXT: str r1, [r0, #24] +; CHECK-NEXT: ldr r1, [sp, #40] @ 4-byte Reload +; CHECK-NEXT: str r1, [r0, #28] +; CHECK-NEXT: ldr r1, [sp, #32] @ 4-byte Reload +; CHECK-NEXT: str r1, [r0, #32] +; CHECK-NEXT: ldr r1, [sp, #52] @ 4-byte Reload +; CHECK-NEXT: strd r1, lr, [r0, #36] +; CHECK-NEXT: str.w r8, [r0, #44] +; CHECK-NEXT: add sp, #204 +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} + %x = call <4 x i128> @llvm.fptosi.sat.v4f32.v4i128(<4 x float> %f) + ret <4 x i128> %x +} + +; +; 2-Vector double to signed integer -- result size variation +; + +declare <2 x i1> @llvm.fptosi.sat.v2f64.v2i1 (<2 x double>) +declare <2 x i8> @llvm.fptosi.sat.v2f64.v2i8 (<2 x double>) +declare <2 x i13> @llvm.fptosi.sat.v2f64.v2i13 (<2 x double>) +declare <2 x i16> @llvm.fptosi.sat.v2f64.v2i16 (<2 x double>) +declare <2 x i19> @llvm.fptosi.sat.v2f64.v2i19 (<2 x double>) +declare <2 x i50> @llvm.fptosi.sat.v2f64.v2i50 (<2 x double>) +declare <2 x i64> @llvm.fptosi.sat.v2f64.v2i64 (<2 x double>) +declare <2 x i100> @llvm.fptosi.sat.v2f64.v2i100(<2 x double>) +declare <2 x i128> @llvm.fptosi.sat.v2f64.v2i128(<2 x double>) + +define arm_aapcs_vfpcc <2 x i1> @test_signed_v2f64_v2i1(<2 x double> %f) { +; CHECK-LABEL: test_signed_v2f64_v2i1: ; CHECK: @ %bb.0: -; CHECK-NEXT: .save {r4, r5, r6, r7, lr} -; CHECK-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-NEXT: .pad #4 ; CHECK-NEXT: sub sp, #4 -; CHECK-NEXT: .vsave {d8, d9, d10, d11} -; CHECK-NEXT: vpush {d8, d9, d10, d11} -; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: vmov r0, s19 -; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: vmov r5, s18 -; CHECK-NEXT: vldr s22, .LCPI31_0 -; CHECK-NEXT: vldr s20, .LCPI31_1 -; CHECK-NEXT: add.w r12, r4, #48 -; CHECK-NEXT: vcmp.f32 s19, s22 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt lt -; CHECK-NEXT: movlt.w r3, #-2147483648 -; CHECK-NEXT: movlt r2, #0 -; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vcmp.f32 s19, s20 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt gt -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: mvngt r3, #-2147483648 -; CHECK-NEXT: vcmp.f32 s19, s19 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r3, #0 -; CHECK-NEXT: ittt vs -; CHECK-NEXT: movvs r2, #0 -; CHECK-NEXT: movvs r1, #0 -; CHECK-NEXT: movvs r0, #0 -; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3} -; CHECK-NEXT: vmov r7, s16 -; CHECK-NEXT: vmov r6, s17 -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: vcmp.f32 s18, s22 -; CHECK-NEXT: add.w r12, r4, #32 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt lt -; CHECK-NEXT: movlt.w r3, #-2147483648 -; CHECK-NEXT: movlt r2, #0 -; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vcmp.f32 s18, s20 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt gt -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: mvngt r3, #-2147483648 -; CHECK-NEXT: vcmp.f32 s18, s18 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt vs -; CHECK-NEXT: movvs r3, #0 -; CHECK-NEXT: movvs r2, #0 -; CHECK-NEXT: movvs r1, #0 -; CHECK-NEXT: movvs r0, #0 -; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3} -; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: vcmp.f32 s17, s22 -; CHECK-NEXT: add.w r12, r4, #16 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt lt -; CHECK-NEXT: movlt.w r3, #-2147483648 -; CHECK-NEXT: movlt r2, #0 -; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vcmp.f32 s17, s20 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt gt -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: mvngt r3, #-2147483648 -; CHECK-NEXT: vcmp.f32 s17, s17 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt vs -; CHECK-NEXT: movvs r3, #0 -; CHECK-NEXT: movvs r2, #0 -; CHECK-NEXT: movvs r1, #0 -; CHECK-NEXT: movvs r0, #0 -; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3} -; CHECK-NEXT: mov r0, r7 -; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: vcmp.f32 s16, s22 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt lt -; CHECK-NEXT: movlt.w r3, #-2147483648 -; CHECK-NEXT: movlt r2, #0 -; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vcmp.f32 s16, s20 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt gt -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: mvngt r3, #-2147483648 -; CHECK-NEXT: vcmp.f32 s16, s16 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt vs -; CHECK-NEXT: movvs r3, #0 -; CHECK-NEXT: movvs r2, #0 -; CHECK-NEXT: movvs r1, #0 -; CHECK-NEXT: movvs r0, #0 -; CHECK-NEXT: stm r4!, {r0, r1, r2, r3} -; CHECK-NEXT: vpop {d8, d9, d10, d11} -; CHECK-NEXT: add sp, #4 -; CHECK-NEXT: pop {r4, r5, r6, r7, pc} -; CHECK-NEXT: .p2align 2 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI31_0: -; CHECK-NEXT: .long 0xff000000 @ float -1.70141183E+38 -; CHECK-NEXT: .LCPI31_1: -; CHECK-NEXT: .long 0x7effffff @ float 1.70141173E+38 - %x = call <4 x i128> @llvm.fptosi.sat.v4f32.v4i128(<4 x float> %f) - ret <4 x i128> %x -} - -; -; 2-Vector double to signed integer -- result size variation -; - -declare <2 x i1> @llvm.fptosi.sat.v2f64.v2i1 (<2 x double>) -declare <2 x i8> @llvm.fptosi.sat.v2f64.v2i8 (<2 x double>) -declare <2 x i13> @llvm.fptosi.sat.v2f64.v2i13 (<2 x double>) -declare <2 x i16> @llvm.fptosi.sat.v2f64.v2i16 (<2 x double>) -declare <2 x i19> @llvm.fptosi.sat.v2f64.v2i19 (<2 x double>) -declare <2 x i50> @llvm.fptosi.sat.v2f64.v2i50 (<2 x double>) -declare <2 x i64> @llvm.fptosi.sat.v2f64.v2i64 (<2 x double>) -declare <2 x i100> @llvm.fptosi.sat.v2f64.v2i100(<2 x double>) -declare <2 x i128> @llvm.fptosi.sat.v2f64.v2i128(<2 x double>) - -define arm_aapcs_vfpcc <2 x i1> @test_signed_v2f64_v2i1(<2 x double> %f) { -; CHECK-LABEL: test_signed_v2f64_v2i1: -; CHECK: @ %bb.0: -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-NEXT: .pad #4 -; CHECK-NEXT: sub sp, #4 -; CHECK-NEXT: .vsave {d8, d9} -; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: .pad #16 -; CHECK-NEXT: sub sp, #16 +; CHECK-NEXT: .vsave {d8, d9} +; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: .pad #16 +; CHECK-NEXT: sub sp, #16 ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vldr d0, .LCPI32_0 ; CHECK-NEXT: vmov r8, r7, d8 @@ -3119,556 +3816,573 @@ define arm_aapcs_vfpcc <2 x i64> @test_signed_v2f64_v2i64(<2 x double> %f) { define arm_aapcs_vfpcc <2 x i100> @test_signed_v2f64_v2i100(<2 x double> %f) { ; CHECK-LABEL: test_signed_v2f64_v2i100: -; CHECK: @ %bb.0: +; CHECK: @ %bb.0: @ %fp-to-i-entryfp-to-i-entry ; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-NEXT: .pad #4 ; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: .pad #48 -; CHECK-NEXT: sub sp, #48 +; CHECK-NEXT: .pad #104 +; CHECK-NEXT: sub sp, #104 ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vldr d0, .LCPI40_0 -; CHECK-NEXT: vmov r7, r6, d8 ; CHECK-NEXT: mov r8, r0 -; CHECK-NEXT: vmov r10, r9, d0 -; CHECK-NEXT: mov r0, r7 -; CHECK-NEXT: mov r1, r6 -; CHECK-NEXT: mov r2, r10 -; CHECK-NEXT: mov r3, r9 -; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: mov r0, r7 -; CHECK-NEXT: mov r1, r6 -; CHECK-NEXT: bl __fixdfti -; CHECK-NEXT: vldr d0, .LCPI40_1 -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: strd r1, r0, [sp, #16] @ 8-byte Folded Spill -; CHECK-NEXT: csel r4, r2, r4, ne -; CHECK-NEXT: vmov r5, r11, d0 -; CHECK-NEXT: str r3, [sp, #32] @ 4-byte Spill -; CHECK-NEXT: mov r0, r7 -; CHECK-NEXT: mov r1, r6 -; CHECK-NEXT: mov r2, r5 -; CHECK-NEXT: mov r3, r11 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r0, r7 -; CHECK-NEXT: mov r1, r6 -; CHECK-NEXT: mov r2, r7 -; CHECK-NEXT: mov r3, r6 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r4, #-1 +; CHECK-NEXT: vmov r6, r5, d8 +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: mov.w r9, #15 +; CHECK-NEXT: mov.w r10, #-1 +; CHECK-NEXT: movw r1, #1023 +; CHECK-NEXT: str r0, [sp, #36] @ 4-byte Spill +; CHECK-NEXT: ubfx r7, r5, #20, #11 +; CHECK-NEXT: cmp r7, r1 +; CHECK-NEXT: blo .LBB40_4 +; CHECK-NEXT: @ %bb.1: @ %fp-to-i-entryfp-to-i-entry +; CHECK-NEXT: mov r4, r1 +; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: mov r2, r6 +; CHECK-NEXT: mov r3, r5 ; CHECK-NEXT: bl __aeabi_dcmpun +; CHECK-NEXT: mov r1, r4 +; CHECK-NEXT: mov.w lr, #0 +; CHECK-NEXT: movs r4, #0 +; CHECK-NEXT: mov.w r12, #0 ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r0, r7 -; CHECK-NEXT: mov r1, r6 -; CHECK-NEXT: mov r2, r10 -; CHECK-NEXT: mov r3, r9 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r4, #0 -; CHECK-NEXT: str.w r8, [sp, #44] @ 4-byte Spill -; CHECK-NEXT: str.w r4, [r8, #8] -; CHECK-NEXT: str.w r9, [sp, #36] @ 4-byte Spill -; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r2, r5 -; CHECK-NEXT: mov r3, r11 -; CHECK-NEXT: csel r4, r1, r0, ne -; CHECK-NEXT: mov r0, r7 -; CHECK-NEXT: mov r1, r6 -; CHECK-NEXT: str.w r11, [sp, #40] @ 4-byte Spill -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r0, r7 -; CHECK-NEXT: mov r1, r6 -; CHECK-NEXT: mov r2, r7 -; CHECK-NEXT: mov r3, r6 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r4, #-1 +; CHECK-NEXT: bne.w .LBB40_9 +; CHECK-NEXT: @ %bb.2: @ %fp-to-i-if-check.saturate2 +; CHECK-NEXT: movw r0, #1122 +; CHECK-NEXT: cmp r7, r0 +; CHECK-NEXT: blo .LBB40_5 +; CHECK-NEXT: @ %bb.3: @ %fp-to-i-if-saturate3 +; CHECK-NEXT: mvns r0, r5 +; CHECK-NEXT: lsrs r0, r0, #31 +; CHECK-NEXT: mov.w r0, #7 +; CHECK-NEXT: cinv r12, r0, eq +; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: mov lr, r0 +; CHECK-NEXT: str r0, [sp, #36] @ 4-byte Spill +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: b .LBB40_9 +; CHECK-NEXT: .LBB40_4: +; CHECK-NEXT: mov.w lr, #0 +; CHECK-NEXT: movs r4, #0 +; CHECK-NEXT: mov.w r12, #0 +; CHECK-NEXT: b .LBB40_9 +; CHECK-NEXT: .LBB40_5: @ %fp-to-i-if-check.exp.size4 +; CHECK-NEXT: movs r0, #1 +; CHECK-NEXT: mov r3, r5 +; CHECK-NEXT: bfi r3, r0, #20, #12 +; CHECK-NEXT: orr.w r4, r0, r5, asr #31 +; CHECK-NEXT: asr.w r11, r5, #31 +; CHECK-NEXT: movw r2, #1074 +; CHECK-NEXT: cmp r7, r2 +; CHECK-NEXT: bhi .LBB40_7 +; CHECK-NEXT: @ %bb.6: @ %fp-to-i-if-exp.small5 +; CHECK-NEXT: subw r2, r7, #1075 +; CHECK-NEXT: movs r5, #0 +; CHECK-NEXT: lsll r6, r3, r2 +; CHECK-NEXT: umull r1, r2, r6, r4 +; CHECK-NEXT: umlal r2, r5, r3, r4 +; CHECK-NEXT: str r1, [sp, #36] @ 4-byte Spill +; CHECK-NEXT: umull r1, r7, r6, r11 +; CHECK-NEXT: adds.w lr, r1, r2 +; CHECK-NEXT: adcs.w r2, r5, r7 +; CHECK-NEXT: mla r7, r11, r3, r7 +; CHECK-NEXT: adc r5, r12, #0 +; CHECK-NEXT: umlal r2, r5, r3, r11 +; CHECK-NEXT: mla r0, r11, r6, r7 +; CHECK-NEXT: adds r4, r2, r1 +; CHECK-NEXT: b .LBB40_8 +; CHECK-NEXT: .LBB40_7: @ %fp-to-i-if-exp.large6 +; CHECK-NEXT: strd r6, r3, [sp, #88] +; CHECK-NEXT: subw r3, r7, #1075 +; CHECK-NEXT: movs r2, #12 +; CHECK-NEXT: add r7, sp, #72 +; CHECK-NEXT: and.w r2, r2, r3, lsr #3 +; CHECK-NEXT: adds r7, #16 +; CHECK-NEXT: strd r12, r12, [sp, #96] +; CHECK-NEXT: subs r5, r7, r2 +; CHECK-NEXT: strd r12, r12, [sp, #80] +; CHECK-NEXT: and r10, r3, #31 +; CHECK-NEXT: strd r12, r12, [sp, #72] +; CHECK-NEXT: ldrd r2, r7, [r5] +; CHECK-NEXT: mov r3, r7 +; CHECK-NEXT: ldrd r0, r9, [r5, #8] +; CHECK-NEXT: str r4, [sp, #24] @ 4-byte Spill +; CHECK-NEXT: mov r6, r2 +; CHECK-NEXT: lsrl r2, r7, #1 +; CHECK-NEXT: lsll r6, r3, r10 +; CHECK-NEXT: lsll r0, r9, r10 +; CHECK-NEXT: umull r1, r5, r6, r4 +; CHECK-NEXT: str r1, [sp, #36] @ 4-byte Spill +; CHECK-NEXT: movs r1, #0 +; CHECK-NEXT: umlal r5, r1, r3, r4 +; CHECK-NEXT: umull lr, r4, r6, r11 +; CHECK-NEXT: strd r4, lr, [sp, #28] @ 8-byte Folded Spill +; CHECK-NEXT: adds.w lr, lr, r5 +; CHECK-NEXT: adcs r1, r4 +; CHECK-NEXT: adc r5, r12, #0 +; CHECK-NEXT: eor r12, r10, #63 +; CHECK-NEXT: rsb.w r12, r12, #0 +; CHECK-NEXT: umlal r1, r5, r3, r11 +; CHECK-NEXT: lsll r2, r7, r12 +; CHECK-NEXT: orrs r2, r0 +; CHECK-NEXT: ldr r0, [sp, #24] @ 4-byte Reload +; CHECK-NEXT: orr.w r10, r9, r7 +; CHECK-NEXT: mov.w r9, #15 +; CHECK-NEXT: umull r12, r7, r0, r2 +; CHECK-NEXT: mla r7, r0, r10, r7 +; CHECK-NEXT: ldr r0, [sp, #28] @ 4-byte Reload +; CHECK-NEXT: mov.w r10, #-1 +; CHECK-NEXT: mla r3, r11, r3, r0 +; CHECK-NEXT: mla r2, r11, r2, r7 +; CHECK-NEXT: mla r0, r11, r6, r3 +; CHECK-NEXT: ldr r3, [sp, #32] @ 4-byte Reload +; CHECK-NEXT: adds.w r3, r3, r12 +; CHECK-NEXT: adcs r0, r2 +; CHECK-NEXT: adds r4, r1, r3 +; CHECK-NEXT: .LBB40_8: @ %fp-to-i-cleanup1 +; CHECK-NEXT: adc.w r12, r5, r0 +; CHECK-NEXT: movw r1, #1023 +; CHECK-NEXT: .LBB40_9: @ %fp-to-i-cleanup1 +; CHECK-NEXT: vmov r6, r5, d9 +; CHECK-NEXT: mov.w r11, #-1 +; CHECK-NEXT: cmp.w r5, #-1 +; CHECK-NEXT: it gt +; CHECK-NEXT: movgt.w r9, #0 +; CHECK-NEXT: str.w r9, [sp, #20] @ 4-byte Spill +; CHECK-NEXT: itt gt +; CHECK-NEXT: movgt.w r11, #0 +; CHECK-NEXT: movgt.w r10, #1 +; CHECK-NEXT: ubfx r7, r5, #20, #11 +; CHECK-NEXT: mov r9, r10 +; CHECK-NEXT: mov.w r10, #0 +; CHECK-NEXT: cmp r7, r1 +; CHECK-NEXT: blo .LBB40_13 +; CHECK-NEXT: @ %bb.10: @ %fp-to-i-cleanup1 +; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: mov r2, r6 +; CHECK-NEXT: mov r3, r5 +; CHECK-NEXT: str.w r12, [sp, #32] @ 4-byte Spill +; CHECK-NEXT: strd lr, r4, [sp, #24] @ 8-byte Folded Spill ; CHECK-NEXT: bl __aeabi_dcmpun +; CHECK-NEXT: ldr.w r12, [sp, #32] @ 4-byte Reload ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r0, r7 -; CHECK-NEXT: mov r1, r6 -; CHECK-NEXT: mov r2, r10 +; CHECK-NEXT: ldrd lr, r4, [sp, #24] @ 8-byte Folded Reload +; CHECK-NEXT: mov r1, r10 +; CHECK-NEXT: mov.w r0, #0 +; CHECK-NEXT: mov.w r2, #0 +; CHECK-NEXT: bne.w .LBB40_18 +; CHECK-NEXT: @ %bb.11: @ %fp-to-i-if-check.saturate +; CHECK-NEXT: movw r0, #1122 +; CHECK-NEXT: cmp r7, r0 +; CHECK-NEXT: blo .LBB40_14 +; CHECK-NEXT: @ %bb.12: @ %fp-to-i-if-saturate +; CHECK-NEXT: mvns r0, r5 +; CHECK-NEXT: lsrs r0, r0, #31 +; CHECK-NEXT: mov.w r0, #7 +; CHECK-NEXT: csetm r10, ne +; CHECK-NEXT: cinv r2, r0, eq +; CHECK-NEXT: mov r1, r10 +; CHECK-NEXT: mov r0, r10 +; CHECK-NEXT: b .LBB40_18 +; CHECK-NEXT: .LBB40_13: +; CHECK-NEXT: mov r1, r10 +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: movs r2, #0 +; CHECK-NEXT: b .LBB40_18 +; CHECK-NEXT: .LBB40_14: @ %fp-to-i-if-check.exp.size +; CHECK-NEXT: movs r0, #1 +; CHECK-NEXT: bfi r5, r0, #20, #12 +; CHECK-NEXT: movw r0, #1074 +; CHECK-NEXT: cmp r7, r0 +; CHECK-NEXT: bhi .LBB40_16 +; CHECK-NEXT: @ %bb.15: @ %fp-to-i-if-exp.small +; CHECK-NEXT: subw r0, r7, #1075 +; CHECK-NEXT: movs r3, #0 +; CHECK-NEXT: lsll r6, r5, r0 +; CHECK-NEXT: mov.w r12, #0 +; CHECK-NEXT: umull r10, r0, r6, r9 +; CHECK-NEXT: umlal r0, r3, r5, r9 +; CHECK-NEXT: umull r7, r2, r6, r11 +; CHECK-NEXT: adds r1, r7, r0 +; CHECK-NEXT: adcs.w r0, r3, r2 +; CHECK-NEXT: mla r2, r11, r5, r2 +; CHECK-NEXT: adc r3, r12, #0 +; CHECK-NEXT: umlal r0, r3, r5, r11 +; CHECK-NEXT: ldr r5, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: mla r2, r5, r6, r2 +; CHECK-NEXT: adds r0, r0, r7 +; CHECK-NEXT: adcs r2, r3 +; CHECK-NEXT: b .LBB40_17 +; CHECK-NEXT: .LBB40_16: @ %fp-to-i-if-exp.large +; CHECK-NEXT: subw r0, r7, #1075 +; CHECK-NEXT: movs r1, #12 +; CHECK-NEXT: add r2, sp, #40 +; CHECK-NEXT: mov.w r12, #0 +; CHECK-NEXT: and.w r1, r1, r0, lsr #3 +; CHECK-NEXT: adds r2, #16 +; CHECK-NEXT: strd r6, r5, [sp, #56] +; CHECK-NEXT: subs r1, r2, r1 +; CHECK-NEXT: strd r12, r12, [sp, #64] +; CHECK-NEXT: and r6, r0, #31 +; CHECK-NEXT: strd r12, r12, [sp, #48] +; CHECK-NEXT: strd r12, r12, [sp, #40] +; CHECK-NEXT: ldrd r2, r7, [r1] +; CHECK-NEXT: ldr r3, [r1, #8] +; CHECK-NEXT: mov r5, r7 +; CHECK-NEXT: str r3, [sp, #16] @ 4-byte Spill ; CHECK-NEXT: mov r3, r9 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r4, #0 -; CHECK-NEXT: str.w r4, [r8, #4] -; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: ldr r1, [sp, #20] @ 4-byte Reload -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r2, r5 -; CHECK-NEXT: mov r3, r11 -; CHECK-NEXT: csel r4, r1, r0, ne -; CHECK-NEXT: mov r0, r7 -; CHECK-NEXT: mov r1, r6 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r0, r7 -; CHECK-NEXT: mov r1, r6 -; CHECK-NEXT: mov r2, r7 -; CHECK-NEXT: mov r3, r6 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r4, #-1 -; CHECK-NEXT: str r7, [sp, #24] @ 4-byte Spill -; CHECK-NEXT: str r6, [sp, #28] @ 4-byte Spill +; CHECK-NEXT: mov r0, r2 +; CHECK-NEXT: ldr r1, [r1, #12] +; CHECK-NEXT: lsll r0, r5, r6 +; CHECK-NEXT: str r1, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: umull r10, r1, r0, r9 +; CHECK-NEXT: mov.w r9, #0 +; CHECK-NEXT: lsrl r2, r7, #1 +; CHECK-NEXT: umull r4, lr, r0, r11 +; CHECK-NEXT: umlal r1, r9, r5, r3 +; CHECK-NEXT: strd lr, r4, [sp, #4] @ 8-byte Folded Spill +; CHECK-NEXT: adds r1, r1, r4 +; CHECK-NEXT: adcs.w r4, r9, lr +; CHECK-NEXT: eor lr, r6, #63 +; CHECK-NEXT: rsb.w lr, lr, #0 +; CHECK-NEXT: adc r12, r12, #0 +; CHECK-NEXT: lsll r2, r7, lr +; CHECK-NEXT: ldrd r9, lr, [sp, #12] @ 8-byte Folded Reload +; CHECK-NEXT: umlal r4, r12, r5, r11 +; CHECK-NEXT: lsll lr, r9, r6 +; CHECK-NEXT: orr.w r2, r2, lr +; CHECK-NEXT: orr.w r7, r7, r9 +; CHECK-NEXT: umull lr, r6, r3, r2 +; CHECK-NEXT: mla r3, r3, r7, r6 +; CHECK-NEXT: mla r2, r11, r2, r3 +; CHECK-NEXT: ldr r3, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: mla r3, r11, r5, r3 +; CHECK-NEXT: ldr r5, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: mla r0, r5, r0, r3 +; CHECK-NEXT: ldr r3, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: adds.w r3, r3, lr +; CHECK-NEXT: ldr.w lr, [sp, #24] @ 4-byte Reload +; CHECK-NEXT: adcs r2, r0 +; CHECK-NEXT: adds r0, r4, r3 +; CHECK-NEXT: adc.w r2, r2, r12 +; CHECK-NEXT: .LBB40_17: @ %fp-to-i-cleanup +; CHECK-NEXT: ldrd r4, r12, [sp, #28] @ 8-byte Folded Reload +; CHECK-NEXT: .LBB40_18: @ %fp-to-i-cleanup +; CHECK-NEXT: mov r6, r10 +; CHECK-NEXT: ldr r3, [sp, #36] @ 4-byte Reload +; CHECK-NEXT: lsrl r6, r1, #28 +; CHECK-NEXT: str.w r3, [r8] +; CHECK-NEXT: orr.w r1, r1, r0, lsl #4 +; CHECK-NEXT: str.w r6, [r8, #16] +; CHECK-NEXT: str.w lr, [r8, #4] +; CHECK-NEXT: str.w r1, [r8, #20] +; CHECK-NEXT: and r1, r2, #15 +; CHECK-NEXT: lsrl r0, r1, #28 +; CHECK-NEXT: str.w r4, [r8, #8] +; CHECK-NEXT: strb.w r0, [r8, #24] +; CHECK-NEXT: and r0, r12, #15 +; CHECK-NEXT: orr.w r0, r0, r10, lsl #4 +; CHECK-NEXT: str.w r0, [r8, #12] +; CHECK-NEXT: add sp, #104 +; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: add sp, #4 +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} + %x = call <2 x i100> @llvm.fptosi.sat.v2f64.v2i100(<2 x double> %f) + ret <2 x i100> %x +} + +define arm_aapcs_vfpcc <2 x i128> @test_signed_v2f64_v2i128(<2 x double> %f) { +; CHECK-LABEL: test_signed_v2f64_v2i128: +; CHECK: @ %bb.0: @ %fp-to-i-entryfp-to-i-entry +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-NEXT: .pad #4 +; CHECK-NEXT: sub sp, #4 +; CHECK-NEXT: .vsave {d8, d9} +; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: .pad #104 +; CHECK-NEXT: sub sp, #104 +; CHECK-NEXT: vmov q4, q0 +; CHECK-NEXT: mov r11, r0 +; CHECK-NEXT: vmov r6, r5, d8 +; CHECK-NEXT: movs r4, #0 +; CHECK-NEXT: mov.w r10, #-1 +; CHECK-NEXT: movw r2, #1023 +; CHECK-NEXT: ubfx r7, r5, #20, #11 +; CHECK-NEXT: cmp r7, r2 +; CHECK-NEXT: blo .LBB41_4 +; CHECK-NEXT: @ %bb.1: @ %fp-to-i-entryfp-to-i-entry +; CHECK-NEXT: mov r8, r2 +; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: mov r2, r6 +; CHECK-NEXT: mov r3, r5 ; CHECK-NEXT: bl __aeabi_dcmpun -; CHECK-NEXT: vmov r9, r8, d9 +; CHECK-NEXT: mov r2, r8 +; CHECK-NEXT: mov.w lr, #0 +; CHECK-NEXT: movs r1, #0 +; CHECK-NEXT: mov.w r12, #0 ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r4, #0 -; CHECK-NEXT: ldr r0, [sp, #44] @ 4-byte Reload -; CHECK-NEXT: mov r2, r10 -; CHECK-NEXT: str r4, [r0] -; CHECK-NEXT: ldr.w r11, [sp, #36] @ 4-byte Reload -; CHECK-NEXT: mov r3, r11 -; CHECK-NEXT: mov r0, r9 -; CHECK-NEXT: mov r1, r8 -; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: mov r0, r9 -; CHECK-NEXT: mov r1, r8 -; CHECK-NEXT: bl __fixdfti -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: strd r2, r3, [sp, #12] @ 8-byte Folded Spill -; CHECK-NEXT: csel r7, r1, r4, ne -; CHECK-NEXT: mov r4, r5 -; CHECK-NEXT: str r5, [sp, #4] @ 4-byte Spill -; CHECK-NEXT: mov r2, r5 -; CHECK-NEXT: ldr r5, [sp, #40] @ 4-byte Reload -; CHECK-NEXT: mov r6, r0 -; CHECK-NEXT: mov r0, r9 -; CHECK-NEXT: mov r1, r8 +; CHECK-NEXT: bne.w .LBB41_8 +; CHECK-NEXT: @ %bb.2: @ %fp-to-i-if-check.saturate2 +; CHECK-NEXT: movw r0, #1150 +; CHECK-NEXT: cmp r7, r0 +; CHECK-NEXT: blo .LBB41_5 +; CHECK-NEXT: @ %bb.3: @ %fp-to-i-if-saturate3 +; CHECK-NEXT: mvns r0, r5 +; CHECK-NEXT: lsrs r0, r0, #31 +; CHECK-NEXT: csetm r4, ne +; CHECK-NEXT: mvn r0, #-2147483648 +; CHECK-NEXT: cinv r12, r0, eq +; CHECK-NEXT: mov lr, r4 +; CHECK-NEXT: mov r1, r4 +; CHECK-NEXT: b .LBB41_8 +; CHECK-NEXT: .LBB41_4: +; CHECK-NEXT: mov.w lr, #0 +; CHECK-NEXT: movs r1, #0 +; CHECK-NEXT: mov.w r12, #0 +; CHECK-NEXT: b .LBB41_8 +; CHECK-NEXT: .LBB41_5: @ %fp-to-i-if-check.exp.size4 +; CHECK-NEXT: movs r0, #1 ; CHECK-NEXT: mov r3, r5 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r0, r9 -; CHECK-NEXT: mov r1, r8 +; CHECK-NEXT: bfi r3, r0, #20, #12 +; CHECK-NEXT: orr.w r8, r0, r5, asr #31 +; CHECK-NEXT: movw r9, #1023 +; CHECK-NEXT: asr.w r12, r5, #31 +; CHECK-NEXT: movw r2, #1074 +; CHECK-NEXT: cmp r7, r2 +; CHECK-NEXT: bhi .LBB41_7 +; CHECK-NEXT: @ %bb.6: @ %fp-to-i-if-exp.small5 +; CHECK-NEXT: subw r2, r7, #1075 +; CHECK-NEXT: movs r5, #0 +; CHECK-NEXT: lsll r6, r3, r2 +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: umull r4, r2, r6, r8 +; CHECK-NEXT: umlal r2, r5, r3, r8 +; CHECK-NEXT: umull r1, r7, r6, r12 +; CHECK-NEXT: adds.w lr, r1, r2 +; CHECK-NEXT: adcs.w r2, r5, r7 +; CHECK-NEXT: mla r7, r12, r3, r7 +; CHECK-NEXT: adc r5, r0, #0 +; CHECK-NEXT: umlal r2, r5, r3, r12 +; CHECK-NEXT: mla r0, r12, r6, r7 +; CHECK-NEXT: adds r1, r1, r2 ; CHECK-NEXT: mov r2, r9 -; CHECK-NEXT: mov r3, r8 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r7, #-1 +; CHECK-NEXT: adc.w r12, r5, r0 +; CHECK-NEXT: b .LBB41_8 +; CHECK-NEXT: .LBB41_7: @ %fp-to-i-if-exp.large6 +; CHECK-NEXT: strd r6, r3, [sp, #88] +; CHECK-NEXT: subw r3, r7, #1075 +; CHECK-NEXT: movs r2, #12 +; CHECK-NEXT: add r7, sp, #72 +; CHECK-NEXT: and.w r2, r2, r3, lsr #3 +; CHECK-NEXT: adds r7, #16 +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: subs r5, r7, r2 +; CHECK-NEXT: strd r0, r0, [sp, #96] +; CHECK-NEXT: and r4, r3, #31 +; CHECK-NEXT: strd r0, r0, [sp, #80] +; CHECK-NEXT: strd r0, r0, [sp, #72] +; CHECK-NEXT: ldm.w r5, {r2, r7, lr} +; CHECK-NEXT: mov r3, r7 +; CHECK-NEXT: ldr r1, [r5, #12] +; CHECK-NEXT: mov r6, r2 +; CHECK-NEXT: str r1, [sp, #32] @ 4-byte Spill +; CHECK-NEXT: lsll r6, r3, r4 +; CHECK-NEXT: movs r1, #0 +; CHECK-NEXT: umull r0, r5, r6, r8 +; CHECK-NEXT: lsrl r2, r7, #1 +; CHECK-NEXT: umlal r5, r1, r3, r8 +; CHECK-NEXT: str r0, [sp, #36] @ 4-byte Spill +; CHECK-NEXT: umull r0, r9, r6, r12 +; CHECK-NEXT: strd r9, r0, [sp, #20] @ 8-byte Folded Spill +; CHECK-NEXT: adds r0, r0, r5 +; CHECK-NEXT: str r0, [sp, #28] @ 4-byte Spill +; CHECK-NEXT: adcs.w r0, r1, r9 +; CHECK-NEXT: mov.w r1, #0 +; CHECK-NEXT: adc r5, r1, #0 +; CHECK-NEXT: eor r1, r4, #63 +; CHECK-NEXT: umlal r0, r5, r3, r12 +; CHECK-NEXT: rsbs r1, r1, #0 +; CHECK-NEXT: lsll r2, r7, r1 +; CHECK-NEXT: ldr r1, [sp, #32] @ 4-byte Reload +; CHECK-NEXT: lsll lr, r1, r4 +; CHECK-NEXT: ldr r4, [sp, #36] @ 4-byte Reload +; CHECK-NEXT: orr.w r2, r2, lr +; CHECK-NEXT: orrs r1, r7 +; CHECK-NEXT: ldr.w lr, [sp, #28] @ 4-byte Reload +; CHECK-NEXT: umull r9, r7, r8, r2 +; CHECK-NEXT: mla r1, r8, r1, r7 +; CHECK-NEXT: mla r1, r12, r2, r1 +; CHECK-NEXT: ldr r2, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: mla r2, r12, r3, r2 +; CHECK-NEXT: mla r3, r12, r6, r2 +; CHECK-NEXT: ldr r2, [sp, #24] @ 4-byte Reload +; CHECK-NEXT: adds.w r2, r2, r9 +; CHECK-NEXT: adcs r3, r1 +; CHECK-NEXT: adds r1, r0, r2 +; CHECK-NEXT: adc.w r12, r5, r3 +; CHECK-NEXT: movw r2, #1023 +; CHECK-NEXT: .LBB41_8: @ %fp-to-i-cleanup1 +; CHECK-NEXT: vmov r6, r5, d9 +; CHECK-NEXT: mov.w r9, #-1 +; CHECK-NEXT: mov.w r8, #0 +; CHECK-NEXT: strd r1, r4, [sp, #32] @ 8-byte Folded Spill +; CHECK-NEXT: cmp.w r5, #-1 +; CHECK-NEXT: itt gt +; CHECK-NEXT: movgt.w r9, #0 +; CHECK-NEXT: movgt.w r10, #1 +; CHECK-NEXT: ubfx r7, r5, #20, #11 +; CHECK-NEXT: cmp r7, r2 +; CHECK-NEXT: blo .LBB41_12 +; CHECK-NEXT: @ %bb.9: @ %fp-to-i-cleanup1 +; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: mov r2, r6 +; CHECK-NEXT: mov r3, r5 +; CHECK-NEXT: str.w r10, [sp, #24] @ 4-byte Spill +; CHECK-NEXT: mov r10, r12 +; CHECK-NEXT: str.w lr, [sp, #28] @ 4-byte Spill ; CHECK-NEXT: bl __aeabi_dcmpun +; CHECK-NEXT: ldr.w lr, [sp, #28] @ 4-byte Reload ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r0, r9 -; CHECK-NEXT: mov r1, r8 -; CHECK-NEXT: mov r2, r10 -; CHECK-NEXT: mov r3, r11 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r7, #0 -; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r1, r8 -; CHECK-NEXT: csel r6, r6, r0, ne -; CHECK-NEXT: mov r0, r9 -; CHECK-NEXT: mov r2, r4 +; CHECK-NEXT: mov r12, r10 +; CHECK-NEXT: mov.w r0, #0 +; CHECK-NEXT: mov.w r2, #0 +; CHECK-NEXT: mov.w r1, #0 +; CHECK-NEXT: bne.w .LBB41_16 +; CHECK-NEXT: @ %bb.10: @ %fp-to-i-if-check.saturate +; CHECK-NEXT: movw r0, #1150 +; CHECK-NEXT: cmp r7, r0 +; CHECK-NEXT: blo .LBB41_13 +; CHECK-NEXT: @ %bb.11: @ %fp-to-i-if-saturate +; CHECK-NEXT: mvns r0, r5 +; CHECK-NEXT: lsrs r0, r0, #31 +; CHECK-NEXT: mvn r0, #-2147483648 +; CHECK-NEXT: csetm r8, ne +; CHECK-NEXT: cinv r1, r0, eq +; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: mov r2, r8 +; CHECK-NEXT: b .LBB41_16 +; CHECK-NEXT: .LBB41_12: +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: movs r2, #0 +; CHECK-NEXT: movs r1, #0 +; CHECK-NEXT: b .LBB41_16 +; CHECK-NEXT: .LBB41_13: @ %fp-to-i-if-check.exp.size +; CHECK-NEXT: movs r0, #1 +; CHECK-NEXT: bfi r5, r0, #20, #12 +; CHECK-NEXT: movw r0, #1074 +; CHECK-NEXT: cmp r7, r0 +; CHECK-NEXT: bhi .LBB41_15 +; CHECK-NEXT: @ %bb.14: @ %fp-to-i-if-exp.small +; CHECK-NEXT: ldr r3, [sp, #24] @ 4-byte Reload +; CHECK-NEXT: subw r0, r7, #1075 +; CHECK-NEXT: lsll r6, r5, r0 +; CHECK-NEXT: umull r8, r0, r6, r3 +; CHECK-NEXT: umlal r0, r2, r5, r3 +; CHECK-NEXT: umull r3, r7, r6, r9 +; CHECK-NEXT: adds r0, r0, r3 +; CHECK-NEXT: adcs r2, r7 +; CHECK-NEXT: mla r7, r9, r5, r7 +; CHECK-NEXT: adc r1, r1, #0 +; CHECK-NEXT: umlal r2, r1, r5, r9 +; CHECK-NEXT: mla r7, r9, r6, r7 +; CHECK-NEXT: adds r2, r2, r3 +; CHECK-NEXT: adcs r1, r7 +; CHECK-NEXT: b .LBB41_16 +; CHECK-NEXT: .LBB41_15: @ %fp-to-i-if-exp.large +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: add r3, sp, #40 +; CHECK-NEXT: strd r0, r0, [sp, #64] +; CHECK-NEXT: movs r2, #12 +; CHECK-NEXT: strd r0, r0, [sp, #48] +; CHECK-NEXT: adds r3, #16 +; CHECK-NEXT: strd r0, r0, [sp, #40] +; CHECK-NEXT: subw r0, r7, #1075 +; CHECK-NEXT: strd r6, r5, [sp, #56] +; CHECK-NEXT: and r4, r0, #31 +; CHECK-NEXT: and.w r2, r2, r0, lsr #3 +; CHECK-NEXT: ldr.w lr, [sp, #24] @ 4-byte Reload +; CHECK-NEXT: subs r3, r3, r2 +; CHECK-NEXT: ldrd r2, r5, [r3] +; CHECK-NEXT: ldr r1, [r3, #8] +; CHECK-NEXT: str r1, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: ldr r1, [r3, #12] +; CHECK-NEXT: mov r6, r2 ; CHECK-NEXT: mov r3, r5 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r0, r9 -; CHECK-NEXT: mov r1, r8 -; CHECK-NEXT: mov r2, r9 -; CHECK-NEXT: mov r3, r8 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r6, #-1 -; CHECK-NEXT: bl __aeabi_dcmpun -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r0, #0 -; CHECK-NEXT: ldr r1, [sp, #44] @ 4-byte Reload -; CHECK-NEXT: str r0, [sp, #20] @ 4-byte Spill -; CHECK-NEXT: lsrl r0, r7, #28 -; CHECK-NEXT: mov r2, r10 -; CHECK-NEXT: mov r3, r11 -; CHECK-NEXT: str r0, [r1, #16] -; CHECK-NEXT: mov r0, r9 -; CHECK-NEXT: mov r1, r8 -; CHECK-NEXT: str.w r10, [sp, #8] @ 4-byte Spill -; CHECK-NEXT: mov r6, r11 -; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: ldr r5, [sp, #4] @ 4-byte Reload -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: ldr.w r11, [sp, #40] @ 4-byte Reload -; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: mov r2, r5 -; CHECK-NEXT: csel r4, r1, r0, ne -; CHECK-NEXT: mov r0, r9 -; CHECK-NEXT: mov r1, r8 -; CHECK-NEXT: mov r3, r11 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r0, r9 -; CHECK-NEXT: mov r1, r8 -; CHECK-NEXT: mov r2, r9 -; CHECK-NEXT: mov r3, r8 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r4, #-1 -; CHECK-NEXT: bl __aeabi_dcmpun -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r4, #0 -; CHECK-NEXT: orr.w r0, r7, r4, lsl #4 -; CHECK-NEXT: ldr r7, [sp, #44] @ 4-byte Reload -; CHECK-NEXT: mov r1, r8 -; CHECK-NEXT: mov r2, r10 -; CHECK-NEXT: mov r3, r6 -; CHECK-NEXT: str r0, [r7, #20] -; CHECK-NEXT: mov r0, r9 -; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: ldr r6, [sp, #16] @ 4-byte Reload -; CHECK-NEXT: mov r0, r9 -; CHECK-NEXT: mov r1, r8 -; CHECK-NEXT: mov r2, r5 -; CHECK-NEXT: mov r3, r11 -; CHECK-NEXT: it eq -; CHECK-NEXT: mvneq r6, #7 -; CHECK-NEXT: mov r10, r5 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r0, r9 -; CHECK-NEXT: mov r1, r8 -; CHECK-NEXT: mov r2, r9 -; CHECK-NEXT: mov r3, r8 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r6, #7 -; CHECK-NEXT: bl __aeabi_dcmpun -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r0, #0 -; CHECK-NEXT: and r1, r0, #15 -; CHECK-NEXT: lsrl r4, r1, #28 -; CHECK-NEXT: strb r4, [r7, #24] -; CHECK-NEXT: ldr r5, [sp, #24] @ 4-byte Reload -; CHECK-NEXT: ldr r6, [sp, #28] @ 4-byte Reload -; CHECK-NEXT: ldr r2, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: str r1, [sp, #20] @ 4-byte Spill +; CHECK-NEXT: lsll r6, r3, r4 +; CHECK-NEXT: movs r1, #0 +; CHECK-NEXT: umull r8, r0, r6, lr +; CHECK-NEXT: lsrl r2, r5, #1 +; CHECK-NEXT: umull r7, r12, r6, r9 +; CHECK-NEXT: umlal r0, r1, r3, lr +; CHECK-NEXT: str r7, [sp, #16] @ 4-byte Spill +; CHECK-NEXT: str.w r12, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: adds r0, r0, r7 +; CHECK-NEXT: mov.w r7, #0 +; CHECK-NEXT: adcs.w r1, r1, r12 +; CHECK-NEXT: mov r12, r4 +; CHECK-NEXT: adc r7, r7, #0 +; CHECK-NEXT: umlal r1, r7, r3, r9 +; CHECK-NEXT: str r7, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: eor r7, r4, #63 +; CHECK-NEXT: rsbs r7, r7, #0 +; CHECK-NEXT: ldr r4, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: lsll r2, r5, r7 +; CHECK-NEXT: ldr r7, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: lsll r4, r7, r12 +; CHECK-NEXT: mov r12, r10 +; CHECK-NEXT: orrs r2, r4 +; CHECK-NEXT: orrs r7, r5 +; CHECK-NEXT: umull r4, r5, lr, r2 +; CHECK-NEXT: mla r5, lr, r7, r5 +; CHECK-NEXT: ldr r7, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: ldr.w lr, [sp, #28] @ 4-byte Reload +; CHECK-NEXT: mla r3, r9, r3, r7 +; CHECK-NEXT: ldr r7, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: mla r2, r9, r2, r5 +; CHECK-NEXT: adds r7, r7, r4 +; CHECK-NEXT: mla r3, r9, r6, r3 +; CHECK-NEXT: adcs r3, r2 +; CHECK-NEXT: adds r2, r1, r7 +; CHECK-NEXT: ldr r1, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: adcs r1, r3 +; CHECK-NEXT: .LBB41_16: @ %fp-to-i-cleanup ; CHECK-NEXT: ldr r3, [sp, #36] @ 4-byte Reload -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: mov r1, r6 -; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: ldr r4, [sp, #32] @ 4-byte Reload -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: mov r1, r6 -; CHECK-NEXT: mov r2, r10 -; CHECK-NEXT: mov r3, r11 -; CHECK-NEXT: it eq -; CHECK-NEXT: mvneq r4, #7 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: mov r1, r6 -; CHECK-NEXT: mov r2, r5 -; CHECK-NEXT: mov r3, r6 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r4, #7 -; CHECK-NEXT: bl __aeabi_dcmpun -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r0, r4 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r0, #0 -; CHECK-NEXT: ldr r1, [sp, #20] @ 4-byte Reload -; CHECK-NEXT: and r0, r0, #15 -; CHECK-NEXT: orr.w r0, r0, r1, lsl #4 -; CHECK-NEXT: str r0, [r7, #12] -; CHECK-NEXT: add sp, #48 +; CHECK-NEXT: strd r3, lr, [r11] +; CHECK-NEXT: ldr r3, [sp, #32] @ 4-byte Reload +; CHECK-NEXT: strd r3, r12, [r11, #8] +; CHECK-NEXT: strd r8, r0, [r11, #16] +; CHECK-NEXT: strd r2, r1, [r11, #24] +; CHECK-NEXT: add sp, #104 ; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: add sp, #4 ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} -; CHECK-NEXT: .p2align 3 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI40_0: -; CHECK-NEXT: .long 0 @ double -6.338253001141147E+29 -; CHECK-NEXT: .long 3323985920 -; CHECK-NEXT: .LCPI40_1: -; CHECK-NEXT: .long 4294967295 @ double 6.3382530011411463E+29 -; CHECK-NEXT: .long 1176502271 - %x = call <2 x i100> @llvm.fptosi.sat.v2f64.v2i100(<2 x double> %f) - ret <2 x i100> %x + %x = call <2 x i128> @llvm.fptosi.sat.v2f64.v2i128(<2 x double> %f) + ret <2 x i128> %x } -define arm_aapcs_vfpcc <2 x i128> @test_signed_v2f64_v2i128(<2 x double> %f) { -; CHECK-LABEL: test_signed_v2f64_v2i128: -; CHECK: @ %bb.0: -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-NEXT: .pad #4 -; CHECK-NEXT: sub sp, #4 -; CHECK-NEXT: .vsave {d8, d9} -; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: .pad #32 -; CHECK-NEXT: sub sp, #32 -; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vldr d0, .LCPI41_0 -; CHECK-NEXT: vmov r8, r7, d9 -; CHECK-NEXT: mov r9, r0 -; CHECK-NEXT: vmov r2, r3, d0 -; CHECK-NEXT: str r3, [sp, #16] @ 4-byte Spill -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: mov r4, r2 -; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: mov r6, r0 -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: bl __fixdfti -; CHECK-NEXT: vldr d0, .LCPI41_1 -; CHECK-NEXT: mov r5, r3 -; CHECK-NEXT: strd r1, r0, [sp, #8] @ 8-byte Folded Spill -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: vmov r10, r11, d0 -; CHECK-NEXT: str r2, [sp] @ 4-byte Spill -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: it eq -; CHECK-NEXT: moveq.w r5, #-2147483648 -; CHECK-NEXT: str.w r11, [sp, #20] @ 4-byte Spill -; CHECK-NEXT: mov r2, r10 -; CHECK-NEXT: mov r3, r11 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: mov r2, r8 -; CHECK-NEXT: mov r3, r7 -; CHECK-NEXT: it ne -; CHECK-NEXT: mvnne r5, #-2147483648 -; CHECK-NEXT: bl __aeabi_dcmpun -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r5, #0 -; CHECK-NEXT: str.w r5, [r9, #28] -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: ldr r6, [sp, #16] @ 4-byte Reload -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: mov r2, r4 -; CHECK-NEXT: str r4, [sp, #24] @ 4-byte Spill -; CHECK-NEXT: mov r3, r6 -; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: ldr r1, [sp] @ 4-byte Reload -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r2, r10 -; CHECK-NEXT: mov r3, r11 -; CHECK-NEXT: csel r5, r1, r0, ne -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: str.w r10, [sp, #28] @ 4-byte Spill -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: mov r2, r8 -; CHECK-NEXT: mov r3, r7 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r5, #-1 -; CHECK-NEXT: bl __aeabi_dcmpun -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: mov r2, r4 -; CHECK-NEXT: mov r3, r6 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r5, #0 -; CHECK-NEXT: str.w r5, [r9, #24] -; CHECK-NEXT: mov r11, r6 -; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: ldr r1, [sp, #8] @ 4-byte Reload -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: ldr r3, [sp, #20] @ 4-byte Reload -; CHECK-NEXT: mov r2, r10 -; CHECK-NEXT: csel r4, r1, r0, ne -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: mov r2, r8 -; CHECK-NEXT: mov r3, r7 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r4, #-1 -; CHECK-NEXT: bl __aeabi_dcmpun -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r4, #0 -; CHECK-NEXT: str.w r4, [r9, #20] -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: ldr r2, [sp, #24] @ 4-byte Reload -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: mov r3, r11 -; CHECK-NEXT: vmov r6, r5, d8 -; CHECK-NEXT: mov r10, r9 -; CHECK-NEXT: str.w r9, [sp, #4] @ 4-byte Spill -; CHECK-NEXT: mov r9, r11 -; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: ldr.w r11, [sp, #20] @ 4-byte Reload -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: ldr r2, [sp, #28] @ 4-byte Reload -; CHECK-NEXT: csel r4, r1, r0, ne -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: mov r3, r11 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: mov r2, r8 -; CHECK-NEXT: mov r3, r7 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r4, #-1 -; CHECK-NEXT: bl __aeabi_dcmpun -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r4, #0 -; CHECK-NEXT: str.w r4, [r10, #16] -; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: ldr.w r8, [sp, #24] @ 4-byte Reload -; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: mov r3, r9 -; CHECK-NEXT: mov r2, r8 -; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: mov r7, r0 -; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: bl __fixdfti -; CHECK-NEXT: mov r9, r3 -; CHECK-NEXT: strd r1, r0, [sp, #8] @ 8-byte Folded Spill -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: it eq -; CHECK-NEXT: moveq.w r9, #-2147483648 -; CHECK-NEXT: ldr.w r10, [sp, #28] @ 4-byte Reload -; CHECK-NEXT: mov r4, r2 -; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: mov r3, r11 -; CHECK-NEXT: mov r2, r10 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: mov r2, r6 -; CHECK-NEXT: mov r3, r5 -; CHECK-NEXT: it ne -; CHECK-NEXT: mvnne r9, #-2147483648 -; CHECK-NEXT: bl __aeabi_dcmpun -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r9, #0 -; CHECK-NEXT: ldr r7, [sp, #4] @ 4-byte Reload -; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: mov r2, r8 -; CHECK-NEXT: str.w r9, [r7, #12] -; CHECK-NEXT: ldr.w r9, [sp, #16] @ 4-byte Reload -; CHECK-NEXT: mov r3, r9 -; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: csel r4, r4, r0, ne -; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: mov r2, r10 -; CHECK-NEXT: mov r3, r11 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: mov r2, r6 -; CHECK-NEXT: mov r3, r5 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r4, #-1 -; CHECK-NEXT: bl __aeabi_dcmpun -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: mov r2, r8 -; CHECK-NEXT: mov r3, r9 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r4, #0 -; CHECK-NEXT: str r4, [r7, #8] -; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: ldr r1, [sp, #8] @ 4-byte Reload -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r2, r10 -; CHECK-NEXT: mov r3, r11 -; CHECK-NEXT: csel r4, r1, r0, ne -; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: mov r2, r6 -; CHECK-NEXT: mov r3, r5 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r4, #-1 -; CHECK-NEXT: bl __aeabi_dcmpun -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: mov r2, r8 -; CHECK-NEXT: mov r3, r9 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r4, #0 -; CHECK-NEXT: str r4, [r7, #4] -; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r2, r10 -; CHECK-NEXT: mov r3, r11 -; CHECK-NEXT: csel r4, r1, r0, ne -; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: mov r2, r6 -; CHECK-NEXT: mov r3, r5 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r4, #-1 -; CHECK-NEXT: bl __aeabi_dcmpun -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r4, #0 -; CHECK-NEXT: str r4, [r7] -; CHECK-NEXT: add sp, #32 -; CHECK-NEXT: vpop {d8, d9} -; CHECK-NEXT: add sp, #4 -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} -; CHECK-NEXT: .p2align 3 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI41_0: -; CHECK-NEXT: .long 0 @ double -1.7014118346046923E+38 -; CHECK-NEXT: .long 3353346048 -; CHECK-NEXT: .LCPI41_1: -; CHECK-NEXT: .long 4294967295 @ double 1.7014118346046921E+38 -; CHECK-NEXT: .long 1205862399 - %x = call <2 x i128> @llvm.fptosi.sat.v2f64.v2i128(<2 x double> %f) - ret <2 x i128> %x -} - -; -; 4-Vector half to signed integer -- result size variation -; - -declare <8 x i1> @llvm.fptosi.sat.v8f16.v8i1 (<8 x half>) -declare <8 x i8> @llvm.fptosi.sat.v8f16.v8i8 (<8 x half>) -declare <8 x i13> @llvm.fptosi.sat.v8f16.v8i13 (<8 x half>) -declare <8 x i16> @llvm.fptosi.sat.v8f16.v8i16 (<8 x half>) -declare <8 x i19> @llvm.fptosi.sat.v8f16.v8i19 (<8 x half>) -declare <8 x i50> @llvm.fptosi.sat.v8f16.v8i50 (<8 x half>) -declare <8 x i64> @llvm.fptosi.sat.v8f16.v8i64 (<8 x half>) -declare <8 x i100> @llvm.fptosi.sat.v8f16.v8i100(<8 x half>) -declare <8 x i128> @llvm.fptosi.sat.v8f16.v8i128(<8 x half>) - -define arm_aapcs_vfpcc <8 x i1> @test_signed_v8f16_v8i1(<8 x half> %f) { -; CHECK-LABEL: test_signed_v8f16_v8i1: +; +; 4-Vector half to signed integer -- result size variation +; + +declare <8 x i1> @llvm.fptosi.sat.v8f16.v8i1 (<8 x half>) +declare <8 x i8> @llvm.fptosi.sat.v8f16.v8i8 (<8 x half>) +declare <8 x i13> @llvm.fptosi.sat.v8f16.v8i13 (<8 x half>) +declare <8 x i16> @llvm.fptosi.sat.v8f16.v8i16 (<8 x half>) +declare <8 x i19> @llvm.fptosi.sat.v8f16.v8i19 (<8 x half>) +declare <8 x i50> @llvm.fptosi.sat.v8f16.v8i50 (<8 x half>) +declare <8 x i64> @llvm.fptosi.sat.v8f16.v8i64 (<8 x half>) +declare <8 x i100> @llvm.fptosi.sat.v8f16.v8i100(<8 x half>) +declare <8 x i128> @llvm.fptosi.sat.v8f16.v8i128(<8 x half>) + +define arm_aapcs_vfpcc <8 x i1> @test_signed_v8f16_v8i1(<8 x half> %f) { +; CHECK-LABEL: test_signed_v8f16_v8i1: ; CHECK: @ %bb.0: ; CHECK-NEXT: .vsave {d8} ; CHECK-NEXT: vpush {d8} @@ -4137,600 +4851,143 @@ define arm_aapcs_vfpcc <8 x i19> @test_signed_v8f16_v8i19(<8 x half> %f) { ; CHECK-NEXT: vcvtb.f32.f16 s0, s2 ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r2, #0 -; CHECK-NEXT: vmov r1, s14 -; CHECK-NEXT: bfc r2, #19, #13 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r1, #0 -; CHECK-NEXT: vcmp.f32 s8, s8 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: lsll r2, r7, #19 -; CHECK-NEXT: bfc r1, #19, #13 -; CHECK-NEXT: vmov r12, s10 -; CHECK-NEXT: vcmp.f32 s1, s1 -; CHECK-NEXT: vmaxnm.f32 s8, s0, s6 -; CHECK-NEXT: orr.w r1, r1, r2 -; CHECK-NEXT: str r1, [r0] -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs.w r12, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s0, s0 -; CHECK-NEXT: vcvtt.f32.f16 s0, s2 -; CHECK-NEXT: vmaxnm.f32 s2, s0, s6 -; CHECK-NEXT: vminnm.f32 s8, s8, s4 -; CHECK-NEXT: vminnm.f32 s2, s2, s4 -; CHECK-NEXT: vmov r3, s7 -; CHECK-NEXT: vcvt.s32.f32 s2, s2 -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r3, #0 -; CHECK-NEXT: vcvt.s32.f32 s8, s8 -; CHECK-NEXT: bfc r3, #19, #13 -; CHECK-NEXT: mov r2, r12 -; CHECK-NEXT: movs r1, #0 -; CHECK-NEXT: bfc r2, #19, #13 -; CHECK-NEXT: mov r4, r3 -; CHECK-NEXT: mov.w r9, #0 -; CHECK-NEXT: lsrl r2, r1, #7 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s0, s0 -; CHECK-NEXT: lsrl r4, r9, #26 -; CHECK-NEXT: vcvtt.f32.f16 s0, s3 -; CHECK-NEXT: mov lr, r1 -; CHECK-NEXT: orr.w r1, r4, r2 -; CHECK-NEXT: vmov r4, s2 -; CHECK-NEXT: vmaxnm.f32 s2, s0, s6 -; CHECK-NEXT: vmov r2, s8 -; CHECK-NEXT: vminnm.f32 s2, s2, s4 -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r2, #0 -; CHECK-NEXT: vcvt.s32.f32 s2, s2 -; CHECK-NEXT: bfc r2, #19, #13 -; CHECK-NEXT: movs r5, #0 -; CHECK-NEXT: lsll r2, r5, #12 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r4, #0 -; CHECK-NEXT: orrs r2, r1 -; CHECK-NEXT: bfc r4, #19, #13 -; CHECK-NEXT: movs r1, #0 -; CHECK-NEXT: lsll r4, r1, #31 -; CHECK-NEXT: vcmp.f32 s0, s0 -; CHECK-NEXT: orrs r2, r4 -; CHECK-NEXT: str r2, [r0, #8] -; CHECK-NEXT: orr.w r2, r7, r3, lsl #6 -; CHECK-NEXT: vcvtb.f32.f16 s0, s3 -; CHECK-NEXT: orr.w r3, r2, r12, lsl #25 -; CHECK-NEXT: vmov r2, s2 -; CHECK-NEXT: vmaxnm.f32 s2, s0, s6 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vminnm.f32 s2, s2, s4 -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r2, #0 -; CHECK-NEXT: vcvt.s32.f32 s2, s2 -; CHECK-NEXT: bfc r2, #19, #13 -; CHECK-NEXT: movs r7, #0 -; CHECK-NEXT: vcmp.f32 s0, s0 -; CHECK-NEXT: lsll r2, r7, #5 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: mov.w r11, #0 -; CHECK-NEXT: vmov r7, s2 -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r7, #0 -; CHECK-NEXT: mov r4, r7 -; CHECK-NEXT: bfc r4, #19, #13 -; CHECK-NEXT: lsrl r4, r11, #14 -; CHECK-NEXT: orrs r2, r4 -; CHECK-NEXT: strh r2, [r0, #16] -; CHECK-NEXT: str r3, [r0, #4] -; CHECK-NEXT: lsrs r2, r2, #16 -; CHECK-NEXT: strb r2, [r0, #18] -; CHECK-NEXT: orr.w r2, r9, lr -; CHECK-NEXT: orrs r2, r5 -; CHECK-NEXT: orrs r1, r2 -; CHECK-NEXT: orr.w r1, r1, r7, lsl #18 -; CHECK-NEXT: str r1, [r0, #12] -; CHECK-NEXT: pop.w {r4, r5, r7, r9, r11, pc} -; CHECK-NEXT: .p2align 2 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI46_0: -; CHECK-NEXT: .long 0x487fffc0 @ float 262143 -; CHECK-NEXT: .LCPI46_1: -; CHECK-NEXT: .long 0xc8800000 @ float -262144 - %x = call <8 x i19> @llvm.fptosi.sat.v8f16.v8i19(<8 x half> %f) - ret <8 x i19> %x -} - -define arm_aapcs_vfpcc <8 x i32> @test_signed_v8f16_v8i32_duplicate(<8 x half> %f) { -; CHECK-LABEL: test_signed_v8f16_v8i32_duplicate: -; CHECK: @ %bb.0: -; CHECK-NEXT: vmovx.f16 s4, s3 -; CHECK-NEXT: vmovx.f16 s6, s0 -; CHECK-NEXT: vcvt.s32.f16 s8, s4 -; CHECK-NEXT: vmovx.f16 s4, s2 -; CHECK-NEXT: vcvt.s32.f16 s10, s4 -; CHECK-NEXT: vmovx.f16 s4, s1 -; CHECK-NEXT: vcvt.s32.f16 s14, s2 -; CHECK-NEXT: vcvt.s32.f16 s2, s1 -; CHECK-NEXT: vcvt.s32.f16 s0, s0 -; CHECK-NEXT: vcvt.s32.f16 s4, s4 -; CHECK-NEXT: vcvt.s32.f16 s6, s6 -; CHECK-NEXT: vmov r0, s2 -; CHECK-NEXT: vmov r1, s0 -; CHECK-NEXT: vcvt.s32.f16 s12, s3 -; CHECK-NEXT: vmov q0[2], q0[0], r1, r0 -; CHECK-NEXT: vmov r0, s4 -; CHECK-NEXT: vmov r1, s6 -; CHECK-NEXT: vmov q0[3], q0[1], r1, r0 -; CHECK-NEXT: vmov r0, s12 -; CHECK-NEXT: vmov r1, s14 -; CHECK-NEXT: vmov q1[2], q1[0], r1, r0 -; CHECK-NEXT: vmov r0, s8 -; CHECK-NEXT: vmov r1, s10 -; CHECK-NEXT: vmov q1[3], q1[1], r1, r0 -; CHECK-NEXT: bx lr - %x = call <8 x i32> @llvm.fptosi.sat.v8f16.v8i32(<8 x half> %f) - ret <8 x i32> %x -} - -define arm_aapcs_vfpcc <8 x i50> @test_signed_v8f16_v8i50(<8 x half> %f) { -; CHECK-LABEL: test_signed_v8f16_v8i50: -; CHECK: @ %bb.0: -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-NEXT: .pad #4 -; CHECK-NEXT: sub sp, #4 -; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13} -; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13} -; CHECK-NEXT: .pad #24 -; CHECK-NEXT: sub sp, #24 -; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: vcvtt.f32.f16 s24, s16 -; CHECK-NEXT: vmov r0, s24 -; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: vcvtb.f32.f16 s26, s17 -; CHECK-NEXT: mov r2, r0 -; CHECK-NEXT: vmov r0, s26 -; CHECK-NEXT: vldr s22, .LCPI48_0 -; CHECK-NEXT: vldr s20, .LCPI48_1 -; CHECK-NEXT: vcmp.f32 s24, s22 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt lt -; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: movtlt r1, #65534 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r2, #0 -; CHECK-NEXT: vcmp.f32 s24, s20 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: vcmp.f32 s24, s24 -; CHECK-NEXT: itt gt -; CHECK-NEXT: movwgt r1, #65535 -; CHECK-NEXT: movtgt r1, #1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r2, #0 -; CHECK-NEXT: str r2, [sp, #20] @ 4-byte Spill -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r1, #0 -; CHECK-NEXT: str r1, [sp, #16] @ 4-byte Spill -; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: vcvtt.f32.f16 s24, s17 -; CHECK-NEXT: mov r2, r0 -; CHECK-NEXT: vmov r0, s24 -; CHECK-NEXT: vcmp.f32 s26, s22 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt lt -; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: movtlt r1, #65534 -; CHECK-NEXT: vcmp.f32 s26, s20 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r2, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt gt -; CHECK-NEXT: movwgt r1, #65535 -; CHECK-NEXT: movtgt r1, #1 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: vcmp.f32 s26, s26 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r2, #0 -; CHECK-NEXT: str r2, [sp, #12] @ 4-byte Spill -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r1, #0 -; CHECK-NEXT: str r1, [sp, #8] @ 4-byte Spill -; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: vcvtb.f32.f16 s26, s18 -; CHECK-NEXT: mov r2, r0 -; CHECK-NEXT: vmov r0, s26 -; CHECK-NEXT: vcmp.f32 s24, s22 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt lt -; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: movtlt r1, #65534 -; CHECK-NEXT: vcmp.f32 s24, s20 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r2, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt gt -; CHECK-NEXT: movwgt r1, #65535 -; CHECK-NEXT: movtgt r1, #1 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: vcmp.f32 s24, s24 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r2, #0 -; CHECK-NEXT: str r2, [sp, #4] @ 4-byte Spill -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r1, #0 -; CHECK-NEXT: str r1, [sp] @ 4-byte Spill -; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: vcmp.f32 s26, s22 -; CHECK-NEXT: mov r6, r1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: ittt lt -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: movlt r6, #0 -; CHECK-NEXT: movtlt r6, #65534 -; CHECK-NEXT: vcmp.f32 s26, s20 -; CHECK-NEXT: vcvtt.f32.f16 s18, s18 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: ittt gt -; CHECK-NEXT: movwgt r6, #65535 -; CHECK-NEXT: movtgt r6, #1 -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: vcmp.f32 s26, s26 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r0, #0 -; CHECK-NEXT: str.w r0, [r4, #25] -; CHECK-NEXT: vmov r0, s18 -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r6, #0 -; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: vcmp.f32 s18, s22 -; CHECK-NEXT: mov r8, r0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: mov r9, r1 -; CHECK-NEXT: vcmp.f32 s18, s20 -; CHECK-NEXT: ittt lt -; CHECK-NEXT: movwlt r9, #0 -; CHECK-NEXT: movtlt r9, #65534 -; CHECK-NEXT: movlt.w r8, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s18, s18 -; CHECK-NEXT: vcvtb.f32.f16 s18, s19 -; CHECK-NEXT: ittt gt -; CHECK-NEXT: movgt.w r8, #-1 -; CHECK-NEXT: movwgt r9, #65535 -; CHECK-NEXT: movtgt r9, #1 -; CHECK-NEXT: vmov r0, s18 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt vs -; CHECK-NEXT: movvs.w r8, #0 -; CHECK-NEXT: movvs.w r9, #0 -; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: vcmp.f32 s18, s22 -; CHECK-NEXT: mov r11, r0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: mov r5, r1 -; CHECK-NEXT: vcmp.f32 s18, s20 -; CHECK-NEXT: ittt lt -; CHECK-NEXT: movlt r5, #0 -; CHECK-NEXT: movtlt r5, #65534 -; CHECK-NEXT: movlt.w r11, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s18, s18 -; CHECK-NEXT: vcvtt.f32.f16 s18, s19 -; CHECK-NEXT: ittt gt -; CHECK-NEXT: movgt.w r11, #-1 -; CHECK-NEXT: movwgt r5, #65535 -; CHECK-NEXT: movtgt r5, #1 -; CHECK-NEXT: vmov r0, s18 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt vs -; CHECK-NEXT: movvs.w r11, #0 -; CHECK-NEXT: movvs r5, #0 -; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: vcvtb.f32.f16 s16, s16 -; CHECK-NEXT: mov r10, r0 -; CHECK-NEXT: vmov r0, s16 -; CHECK-NEXT: mov r7, r1 -; CHECK-NEXT: vcmp.f32 s18, s22 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: ittt lt -; CHECK-NEXT: movlt r7, #0 -; CHECK-NEXT: movtlt r7, #65534 -; CHECK-NEXT: movlt.w r10, #0 -; CHECK-NEXT: vcmp.f32 s18, s20 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: ittt gt -; CHECK-NEXT: movgt.w r10, #-1 -; CHECK-NEXT: movwgt r7, #65535 -; CHECK-NEXT: movtgt r7, #1 -; CHECK-NEXT: vcmp.f32 s18, s18 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt vs -; CHECK-NEXT: movvs.w r10, #0 -; CHECK-NEXT: movvs r7, #0 -; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: vcmp.f32 s16, s22 -; CHECK-NEXT: bfc r5, #18, #14 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: ittt lt -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: movtlt r1, #65534 -; CHECK-NEXT: vcmp.f32 s16, s20 -; CHECK-NEXT: mov r2, r11 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: ittt gt -; CHECK-NEXT: movwgt r1, #65535 -; CHECK-NEXT: movtgt r1, #1 -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: vcmp.f32 s16, s16 -; CHECK-NEXT: lsrl r2, r5, #28 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r0, #0 -; CHECK-NEXT: str r0, [r4] -; CHECK-NEXT: lsr.w r0, r7, #10 -; CHECK-NEXT: bfc r7, #18, #14 -; CHECK-NEXT: bfc r9, #18, #14 -; CHECK-NEXT: lsll r10, r7, #22 -; CHECK-NEXT: bfc r6, #18, #14 -; CHECK-NEXT: orr.w r3, r5, r7 -; CHECK-NEXT: str.w r3, [r4, #45] -; CHECK-NEXT: orr.w r2, r2, r10 -; CHECK-NEXT: str.w r2, [r4, #41] -; CHECK-NEXT: strb.w r0, [r4, #49] -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: lsrl r0, r9, #14 -; CHECK-NEXT: orr.w r2, r9, r11, lsl #4 -; CHECK-NEXT: str.w r2, [r4, #37] -; CHECK-NEXT: str.w r0, [r4, #33] -; CHECK-NEXT: orr.w r0, r6, r8, lsl #18 -; CHECK-NEXT: str.w r0, [r4, #29] -; CHECK-NEXT: ldr r3, [sp] @ 4-byte Reload -; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload -; CHECK-NEXT: ldr r6, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: lsr.w r5, r3, #10 -; CHECK-NEXT: bfc r3, #18, #14 -; CHECK-NEXT: lsll r0, r3, #22 -; CHECK-NEXT: mov r7, r3 -; CHECK-NEXT: ldr r3, [sp, #8] @ 4-byte Reload -; CHECK-NEXT: mov r2, r6 -; CHECK-NEXT: bfc r3, #18, #14 -; CHECK-NEXT: lsrl r2, r3, #28 -; CHECK-NEXT: orr.w r3, r3, r7 -; CHECK-NEXT: str r3, [r4, #20] -; CHECK-NEXT: orr.w r2, r2, r0 -; CHECK-NEXT: str r2, [r4, #16] -; CHECK-NEXT: strb r5, [r4, #24] -; CHECK-NEXT: ldr r3, [sp, #16] @ 4-byte Reload -; CHECK-NEXT: ldr r7, [sp, #20] @ 4-byte Reload -; CHECK-NEXT: bfc r3, #18, #14 -; CHECK-NEXT: mov r0, r7 -; CHECK-NEXT: lsrl r0, r3, #14 -; CHECK-NEXT: orr.w r2, r3, r6, lsl #4 -; CHECK-NEXT: strd r0, r2, [r4, #8] -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r1, #0 -; CHECK-NEXT: bfc r1, #18, #14 -; CHECK-NEXT: orr.w r0, r1, r7, lsl #18 -; CHECK-NEXT: str r0, [r4, #4] -; CHECK-NEXT: add sp, #24 -; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13} -; CHECK-NEXT: add sp, #4 -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} -; CHECK-NEXT: .p2align 2 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI48_0: -; CHECK-NEXT: .long 0xd8000000 @ float -5.62949953E+14 -; CHECK-NEXT: .LCPI48_1: -; CHECK-NEXT: .long 0x57ffffff @ float 5.6294992E+14 - %x = call <8 x i50> @llvm.fptosi.sat.v8f16.v8i50(<8 x half> %f) - ret <8 x i50> %x -} - -define arm_aapcs_vfpcc <8 x i64> @test_signed_v8f16_v8i64(<8 x half> %f) { -; CHECK-LABEL: test_signed_v8f16_v8i64: -; CHECK: @ %bb.0: -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-NEXT: .pad #4 -; CHECK-NEXT: sub sp, #4 -; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vcvtt.f32.f16 s24, s19 -; CHECK-NEXT: vmov r0, s24 -; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: vcvtb.f32.f16 s26, s19 -; CHECK-NEXT: mov r9, r0 -; CHECK-NEXT: vmov r0, s26 -; CHECK-NEXT: vldr s28, .LCPI49_0 -; CHECK-NEXT: vldr s30, .LCPI49_1 -; CHECK-NEXT: mov r8, r1 -; CHECK-NEXT: vcmp.f32 s24, s28 -; CHECK-NEXT: vcvtt.f32.f16 s20, s16 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt lt -; CHECK-NEXT: movlt.w r8, #-2147483648 -; CHECK-NEXT: movlt.w r9, #0 -; CHECK-NEXT: vcmp.f32 s24, s30 -; CHECK-NEXT: vcvtt.f32.f16 s22, s18 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt gt -; CHECK-NEXT: movgt.w r9, #-1 -; CHECK-NEXT: mvngt r8, #-2147483648 -; CHECK-NEXT: vcmp.f32 s24, s24 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vmov r6, s20 -; CHECK-NEXT: vmov r4, s22 -; CHECK-NEXT: itt vs -; CHECK-NEXT: movvs.w r8, #0 -; CHECK-NEXT: movvs.w r9, #0 -; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: mov r10, r0 -; CHECK-NEXT: vcmp.f32 s26, s28 -; CHECK-NEXT: mov r11, r1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt lt -; CHECK-NEXT: movlt.w r10, #0 -; CHECK-NEXT: movlt.w r11, #-2147483648 -; CHECK-NEXT: vcmp.f32 s26, s30 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: mov r0, r4 -; CHECK-NEXT: itt gt -; CHECK-NEXT: mvngt r11, #-2147483648 -; CHECK-NEXT: movgt.w r10, #-1 -; CHECK-NEXT: vcmp.f32 s26, s26 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt vs -; CHECK-NEXT: movvs.w r10, #0 -; CHECK-NEXT: movvs.w r11, #0 -; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: vcmp.f32 s22, s28 -; CHECK-NEXT: mov r4, r1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt lt -; CHECK-NEXT: movlt.w r4, #-2147483648 -; CHECK-NEXT: movlt r5, #0 -; CHECK-NEXT: vcmp.f32 s22, s30 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: itt gt -; CHECK-NEXT: movgt.w r5, #-1 -; CHECK-NEXT: mvngt r4, #-2147483648 -; CHECK-NEXT: vcmp.f32 s22, s22 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt vs -; CHECK-NEXT: movvs r4, #0 -; CHECK-NEXT: movvs r5, #0 -; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: vcvtb.f32.f16 s16, s16 -; CHECK-NEXT: mov r7, r0 -; CHECK-NEXT: vmov r0, s16 -; CHECK-NEXT: mov r6, r1 -; CHECK-NEXT: vcmp.f32 s20, s28 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt lt -; CHECK-NEXT: movlt.w r6, #-2147483648 -; CHECK-NEXT: movlt r7, #0 -; CHECK-NEXT: vcmp.f32 s20, s30 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt gt -; CHECK-NEXT: movgt.w r7, #-1 -; CHECK-NEXT: mvngt r6, #-2147483648 -; CHECK-NEXT: vcmp.f32 s20, s20 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt vs -; CHECK-NEXT: movvs r6, #0 -; CHECK-NEXT: movvs r7, #0 -; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: vcmp.f32 s16, s28 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt lt -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: movlt.w r1, #-2147483648 -; CHECK-NEXT: vcmp.f32 s16, s30 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s16, s16 -; CHECK-NEXT: itt gt -; CHECK-NEXT: mvngt r1, #-2147483648 -; CHECK-NEXT: movgt.w r0, #-1 +; CHECK-NEXT: vmov r1, s14 +; CHECK-NEXT: bfc r2, #19, #13 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r0, #0 -; CHECK-NEXT: vcvtt.f32.f16 s16, s17 -; CHECK-NEXT: vmov q5[2], q5[0], r0, r7 -; CHECK-NEXT: vmov r0, s16 -; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r1, #0 -; CHECK-NEXT: vmov q5[3], q5[1], r1, r6 -; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: vcmp.f32 s16, s28 -; CHECK-NEXT: mov r7, r0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: mov r6, r1 -; CHECK-NEXT: vcmp.f32 s16, s30 -; CHECK-NEXT: itt lt -; CHECK-NEXT: movlt.w r6, #-2147483648 -; CHECK-NEXT: movlt r7, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s16, s16 -; CHECK-NEXT: vcvtb.f32.f16 s16, s17 -; CHECK-NEXT: itt gt -; CHECK-NEXT: movgt.w r7, #-1 -; CHECK-NEXT: mvngt r6, #-2147483648 -; CHECK-NEXT: vmov r0, s16 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt vs -; CHECK-NEXT: movvs r6, #0 -; CHECK-NEXT: movvs r7, #0 -; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: vcmp.f32 s16, s28 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt lt -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: movlt.w r1, #-2147483648 -; CHECK-NEXT: vcmp.f32 s16, s30 +; CHECK-NEXT: vcmp.f32 s8, s8 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s16, s16 -; CHECK-NEXT: itt gt -; CHECK-NEXT: mvngt r1, #-2147483648 -; CHECK-NEXT: movgt.w r0, #-1 +; CHECK-NEXT: lsll r2, r7, #19 +; CHECK-NEXT: bfc r1, #19, #13 +; CHECK-NEXT: vmov r12, s10 +; CHECK-NEXT: vcmp.f32 s1, s1 +; CHECK-NEXT: vmaxnm.f32 s8, s0, s6 +; CHECK-NEXT: orr.w r1, r1, r2 +; CHECK-NEXT: str r1, [r0] +; CHECK-NEXT: it vs +; CHECK-NEXT: movvs.w r12, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s0, s0 +; CHECK-NEXT: vcvtt.f32.f16 s0, s2 +; CHECK-NEXT: vmaxnm.f32 s2, s0, s6 +; CHECK-NEXT: vminnm.f32 s8, s8, s4 +; CHECK-NEXT: vminnm.f32 s2, s2, s4 +; CHECK-NEXT: vmov r3, s7 +; CHECK-NEXT: vcvt.s32.f32 s2, s2 ; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r0, #0 -; CHECK-NEXT: vcvtb.f32.f16 s16, s18 -; CHECK-NEXT: vmov q6[2], q6[0], r0, r7 -; CHECK-NEXT: vmov r0, s16 +; CHECK-NEXT: movvs r3, #0 +; CHECK-NEXT: vcvt.s32.f32 s8, s8 +; CHECK-NEXT: bfc r3, #19, #13 +; CHECK-NEXT: mov r2, r12 +; CHECK-NEXT: movs r1, #0 +; CHECK-NEXT: bfc r2, #19, #13 +; CHECK-NEXT: mov r4, r3 +; CHECK-NEXT: mov.w r9, #0 +; CHECK-NEXT: lsrl r2, r1, #7 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s0, s0 +; CHECK-NEXT: lsrl r4, r9, #26 +; CHECK-NEXT: vcvtt.f32.f16 s0, s3 +; CHECK-NEXT: mov lr, r1 +; CHECK-NEXT: orr.w r1, r4, r2 +; CHECK-NEXT: vmov r4, s2 +; CHECK-NEXT: vmaxnm.f32 s2, s0, s6 +; CHECK-NEXT: vmov r2, s8 +; CHECK-NEXT: vminnm.f32 s2, s2, s4 ; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r1, #0 -; CHECK-NEXT: vmov q6[3], q6[1], r1, r6 -; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: vcmp.f32 s16, s28 -; CHECK-NEXT: vmov q3[2], q3[0], r10, r9 +; CHECK-NEXT: movvs r2, #0 +; CHECK-NEXT: vcvt.s32.f32 s2, s2 +; CHECK-NEXT: bfc r2, #19, #13 +; CHECK-NEXT: movs r5, #0 +; CHECK-NEXT: lsll r2, r5, #12 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt lt -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: movlt.w r1, #-2147483648 -; CHECK-NEXT: vcmp.f32 s16, s30 -; CHECK-NEXT: vmov q3[3], q3[1], r11, r8 +; CHECK-NEXT: it vs +; CHECK-NEXT: movvs r4, #0 +; CHECK-NEXT: orrs r2, r1 +; CHECK-NEXT: bfc r4, #19, #13 +; CHECK-NEXT: movs r1, #0 +; CHECK-NEXT: lsll r4, r1, #31 +; CHECK-NEXT: vcmp.f32 s0, s0 +; CHECK-NEXT: orrs r2, r4 +; CHECK-NEXT: str r2, [r0, #8] +; CHECK-NEXT: orr.w r2, r7, r3, lsl #6 +; CHECK-NEXT: vcvtb.f32.f16 s0, s3 +; CHECK-NEXT: orr.w r3, r2, r12, lsl #25 +; CHECK-NEXT: vmov r2, s2 +; CHECK-NEXT: vmaxnm.f32 s2, s0, s6 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt gt -; CHECK-NEXT: mvngt r1, #-2147483648 -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: vcmp.f32 s16, s16 -; CHECK-NEXT: vmov q0, q5 +; CHECK-NEXT: vminnm.f32 s2, s2, s4 +; CHECK-NEXT: it vs +; CHECK-NEXT: movvs r2, #0 +; CHECK-NEXT: vcvt.s32.f32 s2, s2 +; CHECK-NEXT: bfc r2, #19, #13 +; CHECK-NEXT: movs r7, #0 +; CHECK-NEXT: vcmp.f32 s0, s0 +; CHECK-NEXT: lsll r2, r7, #5 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt vs -; CHECK-NEXT: movvs r0, #0 -; CHECK-NEXT: movvs r1, #0 -; CHECK-NEXT: vmov q2[2], q2[0], r0, r5 -; CHECK-NEXT: vmov q1, q6 -; CHECK-NEXT: vmov q2[3], q2[1], r1, r4 -; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-NEXT: add sp, #4 -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; CHECK-NEXT: mov.w r11, #0 +; CHECK-NEXT: vmov r7, s2 +; CHECK-NEXT: it vs +; CHECK-NEXT: movvs r7, #0 +; CHECK-NEXT: mov r4, r7 +; CHECK-NEXT: bfc r4, #19, #13 +; CHECK-NEXT: lsrl r4, r11, #14 +; CHECK-NEXT: orrs r2, r4 +; CHECK-NEXT: strh r2, [r0, #16] +; CHECK-NEXT: str r3, [r0, #4] +; CHECK-NEXT: lsrs r2, r2, #16 +; CHECK-NEXT: strb r2, [r0, #18] +; CHECK-NEXT: orr.w r2, r9, lr +; CHECK-NEXT: orrs r2, r5 +; CHECK-NEXT: orrs r1, r2 +; CHECK-NEXT: orr.w r1, r1, r7, lsl #18 +; CHECK-NEXT: str r1, [r0, #12] +; CHECK-NEXT: pop.w {r4, r5, r7, r9, r11, pc} ; CHECK-NEXT: .p2align 2 ; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI49_0: -; CHECK-NEXT: .long 0xdf000000 @ float -9.22337203E+18 -; CHECK-NEXT: .LCPI49_1: -; CHECK-NEXT: .long 0x5effffff @ float 9.22337149E+18 - %x = call <8 x i64> @llvm.fptosi.sat.v8f16.v8i64(<8 x half> %f) - ret <8 x i64> %x +; CHECK-NEXT: .LCPI46_0: +; CHECK-NEXT: .long 0x487fffc0 @ float 262143 +; CHECK-NEXT: .LCPI46_1: +; CHECK-NEXT: .long 0xc8800000 @ float -262144 + %x = call <8 x i19> @llvm.fptosi.sat.v8f16.v8i19(<8 x half> %f) + ret <8 x i19> %x } -define arm_aapcs_vfpcc <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) { -; CHECK-LABEL: test_signed_v8f16_v8i100: +define arm_aapcs_vfpcc <8 x i32> @test_signed_v8f16_v8i32_duplicate(<8 x half> %f) { +; CHECK-LABEL: test_signed_v8f16_v8i32_duplicate: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmovx.f16 s4, s3 +; CHECK-NEXT: vmovx.f16 s6, s0 +; CHECK-NEXT: vcvt.s32.f16 s8, s4 +; CHECK-NEXT: vmovx.f16 s4, s2 +; CHECK-NEXT: vcvt.s32.f16 s10, s4 +; CHECK-NEXT: vmovx.f16 s4, s1 +; CHECK-NEXT: vcvt.s32.f16 s14, s2 +; CHECK-NEXT: vcvt.s32.f16 s2, s1 +; CHECK-NEXT: vcvt.s32.f16 s0, s0 +; CHECK-NEXT: vcvt.s32.f16 s4, s4 +; CHECK-NEXT: vcvt.s32.f16 s6, s6 +; CHECK-NEXT: vmov r0, s2 +; CHECK-NEXT: vmov r1, s0 +; CHECK-NEXT: vcvt.s32.f16 s12, s3 +; CHECK-NEXT: vmov q0[2], q0[0], r1, r0 +; CHECK-NEXT: vmov r0, s4 +; CHECK-NEXT: vmov r1, s6 +; CHECK-NEXT: vmov q0[3], q0[1], r1, r0 +; CHECK-NEXT: vmov r0, s12 +; CHECK-NEXT: vmov r1, s14 +; CHECK-NEXT: vmov q1[2], q1[0], r1, r0 +; CHECK-NEXT: vmov r0, s8 +; CHECK-NEXT: vmov r1, s10 +; CHECK-NEXT: vmov q1[3], q1[1], r1, r0 +; CHECK-NEXT: bx lr + %x = call <8 x i32> @llvm.fptosi.sat.v8f16.v8i32(<8 x half> %f) + ret <8 x i32> %x +} + +define arm_aapcs_vfpcc <8 x i50> @test_signed_v8f16_v8i50(<8 x half> %f) { +; CHECK-LABEL: test_signed_v8f16_v8i50: ; CHECK: @ %bb.0: ; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} @@ -4738,569 +4995,603 @@ define arm_aapcs_vfpcc <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) { ; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13} ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13} -; CHECK-NEXT: .pad #48 -; CHECK-NEXT: sub sp, #48 +; CHECK-NEXT: .pad #24 +; CHECK-NEXT: sub sp, #24 ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: vcvtb.f32.f16 s24, s17 +; CHECK-NEXT: vcvtt.f32.f16 s24, s16 ; CHECK-NEXT: vmov r0, s24 -; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: vcvtb.f32.f16 s26, s18 -; CHECK-NEXT: mov r8, r0 +; CHECK-NEXT: bl __aeabi_f2lz +; CHECK-NEXT: vcvtb.f32.f16 s26, s17 +; CHECK-NEXT: mov r2, r0 ; CHECK-NEXT: vmov r0, s26 -; CHECK-NEXT: vldr s22, .LCPI50_0 -; CHECK-NEXT: vldr s20, .LCPI50_1 -; CHECK-NEXT: mov r9, r1 -; CHECK-NEXT: vcmp.f32 s24, s22 -; CHECK-NEXT: mov r10, r2 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt lt -; CHECK-NEXT: mvnlt r3, #7 -; CHECK-NEXT: movlt.w r8, #0 -; CHECK-NEXT: movlt.w r9, #0 -; CHECK-NEXT: movlt.w r10, #0 -; CHECK-NEXT: vcmp.f32 s24, s20 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt gt -; CHECK-NEXT: movgt.w r10, #-1 -; CHECK-NEXT: movgt.w r9, #-1 -; CHECK-NEXT: movgt.w r8, #-1 -; CHECK-NEXT: movgt r3, #7 -; CHECK-NEXT: vcmp.f32 s24, s24 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r3, #0 -; CHECK-NEXT: str r3, [sp, #36] @ 4-byte Spill -; CHECK-NEXT: ittt vs -; CHECK-NEXT: movvs.w r8, #0 -; CHECK-NEXT: movvs.w r9, #0 -; CHECK-NEXT: movvs.w r10, #0 -; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: vcvtb.f32.f16 s24, s19 -; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: vmov r0, s24 -; CHECK-NEXT: mov r6, r1 -; CHECK-NEXT: vcmp.f32 s26, s22 -; CHECK-NEXT: mov r7, r2 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt lt -; CHECK-NEXT: mvnlt r3, #7 -; CHECK-NEXT: movlt r5, #0 -; CHECK-NEXT: movlt r6, #0 -; CHECK-NEXT: movlt r7, #0 -; CHECK-NEXT: vcmp.f32 s26, s20 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt gt -; CHECK-NEXT: movgt.w r7, #-1 -; CHECK-NEXT: movgt.w r6, #-1 -; CHECK-NEXT: movgt.w r5, #-1 -; CHECK-NEXT: movgt r3, #7 -; CHECK-NEXT: vcmp.f32 s26, s26 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r3, #0 -; CHECK-NEXT: str r3, [sp, #32] @ 4-byte Spill -; CHECK-NEXT: ittt vs -; CHECK-NEXT: movvs r5, #0 -; CHECK-NEXT: movvs r6, #0 -; CHECK-NEXT: movvs r7, #0 -; CHECK-NEXT: bl __fixsfti +; CHECK-NEXT: vldr s22, .LCPI48_0 +; CHECK-NEXT: vldr s20, .LCPI48_1 ; CHECK-NEXT: vcmp.f32 s24, s22 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt lt -; CHECK-NEXT: movlt r2, #0 +; CHECK-NEXT: itt lt ; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: mvnlt r3, #7 +; CHECK-NEXT: movtlt r1, #65534 +; CHECK-NEXT: it lt +; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: vcmp.f32 s24, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s24, s24 -; CHECK-NEXT: itttt gt -; CHECK-NEXT: movgt r3, #7 -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: movgt.w r1, #-1 +; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r2, #-1 +; CHECK-NEXT: vcmp.f32 s24, s24 +; CHECK-NEXT: itt gt +; CHECK-NEXT: movwgt r1, #65535 +; CHECK-NEXT: movtgt r1, #1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r2, #0 -; CHECK-NEXT: str.w r2, [r4, #83] +; CHECK-NEXT: str r2, [sp, #20] @ 4-byte Spill ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r1, #0 -; CHECK-NEXT: str.w r1, [r4, #79] -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r0, #0 -; CHECK-NEXT: vcvtt.f32.f16 s24, s16 -; CHECK-NEXT: str.w r0, [r4, #75] +; CHECK-NEXT: str r1, [sp, #16] @ 4-byte Spill +; CHECK-NEXT: bl __aeabi_f2lz +; CHECK-NEXT: vcvtt.f32.f16 s24, s17 +; CHECK-NEXT: mov r2, r0 ; CHECK-NEXT: vmov r0, s24 -; CHECK-NEXT: str.w r7, [r4, #58] -; CHECK-NEXT: str.w r6, [r4, #54] -; CHECK-NEXT: str.w r5, [r4, #50] -; CHECK-NEXT: str.w r10, [r4, #33] -; CHECK-NEXT: str.w r9, [r4, #29] -; CHECK-NEXT: str.w r8, [r4, #25] -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r3, #0 -; CHECK-NEXT: str r3, [sp, #28] @ 4-byte Spill -; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: vcmp.f32 s24, s22 +; CHECK-NEXT: vcmp.f32 s26, s22 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt lt -; CHECK-NEXT: mvnlt r3, #7 -; CHECK-NEXT: movlt r2, #0 -; CHECK-NEXT: movlt r0, #0 +; CHECK-NEXT: itt lt ; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: vcmp.f32 s24, s20 +; CHECK-NEXT: movtlt r1, #65534 +; CHECK-NEXT: vcmp.f32 s26, s20 +; CHECK-NEXT: it lt +; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s24, s24 -; CHECK-NEXT: itttt gt -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: movgt.w r0, #-1 +; CHECK-NEXT: itt gt +; CHECK-NEXT: movwgt r1, #65535 +; CHECK-NEXT: movtgt r1, #1 +; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: movgt r3, #7 +; CHECK-NEXT: vcmp.f32 s26, s26 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r3, #0 -; CHECK-NEXT: str r3, [sp, #24] @ 4-byte Spill -; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r2, #0 -; CHECK-NEXT: str r2, [sp, #20] @ 4-byte Spill -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r0, #0 -; CHECK-NEXT: vcvtt.f32.f16 s24, s17 -; CHECK-NEXT: str r0, [sp, #40] @ 4-byte Spill -; CHECK-NEXT: vmov r0, s24 +; CHECK-NEXT: str r2, [sp, #12] @ 4-byte Spill ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r1, #0 -; CHECK-NEXT: str r1, [sp, #44] @ 4-byte Spill -; CHECK-NEXT: bl __fixsfti +; CHECK-NEXT: str r1, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: bl __aeabi_f2lz +; CHECK-NEXT: vcvtb.f32.f16 s26, s18 +; CHECK-NEXT: mov r2, r0 +; CHECK-NEXT: vmov r0, s26 ; CHECK-NEXT: vcmp.f32 s24, s22 -; CHECK-NEXT: vcvtt.f32.f16 s18, s18 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt lt -; CHECK-NEXT: mvnlt r3, #7 -; CHECK-NEXT: movlt r2, #0 +; CHECK-NEXT: itt lt ; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: movlt r0, #0 +; CHECK-NEXT: movtlt r1, #65534 ; CHECK-NEXT: vcmp.f32 s24, s20 +; CHECK-NEXT: it lt +; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt gt -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: movgt.w r1, #-1 +; CHECK-NEXT: itt gt +; CHECK-NEXT: movwgt r1, #65535 +; CHECK-NEXT: movtgt r1, #1 +; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: movgt r3, #7 ; CHECK-NEXT: vcmp.f32 s24, s24 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r3, #0 -; CHECK-NEXT: str r3, [sp, #16] @ 4-byte Spill -; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r2, #0 -; CHECK-NEXT: str r2, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: str r2, [sp, #4] @ 4-byte Spill ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r1, #0 -; CHECK-NEXT: str r1, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: str r1, [sp] @ 4-byte Spill +; CHECK-NEXT: bl __aeabi_f2lz +; CHECK-NEXT: vcmp.f32 s26, s22 +; CHECK-NEXT: mov r6, r1 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: ittt lt +; CHECK-NEXT: movlt r0, #0 +; CHECK-NEXT: movlt r6, #0 +; CHECK-NEXT: movtlt r6, #65534 +; CHECK-NEXT: vcmp.f32 s26, s20 +; CHECK-NEXT: vcvtt.f32.f16 s18, s18 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: ittt gt +; CHECK-NEXT: movwgt r6, #65535 +; CHECK-NEXT: movtgt r6, #1 +; CHECK-NEXT: movgt.w r0, #-1 +; CHECK-NEXT: vcmp.f32 s26, s26 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r0, #0 -; CHECK-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: str.w r0, [r4, #25] ; CHECK-NEXT: vmov r0, s18 -; CHECK-NEXT: bl __fixsfti +; CHECK-NEXT: it vs +; CHECK-NEXT: movvs r6, #0 +; CHECK-NEXT: bl __aeabi_f2lz ; CHECK-NEXT: vcmp.f32 s18, s22 -; CHECK-NEXT: mov r5, r0 +; CHECK-NEXT: mov r8, r0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: mov r7, r1 -; CHECK-NEXT: mov r8, r2 +; CHECK-NEXT: mov r9, r1 ; CHECK-NEXT: vcmp.f32 s18, s20 -; CHECK-NEXT: itttt lt -; CHECK-NEXT: mvnlt r3, #7 +; CHECK-NEXT: ittt lt +; CHECK-NEXT: movwlt r9, #0 +; CHECK-NEXT: movtlt r9, #65534 ; CHECK-NEXT: movlt.w r8, #0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s18, s18 +; CHECK-NEXT: vcvtb.f32.f16 s18, s19 +; CHECK-NEXT: ittt gt +; CHECK-NEXT: movgt.w r8, #-1 +; CHECK-NEXT: movwgt r9, #65535 +; CHECK-NEXT: movtgt r9, #1 +; CHECK-NEXT: vmov r0, s18 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: itt vs +; CHECK-NEXT: movvs.w r8, #0 +; CHECK-NEXT: movvs.w r9, #0 +; CHECK-NEXT: bl __aeabi_f2lz +; CHECK-NEXT: vcmp.f32 s18, s22 +; CHECK-NEXT: mov r11, r0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: mov r5, r1 +; CHECK-NEXT: vcmp.f32 s18, s20 +; CHECK-NEXT: ittt lt ; CHECK-NEXT: movlt r5, #0 -; CHECK-NEXT: movlt r7, #0 +; CHECK-NEXT: movtlt r5, #65534 +; CHECK-NEXT: movlt.w r11, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s18, s18 ; CHECK-NEXT: vcvtt.f32.f16 s18, s19 +; CHECK-NEXT: ittt gt +; CHECK-NEXT: movgt.w r11, #-1 +; CHECK-NEXT: movwgt r5, #65535 +; CHECK-NEXT: movtgt r5, #1 ; CHECK-NEXT: vmov r0, s18 -; CHECK-NEXT: itttt gt -; CHECK-NEXT: movgt.w r7, #-1 -; CHECK-NEXT: movgt.w r5, #-1 -; CHECK-NEXT: movgt.w r8, #-1 -; CHECK-NEXT: movgt r3, #7 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r3, #0 -; CHECK-NEXT: str r3, [sp] @ 4-byte Spill -; CHECK-NEXT: ittt vs -; CHECK-NEXT: movvs.w r8, #0 +; CHECK-NEXT: itt vs +; CHECK-NEXT: movvs.w r11, #0 ; CHECK-NEXT: movvs r5, #0 -; CHECK-NEXT: movvs r7, #0 -; CHECK-NEXT: bl __fixsfti +; CHECK-NEXT: bl __aeabi_f2lz ; CHECK-NEXT: vcvtb.f32.f16 s16, s16 -; CHECK-NEXT: mov r6, r0 +; CHECK-NEXT: mov r10, r0 ; CHECK-NEXT: vmov r0, s16 -; CHECK-NEXT: mov r9, r1 +; CHECK-NEXT: mov r7, r1 ; CHECK-NEXT: vcmp.f32 s18, s22 -; CHECK-NEXT: mov r10, r2 -; CHECK-NEXT: mov r11, r3 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt lt -; CHECK-NEXT: mvnlt r11, #7 +; CHECK-NEXT: ittt lt +; CHECK-NEXT: movlt r7, #0 +; CHECK-NEXT: movtlt r7, #65534 ; CHECK-NEXT: movlt.w r10, #0 -; CHECK-NEXT: movlt.w r9, #0 -; CHECK-NEXT: movlt r6, #0 ; CHECK-NEXT: vcmp.f32 s18, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt gt -; CHECK-NEXT: movgt.w r6, #-1 -; CHECK-NEXT: movgt.w r9, #-1 +; CHECK-NEXT: ittt gt ; CHECK-NEXT: movgt.w r10, #-1 -; CHECK-NEXT: movgt.w r11, #7 +; CHECK-NEXT: movwgt r7, #65535 +; CHECK-NEXT: movtgt r7, #1 ; CHECK-NEXT: vcmp.f32 s18, s18 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt vs -; CHECK-NEXT: movvs.w r11, #0 +; CHECK-NEXT: itt vs ; CHECK-NEXT: movvs.w r10, #0 -; CHECK-NEXT: movvs.w r9, #0 -; CHECK-NEXT: movvs r6, #0 -; CHECK-NEXT: bl __fixsfti +; CHECK-NEXT: movvs r7, #0 +; CHECK-NEXT: bl __aeabi_f2lz ; CHECK-NEXT: vcmp.f32 s16, s22 -; CHECK-NEXT: mov r12, r3 +; CHECK-NEXT: bfc r5, #18, #14 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt lt -; CHECK-NEXT: movlt r2, #0 -; CHECK-NEXT: movlt r1, #0 +; CHECK-NEXT: ittt lt ; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: mvnlt r12, #7 +; CHECK-NEXT: movlt r1, #0 +; CHECK-NEXT: movtlt r1, #65534 ; CHECK-NEXT: vcmp.f32 s16, s20 +; CHECK-NEXT: mov r2, r11 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt gt -; CHECK-NEXT: movgt.w r12, #7 +; CHECK-NEXT: ittt gt +; CHECK-NEXT: movwgt r1, #65535 +; CHECK-NEXT: movtgt r1, #1 ; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: movgt.w r2, #-1 ; CHECK-NEXT: vcmp.f32 s16, s16 +; CHECK-NEXT: lsrl r2, r5, #28 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r2, #0 -; CHECK-NEXT: str r2, [r4, #8] -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r1, #0 -; CHECK-NEXT: str r1, [r4, #4] -; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r0, #0 ; CHECK-NEXT: str r0, [r4] -; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: lsrl r0, r9, #28 -; CHECK-NEXT: str.w r0, [r4, #91] -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: lsrl r0, r7, #28 -; CHECK-NEXT: str.w r0, [r4, #66] -; CHECK-NEXT: ldr.w lr, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: lsr.w r0, r7, #10 +; CHECK-NEXT: bfc r7, #18, #14 +; CHECK-NEXT: bfc r9, #18, #14 +; CHECK-NEXT: lsll r10, r7, #22 +; CHECK-NEXT: bfc r6, #18, #14 +; CHECK-NEXT: orr.w r3, r5, r7 +; CHECK-NEXT: str.w r3, [r4, #45] +; CHECK-NEXT: orr.w r2, r2, r10 +; CHECK-NEXT: str.w r2, [r4, #41] +; CHECK-NEXT: strb.w r0, [r4, #49] +; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: lsrl r0, r9, #14 +; CHECK-NEXT: orr.w r2, r9, r11, lsl #4 +; CHECK-NEXT: str.w r2, [r4, #37] +; CHECK-NEXT: str.w r0, [r4, #33] +; CHECK-NEXT: orr.w r0, r6, r8, lsl #18 +; CHECK-NEXT: str.w r0, [r4, #29] +; CHECK-NEXT: ldr r3, [sp] @ 4-byte Reload +; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: ldr r6, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: lsr.w r5, r3, #10 +; CHECK-NEXT: bfc r3, #18, #14 +; CHECK-NEXT: lsll r0, r3, #22 +; CHECK-NEXT: mov r7, r3 ; CHECK-NEXT: ldr r3, [sp, #8] @ 4-byte Reload -; CHECK-NEXT: mov r0, lr -; CHECK-NEXT: lsrl r0, r3, #28 -; CHECK-NEXT: str.w r0, [r4, #41] -; CHECK-NEXT: ldrd r0, r1, [sp, #40] @ 8-byte Folded Reload -; CHECK-NEXT: lsrl r0, r1, #28 -; CHECK-NEXT: str r1, [sp, #44] @ 4-byte Spill -; CHECK-NEXT: and r1, r11, #15 -; CHECK-NEXT: str r0, [r4, #16] -; CHECK-NEXT: orr.w r0, r9, r10, lsl #4 -; CHECK-NEXT: lsrl r10, r1, #28 -; CHECK-NEXT: str.w r0, [r4, #95] -; CHECK-NEXT: strb.w r10, [r4, #99] -; CHECK-NEXT: ldr r0, [sp, #28] @ 4-byte Reload -; CHECK-NEXT: and r0, r0, #15 -; CHECK-NEXT: orr.w r0, r0, r6, lsl #4 -; CHECK-NEXT: str.w r0, [r4, #87] -; CHECK-NEXT: orr.w r0, r7, r8, lsl #4 -; CHECK-NEXT: str.w r0, [r4, #70] -; CHECK-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-NEXT: and r1, r0, #15 -; CHECK-NEXT: lsrl r8, r1, #28 -; CHECK-NEXT: strb.w r8, [r4, #74] -; CHECK-NEXT: ldr r0, [sp, #32] @ 4-byte Reload -; CHECK-NEXT: and r0, r0, #15 -; CHECK-NEXT: orr.w r0, r0, r5, lsl #4 -; CHECK-NEXT: str.w r0, [r4, #62] -; CHECK-NEXT: ldr r2, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: orr.w r0, r3, r2, lsl #4 -; CHECK-NEXT: str.w r0, [r4, #45] -; CHECK-NEXT: ldr r0, [sp, #16] @ 4-byte Reload -; CHECK-NEXT: and r1, r0, #15 -; CHECK-NEXT: lsrl r2, r1, #28 -; CHECK-NEXT: strb.w r2, [r4, #49] -; CHECK-NEXT: ldr r0, [sp, #36] @ 4-byte Reload -; CHECK-NEXT: and r0, r0, #15 -; CHECK-NEXT: orr.w r0, r0, lr, lsl #4 -; CHECK-NEXT: str.w r0, [r4, #37] -; CHECK-NEXT: ldr r2, [sp, #20] @ 4-byte Reload -; CHECK-NEXT: ldr r0, [sp, #44] @ 4-byte Reload -; CHECK-NEXT: orr.w r0, r0, r2, lsl #4 -; CHECK-NEXT: str r0, [r4, #20] -; CHECK-NEXT: ldr r0, [sp, #24] @ 4-byte Reload -; CHECK-NEXT: and r1, r0, #15 -; CHECK-NEXT: lsrl r2, r1, #28 -; CHECK-NEXT: strb r2, [r4, #24] +; CHECK-NEXT: mov r2, r6 +; CHECK-NEXT: bfc r3, #18, #14 +; CHECK-NEXT: lsrl r2, r3, #28 +; CHECK-NEXT: orr.w r3, r3, r7 +; CHECK-NEXT: str r3, [r4, #20] +; CHECK-NEXT: orr.w r2, r2, r0 +; CHECK-NEXT: str r2, [r4, #16] +; CHECK-NEXT: strb r5, [r4, #24] +; CHECK-NEXT: ldr r3, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: ldr r7, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: bfc r3, #18, #14 +; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: lsrl r0, r3, #14 +; CHECK-NEXT: orr.w r2, r3, r6, lsl #4 +; CHECK-NEXT: strd r0, r2, [r4, #8] ; CHECK-NEXT: it vs -; CHECK-NEXT: movvs.w r12, #0 -; CHECK-NEXT: ldr r1, [sp, #40] @ 4-byte Reload -; CHECK-NEXT: and r0, r12, #15 -; CHECK-NEXT: orr.w r0, r0, r1, lsl #4 -; CHECK-NEXT: str r0, [r4, #12] -; CHECK-NEXT: add sp, #48 +; CHECK-NEXT: movvs r1, #0 +; CHECK-NEXT: bfc r1, #18, #14 +; CHECK-NEXT: orr.w r0, r1, r7, lsl #18 +; CHECK-NEXT: str r0, [r4, #4] +; CHECK-NEXT: add sp, #24 ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13} ; CHECK-NEXT: add sp, #4 ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; CHECK-NEXT: .p2align 2 ; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI50_0: -; CHECK-NEXT: .long 0xf1000000 @ float -6.338253E+29 -; CHECK-NEXT: .LCPI50_1: -; CHECK-NEXT: .long 0x70ffffff @ float 6.33825262E+29 - %x = call <8 x i100> @llvm.fptosi.sat.v8f16.v8i100(<8 x half> %f) - ret <8 x i100> %x +; CHECK-NEXT: .LCPI48_0: +; CHECK-NEXT: .long 0xd8000000 @ float -5.62949953E+14 +; CHECK-NEXT: .LCPI48_1: +; CHECK-NEXT: .long 0x57ffffff @ float 5.6294992E+14 + %x = call <8 x i50> @llvm.fptosi.sat.v8f16.v8i50(<8 x half> %f) + ret <8 x i50> %x } -define arm_aapcs_vfpcc <8 x i128> @test_signed_v8f16_v8i128(<8 x half> %f) { -; CHECK-LABEL: test_signed_v8f16_v8i128: +define arm_aapcs_vfpcc <8 x i64> @test_signed_v8f16_v8i64(<8 x half> %f) { +; CHECK-LABEL: test_signed_v8f16_v8i64: ; CHECK: @ %bb.0: -; CHECK-NEXT: .save {r4, r5, r6, r7, lr} -; CHECK-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-NEXT: .pad #4 ; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: vcvtt.f32.f16 s30, s19 -; CHECK-NEXT: vcvtb.f32.f16 s20, s16 -; CHECK-NEXT: vmov r0, s30 +; CHECK-NEXT: vcvtt.f32.f16 s24, s19 +; CHECK-NEXT: vmov r0, s24 +; CHECK-NEXT: bl __aeabi_f2lz ; CHECK-NEXT: vcvtb.f32.f16 s26, s19 -; CHECK-NEXT: vldr s22, .LCPI51_0 -; CHECK-NEXT: vmov r5, s20 -; CHECK-NEXT: vmov r7, s26 -; CHECK-NEXT: vcvtt.f32.f16 s28, s18 -; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: vldr s24, .LCPI51_1 -; CHECK-NEXT: add.w r12, r4, #112 -; CHECK-NEXT: vmov r6, s28 -; CHECK-NEXT: vcvtb.f32.f16 s18, s18 -; CHECK-NEXT: vcmp.f32 s30, s24 +; CHECK-NEXT: mov r9, r0 +; CHECK-NEXT: vmov r0, s26 +; CHECK-NEXT: vldr s28, .LCPI49_0 +; CHECK-NEXT: vldr s30, .LCPI49_1 +; CHECK-NEXT: mov r8, r1 +; CHECK-NEXT: vcmp.f32 s24, s28 +; CHECK-NEXT: vcvtt.f32.f16 s20, s16 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt lt -; CHECK-NEXT: movlt.w r3, #-2147483648 -; CHECK-NEXT: movlt r2, #0 -; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vcmp.f32 s30, s22 +; CHECK-NEXT: itt lt +; CHECK-NEXT: movlt.w r8, #-2147483648 +; CHECK-NEXT: movlt.w r9, #0 +; CHECK-NEXT: vcmp.f32 s24, s30 +; CHECK-NEXT: vcvtt.f32.f16 s22, s18 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt gt -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: mvngt r3, #-2147483648 -; CHECK-NEXT: vcmp.f32 s30, s30 +; CHECK-NEXT: itt gt +; CHECK-NEXT: movgt.w r9, #-1 +; CHECK-NEXT: mvngt r8, #-2147483648 +; CHECK-NEXT: vcmp.f32 s24, s24 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r3, #0 -; CHECK-NEXT: ittt vs -; CHECK-NEXT: movvs r2, #0 -; CHECK-NEXT: movvs r1, #0 -; CHECK-NEXT: movvs r0, #0 -; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3} -; CHECK-NEXT: mov r0, r7 -; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: vcmp.f32 s26, s24 -; CHECK-NEXT: add.w r12, r4, #96 +; CHECK-NEXT: vmov r6, s20 +; CHECK-NEXT: vmov r4, s22 +; CHECK-NEXT: itt vs +; CHECK-NEXT: movvs.w r8, #0 +; CHECK-NEXT: movvs.w r9, #0 +; CHECK-NEXT: bl __aeabi_f2lz +; CHECK-NEXT: mov r10, r0 +; CHECK-NEXT: vcmp.f32 s26, s28 +; CHECK-NEXT: mov r11, r1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt lt -; CHECK-NEXT: movlt.w r3, #-2147483648 -; CHECK-NEXT: movlt r2, #0 -; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vcmp.f32 s26, s22 +; CHECK-NEXT: itt lt +; CHECK-NEXT: movlt.w r10, #0 +; CHECK-NEXT: movlt.w r11, #-2147483648 +; CHECK-NEXT: vcmp.f32 s26, s30 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: mov r0, r4 +; CHECK-NEXT: itt gt +; CHECK-NEXT: mvngt r11, #-2147483648 +; CHECK-NEXT: movgt.w r10, #-1 ; CHECK-NEXT: vcmp.f32 s26, s26 -; CHECK-NEXT: itttt gt -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: mvngt r3, #-2147483648 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r3, #0 -; CHECK-NEXT: ittt vs -; CHECK-NEXT: movvs r2, #0 -; CHECK-NEXT: movvs r1, #0 -; CHECK-NEXT: movvs r0, #0 -; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3} -; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: vmov r7, s18 -; CHECK-NEXT: vcvtt.f32.f16 s26, s17 -; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: vcmp.f32 s28, s24 -; CHECK-NEXT: add.w r12, r4, #80 +; CHECK-NEXT: itt vs +; CHECK-NEXT: movvs.w r10, #0 +; CHECK-NEXT: movvs.w r11, #0 +; CHECK-NEXT: bl __aeabi_f2lz +; CHECK-NEXT: mov r5, r0 +; CHECK-NEXT: vcmp.f32 s22, s28 +; CHECK-NEXT: mov r4, r1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt lt -; CHECK-NEXT: movlt.w r3, #-2147483648 -; CHECK-NEXT: movlt r2, #0 -; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vcmp.f32 s28, s22 +; CHECK-NEXT: itt lt +; CHECK-NEXT: movlt.w r4, #-2147483648 +; CHECK-NEXT: movlt r5, #0 +; CHECK-NEXT: vcmp.f32 s22, s30 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s28, s28 -; CHECK-NEXT: itttt gt -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: mvngt r3, #-2147483648 +; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: itt gt +; CHECK-NEXT: movgt.w r5, #-1 +; CHECK-NEXT: mvngt r4, #-2147483648 +; CHECK-NEXT: vcmp.f32 s22, s22 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r3, #0 -; CHECK-NEXT: ittt vs -; CHECK-NEXT: movvs r2, #0 -; CHECK-NEXT: movvs r1, #0 -; CHECK-NEXT: movvs r0, #0 -; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3} -; CHECK-NEXT: mov r0, r7 -; CHECK-NEXT: vmov r6, s26 -; CHECK-NEXT: vcvtb.f32.f16 s28, s17 -; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: vcmp.f32 s18, s24 -; CHECK-NEXT: add.w r12, r4, #64 +; CHECK-NEXT: itt vs +; CHECK-NEXT: movvs r4, #0 +; CHECK-NEXT: movvs r5, #0 +; CHECK-NEXT: bl __aeabi_f2lz +; CHECK-NEXT: vcvtb.f32.f16 s16, s16 +; CHECK-NEXT: mov r7, r0 +; CHECK-NEXT: vmov r0, s16 +; CHECK-NEXT: mov r6, r1 +; CHECK-NEXT: vcmp.f32 s20, s28 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt lt -; CHECK-NEXT: movlt.w r3, #-2147483648 -; CHECK-NEXT: movlt r2, #0 -; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vcmp.f32 s18, s22 -; CHECK-NEXT: vcvtt.f32.f16 s16, s16 +; CHECK-NEXT: itt lt +; CHECK-NEXT: movlt.w r6, #-2147483648 +; CHECK-NEXT: movlt r7, #0 +; CHECK-NEXT: vcmp.f32 s20, s30 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt gt -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: mvngt r3, #-2147483648 -; CHECK-NEXT: vcmp.f32 s18, s18 +; CHECK-NEXT: itt gt +; CHECK-NEXT: movgt.w r7, #-1 +; CHECK-NEXT: mvngt r6, #-2147483648 +; CHECK-NEXT: vcmp.f32 s20, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r3, #0 -; CHECK-NEXT: ittt vs -; CHECK-NEXT: movvs r2, #0 -; CHECK-NEXT: movvs r1, #0 -; CHECK-NEXT: movvs r0, #0 -; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3} -; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: vmov r7, s28 -; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: vcmp.f32 s26, s24 -; CHECK-NEXT: add.w r12, r4, #48 +; CHECK-NEXT: itt vs +; CHECK-NEXT: movvs r6, #0 +; CHECK-NEXT: movvs r7, #0 +; CHECK-NEXT: bl __aeabi_f2lz +; CHECK-NEXT: vcmp.f32 s16, s28 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt lt -; CHECK-NEXT: movlt.w r3, #-2147483648 -; CHECK-NEXT: movlt r2, #0 -; CHECK-NEXT: movlt r1, #0 +; CHECK-NEXT: itt lt ; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vcmp.f32 s26, s22 +; CHECK-NEXT: movlt.w r1, #-2147483648 +; CHECK-NEXT: vcmp.f32 s16, s30 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt gt +; CHECK-NEXT: vcmp.f32 s16, s16 +; CHECK-NEXT: itt gt +; CHECK-NEXT: mvngt r1, #-2147483648 ; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: mvngt r3, #-2147483648 -; CHECK-NEXT: vcmp.f32 s26, s26 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r3, #0 -; CHECK-NEXT: ittt vs -; CHECK-NEXT: movvs r2, #0 -; CHECK-NEXT: movvs r1, #0 ; CHECK-NEXT: movvs r0, #0 -; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3} -; CHECK-NEXT: mov r0, r7 -; CHECK-NEXT: vmov r6, s16 -; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: vcmp.f32 s28, s24 -; CHECK-NEXT: add.w r12, r4, #32 +; CHECK-NEXT: vcvtt.f32.f16 s16, s17 +; CHECK-NEXT: vmov q5[2], q5[0], r0, r7 +; CHECK-NEXT: vmov r0, s16 +; CHECK-NEXT: it vs +; CHECK-NEXT: movvs r1, #0 +; CHECK-NEXT: vmov q5[3], q5[1], r1, r6 +; CHECK-NEXT: bl __aeabi_f2lz +; CHECK-NEXT: vcmp.f32 s16, s28 +; CHECK-NEXT: mov r7, r0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt lt -; CHECK-NEXT: movlt.w r3, #-2147483648 -; CHECK-NEXT: movlt r2, #0 -; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vcmp.f32 s28, s22 +; CHECK-NEXT: mov r6, r1 +; CHECK-NEXT: vcmp.f32 s16, s30 +; CHECK-NEXT: itt lt +; CHECK-NEXT: movlt.w r6, #-2147483648 +; CHECK-NEXT: movlt r7, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt gt -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: mvngt r3, #-2147483648 -; CHECK-NEXT: vcmp.f32 s28, s28 +; CHECK-NEXT: vcmp.f32 s16, s16 +; CHECK-NEXT: vcvtb.f32.f16 s16, s17 +; CHECK-NEXT: itt gt +; CHECK-NEXT: movgt.w r7, #-1 +; CHECK-NEXT: mvngt r6, #-2147483648 +; CHECK-NEXT: vmov r0, s16 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt vs -; CHECK-NEXT: movvs r3, #0 -; CHECK-NEXT: movvs r2, #0 -; CHECK-NEXT: movvs r1, #0 -; CHECK-NEXT: movvs r0, #0 -; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3} -; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: vcmp.f32 s16, s24 -; CHECK-NEXT: add.w r12, r4, #16 +; CHECK-NEXT: itt vs +; CHECK-NEXT: movvs r6, #0 +; CHECK-NEXT: movvs r7, #0 +; CHECK-NEXT: bl __aeabi_f2lz +; CHECK-NEXT: vcmp.f32 s16, s28 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt lt -; CHECK-NEXT: movlt.w r3, #-2147483648 -; CHECK-NEXT: movlt r2, #0 -; CHECK-NEXT: movlt r1, #0 +; CHECK-NEXT: itt lt ; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vcmp.f32 s16, s22 +; CHECK-NEXT: movlt.w r1, #-2147483648 +; CHECK-NEXT: vcmp.f32 s16, s30 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt gt -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: mvngt r3, #-2147483648 ; CHECK-NEXT: vcmp.f32 s16, s16 +; CHECK-NEXT: itt gt +; CHECK-NEXT: mvngt r1, #-2147483648 +; CHECK-NEXT: movgt.w r0, #-1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt vs -; CHECK-NEXT: movvs r3, #0 -; CHECK-NEXT: movvs r2, #0 -; CHECK-NEXT: movvs r1, #0 +; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r0, #0 -; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3} -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: vcmp.f32 s20, s24 +; CHECK-NEXT: vcvtb.f32.f16 s16, s18 +; CHECK-NEXT: vmov q6[2], q6[0], r0, r7 +; CHECK-NEXT: vmov r0, s16 +; CHECK-NEXT: it vs +; CHECK-NEXT: movvs r1, #0 +; CHECK-NEXT: vmov q6[3], q6[1], r1, r6 +; CHECK-NEXT: bl __aeabi_f2lz +; CHECK-NEXT: vcmp.f32 s16, s28 +; CHECK-NEXT: vmov q3[2], q3[0], r10, r9 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt lt -; CHECK-NEXT: movlt.w r3, #-2147483648 -; CHECK-NEXT: movlt r2, #0 -; CHECK-NEXT: movlt r1, #0 +; CHECK-NEXT: itt lt ; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vcmp.f32 s20, s22 +; CHECK-NEXT: movlt.w r1, #-2147483648 +; CHECK-NEXT: vcmp.f32 s16, s30 +; CHECK-NEXT: vmov q3[3], q3[1], r11, r8 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt gt +; CHECK-NEXT: itt gt +; CHECK-NEXT: mvngt r1, #-2147483648 ; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: mvngt r3, #-2147483648 -; CHECK-NEXT: vcmp.f32 s20, s20 +; CHECK-NEXT: vcmp.f32 s16, s16 +; CHECK-NEXT: vmov q0, q5 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt vs -; CHECK-NEXT: movvs r3, #0 -; CHECK-NEXT: movvs r2, #0 -; CHECK-NEXT: movvs r1, #0 +; CHECK-NEXT: itt vs ; CHECK-NEXT: movvs r0, #0 -; CHECK-NEXT: stm r4!, {r0, r1, r2, r3} +; CHECK-NEXT: movvs r1, #0 +; CHECK-NEXT: vmov q2[2], q2[0], r0, r5 +; CHECK-NEXT: vmov q1, q6 +; CHECK-NEXT: vmov q2[3], q2[1], r1, r4 ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} ; CHECK-NEXT: add sp, #4 -; CHECK-NEXT: pop {r4, r5, r6, r7, pc} +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; CHECK-NEXT: .p2align 2 ; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI51_0: -; CHECK-NEXT: .long 0x7effffff @ float 1.70141173E+38 -; CHECK-NEXT: .LCPI51_1: -; CHECK-NEXT: .long 0xff000000 @ float -1.70141183E+38 +; CHECK-NEXT: .LCPI49_0: +; CHECK-NEXT: .long 0xdf000000 @ float -9.22337203E+18 +; CHECK-NEXT: .LCPI49_1: +; CHECK-NEXT: .long 0x5effffff @ float 9.22337149E+18 + %x = call <8 x i64> @llvm.fptosi.sat.v8f16.v8i64(<8 x half> %f) + ret <8 x i64> %x +} + +define arm_aapcs_vfpcc <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) { +; CHECK-LABEL: test_signed_v8f16_v8i100: +; CHECK: @ %bb.0: +; CHECK-NEXT: .save {r5, lr} +; CHECK-NEXT: push {r5, lr} +; CHECK-NEXT: vmovx.f16 s14, s3 +; CHECK-NEXT: vcvt.s32.f16 s12, s3 +; CHECK-NEXT: vcvt.s32.f16 s14, s14 +; CHECK-NEXT: vcvt.s32.f16 s10, s2 +; CHECK-NEXT: vmov r12, s14 +; CHECK-NEXT: vmovx.f16 s2, s2 +; CHECK-NEXT: vcvt.s32.f16 s2, s2 +; CHECK-NEXT: vmovx.f16 s8, s1 +; CHECK-NEXT: vcvt.s32.f16 s8, s8 +; CHECK-NEXT: vcvt.s32.f16 s6, s1 +; CHECK-NEXT: vcvt.s32.f16 s4, s0 +; CHECK-NEXT: vmovx.f16 s0, s0 +; CHECK-NEXT: vcvt.s32.f16 s0, s0 +; CHECK-NEXT: asr.w r3, r12, #31 +; CHECK-NEXT: mov r2, r12 +; CHECK-NEXT: mov r1, r3 +; CHECK-NEXT: lsrl r2, r1, #28 +; CHECK-NEXT: orr.w r1, r1, r3, lsl #4 +; CHECK-NEXT: str.w r1, [r0, #95] +; CHECK-NEXT: str.w r2, [r0, #91] +; CHECK-NEXT: movs r1, #15 +; CHECK-NEXT: and.w r5, r1, r12, asr #31 +; CHECK-NEXT: mov r2, r3 +; CHECK-NEXT: lsrl r2, r5, #28 +; CHECK-NEXT: strb.w r2, [r0, #99] +; CHECK-NEXT: vmov r2, s12 +; CHECK-NEXT: and.w r3, r1, r2, asr #31 +; CHECK-NEXT: orr.w r3, r3, r12, lsl #4 +; CHECK-NEXT: vmov r12, s2 +; CHECK-NEXT: str.w r3, [r0, #87] +; CHECK-NEXT: asrs r3, r2, #31 +; CHECK-NEXT: str.w r3, [r0, #83] +; CHECK-NEXT: str.w r3, [r0, #79] +; CHECK-NEXT: str.w r2, [r0, #75] +; CHECK-NEXT: asr.w r3, r12, #31 +; CHECK-NEXT: mov r2, r12 +; CHECK-NEXT: mov r5, r3 +; CHECK-NEXT: lsrl r2, r5, #28 +; CHECK-NEXT: orr.w r5, r5, r3, lsl #4 +; CHECK-NEXT: str.w r5, [r0, #70] +; CHECK-NEXT: str.w r2, [r0, #66] +; CHECK-NEXT: and.w r5, r1, r12, asr #31 +; CHECK-NEXT: mov r2, r3 +; CHECK-NEXT: lsrl r2, r5, #28 +; CHECK-NEXT: strb.w r2, [r0, #74] +; CHECK-NEXT: vmov r2, s10 +; CHECK-NEXT: and.w r3, r1, r2, asr #31 +; CHECK-NEXT: orr.w r3, r3, r12, lsl #4 +; CHECK-NEXT: vmov r12, s8 +; CHECK-NEXT: str.w r3, [r0, #62] +; CHECK-NEXT: asrs r3, r2, #31 +; CHECK-NEXT: str.w r3, [r0, #58] +; CHECK-NEXT: str.w r3, [r0, #54] +; CHECK-NEXT: str.w r2, [r0, #50] +; CHECK-NEXT: asr.w r3, r12, #31 +; CHECK-NEXT: mov r2, r12 +; CHECK-NEXT: mov r5, r3 +; CHECK-NEXT: lsrl r2, r5, #28 +; CHECK-NEXT: orr.w r5, r5, r3, lsl #4 +; CHECK-NEXT: str.w r5, [r0, #45] +; CHECK-NEXT: str.w r2, [r0, #41] +; CHECK-NEXT: and.w r5, r1, r12, asr #31 +; CHECK-NEXT: mov r2, r3 +; CHECK-NEXT: lsrl r2, r5, #28 +; CHECK-NEXT: strb.w r2, [r0, #49] +; CHECK-NEXT: vmov r2, s6 +; CHECK-NEXT: and.w r3, r1, r2, asr #31 +; CHECK-NEXT: orr.w r3, r3, r12, lsl #4 +; CHECK-NEXT: vmov r12, s0 +; CHECK-NEXT: str.w r3, [r0, #37] +; CHECK-NEXT: asrs r3, r2, #31 +; CHECK-NEXT: str.w r3, [r0, #33] +; CHECK-NEXT: str.w r3, [r0, #29] +; CHECK-NEXT: str.w r2, [r0, #25] +; CHECK-NEXT: asr.w r3, r12, #31 +; CHECK-NEXT: mov r2, r12 +; CHECK-NEXT: mov r5, r3 +; CHECK-NEXT: lsrl r2, r5, #28 +; CHECK-NEXT: orr.w r5, r5, r3, lsl #4 +; CHECK-NEXT: strd r2, r5, [r0, #16] +; CHECK-NEXT: and.w r5, r1, r12, asr #31 +; CHECK-NEXT: mov r2, r3 +; CHECK-NEXT: lsrl r2, r5, #28 +; CHECK-NEXT: strb r2, [r0, #24] +; CHECK-NEXT: vmov r2, s4 +; CHECK-NEXT: and.w r1, r1, r2, asr #31 +; CHECK-NEXT: orr.w r1, r1, r12, lsl #4 +; CHECK-NEXT: str r1, [r0, #12] +; CHECK-NEXT: asrs r1, r2, #31 +; CHECK-NEXT: str r1, [r0, #8] +; CHECK-NEXT: strd r2, r1, [r0] +; CHECK-NEXT: pop {r5, pc} + %x = call <8 x i100> @llvm.fptosi.sat.v8f16.v8i100(<8 x half> %f) + ret <8 x i100> %x +} + +define arm_aapcs_vfpcc <8 x i128> @test_signed_v8f16_v8i128(<8 x half> %f) { +; CHECK-LABEL: test_signed_v8f16_v8i128: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmovx.f16 s14, s3 +; CHECK-NEXT: vcvt.s32.f16 s12, s3 +; CHECK-NEXT: vcvt.s32.f16 s14, s14 +; CHECK-NEXT: vmov r3, s12 +; CHECK-NEXT: vmov r12, s14 +; CHECK-NEXT: vcvt.s32.f16 s10, s2 +; CHECK-NEXT: vmovx.f16 s2, s2 +; CHECK-NEXT: vmovx.f16 s8, s1 +; CHECK-NEXT: vcvt.s32.f16 s2, s2 +; CHECK-NEXT: vcvt.s32.f16 s6, s1 +; CHECK-NEXT: vcvt.s32.f16 s8, s8 +; CHECK-NEXT: vcvt.s32.f16 s4, s0 +; CHECK-NEXT: vmovx.f16 s0, s0 +; CHECK-NEXT: vcvt.s32.f16 s0, s0 +; CHECK-NEXT: asrs r1, r3, #31 +; CHECK-NEXT: asr.w r2, r12, #31 +; CHECK-NEXT: strd r3, r1, [r0, #96] +; CHECK-NEXT: strd r1, r1, [r0, #104] +; CHECK-NEXT: vmov r3, s10 +; CHECK-NEXT: strd r12, r2, [r0, #112] +; CHECK-NEXT: vmov r12, s2 +; CHECK-NEXT: strd r2, r2, [r0, #120] +; CHECK-NEXT: asrs r1, r3, #31 +; CHECK-NEXT: asr.w r2, r12, #31 +; CHECK-NEXT: strd r3, r1, [r0, #64] +; CHECK-NEXT: strd r1, r1, [r0, #72] +; CHECK-NEXT: vmov r3, s6 +; CHECK-NEXT: strd r12, r2, [r0, #80] +; CHECK-NEXT: vmov r12, s8 +; CHECK-NEXT: strd r2, r2, [r0, #88] +; CHECK-NEXT: asrs r1, r3, #31 +; CHECK-NEXT: asr.w r2, r12, #31 +; CHECK-NEXT: strd r3, r1, [r0, #32] +; CHECK-NEXT: strd r1, r1, [r0, #40] +; CHECK-NEXT: vmov r3, s4 +; CHECK-NEXT: strd r12, r2, [r0, #48] +; CHECK-NEXT: vmov r12, s0 +; CHECK-NEXT: strd r2, r2, [r0, #56] +; CHECK-NEXT: asrs r1, r3, #31 +; CHECK-NEXT: asr.w r2, r12, #31 +; CHECK-NEXT: strd r3, r1, [r0] +; CHECK-NEXT: strd r1, r1, [r0, #8] +; CHECK-NEXT: strd r12, r2, [r0, #16] +; CHECK-NEXT: strd r2, r2, [r0, #24] +; CHECK-NEXT: bx lr %x = call <8 x i128> @llvm.fptosi.sat.v8f16.v8i128(<8 x half> %f) ret <8 x i128> %x } diff --git a/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll b/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll index 1a22270ea0ebe..da81b78aa4508 100644 --- a/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll +++ b/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll @@ -1684,236 +1684,621 @@ define arm_aapcs_vfpcc <4 x i64> @test_unsigned_v4f32_v4i64(<4 x float> %f) { define arm_aapcs_vfpcc <4 x i100> @test_unsigned_v4f32_v4i100(<4 x float> %f) { ; CHECK-LABEL: test_unsigned_v4f32_v4i100: -; CHECK: @ %bb.0: +; CHECK: @ %bb.0: @ %fp-to-i-entryfp-to-i-entryfp-to-i-entryfp-to-i-entry ; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-NEXT: .pad #4 -; CHECK-NEXT: sub sp, #4 -; CHECK-NEXT: .vsave {d8, d9, d10} -; CHECK-NEXT: vpush {d8, d9, d10} -; CHECK-NEXT: .pad #8 -; CHECK-NEXT: sub sp, #8 -; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: vmov r0, s18 -; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: vmov r6, s17 -; CHECK-NEXT: vldr s20, .LCPI30_0 -; CHECK-NEXT: vcmp.f32 s18, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt lt -; CHECK-NEXT: movlt r3, #0 -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: movlt r2, #0 -; CHECK-NEXT: vcmp.f32 s18, s20 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: str.w r2, [r4, #33] -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: str.w r1, [r4, #29] -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: str.w r0, [r4, #25] -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt r3, #15 -; CHECK-NEXT: vmov r7, s19 -; CHECK-NEXT: str r3, [sp, #4] @ 4-byte Spill -; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: mov r10, r0 -; CHECK-NEXT: vcmp.f32 s17, #0 -; CHECK-NEXT: mov r5, r1 -; CHECK-NEXT: mov r6, r2 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: mov r0, r7 -; CHECK-NEXT: itttt lt -; CHECK-NEXT: movlt.w r10, #0 -; CHECK-NEXT: movlt r5, #0 -; CHECK-NEXT: movlt r6, #0 -; CHECK-NEXT: movlt r3, #0 -; CHECK-NEXT: vcmp.f32 s17, s20 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt r3, #15 -; CHECK-NEXT: str r3, [sp] @ 4-byte Spill -; CHECK-NEXT: ittt gt -; CHECK-NEXT: movgt.w r6, #-1 -; CHECK-NEXT: movgt.w r5, #-1 -; CHECK-NEXT: movgt.w r10, #-1 -; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: mov r7, r0 -; CHECK-NEXT: vmov r0, s16 -; CHECK-NEXT: vcmp.f32 s19, #0 -; CHECK-NEXT: mov r9, r1 -; CHECK-NEXT: mov r8, r2 -; CHECK-NEXT: mov r11, r3 +; CHECK-NEXT: .pad #164 +; CHECK-NEXT: sub sp, #164 +; CHECK-NEXT: vmov r2, s0 +; CHECK-NEXT: mov.w r8, #0 +; CHECK-NEXT: ubfx r1, r2, #23, #8 +; CHECK-NEXT: cmp r1, #127 +; CHECK-NEXT: blo .LBB30_6 +; CHECK-NEXT: @ %bb.1: @ %fp-to-i-entryfp-to-i-entryfp-to-i-entryfp-to-i-entry +; CHECK-NEXT: vcmp.f32 s0, s0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt lt -; CHECK-NEXT: movlt.w r9, #0 -; CHECK-NEXT: movlt r7, #0 -; CHECK-NEXT: movlt.w r8, #0 -; CHECK-NEXT: movlt.w r11, #0 -; CHECK-NEXT: vcmp.f32 s19, s20 +; CHECK-NEXT: bvs .LBB30_6 +; CHECK-NEXT: @ %bb.2: @ %fp-to-i-entryfp-to-i-entryfp-to-i-entryfp-to-i-entry +; CHECK-NEXT: mov r5, r8 +; CHECK-NEXT: movs r3, #0 +; CHECK-NEXT: mov.w r10, #0 +; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: bmi .LBB30_9 +; CHECK-NEXT: @ %bb.3: @ %fp-to-i-if-check.saturate24 +; CHECK-NEXT: mov.w r8, #-1 +; CHECK-NEXT: mov.w r5, #-1 +; CHECK-NEXT: mov.w r3, #-1 +; CHECK-NEXT: mov.w r10, #15 +; CHECK-NEXT: cmp r1, #226 +; CHECK-NEXT: bhi .LBB30_9 +; CHECK-NEXT: @ %bb.4: @ %fp-to-i-if-check.exp.size26 +; CHECK-NEXT: movs r3, #1 +; CHECK-NEXT: cmp r1, #149 +; CHECK-NEXT: bfi r2, r3, #23, #9 +; CHECK-NEXT: bhi .LBB30_8 +; CHECK-NEXT: @ %bb.5: @ %fp-to-i-if-exp.small27 +; CHECK-NEXT: rsb.w r1, r1, #150 +; CHECK-NEXT: movs r5, #0 +; CHECK-NEXT: lsr.w r8, r2, r1 +; CHECK-NEXT: b .LBB30_7 +; CHECK-NEXT: .LBB30_6: +; CHECK-NEXT: mov r5, r8 +; CHECK-NEXT: .LBB30_7: @ %fp-to-i-cleanup23 +; CHECK-NEXT: movs r3, #0 +; CHECK-NEXT: mov.w r10, #0 +; CHECK-NEXT: b .LBB30_9 +; CHECK-NEXT: .LBB30_8: @ %fp-to-i-if-exp.large28 +; CHECK-NEXT: movs r3, #0 +; CHECK-NEXT: strd r2, r3, [sp, #144] +; CHECK-NEXT: sub.w r2, r1, #150 +; CHECK-NEXT: movs r1, #12 +; CHECK-NEXT: strd r3, r3, [sp, #152] +; CHECK-NEXT: strd r3, r3, [sp, #136] +; CHECK-NEXT: and.w r1, r1, r2, lsr #3 +; CHECK-NEXT: strd r3, r3, [sp, #128] +; CHECK-NEXT: add r3, sp, #128 +; CHECK-NEXT: adds r3, #16 +; CHECK-NEXT: and r2, r2, #31 +; CHECK-NEXT: subs r1, r3, r1 +; CHECK-NEXT: eor r7, r2, #63 +; CHECK-NEXT: ldrd r8, r5, [r1] +; CHECK-NEXT: rsbs r7, r7, #0 +; CHECK-NEXT: ldrd r6, r1, [r1, #8] +; CHECK-NEXT: mov r3, r5 +; CHECK-NEXT: mov r4, r8 +; CHECK-NEXT: lsll r6, r1, r2 +; CHECK-NEXT: lsrl r4, r3, #1 +; CHECK-NEXT: lsll r8, r5, r2 +; CHECK-NEXT: lsll r4, r3, r7 +; CHECK-NEXT: orr.w r10, r1, r3 +; CHECK-NEXT: orr.w r3, r6, r4 +; CHECK-NEXT: .LBB30_9: @ %fp-to-i-cleanup23 +; CHECK-NEXT: vmov r7, s1 +; CHECK-NEXT: movw r11, #65535 +; CHECK-NEXT: movt r11, #127 +; CHECK-NEXT: movs r6, #0 +; CHECK-NEXT: strd r3, r5, [sp, #16] @ 8-byte Folded Spill +; CHECK-NEXT: ubfx r2, r7, #23, #8 +; CHECK-NEXT: cmp r2, #127 +; CHECK-NEXT: blo .LBB30_15 +; CHECK-NEXT: @ %bb.10: @ %fp-to-i-cleanup23 +; CHECK-NEXT: vcmp.f32 s1, s1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt gt -; CHECK-NEXT: movgt.w r11, #15 -; CHECK-NEXT: movgt.w r8, #-1 -; CHECK-NEXT: movgt.w r7, #-1 -; CHECK-NEXT: movgt.w r9, #-1 -; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: vcmp.f32 s16, #0 +; CHECK-NEXT: bvs .LBB30_15 +; CHECK-NEXT: @ %bb.11: @ %fp-to-i-cleanup23 +; CHECK-NEXT: mov r3, r6 +; CHECK-NEXT: mov.w lr, #0 +; CHECK-NEXT: movs r1, #0 +; CHECK-NEXT: cmp r7, #0 +; CHECK-NEXT: bmi .LBB30_17 +; CHECK-NEXT: @ %bb.12: @ %fp-to-i-if-check.saturate13 +; CHECK-NEXT: mov.w r6, #-1 +; CHECK-NEXT: mov.w r3, #-1 +; CHECK-NEXT: mov.w lr, #-1 +; CHECK-NEXT: movs r1, #15 +; CHECK-NEXT: cmp r2, #226 +; CHECK-NEXT: bhi .LBB30_17 +; CHECK-NEXT: @ %bb.13: @ %fp-to-i-if-check.exp.size15 +; CHECK-NEXT: and.w r1, r7, r11 +; CHECK-NEXT: cmp r2, #149 +; CHECK-NEXT: add.w r1, r1, #8388608 +; CHECK-NEXT: bhi .LBB30_16 +; CHECK-NEXT: @ %bb.14: @ %fp-to-i-if-exp.small16 +; CHECK-NEXT: rsb.w r2, r2, #150 +; CHECK-NEXT: movs r3, #0 +; CHECK-NEXT: mov lr, r3 +; CHECK-NEXT: lsr.w r6, r1, r2 +; CHECK-NEXT: mov r1, r3 +; CHECK-NEXT: b .LBB30_17 +; CHECK-NEXT: .LBB30_15: +; CHECK-NEXT: mov r3, r6 +; CHECK-NEXT: mov.w lr, #0 +; CHECK-NEXT: movs r1, #0 +; CHECK-NEXT: b .LBB30_17 +; CHECK-NEXT: .LBB30_16: @ %fp-to-i-if-exp.large17 +; CHECK-NEXT: movs r3, #0 +; CHECK-NEXT: sub.w r7, r2, #150 +; CHECK-NEXT: strd r1, r3, [sp, #112] +; CHECK-NEXT: movs r1, #12 +; CHECK-NEXT: strd r3, r3, [sp, #120] +; CHECK-NEXT: and.w r1, r1, r7, lsr #3 +; CHECK-NEXT: strd r3, r3, [sp, #104] +; CHECK-NEXT: and r7, r7, #31 +; CHECK-NEXT: strd r3, r3, [sp, #96] +; CHECK-NEXT: add r3, sp, #96 +; CHECK-NEXT: adds r3, #16 +; CHECK-NEXT: eor r4, r7, #63 +; CHECK-NEXT: subs r1, r3, r1 +; CHECK-NEXT: rsbs r4, r4, #0 +; CHECK-NEXT: ldrd r6, r3, [r1] +; CHECK-NEXT: ldrd r12, r1, [r1, #8] +; CHECK-NEXT: mov r5, r3 +; CHECK-NEXT: mov r2, r6 +; CHECK-NEXT: lsll r12, r1, r7 +; CHECK-NEXT: lsrl r2, r5, #1 +; CHECK-NEXT: lsll r6, r3, r7 +; CHECK-NEXT: lsll r2, r5, r4 +; CHECK-NEXT: orrs r1, r5 +; CHECK-NEXT: orr.w lr, r12, r2 +; CHECK-NEXT: .LBB30_17: @ %fp-to-i-cleanup12 +; CHECK-NEXT: str r1, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: vmov r1, s2 +; CHECK-NEXT: movs r4, #0 +; CHECK-NEXT: str.w r8, [sp, #24] @ 4-byte Spill +; CHECK-NEXT: ubfx r5, r1, #23, #8 +; CHECK-NEXT: cmp r5, #127 +; CHECK-NEXT: blo .LBB30_23 +; CHECK-NEXT: @ %bb.18: @ %fp-to-i-cleanup12 +; CHECK-NEXT: vcmp.f32 s2, s2 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt lt -; CHECK-NEXT: movlt r3, #0 -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: movlt r2, #0 -; CHECK-NEXT: vcmp.f32 s16, s20 +; CHECK-NEXT: bvs .LBB30_23 +; CHECK-NEXT: @ %bb.19: @ %fp-to-i-cleanup12 +; CHECK-NEXT: movs r2, #0 +; CHECK-NEXT: movs r7, #0 +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: str r2, [sp, #28] @ 4-byte Spill +; CHECK-NEXT: bmi .LBB30_26 +; CHECK-NEXT: @ %bb.20: @ %fp-to-i-if-check.saturate2 +; CHECK-NEXT: mov.w r2, #-1 +; CHECK-NEXT: mov.w r4, #-1 +; CHECK-NEXT: movs r7, #15 +; CHECK-NEXT: cmp r5, #226 +; CHECK-NEXT: str r2, [sp, #28] @ 4-byte Spill +; CHECK-NEXT: bhi .LBB30_26 +; CHECK-NEXT: @ %bb.21: @ %fp-to-i-if-check.exp.size4 +; CHECK-NEXT: and.w r1, r1, r11 +; CHECK-NEXT: cmp r5, #149 +; CHECK-NEXT: add.w r1, r1, #8388608 +; CHECK-NEXT: bhi .LBB30_25 +; CHECK-NEXT: @ %bb.22: @ %fp-to-i-if-exp.small5 +; CHECK-NEXT: rsb.w r2, r5, #150 +; CHECK-NEXT: lsr.w r4, r1, r2 +; CHECK-NEXT: movs r1, #0 +; CHECK-NEXT: str r1, [sp, #28] @ 4-byte Spill +; CHECK-NEXT: b .LBB30_24 +; CHECK-NEXT: .LBB30_23: +; CHECK-NEXT: str r4, [sp, #28] @ 4-byte Spill +; CHECK-NEXT: .LBB30_24: @ %fp-to-i-cleanup1 +; CHECK-NEXT: movs r2, #0 +; CHECK-NEXT: movs r7, #0 +; CHECK-NEXT: b .LBB30_26 +; CHECK-NEXT: .LBB30_25: @ %fp-to-i-if-exp.large6 +; CHECK-NEXT: movs r2, #0 +; CHECK-NEXT: subs r5, #150 +; CHECK-NEXT: strd r1, r2, [sp, #80] +; CHECK-NEXT: movs r1, #12 +; CHECK-NEXT: strd r2, r2, [sp, #88] +; CHECK-NEXT: and.w r1, r1, r5, lsr #3 +; CHECK-NEXT: strd r2, r2, [sp, #72] +; CHECK-NEXT: and r5, r5, #31 +; CHECK-NEXT: strd r2, r2, [sp, #64] +; CHECK-NEXT: add r2, sp, #64 +; CHECK-NEXT: adds r2, #16 +; CHECK-NEXT: subs r1, r2, r1 +; CHECK-NEXT: ldrd r4, r12, [r1] +; CHECK-NEXT: ldrd r8, r9, [r1, #8] +; CHECK-NEXT: eor r1, r5, #63 +; CHECK-NEXT: mov r7, r12 +; CHECK-NEXT: rsbs r1, r1, #0 +; CHECK-NEXT: mov r2, r4 +; CHECK-NEXT: lsll r8, r9, r5 +; CHECK-NEXT: lsrl r2, r7, #1 +; CHECK-NEXT: lsll r2, r7, r1 +; CHECK-NEXT: mov r1, r12 +; CHECK-NEXT: orr.w r7, r7, r9 +; CHECK-NEXT: orr.w r2, r2, r8 +; CHECK-NEXT: lsll r4, r1, r5 +; CHECK-NEXT: str r1, [sp, #28] @ 4-byte Spill +; CHECK-NEXT: .LBB30_26: @ %fp-to-i-cleanup1 +; CHECK-NEXT: vmov r1, s3 +; CHECK-NEXT: mov.w r8, #0 +; CHECK-NEXT: mov r12, r10 +; CHECK-NEXT: strd r4, r7, [sp] @ 8-byte Folded Spill +; CHECK-NEXT: str r2, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: ubfx r9, r1, #23, #8 +; CHECK-NEXT: cmp.w r9, #127 +; CHECK-NEXT: blo .LBB30_32 +; CHECK-NEXT: @ %bb.27: @ %fp-to-i-cleanup1 +; CHECK-NEXT: vcmp.f32 s3, s3 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: str r2, [r4, #8] -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: str r1, [r4, #4] -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: str r0, [r4] -; CHECK-NEXT: mov r0, r7 -; CHECK-NEXT: lsrl r0, r9, #28 +; CHECK-NEXT: bvs .LBB30_32 +; CHECK-NEXT: @ %bb.28: @ %fp-to-i-cleanup1 +; CHECK-NEXT: mov r2, r11 +; CHECK-NEXT: mov r5, r8 +; CHECK-NEXT: mov.w r10, #0 +; CHECK-NEXT: mov.w r11, #0 +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: bmi .LBB30_34 +; CHECK-NEXT: @ %bb.29: @ %fp-to-i-if-check.saturate +; CHECK-NEXT: mov.w r8, #-1 +; CHECK-NEXT: mov.w r5, #-1 +; CHECK-NEXT: mov.w r10, #-1 +; CHECK-NEXT: mov.w r11, #15 +; CHECK-NEXT: cmp.w r9, #226 +; CHECK-NEXT: bhi .LBB30_34 +; CHECK-NEXT: @ %bb.30: @ %fp-to-i-if-check.exp.size +; CHECK-NEXT: ands r1, r2 +; CHECK-NEXT: cmp.w r9, #149 +; CHECK-NEXT: add.w r1, r1, #8388608 +; CHECK-NEXT: bhi .LBB30_33 +; CHECK-NEXT: @ %bb.31: @ %fp-to-i-if-exp.small +; CHECK-NEXT: rsb.w r5, r9, #150 +; CHECK-NEXT: lsr.w r8, r1, r5 +; CHECK-NEXT: movs r5, #0 +; CHECK-NEXT: mov r10, r5 +; CHECK-NEXT: mov r11, r5 +; CHECK-NEXT: b .LBB30_34 +; CHECK-NEXT: .LBB30_32: +; CHECK-NEXT: mov r5, r8 +; CHECK-NEXT: mov.w r10, #0 +; CHECK-NEXT: mov.w r11, #0 +; CHECK-NEXT: b .LBB30_34 +; CHECK-NEXT: .LBB30_33: @ %fp-to-i-if-exp.large +; CHECK-NEXT: movs r5, #0 +; CHECK-NEXT: sub.w r9, r9, #150 +; CHECK-NEXT: strd r1, r5, [sp, #48] +; CHECK-NEXT: movs r1, #12 +; CHECK-NEXT: strd r5, r5, [sp, #56] +; CHECK-NEXT: and.w r1, r1, r9, lsr #3 +; CHECK-NEXT: strd r5, r5, [sp, #40] +; CHECK-NEXT: and r9, r9, #31 +; CHECK-NEXT: strd r5, r5, [sp, #32] +; CHECK-NEXT: add r5, sp, #32 +; CHECK-NEXT: adds r5, #16 +; CHECK-NEXT: subs r1, r5, r1 +; CHECK-NEXT: ldrd r8, r5, [r1] +; CHECK-NEXT: ldrd r10, r11, [r1, #8] +; CHECK-NEXT: eor r1, r9, #63 +; CHECK-NEXT: mov r7, r5 +; CHECK-NEXT: rsbs r1, r1, #0 +; CHECK-NEXT: mov r2, r8 +; CHECK-NEXT: lsll r10, r11, r9 +; CHECK-NEXT: lsrl r2, r7, #1 +; CHECK-NEXT: lsll r8, r5, r9 +; CHECK-NEXT: lsll r2, r7, r1 +; CHECK-NEXT: orr.w r11, r11, r7 +; CHECK-NEXT: orr.w r10, r10, r2 +; CHECK-NEXT: .LBB30_34: @ %fp-to-i-cleanup +; CHECK-NEXT: mov r2, r8 +; CHECK-NEXT: ldr r1, [sp, #24] @ 4-byte Reload +; CHECK-NEXT: lsrl r2, r5, #28 +; CHECK-NEXT: str r1, [r0] +; CHECK-NEXT: str.w r2, [r0, #41] +; CHECK-NEXT: ldr r1, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: str r1, [r0, #4] +; CHECK-NEXT: orr.w r1, r5, r10, lsl #4 +; CHECK-NEXT: str.w r1, [r0, #45] +; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: str r1, [r0, #8] ; CHECK-NEXT: and r1, r11, #15 -; CHECK-NEXT: str.w r0, [r4, #41] -; CHECK-NEXT: mov r0, r10 -; CHECK-NEXT: lsrl r0, r5, #28 -; CHECK-NEXT: str r0, [r4, #16] -; CHECK-NEXT: orr.w r0, r9, r8, lsl #4 -; CHECK-NEXT: lsrl r8, r1, #28 -; CHECK-NEXT: str.w r0, [r4, #45] -; CHECK-NEXT: strb.w r8, [r4, #49] -; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload -; CHECK-NEXT: and r0, r0, #15 -; CHECK-NEXT: orr.w r0, r0, r7, lsl #4 -; CHECK-NEXT: str.w r0, [r4, #37] -; CHECK-NEXT: orr.w r0, r5, r6, lsl #4 -; CHECK-NEXT: str r0, [r4, #20] -; CHECK-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-NEXT: and r1, r0, #15 -; CHECK-NEXT: lsrl r6, r1, #28 -; CHECK-NEXT: strb r6, [r4, #24] -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt r3, #15 -; CHECK-NEXT: and r0, r3, #15 -; CHECK-NEXT: orr.w r0, r0, r10, lsl #4 -; CHECK-NEXT: str r0, [r4, #12] -; CHECK-NEXT: add sp, #8 -; CHECK-NEXT: vpop {d8, d9, d10} -; CHECK-NEXT: add sp, #4 +; CHECK-NEXT: lsrl r10, r1, #28 +; CHECK-NEXT: and r1, r12, #15 +; CHECK-NEXT: strb.w r10, [r0, #49] +; CHECK-NEXT: orr.w r1, r1, r6, lsl #4 +; CHECK-NEXT: lsrl r6, r3, #28 +; CHECK-NEXT: str r1, [r0, #12] +; CHECK-NEXT: orr.w r1, r3, lr, lsl #4 +; CHECK-NEXT: str r6, [r0, #16] +; CHECK-NEXT: str r1, [r0, #20] +; CHECK-NEXT: ldr r1, [sp] @ 4-byte Reload +; CHECK-NEXT: str.w r1, [r0, #25] +; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: and r1, r1, #15 +; CHECK-NEXT: lsrl lr, r1, #28 +; CHECK-NEXT: strb.w lr, [r0, #24] +; CHECK-NEXT: ldr r1, [sp, #28] @ 4-byte Reload +; CHECK-NEXT: str.w r1, [r0, #29] +; CHECK-NEXT: ldr r1, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: str.w r1, [r0, #33] +; CHECK-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: and r1, r1, #15 +; CHECK-NEXT: orr.w r1, r1, r8, lsl #4 +; CHECK-NEXT: str.w r1, [r0, #37] +; CHECK-NEXT: add sp, #164 ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} -; CHECK-NEXT: .p2align 2 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI30_0: -; CHECK-NEXT: .long 0x717fffff @ float 1.26765052E+30 %x = call <4 x i100> @llvm.fptoui.sat.v4f32.v4i100(<4 x float> %f) ret <4 x i100> %x } define arm_aapcs_vfpcc <4 x i128> @test_unsigned_v4f32_v4i128(<4 x float> %f) { ; CHECK-LABEL: test_unsigned_v4f32_v4i128: -; CHECK: @ %bb.0: -; CHECK-NEXT: .save {r4, r5, r6, r7, lr} -; CHECK-NEXT: push {r4, r5, r6, r7, lr} -; CHECK-NEXT: .pad #4 -; CHECK-NEXT: sub sp, #4 -; CHECK-NEXT: .vsave {d8, d9, d10} -; CHECK-NEXT: vpush {d8, d9, d10} -; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: vmov r0, s19 -; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: vmov r5, s18 -; CHECK-NEXT: vldr s20, .LCPI31_0 -; CHECK-NEXT: vcmp.f32 s19, #0 -; CHECK-NEXT: add.w r12, r4, #48 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt lt -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: movlt r2, #0 -; CHECK-NEXT: movlt r3, #0 -; CHECK-NEXT: vcmp.f32 s19, s20 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r3, #-1 -; CHECK-NEXT: ittt gt -; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3} -; CHECK-NEXT: vmov r7, s16 -; CHECK-NEXT: vmov r6, s17 -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: vcmp.f32 s18, #0 -; CHECK-NEXT: add.w r12, r4, #32 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt lt -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: movlt r2, #0 -; CHECK-NEXT: movlt r3, #0 -; CHECK-NEXT: vcmp.f32 s18, s20 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt gt -; CHECK-NEXT: movgt.w r3, #-1 -; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3} -; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: vcmp.f32 s17, #0 -; CHECK-NEXT: add.w r12, r4, #16 +; CHECK: @ %bb.0: @ %fp-to-i-entryfp-to-i-entryfp-to-i-entryfp-to-i-entry +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-NEXT: .pad #164 +; CHECK-NEXT: sub sp, #164 +; CHECK-NEXT: vmov r7, s0 +; CHECK-NEXT: mov.w r9, #0 +; CHECK-NEXT: str.w r9, [sp, #28] @ 4-byte Spill +; CHECK-NEXT: ubfx r3, r7, #23, #8 +; CHECK-NEXT: cmp r3, #127 +; CHECK-NEXT: blo .LBB31_6 +; CHECK-NEXT: @ %bb.1: @ %fp-to-i-entryfp-to-i-entryfp-to-i-entryfp-to-i-entry +; CHECK-NEXT: vcmp.f32 s0, s0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt lt -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: movlt r2, #0 -; CHECK-NEXT: movlt r3, #0 -; CHECK-NEXT: vcmp.f32 s17, s20 +; CHECK-NEXT: bvs .LBB31_6 +; CHECK-NEXT: @ %bb.2: @ %fp-to-i-entryfp-to-i-entryfp-to-i-entryfp-to-i-entry +; CHECK-NEXT: movs r4, #0 +; CHECK-NEXT: mov.w r8, #0 +; CHECK-NEXT: cmp r7, #0 +; CHECK-NEXT: bmi .LBB31_8 +; CHECK-NEXT: @ %bb.3: @ %fp-to-i-if-check.saturate24 +; CHECK-NEXT: mov.w r1, #-1 +; CHECK-NEXT: mov.w r9, #-1 +; CHECK-NEXT: mov.w r4, #-1 +; CHECK-NEXT: mov.w r8, #-1 +; CHECK-NEXT: cmp r3, #254 +; CHECK-NEXT: str r1, [sp, #28] @ 4-byte Spill +; CHECK-NEXT: bhi .LBB31_8 +; CHECK-NEXT: @ %bb.4: @ %fp-to-i-if-check.exp.size26 +; CHECK-NEXT: movs r1, #1 +; CHECK-NEXT: cmp r3, #149 +; CHECK-NEXT: bfi r7, r1, #23, #9 +; CHECK-NEXT: bhi .LBB31_7 +; CHECK-NEXT: @ %bb.5: @ %fp-to-i-if-exp.small27 +; CHECK-NEXT: rsb.w r1, r3, #150 +; CHECK-NEXT: mov.w r9, #0 +; CHECK-NEXT: lsr.w r1, r7, r1 +; CHECK-NEXT: str r1, [sp, #28] @ 4-byte Spill +; CHECK-NEXT: .LBB31_6: +; CHECK-NEXT: movs r4, #0 +; CHECK-NEXT: mov.w r8, #0 +; CHECK-NEXT: b .LBB31_8 +; CHECK-NEXT: .LBB31_7: @ %fp-to-i-if-exp.large28 +; CHECK-NEXT: movs r1, #0 +; CHECK-NEXT: sub.w r2, r3, #150 +; CHECK-NEXT: add r3, sp, #128 +; CHECK-NEXT: strd r1, r1, [sp, #152] +; CHECK-NEXT: strd r7, r1, [sp, #144] +; CHECK-NEXT: adds r3, #16 +; CHECK-NEXT: strd r1, r1, [sp, #136] +; CHECK-NEXT: strd r1, r1, [sp, #128] +; CHECK-NEXT: movs r1, #12 +; CHECK-NEXT: and.w r1, r1, r2, lsr #3 +; CHECK-NEXT: and r2, r2, #31 +; CHECK-NEXT: subs r1, r3, r1 +; CHECK-NEXT: eor r7, r2, #63 +; CHECK-NEXT: ldrd r5, r9, [r1] +; CHECK-NEXT: rsbs r7, r7, #0 +; CHECK-NEXT: ldrd r6, r1, [r1, #8] +; CHECK-NEXT: mov r3, r9 +; CHECK-NEXT: mov r4, r5 +; CHECK-NEXT: lsll r6, r1, r2 +; CHECK-NEXT: lsrl r4, r3, #1 +; CHECK-NEXT: lsll r4, r3, r7 +; CHECK-NEXT: orrs r4, r6 +; CHECK-NEXT: mov r6, r5 +; CHECK-NEXT: orr.w r8, r1, r3 +; CHECK-NEXT: lsll r6, r9, r2 +; CHECK-NEXT: str r6, [sp, #28] @ 4-byte Spill +; CHECK-NEXT: .LBB31_8: @ %fp-to-i-cleanup23 +; CHECK-NEXT: vmov r1, s1 +; CHECK-NEXT: movw r3, #65535 +; CHECK-NEXT: movt r3, #127 +; CHECK-NEXT: mov.w r10, #0 +; CHECK-NEXT: ubfx r7, r1, #23, #8 +; CHECK-NEXT: cmp r7, #127 +; CHECK-NEXT: blo .LBB31_14 +; CHECK-NEXT: @ %bb.9: @ %fp-to-i-cleanup23 +; CHECK-NEXT: vcmp.f32 s1, s1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt gt -; CHECK-NEXT: movgt.w r3, #-1 -; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3} -; CHECK-NEXT: mov r0, r7 -; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: vcmp.f32 s16, #0 +; CHECK-NEXT: bvs .LBB31_14 +; CHECK-NEXT: @ %bb.10: @ %fp-to-i-cleanup23 +; CHECK-NEXT: movs r2, #0 +; CHECK-NEXT: movs r6, #0 +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: str r2, [sp, #24] @ 4-byte Spill +; CHECK-NEXT: bmi .LBB31_17 +; CHECK-NEXT: @ %bb.11: @ %fp-to-i-if-check.saturate13 +; CHECK-NEXT: mov.w r2, #-1 +; CHECK-NEXT: mov.w r10, #-1 +; CHECK-NEXT: mov.w r6, #-1 +; CHECK-NEXT: cmp r7, #254 +; CHECK-NEXT: str r2, [sp, #24] @ 4-byte Spill +; CHECK-NEXT: bhi .LBB31_17 +; CHECK-NEXT: @ %bb.12: @ %fp-to-i-if-check.exp.size15 +; CHECK-NEXT: ands r1, r3 +; CHECK-NEXT: cmp r7, #149 +; CHECK-NEXT: add.w r1, r1, #8388608 +; CHECK-NEXT: bhi .LBB31_16 +; CHECK-NEXT: @ %bb.13: @ %fp-to-i-if-exp.small16 +; CHECK-NEXT: rsb.w r2, r7, #150 +; CHECK-NEXT: lsr.w r10, r1, r2 +; CHECK-NEXT: movs r1, #0 +; CHECK-NEXT: str r1, [sp, #24] @ 4-byte Spill +; CHECK-NEXT: b .LBB31_15 +; CHECK-NEXT: .LBB31_14: +; CHECK-NEXT: str.w r10, [sp, #24] @ 4-byte Spill +; CHECK-NEXT: .LBB31_15: @ %fp-to-i-cleanup12 +; CHECK-NEXT: movs r2, #0 +; CHECK-NEXT: movs r6, #0 +; CHECK-NEXT: b .LBB31_17 +; CHECK-NEXT: .LBB31_16: @ %fp-to-i-if-exp.large17 +; CHECK-NEXT: movs r2, #0 +; CHECK-NEXT: add r3, sp, #96 +; CHECK-NEXT: strd r1, r2, [sp, #112] +; CHECK-NEXT: movs r1, #12 +; CHECK-NEXT: strd r2, r2, [sp, #120] +; CHECK-NEXT: adds r3, #16 +; CHECK-NEXT: strd r2, r2, [sp, #104] +; CHECK-NEXT: strd r2, r2, [sp, #96] +; CHECK-NEXT: sub.w r2, r7, #150 +; CHECK-NEXT: and r11, r2, #31 +; CHECK-NEXT: and.w r1, r1, r2, lsr #3 +; CHECK-NEXT: eor r7, r11, #63 +; CHECK-NEXT: subs r1, r3, r1 +; CHECK-NEXT: rsbs r7, r7, #0 +; CHECK-NEXT: ldrd r10, r3, [r1] +; CHECK-NEXT: ldrd r12, r1, [r1, #8] +; CHECK-NEXT: mov r5, r3 +; CHECK-NEXT: mov r6, r10 +; CHECK-NEXT: lsll r12, r1, r11 +; CHECK-NEXT: lsrl r6, r5, #1 +; CHECK-NEXT: lsll r10, r3, r11 +; CHECK-NEXT: lsll r6, r5, r7 +; CHECK-NEXT: str r3, [sp, #24] @ 4-byte Spill +; CHECK-NEXT: orrs r1, r5 +; CHECK-NEXT: movw r3, #65535 +; CHECK-NEXT: orr.w r2, r12, r6 +; CHECK-NEXT: movt r3, #127 +; CHECK-NEXT: mov r6, r1 +; CHECK-NEXT: .LBB31_17: @ %fp-to-i-cleanup12 +; CHECK-NEXT: vmov r1, s2 +; CHECK-NEXT: mov.w lr, #0 +; CHECK-NEXT: str.w r9, [sp, #20] @ 4-byte Spill +; CHECK-NEXT: strd r8, r4, [sp, #12] @ 8-byte Folded Spill +; CHECK-NEXT: strd r6, r2, [sp, #4] @ 8-byte Folded Spill +; CHECK-NEXT: ubfx r5, r1, #23, #8 +; CHECK-NEXT: cmp r5, #127 +; CHECK-NEXT: blo .LBB31_23 +; CHECK-NEXT: @ %bb.18: @ %fp-to-i-cleanup12 +; CHECK-NEXT: vcmp.f32 s2, s2 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt lt -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: movlt r2, #0 -; CHECK-NEXT: movlt r3, #0 -; CHECK-NEXT: vcmp.f32 s16, s20 +; CHECK-NEXT: bvs .LBB31_23 +; CHECK-NEXT: @ %bb.19: @ %fp-to-i-cleanup12 +; CHECK-NEXT: movs r7, #0 +; CHECK-NEXT: mov.w r12, #0 +; CHECK-NEXT: mov.w r8, #0 +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: bmi .LBB31_26 +; CHECK-NEXT: @ %bb.20: @ %fp-to-i-if-check.saturate2 +; CHECK-NEXT: mov.w lr, #-1 +; CHECK-NEXT: mov.w r7, #-1 +; CHECK-NEXT: mov.w r12, #-1 +; CHECK-NEXT: mov.w r8, #-1 +; CHECK-NEXT: cmp r5, #254 +; CHECK-NEXT: bhi .LBB31_26 +; CHECK-NEXT: @ %bb.21: @ %fp-to-i-if-check.exp.size4 +; CHECK-NEXT: ands r1, r3 +; CHECK-NEXT: cmp r5, #149 +; CHECK-NEXT: add.w r1, r1, #8388608 +; CHECK-NEXT: bhi .LBB31_25 +; CHECK-NEXT: @ %bb.22: @ %fp-to-i-if-exp.small5 +; CHECK-NEXT: rsb.w r2, r5, #150 +; CHECK-NEXT: movs r7, #0 +; CHECK-NEXT: lsr.w lr, r1, r2 +; CHECK-NEXT: b .LBB31_24 +; CHECK-NEXT: .LBB31_23: +; CHECK-NEXT: mov r7, lr +; CHECK-NEXT: .LBB31_24: @ %fp-to-i-cleanup1 +; CHECK-NEXT: mov.w r12, #0 +; CHECK-NEXT: mov.w r8, #0 +; CHECK-NEXT: b .LBB31_26 +; CHECK-NEXT: .LBB31_25: @ %fp-to-i-if-exp.large6 +; CHECK-NEXT: movs r2, #0 +; CHECK-NEXT: add r7, sp, #64 +; CHECK-NEXT: strd r1, r2, [sp, #80] +; CHECK-NEXT: movs r1, #12 +; CHECK-NEXT: strd r2, r2, [sp, #88] +; CHECK-NEXT: adds r7, #16 +; CHECK-NEXT: strd r2, r2, [sp, #72] +; CHECK-NEXT: strd r2, r2, [sp, #64] +; CHECK-NEXT: sub.w r2, r5, #150 +; CHECK-NEXT: and.w r1, r1, r2, lsr #3 +; CHECK-NEXT: and r2, r2, #31 +; CHECK-NEXT: subs r1, r7, r1 +; CHECK-NEXT: eor r3, r2, #63 +; CHECK-NEXT: ldrd lr, r7, [r1] +; CHECK-NEXT: rsbs r3, r3, #0 +; CHECK-NEXT: ldrd r6, r1, [r1, #8] +; CHECK-NEXT: mov r5, r7 +; CHECK-NEXT: mov r4, lr +; CHECK-NEXT: lsll r6, r1, r2 +; CHECK-NEXT: lsrl r4, r5, #1 +; CHECK-NEXT: lsll lr, r7, r2 +; CHECK-NEXT: lsll r4, r5, r3 +; CHECK-NEXT: orr.w r8, r1, r5 +; CHECK-NEXT: orr.w r12, r6, r4 +; CHECK-NEXT: .LBB31_26: @ %fp-to-i-cleanup1 +; CHECK-NEXT: vmov r9, s3 +; CHECK-NEXT: mov r11, r10 +; CHECK-NEXT: mov.w r10, #0 +; CHECK-NEXT: ubfx r1, r9, #23, #8 +; CHECK-NEXT: cmp r1, #127 +; CHECK-NEXT: blo .LBB31_32 +; CHECK-NEXT: @ %bb.27: @ %fp-to-i-cleanup1 +; CHECK-NEXT: vcmp.f32 s3, s3 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt gt -; CHECK-NEXT: movgt.w r3, #-1 -; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: stm r4!, {r0, r1, r2, r3} -; CHECK-NEXT: vpop {d8, d9, d10} -; CHECK-NEXT: add sp, #4 -; CHECK-NEXT: pop {r4, r5, r6, r7, pc} -; CHECK-NEXT: .p2align 2 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI31_0: -; CHECK-NEXT: .long 0x7f7fffff @ float 3.40282347E+38 +; CHECK-NEXT: bvs .LBB31_32 +; CHECK-NEXT: @ %bb.28: @ %fp-to-i-cleanup1 +; CHECK-NEXT: movs r5, #0 +; CHECK-NEXT: movs r2, #0 +; CHECK-NEXT: movs r4, #0 +; CHECK-NEXT: cmp.w r9, #0 +; CHECK-NEXT: bmi .LBB31_35 +; CHECK-NEXT: @ %bb.29: @ %fp-to-i-if-check.saturate +; CHECK-NEXT: mov.w r10, #-1 +; CHECK-NEXT: mov.w r5, #-1 +; CHECK-NEXT: mov.w r2, #-1 +; CHECK-NEXT: mov.w r4, #-1 +; CHECK-NEXT: cmp r1, #254 +; CHECK-NEXT: bhi .LBB31_35 +; CHECK-NEXT: @ %bb.30: @ %fp-to-i-if-check.exp.size +; CHECK-NEXT: movw r2, #65535 +; CHECK-NEXT: cmp r1, #149 +; CHECK-NEXT: movt r2, #127 +; CHECK-NEXT: and.w r2, r2, r9 +; CHECK-NEXT: add.w r2, r2, #8388608 +; CHECK-NEXT: bhi .LBB31_34 +; CHECK-NEXT: @ %bb.31: @ %fp-to-i-if-exp.small +; CHECK-NEXT: rsb.w r1, r1, #150 +; CHECK-NEXT: movs r5, #0 +; CHECK-NEXT: lsr.w r10, r2, r1 +; CHECK-NEXT: b .LBB31_33 +; CHECK-NEXT: .LBB31_32: +; CHECK-NEXT: mov r5, r10 +; CHECK-NEXT: .LBB31_33: @ %fp-to-i-cleanup +; CHECK-NEXT: movs r2, #0 +; CHECK-NEXT: movs r4, #0 +; CHECK-NEXT: b .LBB31_35 +; CHECK-NEXT: .LBB31_34: @ %fp-to-i-if-exp.large +; CHECK-NEXT: movs r3, #0 +; CHECK-NEXT: sub.w r4, r1, #150 +; CHECK-NEXT: movs r1, #12 +; CHECK-NEXT: strd r3, r3, [sp, #56] +; CHECK-NEXT: strd r2, r3, [sp, #48] +; CHECK-NEXT: and.w r1, r1, r4, lsr #3 +; CHECK-NEXT: strd r3, r3, [sp, #40] +; CHECK-NEXT: and r9, r4, #31 +; CHECK-NEXT: strd r3, r3, [sp, #32] +; CHECK-NEXT: add r3, sp, #32 +; CHECK-NEXT: adds r3, #16 +; CHECK-NEXT: eor r4, r9, #63 +; CHECK-NEXT: subs r1, r3, r1 +; CHECK-NEXT: rsbs r4, r4, #0 +; CHECK-NEXT: ldrd r10, r5, [r1] +; CHECK-NEXT: ldrd r6, r1, [r1, #8] +; CHECK-NEXT: mov r3, r5 +; CHECK-NEXT: mov r2, r10 +; CHECK-NEXT: lsll r6, r1, r9 +; CHECK-NEXT: lsrl r2, r3, #1 +; CHECK-NEXT: lsll r10, r5, r9 +; CHECK-NEXT: lsll r2, r3, r4 +; CHECK-NEXT: orr.w r4, r1, r3 +; CHECK-NEXT: orrs r2, r6 +; CHECK-NEXT: .LBB31_35: @ %fp-to-i-cleanup +; CHECK-NEXT: str.w r10, [r0, #48] +; CHECK-NEXT: ldr r1, [sp, #28] @ 4-byte Reload +; CHECK-NEXT: str r1, [r0] +; CHECK-NEXT: str r5, [r0, #52] +; CHECK-NEXT: ldr r1, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: str r1, [r0, #4] +; CHECK-NEXT: str r2, [r0, #56] +; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: str r1, [r0, #8] +; CHECK-NEXT: str r4, [r0, #60] +; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: strd r1, r11, [r0, #12] +; CHECK-NEXT: ldr r1, [sp, #24] @ 4-byte Reload +; CHECK-NEXT: str r1, [r0, #20] +; CHECK-NEXT: ldr r1, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: str r1, [r0, #24] +; CHECK-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: strd r1, lr, [r0, #28] +; CHECK-NEXT: strd r7, r12, [r0, #36] +; CHECK-NEXT: str.w r8, [r0, #44] +; CHECK-NEXT: add sp, #164 +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} %x = call <4 x i128> @llvm.fptoui.sat.v4f32.v4i128(<4 x float> %f) ret <4 x i128> %x } @@ -2574,401 +2959,385 @@ define arm_aapcs_vfpcc <2 x i64> @test_unsigned_v2f64_v2i64(<2 x double> %f) { define arm_aapcs_vfpcc <2 x i100> @test_unsigned_v2f64_v2i100(<2 x double> %f) { ; CHECK-LABEL: test_unsigned_v2f64_v2i100: -; CHECK: @ %bb.0: +; CHECK: @ %bb.0: @ %fp-to-i-entryfp-to-i-entry ; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-NEXT: .pad #4 ; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: .pad #48 -; CHECK-NEXT: sub sp, #48 +; CHECK-NEXT: .pad #72 +; CHECK-NEXT: sub sp, #72 ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vldr d0, .LCPI40_0 -; CHECK-NEXT: vmov r11, r4, d8 -; CHECK-NEXT: mov r6, r0 -; CHECK-NEXT: vmov r2, r3, d0 -; CHECK-NEXT: str r3, [sp, #36] @ 4-byte Spill -; CHECK-NEXT: mov r0, r11 -; CHECK-NEXT: mov r1, r4 -; CHECK-NEXT: mov r9, r2 -; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: mov r8, r0 -; CHECK-NEXT: mov r0, r11 -; CHECK-NEXT: mov r1, r4 -; CHECK-NEXT: mov r5, r4 -; CHECK-NEXT: bl __fixunsdfti -; CHECK-NEXT: vldr d0, .LCPI40_1 -; CHECK-NEXT: cmp.w r8, #0 -; CHECK-NEXT: str r3, [sp, #28] @ 4-byte Spill -; CHECK-NEXT: csel r4, r2, r8, ne -; CHECK-NEXT: vmov r10, r3, d0 -; CHECK-NEXT: strd r1, r0, [sp, #16] @ 8-byte Folded Spill -; CHECK-NEXT: mov r0, r11 -; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: str r5, [sp, #40] @ 4-byte Spill -; CHECK-NEXT: mov r2, r10 -; CHECK-NEXT: mov r8, r3 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r4, #-1 -; CHECK-NEXT: str r4, [r6, #8] -; CHECK-NEXT: mov r0, r11 -; CHECK-NEXT: ldr r7, [sp, #36] @ 4-byte Reload +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: vmov r6, r5, d8 +; CHECK-NEXT: mov.w r10, #1 +; CHECK-NEXT: mov.w r8, #0 +; CHECK-NEXT: movw r11, #1023 +; CHECK-NEXT: ubfx r7, r5, #20, #11 +; CHECK-NEXT: cmp r7, r11 +; CHECK-NEXT: blo .LBB40_6 +; CHECK-NEXT: @ %bb.1: @ %fp-to-i-entryfp-to-i-entry +; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: mov r2, r9 -; CHECK-NEXT: mov r3, r7 -; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r2, r10 +; CHECK-NEXT: mov r2, r6 +; CHECK-NEXT: mov r3, r5 +; CHECK-NEXT: bl __aeabi_dcmpun +; CHECK-NEXT: cbnz r0, .LBB40_6 +; CHECK-NEXT: @ %bb.2: @ %fp-to-i-entryfp-to-i-entry ; CHECK-NEXT: mov r3, r8 -; CHECK-NEXT: csel r4, r1, r0, ne -; CHECK-NEXT: mov r0, r11 -; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: str.w r10, [sp, #44] @ 4-byte Spill -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r4, #-1 -; CHECK-NEXT: str r4, [r6, #4] -; CHECK-NEXT: mov r5, r6 -; CHECK-NEXT: str r6, [sp, #12] @ 4-byte Spill -; CHECK-NEXT: mov r0, r11 -; CHECK-NEXT: ldr r6, [sp, #40] @ 4-byte Reload -; CHECK-NEXT: mov r2, r9 -; CHECK-NEXT: mov r3, r7 -; CHECK-NEXT: str.w r11, [sp, #24] @ 4-byte Spill -; CHECK-NEXT: mov r1, r6 -; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: ldr r1, [sp, #20] @ 4-byte Reload -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r2, r10 +; CHECK-NEXT: mov.w lr, #0 +; CHECK-NEXT: mov.w r9, #0 +; CHECK-NEXT: cmp r5, #0 +; CHECK-NEXT: bmi .LBB40_8 +; CHECK-NEXT: @ %bb.3: @ %fp-to-i-if-check.saturate2 +; CHECK-NEXT: mov.w r8, #-1 +; CHECK-NEXT: mov.w r3, #-1 +; CHECK-NEXT: mov.w lr, #-1 +; CHECK-NEXT: mov.w r9, #15 +; CHECK-NEXT: movw r0, #1122 +; CHECK-NEXT: cmp r7, r0 +; CHECK-NEXT: bhi .LBB40_8 +; CHECK-NEXT: @ %bb.4: @ %fp-to-i-if-check.exp.size4 +; CHECK-NEXT: bfi r5, r10, #20, #12 +; CHECK-NEXT: movw r0, #1074 +; CHECK-NEXT: cmp r7, r0 +; CHECK-NEXT: bhi .LBB40_7 +; CHECK-NEXT: @ %bb.5: @ %fp-to-i-if-exp.small5 +; CHECK-NEXT: subw r0, r7, #1075 +; CHECK-NEXT: mov.w lr, #0 +; CHECK-NEXT: lsll r6, r5, r0 +; CHECK-NEXT: mov.w r9, #0 +; CHECK-NEXT: mov r8, r6 +; CHECK-NEXT: mov r3, r5 +; CHECK-NEXT: b .LBB40_8 +; CHECK-NEXT: .LBB40_6: ; CHECK-NEXT: mov r3, r8 -; CHECK-NEXT: csel r4, r1, r0, ne -; CHECK-NEXT: mov r1, r6 -; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: mov.w lr, #0 +; CHECK-NEXT: mov.w r9, #0 +; CHECK-NEXT: b .LBB40_8 +; CHECK-NEXT: .LBB40_7: @ %fp-to-i-if-exp.large6 +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: add r2, sp, #40 +; CHECK-NEXT: strd r0, r0, [sp, #64] +; CHECK-NEXT: movs r1, #12 +; CHECK-NEXT: strd r0, r0, [sp, #48] +; CHECK-NEXT: adds r2, #16 +; CHECK-NEXT: strd r0, r0, [sp, #40] +; CHECK-NEXT: subw r0, r7, #1075 +; CHECK-NEXT: strd r6, r5, [sp, #56] +; CHECK-NEXT: and.w r1, r1, r0, lsr #3 +; CHECK-NEXT: and r0, r0, #31 +; CHECK-NEXT: subs r1, r2, r1 +; CHECK-NEXT: eor r7, r0, #63 +; CHECK-NEXT: ldrd r8, r3, [r1] +; CHECK-NEXT: rsbs r7, r7, #0 +; CHECK-NEXT: ldrd r2, r1, [r1, #8] +; CHECK-NEXT: mov r5, r3 ; CHECK-NEXT: mov r6, r8 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: vmov r8, r11, d9 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r2, r9 -; CHECK-NEXT: mov r3, r7 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r4, #-1 -; CHECK-NEXT: str r4, [r5] -; CHECK-NEXT: mov r10, r9 -; CHECK-NEXT: str.w r9, [sp, #32] @ 4-byte Spill -; CHECK-NEXT: mov r5, r7 -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r11 -; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: mov r9, r0 -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r11 -; CHECK-NEXT: bl __fixunsdfti -; CHECK-NEXT: ldr r4, [sp, #44] @ 4-byte Reload -; CHECK-NEXT: cmp.w r9, #0 -; CHECK-NEXT: strd r3, r0, [sp, #16] @ 8-byte Folded Spill -; CHECK-NEXT: csel r7, r1, r9, ne -; CHECK-NEXT: str r2, [sp, #8] @ 4-byte Spill -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r11 -; CHECK-NEXT: mov r2, r4 -; CHECK-NEXT: mov r3, r6 -; CHECK-NEXT: str r6, [sp, #4] @ 4-byte Spill -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r11 -; CHECK-NEXT: mov r2, r10 -; CHECK-NEXT: mov r3, r5 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r7, #-1 -; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: ldr r1, [sp, #20] @ 4-byte Reload -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r2, r4 -; CHECK-NEXT: mov r3, r6 -; CHECK-NEXT: csel r9, r1, r0, ne -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r11 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r0, r9 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r0, #-1 -; CHECK-NEXT: ldr.w r9, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: str r0, [sp, #20] @ 4-byte Spill -; CHECK-NEXT: lsrl r0, r7, #28 -; CHECK-NEXT: mov r1, r11 -; CHECK-NEXT: mov r3, r5 -; CHECK-NEXT: str.w r0, [r9, #16] -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: ldr r4, [sp, #32] @ 4-byte Reload -; CHECK-NEXT: mov r2, r4 -; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: ldr r6, [sp, #4] @ 4-byte Reload -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: ldr r1, [sp, #8] @ 4-byte Reload -; CHECK-NEXT: ldr r2, [sp, #44] @ 4-byte Reload -; CHECK-NEXT: csel r10, r1, r0, ne -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r11 -; CHECK-NEXT: mov r3, r6 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r10, #-1 -; CHECK-NEXT: orr.w r0, r7, r10, lsl #4 -; CHECK-NEXT: str.w r0, [r9, #20] -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r11 -; CHECK-NEXT: mov r2, r4 -; CHECK-NEXT: mov r3, r5 -; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: ldr r5, [sp, #44] @ 4-byte Reload -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload -; CHECK-NEXT: mov r3, r6 -; CHECK-NEXT: csel r7, r1, r0, ne -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r11 -; CHECK-NEXT: mov r2, r5 -; CHECK-NEXT: mov r8, r6 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r7, #15 -; CHECK-NEXT: and r1, r7, #15 -; CHECK-NEXT: lsrl r10, r1, #28 -; CHECK-NEXT: strb.w r10, [r9, #24] -; CHECK-NEXT: ldr r6, [sp, #24] @ 4-byte Reload -; CHECK-NEXT: ldr r4, [sp, #40] @ 4-byte Reload -; CHECK-NEXT: ldrd r2, r3, [sp, #32] @ 8-byte Folded Reload +; CHECK-NEXT: lsll r2, r1, r0 +; CHECK-NEXT: lsrl r6, r5, #1 +; CHECK-NEXT: lsll r8, r3, r0 +; CHECK-NEXT: lsll r6, r5, r7 +; CHECK-NEXT: orr.w r9, r1, r5 +; CHECK-NEXT: orr.w lr, r2, r6 +; CHECK-NEXT: .LBB40_8: @ %fp-to-i-cleanup1 +; CHECK-NEXT: vmov r6, r5, d9 +; CHECK-NEXT: mov.w r10, #0 +; CHECK-NEXT: ubfx r7, r5, #20, #11 +; CHECK-NEXT: cmp r7, r11 +; CHECK-NEXT: blo .LBB40_14 +; CHECK-NEXT: @ %bb.9: @ %fp-to-i-cleanup1 +; CHECK-NEXT: str.w r8, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: mov r8, r3 ; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: mov r1, r4 -; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: ldr r1, [sp, #28] @ 4-byte Reload +; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: mov r2, r6 +; CHECK-NEXT: mov r3, r5 +; CHECK-NEXT: mov r11, r4 +; CHECK-NEXT: mov r4, lr +; CHECK-NEXT: bl __aeabi_dcmpun ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r2, r5 +; CHECK-NEXT: bne .LBB40_16 +; CHECK-NEXT: @ %bb.10: @ %fp-to-i-cleanup1 ; CHECK-NEXT: mov r3, r8 -; CHECK-NEXT: csel r7, r1, r0, ne -; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: mov r1, r4 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r7, #15 -; CHECK-NEXT: ldr r1, [sp, #20] @ 4-byte Reload -; CHECK-NEXT: and r0, r7, #15 -; CHECK-NEXT: orr.w r0, r0, r1, lsl #4 -; CHECK-NEXT: str.w r0, [r9, #12] -; CHECK-NEXT: add sp, #48 +; CHECK-NEXT: ldr.w r8, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: mov lr, r4 +; CHECK-NEXT: mov r1, r10 +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: movs r2, #0 +; CHECK-NEXT: mov r4, r11 +; CHECK-NEXT: cmp r5, #0 +; CHECK-NEXT: bmi .LBB40_17 +; CHECK-NEXT: @ %bb.11: @ %fp-to-i-if-check.saturate +; CHECK-NEXT: movw r0, #1122 +; CHECK-NEXT: cmp r7, r0 +; CHECK-NEXT: mov.w r10, #-1 +; CHECK-NEXT: mov.w r1, #-1 +; CHECK-NEXT: mov.w r0, #-1 +; CHECK-NEXT: mov.w r2, #15 +; CHECK-NEXT: bhi .LBB40_17 +; CHECK-NEXT: @ %bb.12: @ %fp-to-i-if-check.exp.size +; CHECK-NEXT: movs r0, #1 +; CHECK-NEXT: bfi r5, r0, #20, #12 +; CHECK-NEXT: movw r0, #1074 +; CHECK-NEXT: cmp r7, r0 +; CHECK-NEXT: bhi .LBB40_15 +; CHECK-NEXT: @ %bb.13: @ %fp-to-i-if-exp.small +; CHECK-NEXT: subw r0, r7, #1075 +; CHECK-NEXT: movs r2, #0 +; CHECK-NEXT: lsll r6, r5, r0 +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: mov r10, r6 +; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: b .LBB40_17 +; CHECK-NEXT: .LBB40_14: +; CHECK-NEXT: mov r1, r10 +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: movs r2, #0 +; CHECK-NEXT: b .LBB40_17 +; CHECK-NEXT: .LBB40_15: @ %fp-to-i-if-exp.large +; CHECK-NEXT: subw r7, r7, #1075 +; CHECK-NEXT: movs r1, #12 +; CHECK-NEXT: add r2, sp, #8 +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: and.w r1, r1, r7, lsr #3 +; CHECK-NEXT: adds r2, #16 +; CHECK-NEXT: strd r0, r0, [sp, #32] +; CHECK-NEXT: subs r2, r2, r1 +; CHECK-NEXT: strd r0, r0, [sp, #16] +; CHECK-NEXT: and r7, r7, #31 +; CHECK-NEXT: strd r0, r0, [sp, #8] +; CHECK-NEXT: strd r6, r5, [sp, #24] +; CHECK-NEXT: ldrd r10, r1, [r2] +; CHECK-NEXT: ldrd r12, r11, [r2, #8] +; CHECK-NEXT: eor r2, r7, #63 +; CHECK-NEXT: mov r5, r1 +; CHECK-NEXT: rsbs r2, r2, #0 +; CHECK-NEXT: mov r0, r10 +; CHECK-NEXT: lsll r12, r11, r7 +; CHECK-NEXT: lsrl r0, r5, #1 +; CHECK-NEXT: lsll r10, r1, r7 +; CHECK-NEXT: lsll r0, r5, r2 +; CHECK-NEXT: orr.w r2, r11, r5 +; CHECK-NEXT: orr.w r0, r0, r12 +; CHECK-NEXT: b .LBB40_17 +; CHECK-NEXT: .LBB40_16: +; CHECK-NEXT: mov lr, r4 +; CHECK-NEXT: mov r3, r8 +; CHECK-NEXT: mov r1, r10 +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: movs r2, #0 +; CHECK-NEXT: ldr.w r8, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: mov r4, r11 +; CHECK-NEXT: .LBB40_17: @ %fp-to-i-cleanup +; CHECK-NEXT: mov r6, r10 +; CHECK-NEXT: str.w r8, [r4] +; CHECK-NEXT: lsrl r6, r1, #28 +; CHECK-NEXT: orr.w r1, r1, r0, lsl #4 +; CHECK-NEXT: str r6, [r4, #16] +; CHECK-NEXT: str r3, [r4, #4] +; CHECK-NEXT: str r1, [r4, #20] +; CHECK-NEXT: and r1, r2, #15 +; CHECK-NEXT: lsrl r0, r1, #28 +; CHECK-NEXT: str.w lr, [r4, #8] +; CHECK-NEXT: strb r0, [r4, #24] +; CHECK-NEXT: and r0, r9, #15 +; CHECK-NEXT: orr.w r0, r0, r10, lsl #4 +; CHECK-NEXT: str r0, [r4, #12] +; CHECK-NEXT: add sp, #72 ; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: add sp, #4 ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} -; CHECK-NEXT: .p2align 3 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI40_0: -; CHECK-NEXT: .long 0 @ double 0 -; CHECK-NEXT: .long 0 -; CHECK-NEXT: .LCPI40_1: -; CHECK-NEXT: .long 4294967295 @ double 1.2676506002282293E+30 -; CHECK-NEXT: .long 1177550847 %x = call <2 x i100> @llvm.fptoui.sat.v2f64.v2i100(<2 x double> %f) ret <2 x i100> %x } define arm_aapcs_vfpcc <2 x i128> @test_unsigned_v2f64_v2i128(<2 x double> %f) { ; CHECK-LABEL: test_unsigned_v2f64_v2i128: -; CHECK: @ %bb.0: +; CHECK: @ %bb.0: @ %fp-to-i-entryfp-to-i-entry ; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-NEXT: .pad #4 ; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: .pad #24 -; CHECK-NEXT: sub sp, #24 +; CHECK-NEXT: .pad #72 +; CHECK-NEXT: sub sp, #72 ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vldr d0, .LCPI41_0 -; CHECK-NEXT: vmov r8, r7, d9 ; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: vmov r2, r9, d0 -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: mov r3, r9 -; CHECK-NEXT: mov r11, r2 -; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: mov r6, r0 -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: bl __fixunsdfti -; CHECK-NEXT: vldr d0, .LCPI41_1 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: strd r1, r0, [sp, #8] @ 8-byte Folded Spill -; CHECK-NEXT: csel r6, r3, r6, ne -; CHECK-NEXT: vmov r10, r5, d0 -; CHECK-NEXT: str r2, [sp] @ 4-byte Spill -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: str r5, [sp, #16] @ 4-byte Spill -; CHECK-NEXT: mov r2, r10 -; CHECK-NEXT: mov r3, r5 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: mov r2, r11 -; CHECK-NEXT: mov r3, r9 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r6, #-1 -; CHECK-NEXT: str r6, [r4, #28] -; CHECK-NEXT: str.w r11, [sp, #20] @ 4-byte Spill -; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: ldr r6, [sp, #16] @ 4-byte Reload -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: ldr r1, [sp] @ 4-byte Reload -; CHECK-NEXT: mov r2, r10 -; CHECK-NEXT: csel r5, r1, r0, ne -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: mov r3, r6 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r5, #-1 -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: mov r2, r11 -; CHECK-NEXT: mov r3, r9 -; CHECK-NEXT: str r5, [r4, #24] -; CHECK-NEXT: mov r5, r4 -; CHECK-NEXT: mov r4, r9 -; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: ldr r1, [sp, #8] @ 4-byte Reload -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r2, r10 -; CHECK-NEXT: mov r3, r6 -; CHECK-NEXT: csel r9, r1, r0, ne -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r9, #-1 -; CHECK-NEXT: str r5, [sp, #4] @ 4-byte Spill -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: str.w r9, [r5, #20] -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: ldr r2, [sp, #20] @ 4-byte Reload -; CHECK-NEXT: mov r3, r4 -; CHECK-NEXT: vmov r6, r11, d8 -; CHECK-NEXT: mov r9, r4 -; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r2, r10 -; CHECK-NEXT: csel r4, r1, r0, ne -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: ldr r7, [sp, #16] @ 4-byte Reload -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r3, r7 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r4, #-1 -; CHECK-NEXT: str r4, [r5, #16] -; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: ldr r5, [sp, #20] @ 4-byte Reload -; CHECK-NEXT: mov r1, r11 -; CHECK-NEXT: mov r3, r9 -; CHECK-NEXT: mov r2, r5 -; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: mov r8, r0 -; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: mov r1, r11 -; CHECK-NEXT: bl __fixunsdfti -; CHECK-NEXT: cmp.w r8, #0 -; CHECK-NEXT: strd r1, r0, [sp, #8] @ 8-byte Folded Spill -; CHECK-NEXT: csel r4, r3, r8, ne -; CHECK-NEXT: str r2, [sp] @ 4-byte Spill +; CHECK-NEXT: vmov r6, r5, d8 +; CHECK-NEXT: mov.w r10, #1 +; CHECK-NEXT: mov.w r8, #0 +; CHECK-NEXT: movw r11, #1023 +; CHECK-NEXT: ubfx r7, r5, #20, #11 +; CHECK-NEXT: cmp r7, r11 +; CHECK-NEXT: blo .LBB41_6 +; CHECK-NEXT: @ %bb.1: @ %fp-to-i-entryfp-to-i-entry ; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: mov r1, r11 -; CHECK-NEXT: mov r2, r10 -; CHECK-NEXT: mov r3, r7 -; CHECK-NEXT: mov r8, r7 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r4, #-1 -; CHECK-NEXT: ldr r7, [sp, #4] @ 4-byte Reload -; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: mov r1, r11 -; CHECK-NEXT: mov r2, r5 -; CHECK-NEXT: mov r3, r9 -; CHECK-NEXT: str r4, [r7, #12] -; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: ldr r1, [sp] @ 4-byte Reload -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r2, r10 -; CHECK-NEXT: mov r3, r8 -; CHECK-NEXT: csel r4, r1, r0, ne -; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: mov r1, r11 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: mov r1, r11 -; CHECK-NEXT: mov r2, r5 -; CHECK-NEXT: mov r3, r9 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r4, #-1 -; CHECK-NEXT: str r4, [r7, #8] -; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: ldr r1, [sp, #8] @ 4-byte Reload -; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: mov r2, r6 +; CHECK-NEXT: mov r3, r5 +; CHECK-NEXT: bl __aeabi_dcmpun +; CHECK-NEXT: cbnz r0, .LBB41_6 +; CHECK-NEXT: @ %bb.2: @ %fp-to-i-entryfp-to-i-entry +; CHECK-NEXT: mov r9, r8 +; CHECK-NEXT: mov.w r12, #0 +; CHECK-NEXT: movs r3, #0 +; CHECK-NEXT: cmp r5, #0 +; CHECK-NEXT: bmi .LBB41_8 +; CHECK-NEXT: @ %bb.3: @ %fp-to-i-if-check.saturate2 +; CHECK-NEXT: mov.w r8, #-1 +; CHECK-NEXT: mov.w r9, #-1 +; CHECK-NEXT: mov.w r12, #-1 +; CHECK-NEXT: mov.w r3, #-1 +; CHECK-NEXT: movw r0, #1150 +; CHECK-NEXT: cmp r7, r0 +; CHECK-NEXT: bhi .LBB41_8 +; CHECK-NEXT: @ %bb.4: @ %fp-to-i-if-check.exp.size4 +; CHECK-NEXT: bfi r5, r10, #20, #12 +; CHECK-NEXT: movw r0, #1074 +; CHECK-NEXT: cmp r7, r0 +; CHECK-NEXT: bhi .LBB41_7 +; CHECK-NEXT: @ %bb.5: @ %fp-to-i-if-exp.small5 +; CHECK-NEXT: subw r0, r7, #1075 +; CHECK-NEXT: mov.w r12, #0 +; CHECK-NEXT: lsll r6, r5, r0 +; CHECK-NEXT: movs r3, #0 +; CHECK-NEXT: mov r8, r6 +; CHECK-NEXT: mov r9, r5 +; CHECK-NEXT: b .LBB41_8 +; CHECK-NEXT: .LBB41_6: +; CHECK-NEXT: mov r9, r8 +; CHECK-NEXT: mov.w r12, #0 +; CHECK-NEXT: movs r3, #0 +; CHECK-NEXT: b .LBB41_8 +; CHECK-NEXT: .LBB41_7: @ %fp-to-i-if-exp.large6 +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: add r2, sp, #40 +; CHECK-NEXT: strd r0, r0, [sp, #64] +; CHECK-NEXT: movs r1, #12 +; CHECK-NEXT: strd r0, r0, [sp, #48] +; CHECK-NEXT: adds r2, #16 +; CHECK-NEXT: strd r0, r0, [sp, #40] +; CHECK-NEXT: subw r0, r7, #1075 +; CHECK-NEXT: strd r6, r5, [sp, #56] +; CHECK-NEXT: and.w r1, r1, r0, lsr #3 +; CHECK-NEXT: and r0, r0, #31 +; CHECK-NEXT: subs r1, r2, r1 +; CHECK-NEXT: eor r7, r0, #63 +; CHECK-NEXT: ldrd r8, r9, [r1] +; CHECK-NEXT: rsbs r7, r7, #0 +; CHECK-NEXT: ldrd r2, r1, [r1, #8] +; CHECK-NEXT: mov r5, r9 +; CHECK-NEXT: mov r6, r8 +; CHECK-NEXT: lsll r2, r1, r0 +; CHECK-NEXT: lsrl r6, r5, #1 +; CHECK-NEXT: lsll r8, r9, r0 +; CHECK-NEXT: lsll r6, r5, r7 +; CHECK-NEXT: orr.w r3, r1, r5 +; CHECK-NEXT: orr.w r12, r2, r6 +; CHECK-NEXT: .LBB41_8: @ %fp-to-i-cleanup1 +; CHECK-NEXT: vmov r10, r5, d9 +; CHECK-NEXT: movs r6, #0 +; CHECK-NEXT: ubfx r7, r5, #20, #11 +; CHECK-NEXT: cmp r7, r11 +; CHECK-NEXT: blo .LBB41_14 +; CHECK-NEXT: @ %bb.9: @ %fp-to-i-cleanup1 +; CHECK-NEXT: mov r11, r8 +; CHECK-NEXT: mov r8, r9 +; CHECK-NEXT: mov r9, r3 +; CHECK-NEXT: mov r0, r10 +; CHECK-NEXT: mov r1, r5 ; CHECK-NEXT: mov r2, r10 -; CHECK-NEXT: mov r3, r8 -; CHECK-NEXT: csel r4, r1, r0, ne -; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: mov r1, r11 -; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: mov r3, r5 +; CHECK-NEXT: str r4, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: mov r4, r12 +; CHECK-NEXT: bl __aeabi_dcmpun ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: mov r1, r11 -; CHECK-NEXT: mov r2, r5 +; CHECK-NEXT: bne .LBB41_16 +; CHECK-NEXT: @ %bb.10: @ %fp-to-i-cleanup1 +; CHECK-NEXT: mov r12, r4 +; CHECK-NEXT: ldr r4, [sp, #4] @ 4-byte Reload ; CHECK-NEXT: mov r3, r9 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r4, #-1 -; CHECK-NEXT: str r4, [r7, #4] -; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r2, r10 -; CHECK-NEXT: mov r3, r8 -; CHECK-NEXT: csel r4, r1, r0, ne +; CHECK-NEXT: mov r9, r8 +; CHECK-NEXT: movs r1, #0 +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: movs r2, #0 +; CHECK-NEXT: mov r8, r11 +; CHECK-NEXT: cmp r5, #0 +; CHECK-NEXT: bmi .LBB41_17 +; CHECK-NEXT: @ %bb.11: @ %fp-to-i-if-check.saturate +; CHECK-NEXT: movw r0, #1150 +; CHECK-NEXT: cmp r7, r0 +; CHECK-NEXT: mov.w r6, #-1 +; CHECK-NEXT: mov.w r1, #-1 +; CHECK-NEXT: mov.w r0, #-1 +; CHECK-NEXT: mov.w r2, #-1 +; CHECK-NEXT: bhi .LBB41_17 +; CHECK-NEXT: @ %bb.12: @ %fp-to-i-if-check.exp.size +; CHECK-NEXT: movs r0, #1 +; CHECK-NEXT: bfi r5, r0, #20, #12 +; CHECK-NEXT: movw r0, #1074 +; CHECK-NEXT: cmp r7, r0 +; CHECK-NEXT: bhi .LBB41_15 +; CHECK-NEXT: @ %bb.13: @ %fp-to-i-if-exp.small +; CHECK-NEXT: subw r0, r7, #1075 +; CHECK-NEXT: movs r2, #0 +; CHECK-NEXT: lsll r10, r5, r0 +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: mov r6, r10 +; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: b .LBB41_17 +; CHECK-NEXT: .LBB41_14: +; CHECK-NEXT: mov r1, r6 +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: movs r2, #0 +; CHECK-NEXT: b .LBB41_17 +; CHECK-NEXT: .LBB41_15: @ %fp-to-i-if-exp.large +; CHECK-NEXT: subw r7, r7, #1075 +; CHECK-NEXT: movs r1, #12 +; CHECK-NEXT: add r2, sp, #8 +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: and.w r1, r1, r7, lsr #3 +; CHECK-NEXT: adds r2, #16 +; CHECK-NEXT: strd r0, r0, [sp, #32] +; CHECK-NEXT: subs r2, r2, r1 +; CHECK-NEXT: strd r0, r0, [sp, #16] +; CHECK-NEXT: and r7, r7, #31 +; CHECK-NEXT: strd r0, r0, [sp, #8] +; CHECK-NEXT: strd r10, r5, [sp, #24] +; CHECK-NEXT: ldrd r6, r1, [r2] +; CHECK-NEXT: ldrd r10, r11, [r2, #8] +; CHECK-NEXT: eor r2, r7, #63 +; CHECK-NEXT: mov r5, r1 +; CHECK-NEXT: rsbs r2, r2, #0 ; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: mov r1, r11 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r4, #-1 -; CHECK-NEXT: str r4, [r7] -; CHECK-NEXT: add sp, #24 +; CHECK-NEXT: lsll r10, r11, r7 +; CHECK-NEXT: lsrl r0, r5, #1 +; CHECK-NEXT: lsll r6, r1, r7 +; CHECK-NEXT: lsll r0, r5, r2 +; CHECK-NEXT: orr.w r2, r11, r5 +; CHECK-NEXT: orr.w r0, r0, r10 +; CHECK-NEXT: b .LBB41_17 +; CHECK-NEXT: .LBB41_16: +; CHECK-NEXT: mov r12, r4 +; CHECK-NEXT: mov r3, r9 +; CHECK-NEXT: mov r9, r8 +; CHECK-NEXT: mov r1, r6 +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: movs r2, #0 +; CHECK-NEXT: mov r8, r11 +; CHECK-NEXT: ldr r4, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: .LBB41_17: @ %fp-to-i-cleanup +; CHECK-NEXT: stm.w r4, {r8, r9, r12} +; CHECK-NEXT: strd r3, r6, [r4, #12] +; CHECK-NEXT: strd r1, r0, [r4, #20] +; CHECK-NEXT: str r2, [r4, #28] +; CHECK-NEXT: add sp, #72 ; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: add sp, #4 ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} -; CHECK-NEXT: .p2align 3 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI41_0: -; CHECK-NEXT: .long 0 @ double 0 -; CHECK-NEXT: .long 0 -; CHECK-NEXT: .LCPI41_1: -; CHECK-NEXT: .long 4294967295 @ double 3.4028236692093843E+38 -; CHECK-NEXT: .long 1206910975 %x = call <2 x i128> @llvm.fptoui.sat.v2f64.v2i128(<2 x double> %f) ret <2 x i128> %x } @@ -3947,271 +4316,100 @@ define arm_aapcs_vfpcc <8 x i64> @test_unsigned_v8f16_v8i64(<8 x half> %f) { define arm_aapcs_vfpcc <8 x i100> @test_unsigned_v8f16_v8i100(<8 x half> %f) { ; CHECK-LABEL: test_unsigned_v8f16_v8i100: ; CHECK: @ %bb.0: -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-NEXT: .pad #4 -; CHECK-NEXT: sub sp, #4 -; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12} -; CHECK-NEXT: vpush {d8, d9, d10, d11, d12} -; CHECK-NEXT: .pad #56 -; CHECK-NEXT: sub sp, #56 -; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: vcvtb.f32.f16 s22, s17 -; CHECK-NEXT: vmov r0, s22 -; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: vcvtb.f32.f16 s24, s18 -; CHECK-NEXT: mov r8, r0 -; CHECK-NEXT: vmov r0, s24 -; CHECK-NEXT: vldr s20, .LCPI50_0 -; CHECK-NEXT: vcmp.f32 s22, #0 -; CHECK-NEXT: mov r9, r1 -; CHECK-NEXT: mov r10, r2 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt lt -; CHECK-NEXT: movlt.w r10, #0 -; CHECK-NEXT: movlt.w r9, #0 -; CHECK-NEXT: movlt.w r8, #0 -; CHECK-NEXT: movlt r3, #0 -; CHECK-NEXT: vcmp.f32 s22, s20 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt r3, #15 -; CHECK-NEXT: str r3, [sp, #52] @ 4-byte Spill -; CHECK-NEXT: ittt gt -; CHECK-NEXT: movgt.w r8, #-1 -; CHECK-NEXT: movgt.w r9, #-1 -; CHECK-NEXT: movgt.w r10, #-1 -; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: vcvtb.f32.f16 s22, s19 -; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: vmov r0, s22 -; CHECK-NEXT: mov r6, r1 -; CHECK-NEXT: vcmp.f32 s24, #0 -; CHECK-NEXT: mov r7, r2 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt lt -; CHECK-NEXT: movlt r7, #0 -; CHECK-NEXT: movlt r6, #0 -; CHECK-NEXT: movlt r5, #0 -; CHECK-NEXT: movlt r3, #0 -; CHECK-NEXT: vcmp.f32 s24, s20 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt r3, #15 -; CHECK-NEXT: str r3, [sp, #48] @ 4-byte Spill -; CHECK-NEXT: ittt gt -; CHECK-NEXT: movgt.w r5, #-1 -; CHECK-NEXT: movgt.w r6, #-1 -; CHECK-NEXT: movgt.w r7, #-1 -; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: vcmp.f32 s22, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s22, s20 -; CHECK-NEXT: itttt lt -; CHECK-NEXT: movlt r3, #0 -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: movlt r2, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: str.w r2, [r4, #83] -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: str.w r1, [r4, #79] -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: vcvtt.f32.f16 s22, s16 -; CHECK-NEXT: str.w r0, [r4, #75] -; CHECK-NEXT: vmov r0, s22 -; CHECK-NEXT: str.w r7, [r4, #58] -; CHECK-NEXT: str.w r6, [r4, #54] -; CHECK-NEXT: str.w r5, [r4, #50] -; CHECK-NEXT: str.w r10, [r4, #33] -; CHECK-NEXT: str.w r9, [r4, #29] -; CHECK-NEXT: str.w r8, [r4, #25] -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt r3, #15 -; CHECK-NEXT: str r3, [sp, #40] @ 4-byte Spill -; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: vcmp.f32 s22, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s22, s20 -; CHECK-NEXT: itttt lt -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: movlt r2, #0 -; CHECK-NEXT: movlt r3, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt r3, #15 -; CHECK-NEXT: str r3, [sp, #44] @ 4-byte Spill -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: str r2, [sp, #36] @ 4-byte Spill -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: str r1, [sp, #32] @ 4-byte Spill -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: vcvtt.f32.f16 s22, s17 -; CHECK-NEXT: str r0, [sp, #24] @ 4-byte Spill -; CHECK-NEXT: vmov r0, s22 -; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: vcmp.f32 s22, #0 -; CHECK-NEXT: mov r5, r1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt lt -; CHECK-NEXT: movlt r5, #0 -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: movlt r2, #0 -; CHECK-NEXT: movlt r3, #0 -; CHECK-NEXT: vcmp.f32 s22, s20 -; CHECK-NEXT: vcvtt.f32.f16 s18, s18 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt r3, #15 -; CHECK-NEXT: str r3, [sp, #28] @ 4-byte Spill -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: str r2, [sp, #20] @ 4-byte Spill -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: str r0, [sp, #16] @ 4-byte Spill -; CHECK-NEXT: vmov r0, s18 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r5, #-1 -; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: vcmp.f32 s18, #0 -; CHECK-NEXT: mov r9, r1 -; CHECK-NEXT: mov r8, r2 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s18, s20 -; CHECK-NEXT: itttt lt -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: movlt.w r9, #0 -; CHECK-NEXT: movlt.w r8, #0 -; CHECK-NEXT: movlt r3, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt r3, #15 -; CHECK-NEXT: str r3, [sp, #12] @ 4-byte Spill -; CHECK-NEXT: ittt gt -; CHECK-NEXT: movgt.w r8, #-1 -; CHECK-NEXT: movgt.w r9, #-1 -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: vcvtt.f32.f16 s18, s19 -; CHECK-NEXT: str r0, [sp, #8] @ 4-byte Spill -; CHECK-NEXT: vmov r0, s18 -; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: vcvtb.f32.f16 s16, s16 -; CHECK-NEXT: mov r6, r0 -; CHECK-NEXT: vmov r0, s16 -; CHECK-NEXT: mov r11, r1 -; CHECK-NEXT: vcmp.f32 s18, #0 -; CHECK-NEXT: mov r10, r2 -; CHECK-NEXT: mov r7, r3 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt lt -; CHECK-NEXT: movlt.w r11, #0 -; CHECK-NEXT: movlt r6, #0 -; CHECK-NEXT: movlt.w r10, #0 -; CHECK-NEXT: movlt r7, #0 -; CHECK-NEXT: vcmp.f32 s18, s20 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt gt -; CHECK-NEXT: movgt r7, #15 -; CHECK-NEXT: movgt.w r10, #-1 -; CHECK-NEXT: movgt.w r6, #-1 -; CHECK-NEXT: movgt.w r11, #-1 -; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: vcmp.f32 s16, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt lt -; CHECK-NEXT: movlt r3, #0 -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: movlt r2, #0 -; CHECK-NEXT: vcmp.f32 s16, s20 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: str r2, [r4, #8] -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: str r1, [r4, #4] -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: str r0, [r4] -; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: lsrl r0, r11, #28 -; CHECK-NEXT: and r1, r7, #15 -; CHECK-NEXT: str.w r0, [r4, #91] -; CHECK-NEXT: ldr r2, [sp, #8] @ 4-byte Reload -; CHECK-NEXT: mov r0, r2 -; CHECK-NEXT: lsrl r0, r9, #28 -; CHECK-NEXT: str.w r0, [r4, #66] -; CHECK-NEXT: ldr.w lr, [sp, #16] @ 4-byte Reload -; CHECK-NEXT: mov r0, lr -; CHECK-NEXT: lsrl r0, r5, #28 -; CHECK-NEXT: str r5, [sp, #4] @ 4-byte Spill -; CHECK-NEXT: str.w r0, [r4, #41] -; CHECK-NEXT: ldr.w r12, [sp, #24] @ 4-byte Reload -; CHECK-NEXT: ldr r5, [sp, #32] @ 4-byte Reload -; CHECK-NEXT: mov r0, r12 -; CHECK-NEXT: lsrl r0, r5, #28 -; CHECK-NEXT: str r0, [r4, #16] -; CHECK-NEXT: orr.w r0, r11, r10, lsl #4 -; CHECK-NEXT: lsrl r10, r1, #28 -; CHECK-NEXT: str.w r0, [r4, #95] -; CHECK-NEXT: strb.w r10, [r4, #99] -; CHECK-NEXT: ldr r0, [sp, #40] @ 4-byte Reload -; CHECK-NEXT: and r0, r0, #15 -; CHECK-NEXT: orr.w r0, r0, r6, lsl #4 -; CHECK-NEXT: str.w r0, [r4, #87] -; CHECK-NEXT: orr.w r0, r9, r8, lsl #4 -; CHECK-NEXT: str.w r0, [r4, #70] -; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: and r1, r0, #15 -; CHECK-NEXT: lsrl r8, r1, #28 -; CHECK-NEXT: strb.w r8, [r4, #74] -; CHECK-NEXT: ldr r0, [sp, #48] @ 4-byte Reload -; CHECK-NEXT: and r0, r0, #15 -; CHECK-NEXT: orr.w r0, r0, r2, lsl #4 -; CHECK-NEXT: str.w r0, [r4, #62] -; CHECK-NEXT: ldr r2, [sp, #20] @ 4-byte Reload -; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload -; CHECK-NEXT: orr.w r0, r0, r2, lsl #4 -; CHECK-NEXT: str.w r0, [r4, #45] -; CHECK-NEXT: ldr r0, [sp, #28] @ 4-byte Reload -; CHECK-NEXT: and r1, r0, #15 -; CHECK-NEXT: lsrl r2, r1, #28 -; CHECK-NEXT: strb.w r2, [r4, #49] -; CHECK-NEXT: ldr r0, [sp, #52] @ 4-byte Reload -; CHECK-NEXT: and r0, r0, #15 -; CHECK-NEXT: orr.w r0, r0, lr, lsl #4 -; CHECK-NEXT: str.w r0, [r4, #37] -; CHECK-NEXT: ldr r2, [sp, #36] @ 4-byte Reload -; CHECK-NEXT: orr.w r0, r5, r2, lsl #4 -; CHECK-NEXT: str r0, [r4, #20] -; CHECK-NEXT: ldr r0, [sp, #44] @ 4-byte Reload -; CHECK-NEXT: and r1, r0, #15 +; CHECK-NEXT: .save {r5, lr} +; CHECK-NEXT: push {r5, lr} +; CHECK-NEXT: vmovx.f16 s14, s3 +; CHECK-NEXT: vcvt.s32.f16 s12, s3 +; CHECK-NEXT: vcvt.s32.f16 s14, s14 +; CHECK-NEXT: vcvt.s32.f16 s10, s2 +; CHECK-NEXT: vmov r12, s14 +; CHECK-NEXT: vmovx.f16 s2, s2 +; CHECK-NEXT: vcvt.s32.f16 s2, s2 +; CHECK-NEXT: vmovx.f16 s8, s1 +; CHECK-NEXT: vcvt.s32.f16 s8, s8 +; CHECK-NEXT: vcvt.s32.f16 s6, s1 +; CHECK-NEXT: vcvt.s32.f16 s4, s0 +; CHECK-NEXT: vmovx.f16 s0, s0 +; CHECK-NEXT: vcvt.s32.f16 s0, s0 +; CHECK-NEXT: asr.w r3, r12, #31 +; CHECK-NEXT: mov r2, r12 +; CHECK-NEXT: mov r1, r3 ; CHECK-NEXT: lsrl r2, r1, #28 -; CHECK-NEXT: strb r2, [r4, #24] -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt r3, #15 -; CHECK-NEXT: and r0, r3, #15 -; CHECK-NEXT: orr.w r0, r0, r12, lsl #4 -; CHECK-NEXT: str r0, [r4, #12] -; CHECK-NEXT: add sp, #56 -; CHECK-NEXT: vpop {d8, d9, d10, d11, d12} -; CHECK-NEXT: add sp, #4 -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} -; CHECK-NEXT: .p2align 2 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI50_0: -; CHECK-NEXT: .long 0x717fffff @ float 1.26765052E+30 +; CHECK-NEXT: orr.w r1, r1, r3, lsl #4 +; CHECK-NEXT: str.w r1, [r0, #95] +; CHECK-NEXT: str.w r2, [r0, #91] +; CHECK-NEXT: movs r1, #15 +; CHECK-NEXT: and.w r5, r1, r12, asr #31 +; CHECK-NEXT: mov r2, r3 +; CHECK-NEXT: lsrl r2, r5, #28 +; CHECK-NEXT: strb.w r2, [r0, #99] +; CHECK-NEXT: vmov r2, s12 +; CHECK-NEXT: and.w r3, r1, r2, asr #31 +; CHECK-NEXT: orr.w r3, r3, r12, lsl #4 +; CHECK-NEXT: vmov r12, s2 +; CHECK-NEXT: str.w r3, [r0, #87] +; CHECK-NEXT: asrs r3, r2, #31 +; CHECK-NEXT: str.w r3, [r0, #83] +; CHECK-NEXT: str.w r3, [r0, #79] +; CHECK-NEXT: str.w r2, [r0, #75] +; CHECK-NEXT: asr.w r3, r12, #31 +; CHECK-NEXT: mov r2, r12 +; CHECK-NEXT: mov r5, r3 +; CHECK-NEXT: lsrl r2, r5, #28 +; CHECK-NEXT: orr.w r5, r5, r3, lsl #4 +; CHECK-NEXT: str.w r5, [r0, #70] +; CHECK-NEXT: str.w r2, [r0, #66] +; CHECK-NEXT: and.w r5, r1, r12, asr #31 +; CHECK-NEXT: mov r2, r3 +; CHECK-NEXT: lsrl r2, r5, #28 +; CHECK-NEXT: strb.w r2, [r0, #74] +; CHECK-NEXT: vmov r2, s10 +; CHECK-NEXT: and.w r3, r1, r2, asr #31 +; CHECK-NEXT: orr.w r3, r3, r12, lsl #4 +; CHECK-NEXT: vmov r12, s8 +; CHECK-NEXT: str.w r3, [r0, #62] +; CHECK-NEXT: asrs r3, r2, #31 +; CHECK-NEXT: str.w r3, [r0, #58] +; CHECK-NEXT: str.w r3, [r0, #54] +; CHECK-NEXT: str.w r2, [r0, #50] +; CHECK-NEXT: asr.w r3, r12, #31 +; CHECK-NEXT: mov r2, r12 +; CHECK-NEXT: mov r5, r3 +; CHECK-NEXT: lsrl r2, r5, #28 +; CHECK-NEXT: orr.w r5, r5, r3, lsl #4 +; CHECK-NEXT: str.w r5, [r0, #45] +; CHECK-NEXT: str.w r2, [r0, #41] +; CHECK-NEXT: and.w r5, r1, r12, asr #31 +; CHECK-NEXT: mov r2, r3 +; CHECK-NEXT: lsrl r2, r5, #28 +; CHECK-NEXT: strb.w r2, [r0, #49] +; CHECK-NEXT: vmov r2, s6 +; CHECK-NEXT: and.w r3, r1, r2, asr #31 +; CHECK-NEXT: orr.w r3, r3, r12, lsl #4 +; CHECK-NEXT: vmov r12, s0 +; CHECK-NEXT: str.w r3, [r0, #37] +; CHECK-NEXT: asrs r3, r2, #31 +; CHECK-NEXT: str.w r3, [r0, #33] +; CHECK-NEXT: str.w r3, [r0, #29] +; CHECK-NEXT: str.w r2, [r0, #25] +; CHECK-NEXT: asr.w r3, r12, #31 +; CHECK-NEXT: mov r2, r12 +; CHECK-NEXT: mov r5, r3 +; CHECK-NEXT: lsrl r2, r5, #28 +; CHECK-NEXT: orr.w r5, r5, r3, lsl #4 +; CHECK-NEXT: strd r2, r5, [r0, #16] +; CHECK-NEXT: and.w r5, r1, r12, asr #31 +; CHECK-NEXT: mov r2, r3 +; CHECK-NEXT: lsrl r2, r5, #28 +; CHECK-NEXT: strb r2, [r0, #24] +; CHECK-NEXT: vmov r2, s4 +; CHECK-NEXT: and.w r1, r1, r2, asr #31 +; CHECK-NEXT: orr.w r1, r1, r12, lsl #4 +; CHECK-NEXT: str r1, [r0, #12] +; CHECK-NEXT: asrs r1, r2, #31 +; CHECK-NEXT: str r1, [r0, #8] +; CHECK-NEXT: strd r2, r1, [r0] +; CHECK-NEXT: pop {r5, pc} %x = call <8 x i100> @llvm.fptoui.sat.v8f16.v8i100(<8 x half> %f) ret <8 x i100> %x } @@ -4219,185 +4417,51 @@ define arm_aapcs_vfpcc <8 x i100> @test_unsigned_v8f16_v8i100(<8 x half> %f) { define arm_aapcs_vfpcc <8 x i128> @test_unsigned_v8f16_v8i128(<8 x half> %f) { ; CHECK-LABEL: test_unsigned_v8f16_v8i128: ; CHECK: @ %bb.0: -; CHECK-NEXT: .save {r4, r5, r6, r7, lr} -; CHECK-NEXT: push {r4, r5, r6, r7, lr} -; CHECK-NEXT: .pad #4 -; CHECK-NEXT: sub sp, #4 -; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14} -; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14} -; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: vcvtt.f32.f16 s24, s19 -; CHECK-NEXT: vcvtb.f32.f16 s22, s16 -; CHECK-NEXT: vmov r0, s24 -; CHECK-NEXT: vcvtb.f32.f16 s28, s19 -; CHECK-NEXT: vldr s20, .LCPI51_0 -; CHECK-NEXT: vmov r5, s22 -; CHECK-NEXT: vmov r7, s28 -; CHECK-NEXT: vcvtt.f32.f16 s26, s18 -; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: vcmp.f32 s24, #0 -; CHECK-NEXT: add.w r12, r4, #112 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt lt -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: movlt r2, #0 -; CHECK-NEXT: movlt r3, #0 -; CHECK-NEXT: vcmp.f32 s24, s20 -; CHECK-NEXT: vcvtb.f32.f16 s18, s18 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r3, #-1 -; CHECK-NEXT: ittt gt -; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3} -; CHECK-NEXT: mov r0, r7 -; CHECK-NEXT: vmov r6, s26 -; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: vcmp.f32 s28, #0 -; CHECK-NEXT: add.w r12, r4, #96 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt lt -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: movlt r2, #0 -; CHECK-NEXT: movlt r3, #0 -; CHECK-NEXT: vcmp.f32 s28, s20 -; CHECK-NEXT: vcvtt.f32.f16 s24, s17 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r3, #-1 -; CHECK-NEXT: ittt gt -; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3} -; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: vmov r7, s18 -; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: vcmp.f32 s26, #0 -; CHECK-NEXT: add.w r12, r4, #80 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s26, s20 -; CHECK-NEXT: itttt lt -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: movlt r2, #0 -; CHECK-NEXT: movlt r3, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r3, #-1 -; CHECK-NEXT: ittt gt -; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3} -; CHECK-NEXT: mov r0, r7 -; CHECK-NEXT: vmov r6, s24 -; CHECK-NEXT: vcvtb.f32.f16 s26, s17 -; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: vcmp.f32 s18, #0 -; CHECK-NEXT: add.w r12, r4, #64 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt lt -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: movlt r2, #0 -; CHECK-NEXT: movlt r3, #0 -; CHECK-NEXT: vcmp.f32 s18, s20 -; CHECK-NEXT: vcvtt.f32.f16 s16, s16 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r3, #-1 -; CHECK-NEXT: ittt gt -; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3} -; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: vmov r7, s26 -; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: vcmp.f32 s24, #0 -; CHECK-NEXT: add.w r12, r4, #48 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt lt -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: movlt r2, #0 -; CHECK-NEXT: movlt r3, #0 -; CHECK-NEXT: vcmp.f32 s24, s20 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r3, #-1 -; CHECK-NEXT: ittt gt -; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3} -; CHECK-NEXT: mov r0, r7 -; CHECK-NEXT: vmov r6, s16 -; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: vcmp.f32 s26, #0 -; CHECK-NEXT: add.w r12, r4, #32 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt lt -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: movlt r2, #0 -; CHECK-NEXT: movlt r3, #0 -; CHECK-NEXT: vcmp.f32 s26, s20 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt gt -; CHECK-NEXT: movgt.w r3, #-1 -; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3} -; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: vcmp.f32 s16, #0 -; CHECK-NEXT: add.w r12, r4, #16 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt lt -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: movlt r2, #0 -; CHECK-NEXT: movlt r3, #0 -; CHECK-NEXT: vcmp.f32 s16, s20 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt gt -; CHECK-NEXT: movgt.w r3, #-1 -; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3} -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: vcmp.f32 s22, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt lt -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: movlt r2, #0 -; CHECK-NEXT: movlt r3, #0 -; CHECK-NEXT: vcmp.f32 s22, s20 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt gt -; CHECK-NEXT: movgt.w r3, #-1 -; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: stm r4!, {r0, r1, r2, r3} -; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14} -; CHECK-NEXT: add sp, #4 -; CHECK-NEXT: pop {r4, r5, r6, r7, pc} -; CHECK-NEXT: .p2align 2 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI51_0: -; CHECK-NEXT: .long 0x7f7fffff @ float 3.40282347E+38 +; CHECK-NEXT: vmovx.f16 s14, s3 +; CHECK-NEXT: vcvt.s32.f16 s12, s3 +; CHECK-NEXT: vcvt.s32.f16 s14, s14 +; CHECK-NEXT: vmov r3, s12 +; CHECK-NEXT: vmov r12, s14 +; CHECK-NEXT: vcvt.s32.f16 s10, s2 +; CHECK-NEXT: vmovx.f16 s2, s2 +; CHECK-NEXT: vmovx.f16 s8, s1 +; CHECK-NEXT: vcvt.s32.f16 s2, s2 +; CHECK-NEXT: vcvt.s32.f16 s6, s1 +; CHECK-NEXT: vcvt.s32.f16 s8, s8 +; CHECK-NEXT: vcvt.s32.f16 s4, s0 +; CHECK-NEXT: vmovx.f16 s0, s0 +; CHECK-NEXT: vcvt.s32.f16 s0, s0 +; CHECK-NEXT: asrs r1, r3, #31 +; CHECK-NEXT: asr.w r2, r12, #31 +; CHECK-NEXT: strd r3, r1, [r0, #96] +; CHECK-NEXT: strd r1, r1, [r0, #104] +; CHECK-NEXT: vmov r3, s10 +; CHECK-NEXT: strd r12, r2, [r0, #112] +; CHECK-NEXT: vmov r12, s2 +; CHECK-NEXT: strd r2, r2, [r0, #120] +; CHECK-NEXT: asrs r1, r3, #31 +; CHECK-NEXT: asr.w r2, r12, #31 +; CHECK-NEXT: strd r3, r1, [r0, #64] +; CHECK-NEXT: strd r1, r1, [r0, #72] +; CHECK-NEXT: vmov r3, s6 +; CHECK-NEXT: strd r12, r2, [r0, #80] +; CHECK-NEXT: vmov r12, s8 +; CHECK-NEXT: strd r2, r2, [r0, #88] +; CHECK-NEXT: asrs r1, r3, #31 +; CHECK-NEXT: asr.w r2, r12, #31 +; CHECK-NEXT: strd r3, r1, [r0, #32] +; CHECK-NEXT: strd r1, r1, [r0, #40] +; CHECK-NEXT: vmov r3, s4 +; CHECK-NEXT: strd r12, r2, [r0, #48] +; CHECK-NEXT: vmov r12, s0 +; CHECK-NEXT: strd r2, r2, [r0, #56] +; CHECK-NEXT: asrs r1, r3, #31 +; CHECK-NEXT: asr.w r2, r12, #31 +; CHECK-NEXT: strd r3, r1, [r0] +; CHECK-NEXT: strd r1, r1, [r0, #8] +; CHECK-NEXT: strd r12, r2, [r0, #16] +; CHECK-NEXT: strd r2, r2, [r0, #24] +; CHECK-NEXT: bx lr %x = call <8 x i128> @llvm.fptoui.sat.v8f16.v8i128(<8 x half> %f) ret <8 x i128> %x } diff --git a/llvm/test/CodeGen/Thumb2/mve-vqdmulh.ll b/llvm/test/CodeGen/Thumb2/mve-vqdmulh.ll index bf0d92b5e0303..fcab2f455f264 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vqdmulh.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vqdmulh.ll @@ -486,55 +486,232 @@ for.cond.cleanup: ; preds = %vector.body define <2 x i64> @large_i128(<2 x double> %x) { ; CHECK-LABEL: large_i128: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, lr} -; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr} -; CHECK-NEXT: .pad #4 -; CHECK-NEXT: sub sp, #4 -; CHECK-NEXT: mov r8, r3 -; CHECK-NEXT: mov r5, r2 -; CHECK-NEXT: bl __fixdfti -; CHECK-NEXT: subs r7, r2, #1 -; CHECK-NEXT: mov.w r9, #1 -; CHECK-NEXT: sbcs r7, r3, #0 -; CHECK-NEXT: mov.w r4, #0 -; CHECK-NEXT: cset r7, lt -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: csel r0, r0, r7, ne -; CHECK-NEXT: csel r3, r3, r7, ne -; CHECK-NEXT: csel r1, r1, r7, ne -; CHECK-NEXT: csel r2, r2, r9, ne -; CHECK-NEXT: rsbs r7, r0, #0 -; CHECK-NEXT: sbcs.w r7, r4, r1 -; CHECK-NEXT: sbcs.w r2, r4, r2 -; CHECK-NEXT: sbcs.w r2, r4, r3 -; CHECK-NEXT: cset r2, lt -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r6, r0, r2, ne -; CHECK-NEXT: csel r7, r1, r2, ne -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: mov r1, r8 -; CHECK-NEXT: bl __fixdfti -; CHECK-NEXT: subs r5, r2, #1 -; CHECK-NEXT: sbcs r5, r3, #0 -; CHECK-NEXT: cset r5, lt -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: csel r0, r0, r5, ne -; CHECK-NEXT: csel r3, r3, r5, ne -; CHECK-NEXT: csel r1, r1, r5, ne -; CHECK-NEXT: csel r2, r2, r9, ne -; CHECK-NEXT: rsbs r5, r0, #0 -; CHECK-NEXT: sbcs.w r5, r4, r1 -; CHECK-NEXT: sbcs.w r2, r4, r2 -; CHECK-NEXT: sbcs.w r2, r4, r3 +; CHECK: @ %bb.0: @ %entryfp-to-i-entryfp-to-i-entry +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-NEXT: .pad #100 +; CHECK-NEXT: sub sp, #100 +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: ubfx r6, r1, #20, #11 +; CHECK-NEXT: movw r12, #1023 +; CHECK-NEXT: movs r2, #1 +; CHECK-NEXT: mov.w r11, #-1 +; CHECK-NEXT: cmp r6, r12 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: bhs .LBB20_2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: movs r4, #0 +; CHECK-NEXT: strd r0, r0, [sp, #24] @ 8-byte Folded Spill +; CHECK-NEXT: movs r3, #0 +; CHECK-NEXT: b .LBB20_6 +; CHECK-NEXT: .LBB20_2: @ %fp-to-i-if-check.exp.size2 +; CHECK-NEXT: mov r7, r1 +; CHECK-NEXT: orr.w r3, r2, r1, asr #31 +; CHECK-NEXT: bfi r7, r2, #20, #12 +; CHECK-NEXT: asrs r1, r1, #31 +; CHECK-NEXT: movw r5, #1074 +; CHECK-NEXT: cmp r6, r5 +; CHECK-NEXT: bhi .LBB20_4 +; CHECK-NEXT: @ %bb.3: @ %fp-to-i-if-exp.small3 +; CHECK-NEXT: subw r6, r6, #1075 +; CHECK-NEXT: movs r4, #0 +; CHECK-NEXT: lsll r0, r7, r6 +; CHECK-NEXT: mov.w lr, #0 +; CHECK-NEXT: umull r5, r6, r0, r3 +; CHECK-NEXT: umlal r6, r4, r7, r3 +; CHECK-NEXT: str r5, [sp, #28] @ 4-byte Spill +; CHECK-NEXT: umull r3, r5, r0, r1 +; CHECK-NEXT: adds r6, r6, r3 +; CHECK-NEXT: str r6, [sp, #24] @ 4-byte Spill +; CHECK-NEXT: adcs r4, r5 +; CHECK-NEXT: adc r6, lr, #0 +; CHECK-NEXT: umlal r4, r6, r7, r1 +; CHECK-NEXT: mla r7, r1, r7, r5 +; CHECK-NEXT: adds r4, r4, r3 +; CHECK-NEXT: mla r0, r1, r0, r7 +; CHECK-NEXT: b .LBB20_5 +; CHECK-NEXT: .LBB20_4: @ %fp-to-i-if-exp.large4 +; CHECK-NEXT: add r2, sp, #80 +; CHECK-NEXT: subw r4, r6, #1075 +; CHECK-NEXT: mov.w lr, #0 +; CHECK-NEXT: stm.w r2, {r0, r7, lr} +; CHECK-NEXT: movs r0, #12 +; CHECK-NEXT: add r7, sp, #64 +; CHECK-NEXT: and.w r0, r0, r4, lsr #3 +; CHECK-NEXT: adds r7, #16 +; CHECK-NEXT: str.w lr, [sp, #92] +; CHECK-NEXT: subs r5, r7, r0 +; CHECK-NEXT: strd lr, lr, [sp, #72] +; CHECK-NEXT: strd lr, lr, [sp, #64] +; CHECK-NEXT: ldm.w r5, {r0, r7, r8, r11} +; CHECK-NEXT: and r5, r4, #31 +; CHECK-NEXT: mov r9, r7 +; CHECK-NEXT: lsll r8, r11, r5 +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: lsrl r0, r7, #1 +; CHECK-NEXT: lsll r4, r9, r5 +; CHECK-NEXT: eor r5, r5, #63 +; CHECK-NEXT: umull r2, r6, r4, r3 +; CHECK-NEXT: rsbs r5, r5, #0 +; CHECK-NEXT: lsll r0, r7, r5 +; CHECK-NEXT: orr.w r0, r0, r8 +; CHECK-NEXT: umull r10, r12, r4, r1 +; CHECK-NEXT: orr.w r7, r7, r11 +; CHECK-NEXT: str r2, [sp, #28] @ 4-byte Spill +; CHECK-NEXT: movs r2, #0 +; CHECK-NEXT: umlal r6, r2, r9, r3 +; CHECK-NEXT: mov.w r11, #-1 +; CHECK-NEXT: adds.w r6, r6, r10 +; CHECK-NEXT: str r6, [sp, #24] @ 4-byte Spill +; CHECK-NEXT: adcs.w r2, r2, r12 +; CHECK-NEXT: adc r6, lr, #0 +; CHECK-NEXT: umull lr, r5, r3, r0 +; CHECK-NEXT: mla r3, r3, r7, r5 +; CHECK-NEXT: umlal r2, r6, r9, r1 +; CHECK-NEXT: mla r0, r1, r0, r3 +; CHECK-NEXT: mla r3, r1, r9, r12 +; CHECK-NEXT: movw r12, #1023 +; CHECK-NEXT: mla r1, r1, r4, r3 +; CHECK-NEXT: adds.w r3, r10, lr +; CHECK-NEXT: adcs r0, r1 +; CHECK-NEXT: adds r4, r2, r3 +; CHECK-NEXT: mov.w r2, #1 +; CHECK-NEXT: .LBB20_5: @ %fp-to-i-cleanup1 +; CHECK-NEXT: adc.w r3, r6, r0 +; CHECK-NEXT: .LBB20_6: @ %fp-to-i-cleanup1 +; CHECK-NEXT: vmov r0, r7, d1 +; CHECK-NEXT: mov.w r6, #-1 +; CHECK-NEXT: mov.w r9, #0 +; CHECK-NEXT: cmp.w r7, #-1 +; CHECK-NEXT: itt gt +; CHECK-NEXT: movgt r6, #0 +; CHECK-NEXT: movgt.w r11, #1 +; CHECK-NEXT: ubfx r5, r7, #20, #11 +; CHECK-NEXT: cmp r5, r12 +; CHECK-NEXT: bhs .LBB20_8 +; CHECK-NEXT: @ %bb.7: +; CHECK-NEXT: mov.w r12, #0 +; CHECK-NEXT: mov.w r10, #0 +; CHECK-NEXT: movs r2, #0 +; CHECK-NEXT: movs r6, #0 +; CHECK-NEXT: b .LBB20_12 +; CHECK-NEXT: .LBB20_8: @ %fp-to-i-if-check.exp.size +; CHECK-NEXT: bfi r7, r2, #20, #12 +; CHECK-NEXT: movw r1, #1074 +; CHECK-NEXT: cmp r5, r1 +; CHECK-NEXT: str r4, [sp, #20] @ 4-byte Spill +; CHECK-NEXT: bhi .LBB20_10 +; CHECK-NEXT: @ %bb.9: @ %fp-to-i-if-exp.small +; CHECK-NEXT: subw r1, r5, #1075 +; CHECK-NEXT: movs r5, #0 +; CHECK-NEXT: lsll r0, r7, r1 +; CHECK-NEXT: mov.w lr, #0 +; CHECK-NEXT: umull r12, r1, r0, r11 +; CHECK-NEXT: umlal r1, r5, r7, r11 +; CHECK-NEXT: umull r4, r2, r0, r6 +; CHECK-NEXT: adds.w r10, r4, r1 +; CHECK-NEXT: adcs.w r1, r5, r2 +; CHECK-NEXT: mla r2, r6, r7, r2 +; CHECK-NEXT: adc r5, lr, #0 +; CHECK-NEXT: umlal r1, r5, r7, r6 +; CHECK-NEXT: mla r0, r6, r0, r2 +; CHECK-NEXT: adds r2, r1, r4 +; CHECK-NEXT: adc.w r6, r5, r0 +; CHECK-NEXT: b .LBB20_11 +; CHECK-NEXT: .LBB20_10: @ %fp-to-i-if-exp.large +; CHECK-NEXT: add r1, sp, #48 +; CHECK-NEXT: subw r2, r5, #1075 +; CHECK-NEXT: mov.w r10, #0 +; CHECK-NEXT: stm.w r1, {r0, r7, r10} +; CHECK-NEXT: movs r0, #12 +; CHECK-NEXT: add r7, sp, #32 +; CHECK-NEXT: and.w r0, r0, r2, lsr #3 +; CHECK-NEXT: adds r7, #16 +; CHECK-NEXT: str.w r10, [sp, #60] +; CHECK-NEXT: subs r5, r7, r0 +; CHECK-NEXT: strd r10, r10, [sp, #40] +; CHECK-NEXT: strd r10, r10, [sp, #32] +; CHECK-NEXT: and r2, r2, #31 +; CHECK-NEXT: ldm.w r5, {r0, r7, r12} +; CHECK-NEXT: mov.w r8, #0 +; CHECK-NEXT: ldr r1, [r5, #12] +; CHECK-NEXT: mov r5, r7 +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: str r1, [sp] @ 4-byte Spill +; CHECK-NEXT: lsll r4, r5, r2 +; CHECK-NEXT: str r3, [sp, #16] @ 4-byte Spill +; CHECK-NEXT: umull r1, r3, r4, r11 +; CHECK-NEXT: lsrl r0, r7, #1 +; CHECK-NEXT: umlal r3, r8, r5, r11 +; CHECK-NEXT: str r1, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: umull r1, lr, r4, r6 +; CHECK-NEXT: str r1, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: adds r1, r1, r3 +; CHECK-NEXT: str r1, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: ldr r1, [sp] @ 4-byte Reload +; CHECK-NEXT: adcs.w r3, r8, lr +; CHECK-NEXT: adc r8, r10, #0 +; CHECK-NEXT: ldr.w r10, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: lsll r12, r1, r2 +; CHECK-NEXT: eor r2, r2, #63 +; CHECK-NEXT: rsbs r2, r2, #0 +; CHECK-NEXT: umlal r3, r8, r5, r6 +; CHECK-NEXT: lsll r0, r7, r2 +; CHECK-NEXT: orr.w r0, r0, r12 +; CHECK-NEXT: orr.w r2, r1, r7 +; CHECK-NEXT: umull r12, r7, r11, r0 +; CHECK-NEXT: mla r2, r11, r2, r7 +; CHECK-NEXT: mla r0, r6, r0, r2 +; CHECK-NEXT: mla r2, r6, r5, lr +; CHECK-NEXT: mla r2, r6, r4, r2 +; CHECK-NEXT: ldr r4, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: adds.w r7, r4, r12 +; CHECK-NEXT: ldr.w r12, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: adcs r0, r2 +; CHECK-NEXT: adds r2, r3, r7 +; CHECK-NEXT: ldr r3, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: adc.w r6, r8, r0 +; CHECK-NEXT: .LBB20_11: @ %fp-to-i-cleanup +; CHECK-NEXT: ldr r4, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: .LBB20_12: @ %fp-to-i-cleanup +; CHECK-NEXT: subs r0, r4, #1 +; CHECK-NEXT: ldr r7, [sp, #28] @ 4-byte Reload +; CHECK-NEXT: sbcs r0, r3, #0 +; CHECK-NEXT: cset r0, lt +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: csel r1, r3, r0, ne +; CHECK-NEXT: ldr r3, [sp, #24] @ 4-byte Reload +; CHECK-NEXT: csel r3, r3, r0, ne +; CHECK-NEXT: csel r0, r7, r0, ne +; CHECK-NEXT: mov.w r7, #1 +; CHECK-NEXT: csel r5, r4, r7, ne +; CHECK-NEXT: rsbs r4, r0, #0 +; CHECK-NEXT: sbcs.w r4, r9, r3 +; CHECK-NEXT: sbcs.w r5, r9, r5 +; CHECK-NEXT: sbcs.w r1, r9, r1 +; CHECK-NEXT: cset r1, lt +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: csel r0, r0, r1, ne +; CHECK-NEXT: csel r1, r3, r1, ne +; CHECK-NEXT: subs r3, r2, #1 +; CHECK-NEXT: sbcs r3, r6, #0 ; CHECK-NEXT: cset r3, lt ; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: csel r2, r0, r3, ne -; CHECK-NEXT: csel r3, r1, r3, ne -; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: add sp, #4 -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc} +; CHECK-NEXT: csel r6, r6, r3, ne +; CHECK-NEXT: csel r5, r10, r3, ne +; CHECK-NEXT: csel r3, r12, r3, ne +; CHECK-NEXT: csel r2, r2, r7, ne +; CHECK-NEXT: rsbs r7, r3, #0 +; CHECK-NEXT: sbcs.w r7, r9, r5 +; CHECK-NEXT: sbcs.w r2, r9, r2 +; CHECK-NEXT: sbcs.w r2, r9, r6 +; CHECK-NEXT: cset r7, lt +; CHECK-NEXT: cmp r7, #0 +; CHECK-NEXT: csel r2, r3, r7, ne +; CHECK-NEXT: csel r3, r5, r7, ne +; CHECK-NEXT: add sp, #100 +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} entry: %conv = fptosi <2 x double> %x to <2 x i128> %0 = icmp slt <2 x i128> %conv, diff --git a/llvm/test/CodeGen/X86/avx512fp16-cvt.ll b/llvm/test/CodeGen/X86/avx512fp16-cvt.ll index cc58bc1e44f37..2351667eba9d0 100644 --- a/llvm/test/CodeGen/X86/avx512fp16-cvt.ll +++ b/llvm/test/CodeGen/X86/avx512fp16-cvt.ll @@ -627,18 +627,197 @@ define half @s128_to_half(i128 %x) { ; X64-NEXT: retq ; ; X86-LABEL: s128_to_half: -; X86: # %bb.0: +; X86: # %bb.0: # %itofp-entry ; X86-NEXT: pushl %ebp ; X86-NEXT: .cfi_def_cfa_offset 8 ; X86-NEXT: .cfi_offset %ebp, -8 ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: .cfi_def_cfa_register %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi ; X86-NEXT: andl $-16, %esp -; X86-NEXT: subl $32, %esp -; X86-NEXT: vmovups 8(%ebp), %xmm0 -; X86-NEXT: vmovups %xmm0, (%esp) -; X86-NEXT: calll __floattihf -; X86-NEXT: movl %ebp, %esp +; X86-NEXT: subl $144, %esp +; X86-NEXT: .cfi_offset %esi, -20 +; X86-NEXT: .cfi_offset %edi, -16 +; X86-NEXT: .cfi_offset %ebx, -12 +; X86-NEXT: movl 16(%ebp), %ebx +; X86-NEXT: movl 8(%ebp), %edx +; X86-NEXT: movl 20(%ebp), %esi +; X86-NEXT: movl 12(%ebp), %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: orl %esi, %ecx +; X86-NEXT: movl %edx, %edi +; X86-NEXT: orl %ebx, %edi +; X86-NEXT: orl %ecx, %edi +; X86-NEXT: je .LBB43_1 +; X86-NEXT: # %bb.2: # %itofp-if-end +; X86-NEXT: movl %esi, %ecx +; X86-NEXT: sarl $31, %ecx +; X86-NEXT: xorl %ecx, %esi +; X86-NEXT: xorl %ecx, %ebx +; X86-NEXT: xorl %ecx, %eax +; X86-NEXT: xorl %ecx, %edx +; X86-NEXT: subl %ecx, %edx +; X86-NEXT: sbbl %ecx, %eax +; X86-NEXT: sbbl %ecx, %ebx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: sbbl %ecx, %esi +; X86-NEXT: bsrl %esi, %ecx +; X86-NEXT: xorl $31, %ecx +; X86-NEXT: bsrl %ebx, %edi +; X86-NEXT: xorl $31, %edi +; X86-NEXT: orl $32, %edi +; X86-NEXT: testl %esi, %esi +; X86-NEXT: cmovnel %ecx, %edi +; X86-NEXT: movl %edx, %ecx +; X86-NEXT: bsrl %eax, %edx +; X86-NEXT: xorl $31, %edx +; X86-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X86-NEXT: bsrl %ecx, %ecx +; X86-NEXT: xorl $31, %ecx +; X86-NEXT: orl $32, %ecx +; X86-NEXT: testl %eax, %eax +; X86-NEXT: cmovnel %edx, %ecx +; X86-NEXT: orl $64, %ecx +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: orl %esi, %ebx +; X86-NEXT: cmovnel %edi, %ecx +; X86-NEXT: movl $128, %edx +; X86-NEXT: subl %ecx, %edx +; X86-NEXT: movl %ecx, %edi +; X86-NEXT: xorl $127, %edi +; X86-NEXT: cmpl $25, %edx +; X86-NEXT: jl .LBB43_12 +; X86-NEXT: # %bb.3: # %itofp-if-then4 +; X86-NEXT: movl (%esp), %ebx # 4-byte Reload +; X86-NEXT: cmpl $26, %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: je .LBB43_4 +; X86-NEXT: # %bb.5: # %itofp-if-then4 +; X86-NEXT: cmpl $25, %edx +; X86-NEXT: jne .LBB43_7 +; X86-NEXT: # %bb.6: # %itofp-sw-bb +; X86-NEXT: movl %ebx, %edx +; X86-NEXT: shldl $1, %ebx, %eax +; X86-NEXT: addl %ebx, %edx +; X86-NEXT: jmp .LBB43_8 +; X86-NEXT: .LBB43_1: +; X86-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; X86-NEXT: jmp .LBB43_14 +; X86-NEXT: .LBB43_12: # %itofp-if-else +; X86-NEXT: movl (%esp), %eax # 4-byte Reload +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; X86-NEXT: vmovaps %xmm0, {{[0-9]+}}(%esp) +; X86-NEXT: addb $-104, %cl +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: shrb $3, %al +; X86-NEXT: andb $12, %al +; X86-NEXT: negb %al +; X86-NEXT: movsbl %al, %eax +; X86-NEXT: movl 112(%esp,%eax), %edx +; X86-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NEXT: shll %cl, %edx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: jmp .LBB43_13 +; X86-NEXT: .LBB43_4: +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ebx, %edx +; X86-NEXT: jmp .LBB43_9 +; X86-NEXT: .LBB43_7: # %itofp-sw-default +; X86-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; X86-NEXT: vmovaps %xmm0, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-NEXT: movb $102, %dl +; X86-NEXT: subb %cl, %dl +; X86-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X86-NEXT: shrb $3, %dl +; X86-NEXT: andb $12, %dl +; X86-NEXT: movzbl %dl, %edx +; X86-NEXT: movl 72(%esp,%edx), %ebx +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 64(%esp,%edx), %ebx +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 68(%esp,%edx), %edi +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl (%esp), %edx # 4-byte Reload +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edi, %edx +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: shrdl %cl, %edi, %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: shrdl %cl, %edx, %ebx +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: addb $26, %al +; X86-NEXT: vmovaps %xmm0, {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: shrb $3, %cl +; X86-NEXT: andb $12, %cl +; X86-NEXT: negb %cl +; X86-NEXT: movsbl %cl, %ecx +; X86-NEXT: movl 56(%esp,%ecx), %esi +; X86-NEXT: movl 48(%esp,%ecx), %edx +; X86-NEXT: movl %edx, %ebx +; X86-NEXT: orl %esi, %ebx +; X86-NEXT: movl 52(%esp,%ecx), %edi +; X86-NEXT: movl %edi, (%esp) # 4-byte Spill +; X86-NEXT: movl 60(%esp,%ecx), %edi +; X86-NEXT: orl (%esp), %edi # 4-byte Folded Reload +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: shldl %cl, %ebx, %edi +; X86-NEXT: movl (%esp), %ebx # 4-byte Reload +; X86-NEXT: shldl %cl, %ebx, %esi +; X86-NEXT: shll %cl, %edx +; X86-NEXT: orl %esi, %edx +; X86-NEXT: orl %edi, %edx +; X86-NEXT: movl $0, %edx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: setne %dl +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: .LBB43_8: # %itofp-sw-epilog +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: .LBB43_9: # %itofp-sw-epilog +; X86-NEXT: movl %edx, %esi +; X86-NEXT: shrl $2, %edx +; X86-NEXT: andl $1, %edx +; X86-NEXT: orl %esi, %edx +; X86-NEXT: addl $1, %edx +; X86-NEXT: adcl $0, %eax +; X86-NEXT: testl $67108864, %edx # imm = 0x4000000 +; X86-NEXT: jne .LBB43_11 +; X86-NEXT: # %bb.10: +; X86-NEXT: shrdl $2, %eax, %edx +; X86-NEXT: jmp .LBB43_13 +; X86-NEXT: .LBB43_11: # %itofp-if-then20 +; X86-NEXT: shrdl $3, %eax, %edx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: .LBB43_13: # %itofp-if-end26 +; X86-NEXT: andl $-2147483648, %ecx # imm = 0x80000000 +; X86-NEXT: shll $23, %edi +; X86-NEXT: addl $1065353216, %edi # imm = 0x3F800000 +; X86-NEXT: andl $8388607, %edx # imm = 0x7FFFFF +; X86-NEXT: orl %ecx, %edx +; X86-NEXT: orl %edi, %edx +; X86-NEXT: vmovd %edx, %xmm0 +; X86-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 +; X86-NEXT: .LBB43_14: # %itofp-return +; X86-NEXT: leal -12(%ebp), %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx ; X86-NEXT: popl %ebp ; X86-NEXT: .cfi_def_cfa %esp, 4 ; X86-NEXT: retl @@ -718,18 +897,180 @@ define half @u128_to_half(i128 %x) { ; X64-NEXT: retq ; ; X86-LABEL: u128_to_half: -; X86: # %bb.0: +; X86: # %bb.0: # %itofp-entry ; X86-NEXT: pushl %ebp ; X86-NEXT: .cfi_def_cfa_offset 8 ; X86-NEXT: .cfi_offset %ebp, -8 ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: .cfi_def_cfa_register %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi ; X86-NEXT: andl $-16, %esp -; X86-NEXT: subl $32, %esp -; X86-NEXT: vmovups 8(%ebp), %xmm0 -; X86-NEXT: vmovups %xmm0, (%esp) -; X86-NEXT: calll __floatuntihf -; X86-NEXT: movl %ebp, %esp +; X86-NEXT: subl $144, %esp +; X86-NEXT: .cfi_offset %esi, -20 +; X86-NEXT: .cfi_offset %edi, -16 +; X86-NEXT: .cfi_offset %ebx, -12 +; X86-NEXT: movl 8(%ebp), %ebx +; X86-NEXT: movl 20(%ebp), %edi +; X86-NEXT: movl 12(%ebp), %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: orl %edi, %ecx +; X86-NEXT: movl %ebx, %esi +; X86-NEXT: orl 16(%ebp), %esi +; X86-NEXT: orl %ecx, %esi +; X86-NEXT: je .LBB48_1 +; X86-NEXT: # %bb.2: # %itofp-if-end +; X86-NEXT: bsrl %edi, %ecx +; X86-NEXT: xorl $31, %ecx +; X86-NEXT: movl 16(%ebp), %edx +; X86-NEXT: bsrl %edx, %esi +; X86-NEXT: xorl $31, %esi +; X86-NEXT: orl $32, %esi +; X86-NEXT: testl %edi, %edi +; X86-NEXT: cmovnel %ecx, %esi +; X86-NEXT: movl %edi, %edx +; X86-NEXT: bsrl %eax, %edi +; X86-NEXT: xorl $31, %edi +; X86-NEXT: bsrl %ebx, %ecx +; X86-NEXT: xorl $31, %ecx +; X86-NEXT: orl $32, %ecx +; X86-NEXT: testl %eax, %eax +; X86-NEXT: cmovnel %edi, %ecx +; X86-NEXT: orl $64, %ecx +; X86-NEXT: movl 16(%ebp), %edi +; X86-NEXT: orl %edx, %edi +; X86-NEXT: cmovnel %esi, %ecx +; X86-NEXT: movl $128, %edi +; X86-NEXT: subl %ecx, %edi +; X86-NEXT: movl %ecx, %esi +; X86-NEXT: xorl $127, %esi +; X86-NEXT: cmpl $25, %edi +; X86-NEXT: jl .LBB48_10 +; X86-NEXT: # %bb.3: # %itofp-if-then4 +; X86-NEXT: cmpl $26, %edi +; X86-NEXT: je .LBB48_7 +; X86-NEXT: # %bb.4: # %itofp-if-then4 +; X86-NEXT: cmpl $25, %edi +; X86-NEXT: jne .LBB48_6 +; X86-NEXT: # %bb.5: # %itofp-sw-bb +; X86-NEXT: shldl $1, %ebx, %eax +; X86-NEXT: addl %ebx, %ebx +; X86-NEXT: jmp .LBB48_7 +; X86-NEXT: .LBB48_1: +; X86-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; X86-NEXT: jmp .LBB48_12 +; X86-NEXT: .LBB48_10: # %itofp-if-else +; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X86-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; X86-NEXT: vmovaps %xmm0, {{[0-9]+}}(%esp) +; X86-NEXT: addb $-104, %cl +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: shrb $3, %al +; X86-NEXT: andb $12, %al +; X86-NEXT: negb %al +; X86-NEXT: movsbl %al, %eax +; X86-NEXT: movl 112(%esp,%eax), %edx +; X86-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NEXT: shll %cl, %edx +; X86-NEXT: jmp .LBB48_11 +; X86-NEXT: .LBB48_6: # %itofp-sw-default +; X86-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; X86-NEXT: vmovaps %xmm0, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 16(%ebp), %edx +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl 20(%ebp), %edx +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movb $102, %dl +; X86-NEXT: subb %cl, %dl +; X86-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X86-NEXT: shrb $3, %dl +; X86-NEXT: andb $12, %dl +; X86-NEXT: movzbl %dl, %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: movl 72(%esp,%edx), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: movl 64(%esp,%edx), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: movl 68(%esp,%edx), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, %eax +; X86-NEXT: movl %ecx, %ebx +; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: shrdl %cl, %edx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shrdl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: addb $26, %bl +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: vmovaps %xmm0, {{[0-9]+}}(%esp) +; X86-NEXT: movl 16(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 20(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ebx, %eax +; X86-NEXT: shrb $3, %al +; X86-NEXT: andb $12, %al +; X86-NEXT: negb %al +; X86-NEXT: movsbl %al, %ecx +; X86-NEXT: movl 56(%esp,%ecx), %ebx +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 48(%esp,%ecx), %eax +; X86-NEXT: movl %eax, %edx +; X86-NEXT: orl %ebx, %edx +; X86-NEXT: movl %edx, %ebx +; X86-NEXT: movl 52(%esp,%ecx), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 60(%esp,%ecx), %edx +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: shldl %cl, %ebx, %edx +; X86-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT: shldl %cl, %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NEXT: shll %cl, %eax +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: xorl %ebx, %ebx +; X86-NEXT: orl %edx, %eax +; X86-NEXT: setne %bl +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: .LBB48_7: # %itofp-sw-epilog +; X86-NEXT: movl %ebx, %edx +; X86-NEXT: shrl $2, %edx +; X86-NEXT: andl $1, %edx +; X86-NEXT: orl %ebx, %edx +; X86-NEXT: addl $1, %edx +; X86-NEXT: adcl $0, %eax +; X86-NEXT: testl $67108864, %edx # imm = 0x4000000 +; X86-NEXT: jne .LBB48_9 +; X86-NEXT: # %bb.8: +; X86-NEXT: shrdl $2, %eax, %edx +; X86-NEXT: jmp .LBB48_11 +; X86-NEXT: .LBB48_9: # %itofp-if-then20 +; X86-NEXT: shrdl $3, %eax, %edx +; X86-NEXT: movl %edi, %esi +; X86-NEXT: .LBB48_11: # %itofp-if-end26 +; X86-NEXT: shll $23, %esi +; X86-NEXT: andl $8388607, %edx # imm = 0x7FFFFF +; X86-NEXT: leal 1065353216(%esi,%edx), %eax +; X86-NEXT: vmovd %eax, %xmm0 +; X86-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 +; X86-NEXT: .LBB48_12: # %itofp-return +; X86-NEXT: leal -12(%ebp), %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx ; X86-NEXT: popl %ebp ; X86-NEXT: .cfi_def_cfa %esp, 4 ; X86-NEXT: retl @@ -812,29 +1153,14 @@ define i128 @half_to_s128(half %x) { ; ; X86-LABEL: half_to_s128: ; X86: # %bb.0: -; X86-NEXT: pushl %ebp -; X86-NEXT: .cfi_def_cfa_offset 8 -; X86-NEXT: .cfi_offset %ebp, -8 -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: .cfi_def_cfa_register %ebp -; X86-NEXT: pushl %esi -; X86-NEXT: andl $-16, %esp -; X86-NEXT: subl $48, %esp -; X86-NEXT: .cfi_offset %esi, -12 -; X86-NEXT: movl 8(%ebp), %esi -; X86-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero -; X86-NEXT: vmovsh %xmm0, {{[0-9]+}}(%esp) -; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %eax, (%esp) -; X86-NEXT: calll __fixhfti -; X86-NEXT: subl $4, %esp -; X86-NEXT: vmovaps {{[0-9]+}}(%esp), %xmm0 -; X86-NEXT: vmovaps %xmm0, (%esi) -; X86-NEXT: movl %esi, %eax -; X86-NEXT: leal -4(%ebp), %esp -; X86-NEXT: popl %esi -; X86-NEXT: popl %ebp -; X86-NEXT: .cfi_def_cfa %esp, 4 +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: vcvttsh2si {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, %edx +; X86-NEXT: sarl $31, %edx +; X86-NEXT: movl %edx, 12(%eax) +; X86-NEXT: movl %edx, 8(%eax) +; X86-NEXT: movl %edx, 4(%eax) +; X86-NEXT: movl %ecx, (%eax) ; X86-NEXT: retl $4 %a = fptosi half %x to i128 ret i128 %a @@ -915,29 +1241,12 @@ define i128 @half_to_u128(half %x) { ; ; X86-LABEL: half_to_u128: ; X86: # %bb.0: -; X86-NEXT: pushl %ebp -; X86-NEXT: .cfi_def_cfa_offset 8 -; X86-NEXT: .cfi_offset %ebp, -8 -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: .cfi_def_cfa_register %ebp -; X86-NEXT: pushl %esi -; X86-NEXT: andl $-16, %esp -; X86-NEXT: subl $48, %esp -; X86-NEXT: .cfi_offset %esi, -12 -; X86-NEXT: movl 8(%ebp), %esi -; X86-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero -; X86-NEXT: vmovsh %xmm0, {{[0-9]+}}(%esp) -; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %eax, (%esp) -; X86-NEXT: calll __fixunshfti -; X86-NEXT: subl $4, %esp -; X86-NEXT: vmovaps {{[0-9]+}}(%esp), %xmm0 -; X86-NEXT: vmovaps %xmm0, (%esi) -; X86-NEXT: movl %esi, %eax -; X86-NEXT: leal -4(%ebp), %esp -; X86-NEXT: popl %esi -; X86-NEXT: popl %ebp -; X86-NEXT: .cfi_def_cfa %esp, 4 +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: vcvttsh2usi {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, (%eax) +; X86-NEXT: movl $0, 12(%eax) +; X86-NEXT: movl $0, 8(%eax) +; X86-NEXT: movl $0, 4(%eax) ; X86-NEXT: retl $4 %a = fptoui half %x to i128 ret i128 %a diff --git a/llvm/test/CodeGen/X86/fp128-cast.ll b/llvm/test/CodeGen/X86/fp128-cast.ll index 6d4ec063ccd46..4d4166e7b3d50 100644 --- a/llvm/test/CodeGen/X86/fp128-cast.ll +++ b/llvm/test/CodeGen/X86/fp128-cast.ll @@ -413,31 +413,207 @@ define dso_local void @TestFPToSIF128_I128() nounwind { ; X64-SSE-NEXT: retq ; ; X86-LABEL: TestFPToSIF128_I128: -; X86: # %bb.0: # %entry +; X86: # %bb.0: # %entryfp-to-i-entry +; X86-NEXT: pushl %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: subl $56, %esp -; X86-NEXT: movl vf128, %eax -; X86-NEXT: movl vf128+4, %ecx -; X86-NEXT: movl vf128+8, %edx -; X86-NEXT: movl vf128+12, %esi +; X86-NEXT: subl $124, %esp +; X86-NEXT: movl vf128+12, %edx +; X86-NEXT: movl %edx, %ecx +; X86-NEXT: shrl $16, %ecx +; X86-NEXT: andl $32767, %ecx # imm = 0x7FFF +; X86-NEXT: xorl %ebx, %ebx +; X86-NEXT: cmpl $16383, %ecx # imm = 0x3FFF +; X86-NEXT: jae .LBB9_2 +; X86-NEXT: # %bb.1: +; X86-NEXT: xorl %ecx, %ecx +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: xorl %edi, %edi +; X86-NEXT: jmp .LBB9_5 +; X86-NEXT: .LBB9_2: # %fp-to-i-if-check.exp.size +; X86-NEXT: movl vf128+8, %eax +; X86-NEXT: movl %edx, %esi +; X86-NEXT: sarl $31, %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: orl $1, %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movzwl %dx, %edx +; X86-NEXT: orl $65536, %edx # imm = 0x10000 +; X86-NEXT: movl $16494, %esi # imm = 0x406E +; X86-NEXT: cmpl %ecx, %esi +; X86-NEXT: movl $0, %esi +; X86-NEXT: sbbl %esi, %esi +; X86-NEXT: movl $0, %esi +; X86-NEXT: sbbl %esi, %esi +; X86-NEXT: movl vf128+4, %esi +; X86-NEXT: sbbl %ebx, %ebx +; X86-NEXT: movl vf128, %edi +; X86-NEXT: jb .LBB9_4 +; X86-NEXT: # %bb.3: # %fp-to-i-if-exp.small +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) ; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movb $111, %dl +; X86-NEXT: subb %cl, %dl +; X86-NEXT: movl %edx, %ecx +; X86-NEXT: shrb $3, %cl +; X86-NEXT: andb $12, %cl +; X86-NEXT: movzbl %cl, %eax +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl 60(%esp,%eax), %ebx +; X86-NEXT: movl 56(%esp,%eax), %esi +; X86-NEXT: movl %esi, %edi +; X86-NEXT: movl %edx, %ecx +; X86-NEXT: shrdl %cl, %ebx, %edi +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 48(%esp,%eax), %edi +; X86-NEXT: movl 52(%esp,%eax), %ebp +; X86-NEXT: movl %ebp, %eax +; X86-NEXT: shrdl %cl, %esi, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shrl %cl, %ebx +; X86-NEXT: shrdl %cl, %ebp, %edi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: mull %ecx +; X86-NEXT: movl %edx, %ebp +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %edi, %eax +; X86-NEXT: mull %ecx +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %edx, %esi +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-NEXT: adcl $0, %ebp +; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %edi, %eax +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: mull %ecx +; X86-NEXT: movl %edx, %ebp +; X86-NEXT: addl %esi, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: mull %ecx +; X86-NEXT: addl %ebp, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X86-NEXT: adcl %eax, %edx +; X86-NEXT: movl %edx, %ebp +; X86-NEXT: movl %ecx, %esi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: imull %edx, %esi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: imull %eax, %ebx +; X86-NEXT: mull %edx +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: addl %edx, %ebx +; X86-NEXT: addl %esi, %ebx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-NEXT: imull %ecx, %esi +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: mull %edi +; X86-NEXT: addl %esi, %edx +; X86-NEXT: imull %ecx, %edi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: addl %edx, %edi +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: adcl %ebx, %edi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: adcl %ebp, %edi +; X86-NEXT: jmp .LBB9_5 +; X86-NEXT: .LBB9_4: # %fp-to-i-if-exp.large +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %eax, (%esp) -; X86-NEXT: calll __fixtfti -; X86-NEXT: subl $4, %esp -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl %esi, vi128+12 -; X86-NEXT: movl %edx, vi128+8 +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: addb $-111, %cl +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: shrb $3, %al +; X86-NEXT: andb $12, %al +; X86-NEXT: negb %al +; X86-NEXT: movsbl %al, %eax +; X86-NEXT: movl 100(%esp,%eax), %esi +; X86-NEXT: movl 104(%esp,%eax), %edx +; X86-NEXT: movl %edx, %edi +; X86-NEXT: shldl %cl, %esi, %edi +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 96(%esp,%eax), %edi +; X86-NEXT: movl 108(%esp,%eax), %eax +; X86-NEXT: shldl %cl, %edx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl %cl, %edi, %esi +; X86-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NEXT: shll %cl, %edi +; X86-NEXT: movl %edi, %eax +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT: mull %ebx +; X86-NEXT: movl %edx, %ebp +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %esi, %eax +; X86-NEXT: movl %esi, %ecx +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: mull %ebx +; X86-NEXT: movl %edx, %ebx +; X86-NEXT: movl %eax, %esi +; X86-NEXT: addl %ebp, %esi +; X86-NEXT: adcl $0, %ebx +; X86-NEXT: movl %edi, %eax +; X86-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X86-NEXT: movl %edx, %ebp +; X86-NEXT: addl %esi, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: adcl %ebx, %ebp +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-NEXT: mull %esi +; X86-NEXT: addl %ebp, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X86-NEXT: adcl %eax, %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: imull %eax, %ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X86-NEXT: mull %ebp +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: addl %ecx, %edx +; X86-NEXT: imull %esi, %ebp +; X86-NEXT: addl %edx, %ebp +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: imull %esi, %ecx +; X86-NEXT: movl %esi, %eax +; X86-NEXT: mull %edi +; X86-NEXT: addl %ecx, %edx +; X86-NEXT: imull %esi, %edi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: addl %edx, %edi +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: adcl %ebp, %edi +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X86-NEXT: .LBB9_5: # %fp-to-i-cleanup +; X86-NEXT: movl %ebx, vi128 ; X86-NEXT: movl %ecx, vi128+4 -; X86-NEXT: movl %eax, vi128 -; X86-NEXT: addl $56, %esp +; X86-NEXT: movl %eax, vi128+8 +; X86-NEXT: movl %edi, vi128+12 +; X86-NEXT: addl $124, %esp ; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-AVX-LABEL: TestFPToSIF128_I128: @@ -468,31 +644,100 @@ define dso_local void @TestFPToUIF128_U128() nounwind { ; X64-SSE-NEXT: retq ; ; X86-LABEL: TestFPToUIF128_U128: -; X86: # %bb.0: # %entry +; X86: # %bb.0: # %entryfp-to-i-entry +; X86-NEXT: pushl %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: subl $56, %esp -; X86-NEXT: movl vf128, %eax -; X86-NEXT: movl vf128+4, %ecx -; X86-NEXT: movl vf128+8, %edx +; X86-NEXT: subl $92, %esp ; X86-NEXT: movl vf128+12, %esi +; X86-NEXT: movl %esi, %ecx +; X86-NEXT: shrl $16, %ecx +; X86-NEXT: andl $32767, %ecx # imm = 0x7FFF +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: cmpl $16383, %ecx # imm = 0x3FFF +; X86-NEXT: jae .LBB10_2 +; X86-NEXT: # %bb.1: +; X86-NEXT: xorl %ebx, %ebx +; X86-NEXT: xorl %edi, %edi +; X86-NEXT: xorl %esi, %esi +; X86-NEXT: jmp .LBB10_5 +; X86-NEXT: .LBB10_2: # %fp-to-i-if-check.exp.size +; X86-NEXT: movl vf128+8, %eax +; X86-NEXT: movl vf128+4, %edi +; X86-NEXT: movl vf128, %ebx +; X86-NEXT: movzwl %si, %esi +; X86-NEXT: orl $65536, %esi # imm = 0x10000 +; X86-NEXT: movl $16494, %ebp # imm = 0x406E +; X86-NEXT: cmpl %ecx, %ebp +; X86-NEXT: movl $0, %ebp +; X86-NEXT: sbbl %ebp, %ebp +; X86-NEXT: movl $0, %ebp +; X86-NEXT: sbbl %ebp, %ebp +; X86-NEXT: sbbl %edx, %edx +; X86-NEXT: jb .LBB10_4 +; X86-NEXT: # %bb.3: # %fp-to-i-if-exp.small +; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) -; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movb $111, %al +; X86-NEXT: subb %cl, %al +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: shrb $3, %cl +; X86-NEXT: andb $12, %cl +; X86-NEXT: movzbl %cl, %ebx +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl 28(%esp,%ebx), %esi +; X86-NEXT: movl 24(%esp,%ebx), %ebp +; X86-NEXT: movl %ebp, %edi +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: shrdl %cl, %esi, %edi +; X86-NEXT: movl 16(%esp,%ebx), %edx +; X86-NEXT: movl 20(%esp,%ebx), %ebx +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shrdl %cl, %ebp, %ebx +; X86-NEXT: shrl %cl, %esi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shrdl %cl, %eax, %edx +; X86-NEXT: jmp .LBB10_5 +; X86-NEXT: .LBB10_4: # %fp-to-i-if-exp.large +; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %eax, (%esp) -; X86-NEXT: calll __fixunstfti -; X86-NEXT: subl $4, %esp -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: addb $-111, %cl +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: shrb $3, %al +; X86-NEXT: andb $12, %al +; X86-NEXT: negb %al +; X86-NEXT: movsbl %al, %eax +; X86-NEXT: movl 72(%esp,%eax), %edi +; X86-NEXT: movl 76(%esp,%eax), %esi +; X86-NEXT: shldl %cl, %edi, %esi +; X86-NEXT: movl 64(%esp,%eax), %edx +; X86-NEXT: movl 68(%esp,%eax), %ebx +; X86-NEXT: shldl %cl, %ebx, %edi +; X86-NEXT: shldl %cl, %edx, %ebx +; X86-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NEXT: shll %cl, %edx +; X86-NEXT: .LBB10_5: # %fp-to-i-cleanup +; X86-NEXT: movl %edx, vu128 +; X86-NEXT: movl %ebx, vu128+4 +; X86-NEXT: movl %edi, vu128+8 ; X86-NEXT: movl %esi, vu128+12 -; X86-NEXT: movl %edx, vu128+8 -; X86-NEXT: movl %ecx, vu128+4 -; X86-NEXT: movl %eax, vu128 -; X86-NEXT: addl $56, %esp +; X86-NEXT: addl $92, %esp ; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-AVX-LABEL: TestFPToUIF128_U128: @@ -919,31 +1164,260 @@ define dso_local void @TestSIToFPI128_F128() nounwind { ; X64-SSE-NEXT: retq ; ; X86-LABEL: TestSIToFPI128_F128: -; X86: # %bb.0: # %entry +; X86: # %bb.0: # %entryitofp-entry +; X86-NEXT: pushl %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: subl $56, %esp -; X86-NEXT: movl vi128, %eax -; X86-NEXT: movl vi128+4, %ecx -; X86-NEXT: movl vi128+8, %edx -; X86-NEXT: movl vi128+12, %esi +; X86-NEXT: subl $156, %esp +; X86-NEXT: movl vi128+4, %esi +; X86-NEXT: movl vi128+12, %edi +; X86-NEXT: movl vi128, %ebp +; X86-NEXT: movl vi128+8, %eax +; X86-NEXT: movl %esi, %ecx +; X86-NEXT: orl %edi, %ecx +; X86-NEXT: movl %ebp, %edx +; X86-NEXT: orl %eax, %edx +; X86-NEXT: orl %ecx, %edx +; X86-NEXT: je .LBB20_1 +; X86-NEXT: # %bb.2: # %itofp-if-end +; X86-NEXT: movl %edi, %ebx +; X86-NEXT: sarl $31, %ebx +; X86-NEXT: xorl %ebx, %edi +; X86-NEXT: xorl %ebx, %eax +; X86-NEXT: xorl %ebx, %esi +; X86-NEXT: xorl %ebx, %ebp +; X86-NEXT: subl %ebx, %ebp +; X86-NEXT: sbbl %ebx, %esi +; X86-NEXT: sbbl %ebx, %eax +; X86-NEXT: sbbl %ebx, %edi +; X86-NEXT: jne .LBB20_3 +; X86-NEXT: # %bb.4: # %itofp-if-end +; X86-NEXT: bsrl %eax, %ecx +; X86-NEXT: xorl $31, %ecx +; X86-NEXT: orl $32, %ecx +; X86-NEXT: jmp .LBB20_5 +; X86-NEXT: .LBB20_1: +; X86-NEXT: xorl %ebx, %ebx +; X86-NEXT: xorl %esi, %esi +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: xorl %ecx, %ecx +; X86-NEXT: jmp .LBB20_22 +; X86-NEXT: .LBB20_3: +; X86-NEXT: bsrl %edi, %ecx +; X86-NEXT: xorl $31, %ecx +; X86-NEXT: .LBB20_5: # %itofp-if-end +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: testl %esi, %esi +; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: jne .LBB20_6 +; X86-NEXT: # %bb.7: # %itofp-if-end +; X86-NEXT: bsrl %ebp, %edx +; X86-NEXT: xorl $31, %edx +; X86-NEXT: orl $32, %edx +; X86-NEXT: jmp .LBB20_8 +; X86-NEXT: .LBB20_6: +; X86-NEXT: bsrl %esi, %edx +; X86-NEXT: xorl $31, %edx +; X86-NEXT: .LBB20_8: # %itofp-if-end +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %eax, %ebx +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: orl %edi, %ebx +; X86-NEXT: jne .LBB20_10 +; X86-NEXT: # %bb.9: # %itofp-if-end +; X86-NEXT: orl $64, %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: .LBB20_10: # %itofp-if-end +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: movl $128, %eax +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT: subl %ebx, %eax +; X86-NEXT: movl $0, %ecx +; X86-NEXT: sbbl %ecx, %ecx +; X86-NEXT: movl $0, %ebp +; X86-NEXT: sbbl %ebp, %ebp +; X86-NEXT: sbbl %edx, %edx +; X86-NEXT: movl $127, %edi +; X86-NEXT: subl %ebx, %edi +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: cmpl $114, %eax +; X86-NEXT: movl %ecx, %ebx +; X86-NEXT: sbbl $0, %ebx +; X86-NEXT: movl %ebp, %ebx +; X86-NEXT: sbbl $0, %ebx +; X86-NEXT: movl %edx, %ebx +; X86-NEXT: sbbl $0, %ebx +; X86-NEXT: jl .LBB20_20 +; X86-NEXT: # %bb.11: # %itofp-if-then4 +; X86-NEXT: movl %eax, %ebx +; X86-NEXT: xorl $115, %ebx +; X86-NEXT: orl %ebp, %ebx +; X86-NEXT: orl %edx, %ecx +; X86-NEXT: orl %ecx, %ebx +; X86-NEXT: movl %eax, %edx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: je .LBB20_12 +; X86-NEXT: # %bb.13: # %itofp-if-then4 +; X86-NEXT: xorl $114, %edx +; X86-NEXT: orl %ebp, %edx +; X86-NEXT: orl %ecx, %edx +; X86-NEXT: jne .LBB20_15 +; X86-NEXT: # %bb.14: # %itofp-sw-bb +; X86-NEXT: shldl $1, %eax, %edi +; X86-NEXT: shldl $1, %esi, %eax +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: shldl $1, %edx, %esi +; X86-NEXT: addl %edx, %edx +; X86-NEXT: jmp .LBB20_16 +; X86-NEXT: .LBB20_20: # %itofp-if-else +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) -; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: addb $-15, %cl +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: shrb $3, %al +; X86-NEXT: andb $12, %al +; X86-NEXT: negb %al +; X86-NEXT: movsbl %al, %edx +; X86-NEXT: movl 136(%esp,%edx), %eax +; X86-NEXT: movl 140(%esp,%edx), %edi +; X86-NEXT: shldl %cl, %eax, %edi +; X86-NEXT: movl 128(%esp,%edx), %ebx +; X86-NEXT: movl 132(%esp,%edx), %esi +; X86-NEXT: shldl %cl, %esi, %eax +; X86-NEXT: shldl %cl, %ebx, %esi +; X86-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NEXT: shll %cl, %ebx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: jmp .LBB20_21 +; X86-NEXT: .LBB20_12: +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: jmp .LBB20_17 +; X86-NEXT: .LBB20_15: # %itofp-sw-default +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %eax, (%esp) -; X86-NEXT: calll __floattitf -; X86-NEXT: subl $4, %esp -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl %esi, vf128+12 -; X86-NEXT: movl %edx, vf128+8 -; X86-NEXT: movl %ecx, vf128+4 -; X86-NEXT: movl %eax, vf128 -; X86-NEXT: addl $56, %esp +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-NEXT: movb $13, %cl +; X86-NEXT: subb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Folded Reload +; X86-NEXT: movl %ecx, %edx +; X86-NEXT: shrb $3, %dl +; X86-NEXT: andb $12, %dl +; X86-NEXT: movzbl %dl, %ebx +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl 92(%esp,%ebx), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 88(%esp,%ebx), %edx +; X86-NEXT: movl 80(%esp,%ebx), %ebp +; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 84(%esp,%ebx), %ebp +; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, %esi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shrdl %cl, %eax, %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %ebp, %esi +; X86-NEXT: shrdl %cl, %edx, %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shrl %cl, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shrdl %cl, %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: addb $115, %bl +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ebx, %ecx +; X86-NEXT: shrb $3, %cl +; X86-NEXT: andb $12, %cl +; X86-NEXT: negb %cl +; X86-NEXT: movsbl %cl, %ecx +; X86-NEXT: movl 72(%esp,%ecx), %edi +; X86-NEXT: movl 64(%esp,%ecx), %edx +; X86-NEXT: movl %edx, %eax +; X86-NEXT: orl %edi, %eax +; X86-NEXT: movl 68(%esp,%ecx), %esi +; X86-NEXT: movl 76(%esp,%ecx), %ebp +; X86-NEXT: orl %esi, %ebp +; X86-NEXT: movl %ebx, %ecx +; X86-NEXT: shldl %cl, %eax, %ebp +; X86-NEXT: shldl %cl, %esi, %edi +; X86-NEXT: shll %cl, %edx +; X86-NEXT: orl %edi, %edx +; X86-NEXT: orl %ebp, %edx +; X86-NEXT: movl $0, %edx +; X86-NEXT: setne %dl +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: .LBB20_16: # %itofp-sw-epilog +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: .LBB20_17: # %itofp-sw-epilog +; X86-NEXT: movl %edx, %ebx +; X86-NEXT: shrl $2, %ebx +; X86-NEXT: andl $1, %ebx +; X86-NEXT: orl %edx, %ebx +; X86-NEXT: addl $1, %ebx +; X86-NEXT: adcl $0, %esi +; X86-NEXT: adcl $0, %eax +; X86-NEXT: adcl $0, %edi +; X86-NEXT: testl $524288, %edi # imm = 0x80000 +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: jne .LBB20_19 +; X86-NEXT: # %bb.18: +; X86-NEXT: shrdl $2, %esi, %ebx +; X86-NEXT: shrdl $2, %eax, %esi +; X86-NEXT: shrdl $2, %edi, %eax +; X86-NEXT: sarl $2, %edi +; X86-NEXT: jmp .LBB20_21 +; X86-NEXT: .LBB20_19: # %itofp-if-then20 +; X86-NEXT: shrdl $3, %esi, %ebx +; X86-NEXT: shrdl $3, %eax, %esi +; X86-NEXT: shrdl $3, %edi, %eax +; X86-NEXT: sarl $3, %edi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: .LBB20_21: # %itofp-if-end26 +; X86-NEXT: andl $-2147483648, %ecx # imm = 0x80000000 +; X86-NEXT: shll $16, %edx +; X86-NEXT: addl $1073676288, %edx # imm = 0x3FFF0000 +; X86-NEXT: orl %ecx, %edx +; X86-NEXT: movzwl %di, %ecx +; X86-NEXT: orl %edx, %ecx +; X86-NEXT: .LBB20_22: # %itofp-return +; X86-NEXT: movl %ebx, vf128 +; X86-NEXT: movl %esi, vf128+4 +; X86-NEXT: movl %eax, vf128+8 +; X86-NEXT: movl %ecx, vf128+12 +; X86-NEXT: addl $156, %esp ; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-AVX-LABEL: TestSIToFPI128_F128: @@ -974,31 +1448,251 @@ define dso_local void @TestUIToFPU128_F128() #2 { ; X64-SSE-NEXT: retq ; ; X86-LABEL: TestUIToFPU128_F128: -; X86: # %bb.0: # %entry +; X86: # %bb.0: # %entryitofp-entry +; X86-NEXT: pushl %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: subl $56, %esp -; X86-NEXT: movl vu128, %eax -; X86-NEXT: movl vu128+4, %ecx -; X86-NEXT: movl vu128+8, %edx +; X86-NEXT: subl $156, %esp +; X86-NEXT: movl vu128+4, %edx ; X86-NEXT: movl vu128+12, %esi +; X86-NEXT: movl vu128, %eax +; X86-NEXT: movl vu128+8, %ebx +; X86-NEXT: movl %edx, %ecx +; X86-NEXT: orl %esi, %ecx +; X86-NEXT: movl %eax, %edi +; X86-NEXT: orl %ebx, %edi +; X86-NEXT: orl %ecx, %edi +; X86-NEXT: je .LBB21_1 +; X86-NEXT: # %bb.2: # %itofp-if-end +; X86-NEXT: testl %esi, %esi +; X86-NEXT: jne .LBB21_3 +; X86-NEXT: # %bb.4: # %itofp-if-end +; X86-NEXT: bsrl %ebx, %ecx +; X86-NEXT: xorl $31, %ecx +; X86-NEXT: orl $32, %ecx +; X86-NEXT: testl %edx, %edx +; X86-NEXT: je .LBB21_7 +; X86-NEXT: .LBB21_6: +; X86-NEXT: bsrl %edx, %edi +; X86-NEXT: xorl $31, %edi +; X86-NEXT: jmp .LBB21_8 +; X86-NEXT: .LBB21_1: +; X86-NEXT: xorl %edi, %edi +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: xorl %ebx, %ebx +; X86-NEXT: xorl %ecx, %ecx +; X86-NEXT: jmp .LBB21_21 +; X86-NEXT: .LBB21_3: +; X86-NEXT: bsrl %esi, %ecx +; X86-NEXT: xorl $31, %ecx +; X86-NEXT: testl %edx, %edx +; X86-NEXT: jne .LBB21_6 +; X86-NEXT: .LBB21_7: # %itofp-if-end +; X86-NEXT: bsrl %eax, %edi +; X86-NEXT: xorl $31, %edi +; X86-NEXT: orl $32, %edi +; X86-NEXT: .LBB21_8: # %itofp-if-end +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: orl %esi, %ebx +; X86-NEXT: jne .LBB21_10 +; X86-NEXT: # %bb.9: # %itofp-if-end +; X86-NEXT: orl $64, %edi +; X86-NEXT: movl %edi, %ecx +; X86-NEXT: .LBB21_10: # %itofp-if-end +; X86-NEXT: movl %esi, %eax +; X86-NEXT: sarl $31, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: xorl %ebx, %ebx +; X86-NEXT: movl $128, %edx +; X86-NEXT: subl %ecx, %edx +; X86-NEXT: movl $0, %eax +; X86-NEXT: sbbl %eax, %eax +; X86-NEXT: movl $0, %ebp +; X86-NEXT: sbbl %ebp, %ebp +; X86-NEXT: sbbl %ebx, %ebx +; X86-NEXT: movl $127, %edi +; X86-NEXT: subl %ecx, %edi +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: cmpl $114, %edx +; X86-NEXT: movl %eax, %edi +; X86-NEXT: sbbl $0, %edi +; X86-NEXT: movl %ebp, %edi +; X86-NEXT: sbbl $0, %edi +; X86-NEXT: movl %ebx, %edi +; X86-NEXT: sbbl $0, %edi +; X86-NEXT: jl .LBB21_19 +; X86-NEXT: # %bb.11: # %itofp-if-then4 +; X86-NEXT: movl %edx, %edi +; X86-NEXT: xorl $115, %edi +; X86-NEXT: orl %ebp, %edi +; X86-NEXT: orl %ebx, %eax +; X86-NEXT: orl %eax, %edi +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: je .LBB21_12 +; X86-NEXT: # %bb.13: # %itofp-if-then4 +; X86-NEXT: xorl $114, %edx +; X86-NEXT: orl %ebp, %edx +; X86-NEXT: orl %eax, %edx +; X86-NEXT: jne .LBB21_15 +; X86-NEXT: # %bb.14: # %itofp-sw-bb +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT: shldl $1, %ebx, %esi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: shldl $1, %edx, %ebx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shldl $1, %eax, %edx +; X86-NEXT: addl %eax, %eax +; X86-NEXT: jmp .LBB21_16 +; X86-NEXT: .LBB21_19: # %itofp-if-else +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: addb $-15, %cl +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: shrb $3, %al +; X86-NEXT: andb $12, %al +; X86-NEXT: negb %al +; X86-NEXT: movsbl %al, %eax +; X86-NEXT: movl 136(%esp,%eax), %ebx +; X86-NEXT: movl 140(%esp,%eax), %esi +; X86-NEXT: shldl %cl, %ebx, %esi +; X86-NEXT: movl 128(%esp,%eax), %edi +; X86-NEXT: movl 132(%esp,%eax), %edx +; X86-NEXT: shldl %cl, %edx, %ebx +; X86-NEXT: shldl %cl, %edi, %edx +; X86-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NEXT: shll %cl, %edi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X86-NEXT: jmp .LBB21_20 +; X86-NEXT: .LBB21_12: +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: jmp .LBB21_16 +; X86-NEXT: .LBB21_15: # %itofp-sw-default +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-NEXT: movb $13, %al +; X86-NEXT: subb %cl, %al +; X86-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X86-NEXT: shrb $3, %al +; X86-NEXT: andb $12, %al +; X86-NEXT: movzbl %al, %eax +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl 92(%esp,%eax), %edi +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 88(%esp,%eax), %edi +; X86-NEXT: movl 80(%esp,%eax), %ebp +; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 84(%esp,%eax), %ebp +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edi, %ebx +; X86-NEXT: movl %ecx, %edx +; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shrdl %cl, %eax, %ebx +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %ebp, %ebx +; X86-NEXT: shrdl %cl, %edi, %ebx +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shrl %cl, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shrdl %cl, %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: addb $115, %dl +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %eax, (%esp) -; X86-NEXT: calll __floatuntitf -; X86-NEXT: subl $4, %esp -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl %esi, vf128+12 -; X86-NEXT: movl %edx, vf128+8 -; X86-NEXT: movl %ecx, vf128+4 -; X86-NEXT: movl %eax, vf128 -; X86-NEXT: addl $56, %esp +; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, %ecx +; X86-NEXT: shrb $3, %cl +; X86-NEXT: andb $12, %cl +; X86-NEXT: negb %cl +; X86-NEXT: movsbl %cl, %ecx +; X86-NEXT: movl 72(%esp,%ecx), %ebx +; X86-NEXT: movl 64(%esp,%ecx), %esi +; X86-NEXT: movl %esi, %eax +; X86-NEXT: orl %ebx, %eax +; X86-NEXT: movl 68(%esp,%ecx), %edi +; X86-NEXT: movl 76(%esp,%ecx), %ebp +; X86-NEXT: orl %edi, %ebp +; X86-NEXT: movl %edx, %ecx +; X86-NEXT: shldl %cl, %eax, %ebp +; X86-NEXT: shldl %cl, %edi, %ebx +; X86-NEXT: shll %cl, %esi +; X86-NEXT: orl %ebx, %esi +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: orl %ebp, %esi +; X86-NEXT: setne %al +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-NEXT: .LBB21_16: # %itofp-sw-epilog +; X86-NEXT: movl %eax, %edi +; X86-NEXT: shrl $2, %edi +; X86-NEXT: andl $1, %edi +; X86-NEXT: orl %eax, %edi +; X86-NEXT: addl $1, %edi +; X86-NEXT: adcl $0, %edx +; X86-NEXT: adcl $0, %ebx +; X86-NEXT: adcl $0, %esi +; X86-NEXT: testl $524288, %esi # imm = 0x80000 +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: jne .LBB21_18 +; X86-NEXT: # %bb.17: +; X86-NEXT: shrdl $2, %edx, %edi +; X86-NEXT: shrdl $2, %ebx, %edx +; X86-NEXT: shrdl $2, %esi, %ebx +; X86-NEXT: shrl $2, %esi +; X86-NEXT: jmp .LBB21_20 +; X86-NEXT: .LBB21_18: # %itofp-if-then20 +; X86-NEXT: shrdl $3, %edx, %edi +; X86-NEXT: shrdl $3, %ebx, %edx +; X86-NEXT: shrdl $3, %esi, %ebx +; X86-NEXT: shrl $3, %esi +; X86-NEXT: movl %eax, %ebp +; X86-NEXT: .LBB21_20: # %itofp-if-end26 +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: andl $-2147483648, %eax # imm = 0x80000000 +; X86-NEXT: shll $16, %ebp +; X86-NEXT: addl $1073676288, %ebp # imm = 0x3FFF0000 +; X86-NEXT: orl %eax, %ebp +; X86-NEXT: movzwl %si, %ecx +; X86-NEXT: orl %ebp, %ecx +; X86-NEXT: .LBB21_21: # %itofp-return +; X86-NEXT: movl %edi, vf128 +; X86-NEXT: movl %edx, vf128+4 +; X86-NEXT: movl %ebx, vf128+8 +; X86-NEXT: movl %ecx, vf128+12 +; X86-NEXT: addl $156, %esp ; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-AVX-LABEL: TestUIToFPU128_F128: diff --git a/llvm/test/CodeGen/X86/fptosi-sat-scalar.ll b/llvm/test/CodeGen/X86/fptosi-sat-scalar.ll index 9b7a43a29a942..0f5b49e4378b2 100644 --- a/llvm/test/CodeGen/X86/fptosi-sat-scalar.ll +++ b/llvm/test/CodeGen/X86/fptosi-sat-scalar.ll @@ -685,88 +685,165 @@ define i64 @test_signed_i64_f32(float %f) nounwind { define i100 @test_signed_i100_f32(float %f) nounwind { ; X86-X87-LABEL: test_signed_i100_f32: -; X86-X87: # %bb.0: +; X86-X87: # %bb.0: # %fp-to-i-entry ; X86-X87-NEXT: pushl %ebp ; X86-X87-NEXT: pushl %ebx ; X86-X87-NEXT: pushl %edi ; X86-X87-NEXT: pushl %esi -; X86-X87-NEXT: subl $60, %esp +; X86-X87-NEXT: subl $76, %esp ; X86-X87-NEXT: flds {{[0-9]+}}(%esp) ; X86-X87-NEXT: fsts {{[0-9]+}}(%esp) -; X86-X87-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-X87-NEXT: movl %eax, (%esp) -; X86-X87-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} -; X86-X87-NEXT: fxch %st(1) -; X86-X87-NEXT: fsts {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-X87-NEXT: fucompp -; X86-X87-NEXT: fnstsw %ax -; X86-X87-NEXT: movl %eax, %ebx -; X86-X87-NEXT: calll __fixsfti -; X86-X87-NEXT: subl $4, %esp -; X86-X87-NEXT: xorl %edx, %edx -; X86-X87-NEXT: movb %bh, %ah -; X86-X87-NEXT: sahf -; X86-X87-NEXT: movl $-8, %ebx -; X86-X87-NEXT: jb .LBB8_2 -; X86-X87-NEXT: # %bb.1: -; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-X87-NEXT: .LBB8_2: -; X86-X87-NEXT: movl $0, %ecx -; X86-X87-NEXT: movl $0, %ebp -; X86-X87-NEXT: jb .LBB8_4 -; X86-X87-NEXT: # %bb.3: -; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-X87-NEXT: .LBB8_4: -; X86-X87-NEXT: movl $0, %edi -; X86-X87-NEXT: jb .LBB8_6 -; X86-X87-NEXT: # %bb.5: -; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-X87-NEXT: .LBB8_6: -; X86-X87-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} -; X86-X87-NEXT: flds {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X86-X87-NEXT: fucom %st(1) -; X86-X87-NEXT: fstp %st(1) -; X86-X87-NEXT: fnstsw %ax -; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax -; X86-X87-NEXT: sahf -; X86-X87-NEXT: movl $-1, %eax -; X86-X87-NEXT: movl $-1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-X87-NEXT: movl $-1, %esi -; X86-X87-NEXT: ja .LBB8_8 -; X86-X87-NEXT: # %bb.7: -; X86-X87-NEXT: movl %edi, %eax -; X86-X87-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-X87-NEXT: movl %ecx, %esi -; X86-X87-NEXT: .LBB8_8: -; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-X87-NEXT: movl $7, %edi -; X86-X87-NEXT: ja .LBB8_10 -; X86-X87-NEXT: # %bb.9: -; X86-X87-NEXT: movl %ebx, %edi -; X86-X87-NEXT: .LBB8_10: +; X86-X87-NEXT: movl %ebp, %eax +; X86-X87-NEXT: shrl $23, %eax +; X86-X87-NEXT: movzbl %al, %ecx +; X86-X87-NEXT: cmpl $127, %ecx +; X86-X87-NEXT: setb %dl ; X86-X87-NEXT: fucomp %st(0) ; X86-X87-NEXT: fnstsw %ax ; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax ; X86-X87-NEXT: sahf -; X86-X87-NEXT: movl $0, %eax -; X86-X87-NEXT: movl $0, %ebp -; X86-X87-NEXT: movl $0, %ebx -; X86-X87-NEXT: jp .LBB8_12 -; X86-X87-NEXT: # %bb.11: -; X86-X87-NEXT: movl %edi, %edx +; X86-X87-NEXT: setp %al +; X86-X87-NEXT: orb %dl, %al +; X86-X87-NEXT: je .LBB8_2 +; X86-X87-NEXT: # %bb.1: +; X86-X87-NEXT: xorl %esi, %esi +; X86-X87-NEXT: xorl %ecx, %ecx +; X86-X87-NEXT: xorl %edi, %edi +; X86-X87-NEXT: xorl %ebx, %ebx +; X86-X87-NEXT: jmp .LBB8_8 +; X86-X87-NEXT: .LBB8_2: # %fp-to-i-if-check.saturate +; X86-X87-NEXT: movl %ebp, %esi +; X86-X87-NEXT: sarl $31, %esi +; X86-X87-NEXT: cmpl $226, %ecx +; X86-X87-NEXT: jb .LBB8_4 +; X86-X87-NEXT: # %bb.3: # %fp-to-i-if-saturate +; X86-X87-NEXT: movl %esi, %ebx +; X86-X87-NEXT: xorl $7, %ebx +; X86-X87-NEXT: notl %esi +; X86-X87-NEXT: movl %esi, %ecx +; X86-X87-NEXT: movl %esi, %edi +; X86-X87-NEXT: jmp .LBB8_8 +; X86-X87-NEXT: .LBB8_4: # %fp-to-i-if-check.exp.size +; X86-X87-NEXT: movl %esi, %edi +; X86-X87-NEXT: orl $1, %edi +; X86-X87-NEXT: andl $8388607, %ebp # imm = 0x7FFFFF +; X86-X87-NEXT: orl $8388608, %ebp # imm = 0x800000 +; X86-X87-NEXT: cmpl $149, %ecx +; X86-X87-NEXT: ja .LBB8_6 +; X86-X87-NEXT: # %bb.5: # %fp-to-i-if-exp.small +; X86-X87-NEXT: movb $-106, %al +; X86-X87-NEXT: subb %cl, %al +; X86-X87-NEXT: movl %eax, %ecx +; X86-X87-NEXT: shrl %cl, %ebp ; X86-X87-NEXT: movl %esi, %eax -; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-X87-NEXT: .LBB8_12: -; X86-X87-NEXT: movl %ebx, 8(%ecx) -; X86-X87-NEXT: movl %ebp, 4(%ecx) -; X86-X87-NEXT: movl %eax, (%ecx) -; X86-X87-NEXT: andl $15, %edx -; X86-X87-NEXT: movb %dl, 12(%ecx) +; X86-X87-NEXT: imull %ebp, %eax +; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: movl %esi, %eax +; X86-X87-NEXT: mull %ebp +; X86-X87-NEXT: movl %edx, %ebx +; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: movl %ebp, %eax +; X86-X87-NEXT: mull %edi +; X86-X87-NEXT: movl %edx, (%esp) # 4-byte Spill +; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: movl %ebp, %eax +; X86-X87-NEXT: mull %esi +; X86-X87-NEXT: movl %edx, %edi +; X86-X87-NEXT: movl %eax, %ecx +; X86-X87-NEXT: addl (%esp), %ecx # 4-byte Folded Reload +; X86-X87-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X86-X87-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X86-X87-NEXT: jmp .LBB8_7 +; X86-X87-NEXT: .LBB8_6: # %fp-to-i-if-exp.large +; X86-X87-NEXT: movl %ebp, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-X87-NEXT: addb $106, %cl ; X86-X87-NEXT: movl %ecx, %eax -; X86-X87-NEXT: addl $60, %esp +; X86-X87-NEXT: shrb $3, %al +; X86-X87-NEXT: andb $12, %al +; X86-X87-NEXT: negb %al +; X86-X87-NEXT: movsbl %al, %eax +; X86-X87-NEXT: movl 52(%esp,%eax), %ebp +; X86-X87-NEXT: movl 56(%esp,%eax), %edx +; X86-X87-NEXT: movl %edx, %ebx +; X86-X87-NEXT: shldl %cl, %ebp, %ebx +; X86-X87-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: movl 48(%esp,%eax), %ebx +; X86-X87-NEXT: movl 60(%esp,%eax), %eax +; X86-X87-NEXT: shldl %cl, %edx, %eax +; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: shldl %cl, %ebx, %ebp +; X86-X87-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-X87-NEXT: shll %cl, %ebx +; X86-X87-NEXT: movl %ebx, %eax +; X86-X87-NEXT: mull %edi +; X86-X87-NEXT: movl %edx, %ecx +; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: movl %ebp, %eax +; X86-X87-NEXT: mull %edi +; X86-X87-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: movl %eax, %edi +; X86-X87-NEXT: addl %ecx, %edi +; X86-X87-NEXT: adcl $0, %edx +; X86-X87-NEXT: movl %edx, (%esp) # 4-byte Spill +; X86-X87-NEXT: movl %ebx, %eax +; X86-X87-NEXT: mull %esi +; X86-X87-NEXT: movl %edx, %ebp +; X86-X87-NEXT: movl %eax, %ecx +; X86-X87-NEXT: addl %edi, %ecx +; X86-X87-NEXT: adcl (%esp), %ebp # 4-byte Folded Reload +; X86-X87-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-X87-NEXT: movl %edi, %eax +; X86-X87-NEXT: mull %esi +; X86-X87-NEXT: addl %ebp, %eax +; X86-X87-NEXT: movl %eax, (%esp) # 4-byte Spill +; X86-X87-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X86-X87-NEXT: adcl %eax, %edx +; X86-X87-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-X87-NEXT: imull %edx, %eax +; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: movl %edx, %eax +; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X86-X87-NEXT: mull %ebp +; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-X87-NEXT: movl %ebp, %eax +; X86-X87-NEXT: imull %esi, %eax +; X86-X87-NEXT: addl %edx, %eax +; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: imull %esi, %edi +; X86-X87-NEXT: movl %edi, %ebp +; X86-X87-NEXT: movl %esi, %eax +; X86-X87-NEXT: mull %ebx +; X86-X87-NEXT: movl %eax, %edi +; X86-X87-NEXT: addl %ebp, %edx +; X86-X87-NEXT: imull %esi, %ebx +; X86-X87-NEXT: addl %edx, %ebx +; X86-X87-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X86-X87-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X86-X87-NEXT: addl (%esp), %edi # 4-byte Folded Reload +; X86-X87-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X86-X87-NEXT: .LBB8_7: # %fp-to-i-cleanup +; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-X87-NEXT: .LBB8_8: # %fp-to-i-cleanup +; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-X87-NEXT: movl %esi, (%eax) +; X86-X87-NEXT: movl %ecx, 4(%eax) +; X86-X87-NEXT: movl %edi, 8(%eax) +; X86-X87-NEXT: andl $15, %ebx +; X86-X87-NEXT: movb %bl, 12(%eax) +; X86-X87-NEXT: addl $76, %esp ; X86-X87-NEXT: popl %esi ; X86-X87-NEXT: popl %edi ; X86-X87-NEXT: popl %ebx @@ -774,52 +851,157 @@ define i100 @test_signed_i100_f32(float %f) nounwind { ; X86-X87-NEXT: retl $4 ; ; X86-SSE-LABEL: test_signed_i100_f32: -; X86-SSE: # %bb.0: +; X86-SSE: # %bb.0: # %fp-to-i-entry ; X86-SSE-NEXT: pushl %ebp ; X86-SSE-NEXT: pushl %ebx ; X86-SSE-NEXT: pushl %edi ; X86-SSE-NEXT: pushl %esi -; X86-SSE-NEXT: subl $44, %esp -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X86-SSE-NEXT: movss %xmm0, {{[0-9]+}}(%esp) -; X86-SSE-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movl %eax, (%esp) -; X86-SSE-NEXT: calll __fixsfti -; X86-SSE-NEXT: subl $4, %esp -; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X86-SSE-NEXT: xorl %ebp, %ebp -; X86-SSE-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 -; X86-SSE-NEXT: movl $-8, %ebx -; X86-SSE-NEXT: movl $0, %ecx -; X86-SSE-NEXT: movl $0, %edx +; X86-SSE-NEXT: subl $76, %esp +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-SSE-NEXT: movd %xmm0, %ebp +; X86-SSE-NEXT: movl %ebp, %ecx +; X86-SSE-NEXT: shrl $23, %ecx +; X86-SSE-NEXT: movzbl %cl, %ecx +; X86-SSE-NEXT: xorl %ebx, %ebx +; X86-SSE-NEXT: cmpl $127, %ecx ; X86-SSE-NEXT: movl $0, %edi -; X86-SSE-NEXT: jb .LBB8_2 -; X86-SSE-NEXT: # %bb.1: -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-SSE-NEXT: .LBB8_2: -; X86-SSE-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 -; X86-SSE-NEXT: movl $-1, %eax -; X86-SSE-NEXT: cmoval %eax, %edi -; X86-SSE-NEXT: cmoval %eax, %edx -; X86-SSE-NEXT: cmoval %eax, %ecx -; X86-SSE-NEXT: movl $7, %eax -; X86-SSE-NEXT: cmovbel %ebx, %eax +; X86-SSE-NEXT: jb .LBB8_1 +; X86-SSE-NEXT: # %bb.2: # %fp-to-i-entry ; X86-SSE-NEXT: ucomiss %xmm0, %xmm0 -; X86-SSE-NEXT: cmovpl %ebp, %eax -; X86-SSE-NEXT: cmovpl %ebp, %ecx -; X86-SSE-NEXT: cmovpl %ebp, %edx -; X86-SSE-NEXT: cmovpl %ebp, %edi -; X86-SSE-NEXT: movl %edi, 8(%esi) -; X86-SSE-NEXT: movl %edx, 4(%esi) -; X86-SSE-NEXT: movl %ecx, (%esi) -; X86-SSE-NEXT: andl $15, %eax -; X86-SSE-NEXT: movb %al, 12(%esi) -; X86-SSE-NEXT: movl %esi, %eax -; X86-SSE-NEXT: addl $44, %esp +; X86-SSE-NEXT: movl $0, %edx +; X86-SSE-NEXT: movl $0, %esi +; X86-SSE-NEXT: jp .LBB8_9 +; X86-SSE-NEXT: # %bb.3: # %fp-to-i-if-check.saturate +; X86-SSE-NEXT: movl %ebp, %edi +; X86-SSE-NEXT: sarl $31, %edi +; X86-SSE-NEXT: cmpl $226, %ecx +; X86-SSE-NEXT: jb .LBB8_5 +; X86-SSE-NEXT: # %bb.4: # %fp-to-i-if-saturate +; X86-SSE-NEXT: movl %edi, %ebx +; X86-SSE-NEXT: xorl $7, %ebx +; X86-SSE-NEXT: notl %edi +; X86-SSE-NEXT: movl %edi, %edx +; X86-SSE-NEXT: movl %edi, %esi +; X86-SSE-NEXT: jmp .LBB8_9 +; X86-SSE-NEXT: .LBB8_1: +; X86-SSE-NEXT: movl $0, %edx +; X86-SSE-NEXT: movl $0, %esi +; X86-SSE-NEXT: jmp .LBB8_9 +; X86-SSE-NEXT: .LBB8_5: # %fp-to-i-if-check.exp.size +; X86-SSE-NEXT: movl %edi, %esi +; X86-SSE-NEXT: orl $1, %esi +; X86-SSE-NEXT: andl $8388607, %ebp # imm = 0x7FFFFF +; X86-SSE-NEXT: orl $8388608, %ebp # imm = 0x800000 +; X86-SSE-NEXT: cmpl $149, %ecx +; X86-SSE-NEXT: ja .LBB8_7 +; X86-SSE-NEXT: # %bb.6: # %fp-to-i-if-exp.small +; X86-SSE-NEXT: movb $-106, %al +; X86-SSE-NEXT: subb %cl, %al +; X86-SSE-NEXT: movl %eax, %ecx +; X86-SSE-NEXT: shrl %cl, %ebp +; X86-SSE-NEXT: movl %edi, %ecx +; X86-SSE-NEXT: imull %ebp, %ecx +; X86-SSE-NEXT: movl %edi, %eax +; X86-SSE-NEXT: mull %ebp +; X86-SSE-NEXT: movl %edx, %ebx +; X86-SSE-NEXT: movl %eax, (%esp) # 4-byte Spill +; X86-SSE-NEXT: movl %ebp, %eax +; X86-SSE-NEXT: mull %esi +; X86-SSE-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: movl %ebp, %eax +; X86-SSE-NEXT: mull %edi +; X86-SSE-NEXT: movl %edx, %esi +; X86-SSE-NEXT: movl %eax, %edx +; X86-SSE-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-SSE-NEXT: adcl (%esp), %esi # 4-byte Folded Reload +; X86-SSE-NEXT: adcl %ecx, %ebx +; X86-SSE-NEXT: jmp .LBB8_8 +; X86-SSE-NEXT: .LBB8_7: # %fp-to-i-if-exp.large +; X86-SSE-NEXT: movl %ebp, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: xorps %xmm0, %xmm0 +; X86-SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: addb $106, %cl +; X86-SSE-NEXT: movl %ecx, %eax +; X86-SSE-NEXT: shrb $3, %al +; X86-SSE-NEXT: andb $12, %al +; X86-SSE-NEXT: negb %al +; X86-SSE-NEXT: movsbl %al, %eax +; X86-SSE-NEXT: movl 52(%esp,%eax), %ebp +; X86-SSE-NEXT: movl 56(%esp,%eax), %edx +; X86-SSE-NEXT: movl %edx, %ebx +; X86-SSE-NEXT: shldl %cl, %ebp, %ebx +; X86-SSE-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: movl 48(%esp,%eax), %ebx +; X86-SSE-NEXT: movl 60(%esp,%eax), %eax +; X86-SSE-NEXT: shldl %cl, %edx, %eax +; X86-SSE-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: shldl %cl, %ebx, %ebp +; X86-SSE-NEXT: movl %ebp, (%esp) # 4-byte Spill +; X86-SSE-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-SSE-NEXT: shll %cl, %ebx +; X86-SSE-NEXT: movl %ebx, %eax +; X86-SSE-NEXT: mull %esi +; X86-SSE-NEXT: movl %esi, %ecx +; X86-SSE-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: movl %edx, %esi +; X86-SSE-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: movl %ebp, %eax +; X86-SSE-NEXT: mull %ecx +; X86-SSE-NEXT: movl %edx, %ecx +; X86-SSE-NEXT: movl %eax, %ebp +; X86-SSE-NEXT: addl %esi, %ebp +; X86-SSE-NEXT: adcl $0, %ecx +; X86-SSE-NEXT: movl %ebx, %eax +; X86-SSE-NEXT: mull %edi +; X86-SSE-NEXT: movl %edx, %esi +; X86-SSE-NEXT: addl %ebp, %eax +; X86-SSE-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: adcl %ecx, %esi +; X86-SSE-NEXT: setb %cl +; X86-SSE-NEXT: movl (%esp), %eax # 4-byte Reload +; X86-SSE-NEXT: mull %edi +; X86-SSE-NEXT: addl %esi, %eax +; X86-SSE-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: movzbl %cl, %eax +; X86-SSE-NEXT: adcl %eax, %edx +; X86-SSE-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-SSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-SSE-NEXT: imull %eax, %ecx +; X86-SSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X86-SSE-NEXT: mull %ebp +; X86-SSE-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: addl %ecx, %edx +; X86-SSE-NEXT: imull %edi, %ebp +; X86-SSE-NEXT: addl %edx, %ebp +; X86-SSE-NEXT: movl (%esp), %ecx # 4-byte Reload +; X86-SSE-NEXT: imull %edi, %ecx +; X86-SSE-NEXT: movl %edi, %eax +; X86-SSE-NEXT: mull %ebx +; X86-SSE-NEXT: movl %eax, %esi +; X86-SSE-NEXT: addl %ecx, %edx +; X86-SSE-NEXT: imull %edi, %ebx +; X86-SSE-NEXT: addl %edx, %ebx +; X86-SSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-SSE-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-SSE-NEXT: adcl %ebp, %ebx +; X86-SSE-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-SSE-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X86-SSE-NEXT: .LBB8_8: # %fp-to-i-cleanup +; X86-SSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE-NEXT: .LBB8_9: # %fp-to-i-cleanup +; X86-SSE-NEXT: movl %edi, (%eax) +; X86-SSE-NEXT: movl %edx, 4(%eax) +; X86-SSE-NEXT: movl %esi, 8(%eax) +; X86-SSE-NEXT: andl $15, %ebx +; X86-SSE-NEXT: movb %bl, 12(%eax) +; X86-SSE-NEXT: addl $76, %esp ; X86-SSE-NEXT: popl %esi ; X86-SSE-NEXT: popl %edi ; X86-SSE-NEXT: popl %ebx @@ -854,143 +1036,336 @@ define i100 @test_signed_i100_f32(float %f) nounwind { define i128 @test_signed_i128_f32(float %f) nounwind { ; X86-X87-LABEL: test_signed_i128_f32: -; X86-X87: # %bb.0: +; X86-X87: # %bb.0: # %fp-to-i-entry ; X86-X87-NEXT: pushl %ebp ; X86-X87-NEXT: pushl %ebx ; X86-X87-NEXT: pushl %edi ; X86-X87-NEXT: pushl %esi -; X86-X87-NEXT: subl $60, %esp +; X86-X87-NEXT: subl $76, %esp +; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ebx ; X86-X87-NEXT: flds {{[0-9]+}}(%esp) ; X86-X87-NEXT: fsts {{[0-9]+}}(%esp) -; X86-X87-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-X87-NEXT: movl %eax, (%esp) -; X86-X87-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} -; X86-X87-NEXT: fxch %st(1) -; X86-X87-NEXT: fsts {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-X87-NEXT: fucompp -; X86-X87-NEXT: fnstsw %ax -; X86-X87-NEXT: movl %eax, %ebx -; X86-X87-NEXT: calll __fixsfti -; X86-X87-NEXT: subl $4, %esp -; X86-X87-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-X87-NEXT: movb %bh, %ah -; X86-X87-NEXT: sahf -; X86-X87-NEXT: movl $0, %eax -; X86-X87-NEXT: movl $0, %ebx -; X86-X87-NEXT: jae .LBB9_1 -; X86-X87-NEXT: # %bb.2: -; X86-X87-NEXT: movl $0, %edx -; X86-X87-NEXT: jae .LBB9_3 -; X86-X87-NEXT: .LBB9_4: -; X86-X87-NEXT: movl $-2147483648, %ecx # imm = 0x80000000 -; X86-X87-NEXT: jb .LBB9_6 -; X86-X87-NEXT: .LBB9_5: -; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-X87-NEXT: .LBB9_6: -; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-X87-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} -; X86-X87-NEXT: flds {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X86-X87-NEXT: fucom %st(1) -; X86-X87-NEXT: fstp %st(1) +; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-X87-NEXT: movl %esi, %eax +; X86-X87-NEXT: shrl $23, %eax +; X86-X87-NEXT: movzbl %al, %ecx +; X86-X87-NEXT: cmpl $127, %ecx +; X86-X87-NEXT: setb %dl +; X86-X87-NEXT: fucomp %st(0) ; X86-X87-NEXT: fnstsw %ax ; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax ; X86-X87-NEXT: sahf -; X86-X87-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; X86-X87-NEXT: ja .LBB9_8 -; X86-X87-NEXT: # %bb.7: +; X86-X87-NEXT: setp %al +; X86-X87-NEXT: orb %dl, %al +; X86-X87-NEXT: je .LBB9_2 +; X86-X87-NEXT: # %bb.1: +; X86-X87-NEXT: xorl %ecx, %ecx +; X86-X87-NEXT: xorl %ebp, %ebp +; X86-X87-NEXT: xorl %esi, %esi +; X86-X87-NEXT: xorl %edi, %edi +; X86-X87-NEXT: jmp .LBB9_8 +; X86-X87-NEXT: .LBB9_2: # %fp-to-i-if-check.saturate +; X86-X87-NEXT: cmpl $254, %ecx +; X86-X87-NEXT: jb .LBB9_4 +; X86-X87-NEXT: # %bb.3: # %fp-to-i-if-saturate +; X86-X87-NEXT: xorl %ecx, %ecx +; X86-X87-NEXT: testl %esi, %esi +; X86-X87-NEXT: setns %cl +; X86-X87-NEXT: xorl %esi, %esi +; X86-X87-NEXT: negl %ecx +; X86-X87-NEXT: movl $0, %ebp +; X86-X87-NEXT: sbbl %ebp, %ebp +; X86-X87-NEXT: sbbl %esi, %esi +; X86-X87-NEXT: movl $-2147483648, %edi # imm = 0x80000000 +; X86-X87-NEXT: sbbl $0, %edi +; X86-X87-NEXT: jmp .LBB9_8 +; X86-X87-NEXT: .LBB9_4: # %fp-to-i-if-check.exp.size +; X86-X87-NEXT: movl %esi, %ebx +; X86-X87-NEXT: sarl $31, %ebx +; X86-X87-NEXT: movl %ebx, %ebp +; X86-X87-NEXT: orl $1, %ebp +; X86-X87-NEXT: andl $8388607, %esi # imm = 0x7FFFFF +; X86-X87-NEXT: orl $8388608, %esi # imm = 0x800000 +; X86-X87-NEXT: cmpl $149, %ecx +; X86-X87-NEXT: ja .LBB9_6 +; X86-X87-NEXT: # %bb.5: # %fp-to-i-if-exp.small +; X86-X87-NEXT: movb $-106, %al +; X86-X87-NEXT: subb %cl, %al +; X86-X87-NEXT: movl %eax, %ecx +; X86-X87-NEXT: shrl %cl, %esi +; X86-X87-NEXT: movl %ebx, %eax +; X86-X87-NEXT: imull %esi, %eax +; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: movl %ebx, %eax +; X86-X87-NEXT: mull %esi +; X86-X87-NEXT: movl %edx, %edi +; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: movl %esi, %eax +; X86-X87-NEXT: mull %ebp +; X86-X87-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: movl %eax, %ecx +; X86-X87-NEXT: movl %esi, %eax +; X86-X87-NEXT: mull %ebx +; X86-X87-NEXT: movl %edx, %esi +; X86-X87-NEXT: movl %eax, %ebp +; X86-X87-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X86-X87-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-X87-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X86-X87-NEXT: jmp .LBB9_7 +; X86-X87-NEXT: .LBB9_6: # %fp-to-i-if-exp.large +; X86-X87-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-X87-NEXT: addb $106, %cl ; X86-X87-NEXT: movl %ecx, %eax -; X86-X87-NEXT: .LBB9_8: +; X86-X87-NEXT: shrb $3, %al +; X86-X87-NEXT: andb $12, %al +; X86-X87-NEXT: negb %al +; X86-X87-NEXT: movsbl %al, %eax +; X86-X87-NEXT: movl 52(%esp,%eax), %esi +; X86-X87-NEXT: movl 56(%esp,%eax), %edx +; X86-X87-NEXT: movl %edx, %edi +; X86-X87-NEXT: shldl %cl, %esi, %edi +; X86-X87-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: movl 48(%esp,%eax), %edi +; X86-X87-NEXT: movl 60(%esp,%eax), %eax +; X86-X87-NEXT: shldl %cl, %edx, %eax ; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-X87-NEXT: movl $-1, %ebp -; X86-X87-NEXT: movl $-1, %edi -; X86-X87-NEXT: movl $-1, %esi -; X86-X87-NEXT: ja .LBB9_10 -; X86-X87-NEXT: # %bb.9: -; X86-X87-NEXT: movl %edx, %ebp -; X86-X87-NEXT: movl %ebx, %edi -; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-X87-NEXT: .LBB9_10: -; X86-X87-NEXT: fucomp %st(0) -; X86-X87-NEXT: fnstsw %ax -; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax -; X86-X87-NEXT: sahf -; X86-X87-NEXT: movl $0, %eax -; X86-X87-NEXT: movl $0, %edx -; X86-X87-NEXT: movl $0, %ebx -; X86-X87-NEXT: jp .LBB9_12 -; X86-X87-NEXT: # %bb.11: +; X86-X87-NEXT: shldl %cl, %edi, %esi ; X86-X87-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-X87-NEXT: shll %cl, %edi ; X86-X87-NEXT: movl %edi, %eax -; X86-X87-NEXT: movl %ebp, %edx -; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-X87-NEXT: .LBB9_12: -; X86-X87-NEXT: movl %ebx, 12(%ecx) -; X86-X87-NEXT: movl %edx, 8(%ecx) -; X86-X87-NEXT: movl %eax, 4(%ecx) +; X86-X87-NEXT: mull %ebp +; X86-X87-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: movl %eax, %ecx +; X86-X87-NEXT: movl %esi, %eax +; X86-X87-NEXT: mull %ebp +; X86-X87-NEXT: movl %eax, %esi +; X86-X87-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-X87-NEXT: adcl $0, %edx +; X86-X87-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: movl %edi, %eax +; X86-X87-NEXT: mull %ebx +; X86-X87-NEXT: movl %eax, %ebp +; X86-X87-NEXT: addl %esi, %ebp +; X86-X87-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-X87-NEXT: movl %edx, %esi +; X86-X87-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-X87-NEXT: movl %eax, (%ecx) -; X86-X87-NEXT: movl %ecx, %eax -; X86-X87-NEXT: addl $60, %esp +; X86-X87-NEXT: mull %ebx +; X86-X87-NEXT: addl %esi, %eax +; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X86-X87-NEXT: adcl %eax, %edx +; X86-X87-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-X87-NEXT: imull %edx, %eax +; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: movl %edx, %eax +; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-X87-NEXT: mull %esi +; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-X87-NEXT: movl %esi, %eax +; X86-X87-NEXT: imull %ebx, %eax +; X86-X87-NEXT: addl %edx, %eax +; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-X87-NEXT: imull %ebx, %eax +; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: movl %ebx, %eax +; X86-X87-NEXT: mull %edi +; X86-X87-NEXT: movl %eax, %esi +; X86-X87-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-X87-NEXT: imull %ebx, %edi +; X86-X87-NEXT: addl %edx, %edi +; X86-X87-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-X87-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X86-X87-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-X87-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X86-X87-NEXT: .LBB9_7: # %fp-to-i-cleanup +; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-X87-NEXT: .LBB9_8: # %fp-to-i-cleanup +; X86-X87-NEXT: movl %ecx, (%ebx) +; X86-X87-NEXT: movl %ebp, 4(%ebx) +; X86-X87-NEXT: movl %esi, 8(%ebx) +; X86-X87-NEXT: movl %edi, 12(%ebx) +; X86-X87-NEXT: movl %ebx, %eax +; X86-X87-NEXT: addl $76, %esp ; X86-X87-NEXT: popl %esi ; X86-X87-NEXT: popl %edi ; X86-X87-NEXT: popl %ebx ; X86-X87-NEXT: popl %ebp ; X86-X87-NEXT: retl $4 -; X86-X87-NEXT: .LBB9_1: -; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-X87-NEXT: movl $0, %edx -; X86-X87-NEXT: jb .LBB9_4 -; X86-X87-NEXT: .LBB9_3: -; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-X87-NEXT: movl $-2147483648, %ecx # imm = 0x80000000 -; X86-X87-NEXT: jae .LBB9_5 -; X86-X87-NEXT: jmp .LBB9_6 ; ; X86-SSE-LABEL: test_signed_i128_f32: -; X86-SSE: # %bb.0: +; X86-SSE: # %bb.0: # %fp-to-i-entry ; X86-SSE-NEXT: pushl %ebp ; X86-SSE-NEXT: pushl %ebx ; X86-SSE-NEXT: pushl %edi ; X86-SSE-NEXT: pushl %esi -; X86-SSE-NEXT: subl $44, %esp -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X86-SSE-NEXT: movss %xmm0, {{[0-9]+}}(%esp) -; X86-SSE-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movl %eax, (%esp) -; X86-SSE-NEXT: calll __fixsfti -; X86-SSE-NEXT: subl $4, %esp -; X86-SSE-NEXT: xorl %ecx, %ecx -; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X86-SSE-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE-NEXT: subl $76, %esp ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: cmovbl %ecx, %eax -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-SSE-NEXT: cmovbl %ecx, %edx -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-SSE-NEXT: cmovbl %ecx, %edi -; X86-SSE-NEXT: movl $-2147483648, %ebp # imm = 0x80000000 -; X86-SSE-NEXT: cmovael {{[0-9]+}}(%esp), %ebp -; X86-SSE-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 -; X86-SSE-NEXT: movl $2147483647, %ebx # imm = 0x7FFFFFFF -; X86-SSE-NEXT: cmovbel %ebp, %ebx -; X86-SSE-NEXT: movl $-1, %ebp -; X86-SSE-NEXT: cmoval %ebp, %edi -; X86-SSE-NEXT: cmoval %ebp, %edx -; X86-SSE-NEXT: cmoval %ebp, %eax +; X86-SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-SSE-NEXT: movd %xmm0, %ebp +; X86-SSE-NEXT: movl %ebp, %ecx +; X86-SSE-NEXT: shrl $23, %ecx +; X86-SSE-NEXT: movzbl %cl, %ecx +; X86-SSE-NEXT: xorl %esi, %esi +; X86-SSE-NEXT: cmpl $127, %ecx +; X86-SSE-NEXT: movl $0, %edx +; X86-SSE-NEXT: jb .LBB9_1 +; X86-SSE-NEXT: # %bb.2: # %fp-to-i-entry ; X86-SSE-NEXT: ucomiss %xmm0, %xmm0 -; X86-SSE-NEXT: cmovpl %ecx, %eax -; X86-SSE-NEXT: cmovpl %ecx, %edx -; X86-SSE-NEXT: cmovpl %ecx, %edi -; X86-SSE-NEXT: cmovpl %ecx, %ebx -; X86-SSE-NEXT: movl %ebx, 12(%esi) -; X86-SSE-NEXT: movl %edi, 8(%esi) -; X86-SSE-NEXT: movl %edx, 4(%esi) -; X86-SSE-NEXT: movl %eax, (%esi) +; X86-SSE-NEXT: movl $0, %edi +; X86-SSE-NEXT: movl $0, %ebx +; X86-SSE-NEXT: jp .LBB9_9 +; X86-SSE-NEXT: # %bb.3: # %fp-to-i-if-check.saturate +; X86-SSE-NEXT: cmpl $254, %ecx +; X86-SSE-NEXT: jb .LBB9_5 +; X86-SSE-NEXT: # %bb.4: # %fp-to-i-if-saturate +; X86-SSE-NEXT: xorl %edx, %edx +; X86-SSE-NEXT: testl %ebp, %ebp +; X86-SSE-NEXT: setns %dl +; X86-SSE-NEXT: xorl %ebx, %ebx +; X86-SSE-NEXT: negl %edx +; X86-SSE-NEXT: sbbl %edi, %edi +; X86-SSE-NEXT: sbbl %ebx, %ebx +; X86-SSE-NEXT: movl $-2147483648, %esi # imm = 0x80000000 +; X86-SSE-NEXT: sbbl $0, %esi +; X86-SSE-NEXT: jmp .LBB9_9 +; X86-SSE-NEXT: .LBB9_1: +; X86-SSE-NEXT: movl $0, %edi +; X86-SSE-NEXT: movl $0, %ebx +; X86-SSE-NEXT: jmp .LBB9_9 +; X86-SSE-NEXT: .LBB9_5: # %fp-to-i-if-check.exp.size +; X86-SSE-NEXT: movl %ebp, %edi +; X86-SSE-NEXT: sarl $31, %edi +; X86-SSE-NEXT: movl %edi, %ebx +; X86-SSE-NEXT: orl $1, %ebx +; X86-SSE-NEXT: andl $8388607, %ebp # imm = 0x7FFFFF +; X86-SSE-NEXT: orl $8388608, %ebp # imm = 0x800000 +; X86-SSE-NEXT: cmpl $149, %ecx +; X86-SSE-NEXT: ja .LBB9_7 +; X86-SSE-NEXT: # %bb.6: # %fp-to-i-if-exp.small +; X86-SSE-NEXT: movb $-106, %al +; X86-SSE-NEXT: subb %cl, %al +; X86-SSE-NEXT: movl %eax, %ecx +; X86-SSE-NEXT: shrl %cl, %ebp +; X86-SSE-NEXT: movl %edi, %ecx +; X86-SSE-NEXT: imull %ebp, %ecx +; X86-SSE-NEXT: movl %edi, %eax +; X86-SSE-NEXT: mull %ebp +; X86-SSE-NEXT: movl %edx, %esi +; X86-SSE-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: movl %ebp, %eax +; X86-SSE-NEXT: mull %ebx +; X86-SSE-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: movl %ebp, %eax +; X86-SSE-NEXT: mull %edi +; X86-SSE-NEXT: movl %edx, %ebx +; X86-SSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-SSE-NEXT: movl %eax, %edi +; X86-SSE-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X86-SSE-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X86-SSE-NEXT: adcl %ecx, %esi +; X86-SSE-NEXT: jmp .LBB9_8 +; X86-SSE-NEXT: .LBB9_7: # %fp-to-i-if-exp.large +; X86-SSE-NEXT: movl %ebp, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: xorps %xmm0, %xmm0 +; X86-SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: addb $106, %cl +; X86-SSE-NEXT: movl %ecx, %eax +; X86-SSE-NEXT: shrb $3, %al +; X86-SSE-NEXT: andb $12, %al +; X86-SSE-NEXT: negb %al +; X86-SSE-NEXT: movsbl %al, %eax +; X86-SSE-NEXT: movl 52(%esp,%eax), %ebp +; X86-SSE-NEXT: movl 56(%esp,%eax), %edx +; X86-SSE-NEXT: movl %edx, %esi +; X86-SSE-NEXT: shldl %cl, %ebp, %esi +; X86-SSE-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: movl 48(%esp,%eax), %esi +; X86-SSE-NEXT: movl 60(%esp,%eax), %eax +; X86-SSE-NEXT: shldl %cl, %edx, %eax +; X86-SSE-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: shldl %cl, %esi, %ebp +; X86-SSE-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-SSE-NEXT: shll %cl, %esi +; X86-SSE-NEXT: movl %esi, %eax +; X86-SSE-NEXT: mull %ebx +; X86-SSE-NEXT: movl %ebx, %ecx +; X86-SSE-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: movl %edx, %ebx +; X86-SSE-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: movl %ebp, %eax +; X86-SSE-NEXT: mull %ecx +; X86-SSE-NEXT: movl %edx, %ecx +; X86-SSE-NEXT: movl %eax, %ebp +; X86-SSE-NEXT: addl %ebx, %ebp +; X86-SSE-NEXT: adcl $0, %ecx ; X86-SSE-NEXT: movl %esi, %eax -; X86-SSE-NEXT: addl $44, %esp +; X86-SSE-NEXT: mull %edi +; X86-SSE-NEXT: movl %edx, %ebx +; X86-SSE-NEXT: addl %ebp, %eax +; X86-SSE-NEXT: movl %edi, %ebp +; X86-SSE-NEXT: movl %eax, %edi +; X86-SSE-NEXT: adcl %ecx, %ebx +; X86-SSE-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X86-SSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-SSE-NEXT: movl %ecx, %eax +; X86-SSE-NEXT: mull %ebp +; X86-SSE-NEXT: addl %ebx, %eax +; X86-SSE-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X86-SSE-NEXT: adcl %eax, %edx +; X86-SSE-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-SSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-SSE-NEXT: imull %edx, %eax +; X86-SSE-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: movl %edx, %eax +; X86-SSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-SSE-NEXT: mull %ebx +; X86-SSE-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-SSE-NEXT: movl %ebx, %eax +; X86-SSE-NEXT: imull %ebp, %eax +; X86-SSE-NEXT: addl %edx, %eax +; X86-SSE-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: imull %ebp, %ecx +; X86-SSE-NEXT: movl %ebp, %eax +; X86-SSE-NEXT: mull %esi +; X86-SSE-NEXT: movl %eax, %ebx +; X86-SSE-NEXT: addl %ecx, %edx +; X86-SSE-NEXT: imull %ebp, %esi +; X86-SSE-NEXT: addl %edx, %esi +; X86-SSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-SSE-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X86-SSE-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-SSE-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X86-SSE-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-SSE-NEXT: .LBB9_8: # %fp-to-i-cleanup +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE-NEXT: .LBB9_9: # %fp-to-i-cleanup +; X86-SSE-NEXT: movl %edx, (%eax) +; X86-SSE-NEXT: movl %edi, 4(%eax) +; X86-SSE-NEXT: movl %ebx, 8(%eax) +; X86-SSE-NEXT: movl %esi, 12(%eax) +; X86-SSE-NEXT: addl $76, %esp ; X86-SSE-NEXT: popl %esi ; X86-SSE-NEXT: popl %edi ; X86-SSE-NEXT: popl %ebx @@ -1694,88 +2069,204 @@ define i64 @test_signed_i64_f64(double %f) nounwind { define i100 @test_signed_i100_f64(double %f) nounwind { ; X86-X87-LABEL: test_signed_i100_f64: -; X86-X87: # %bb.0: +; X86-X87: # %bb.0: # %fp-to-i-entry ; X86-X87-NEXT: pushl %ebp ; X86-X87-NEXT: pushl %ebx ; X86-X87-NEXT: pushl %edi ; X86-X87-NEXT: pushl %esi -; X86-X87-NEXT: subl $60, %esp +; X86-X87-NEXT: subl $76, %esp +; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ebp ; X86-X87-NEXT: fldl {{[0-9]+}}(%esp) ; X86-X87-NEXT: fstl {{[0-9]+}}(%esp) -; X86-X87-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-X87-NEXT: movl %eax, (%esp) -; X86-X87-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} -; X86-X87-NEXT: fxch %st(1) -; X86-X87-NEXT: fstl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Spill -; X86-X87-NEXT: fucompp -; X86-X87-NEXT: fnstsw %ax -; X86-X87-NEXT: movl %eax, %ebx -; X86-X87-NEXT: calll __fixdfti -; X86-X87-NEXT: subl $4, %esp -; X86-X87-NEXT: xorl %edx, %edx -; X86-X87-NEXT: movb %bh, %ah -; X86-X87-NEXT: sahf -; X86-X87-NEXT: movl $-8, %ebx -; X86-X87-NEXT: jb .LBB18_2 -; X86-X87-NEXT: # %bb.1: +; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-X87-NEXT: .LBB18_2: -; X86-X87-NEXT: movl $0, %ecx -; X86-X87-NEXT: movl $0, %ebp -; X86-X87-NEXT: jb .LBB18_4 -; X86-X87-NEXT: # %bb.3: -; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-X87-NEXT: .LBB18_4: -; X86-X87-NEXT: movl $0, %edi -; X86-X87-NEXT: jb .LBB18_6 -; X86-X87-NEXT: # %bb.5: -; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-X87-NEXT: .LBB18_6: -; X86-X87-NEXT: fldl {{\.?LCPI[0-9]+_[0-9]+}} -; X86-X87-NEXT: fldl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Reload -; X86-X87-NEXT: fucom %st(1) -; X86-X87-NEXT: fstp %st(1) -; X86-X87-NEXT: fnstsw %ax -; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax -; X86-X87-NEXT: sahf -; X86-X87-NEXT: movl $-1, %eax -; X86-X87-NEXT: movl $-1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-X87-NEXT: movl $-1, %esi -; X86-X87-NEXT: ja .LBB18_8 -; X86-X87-NEXT: # %bb.7: -; X86-X87-NEXT: movl %edi, %eax -; X86-X87-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-X87-NEXT: movl %ecx, %esi -; X86-X87-NEXT: .LBB18_8: -; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-X87-NEXT: movl $7, %edi -; X86-X87-NEXT: ja .LBB18_10 -; X86-X87-NEXT: # %bb.9: +; X86-X87-NEXT: testl %ebx, %ebx +; X86-X87-NEXT: setns %dl +; X86-X87-NEXT: movl %ebx, %ecx +; X86-X87-NEXT: shrl $20, %ecx +; X86-X87-NEXT: andl $2047, %ecx # imm = 0x7FF ; X86-X87-NEXT: movl %ebx, %edi -; X86-X87-NEXT: .LBB18_10: +; X86-X87-NEXT: andl $1048575, %ebx # imm = 0xFFFFF +; X86-X87-NEXT: addl $-1, %esi +; X86-X87-NEXT: adcl $1048575, %ebx # imm = 0xFFFFF +; X86-X87-NEXT: addl $1, %esi +; X86-X87-NEXT: adcl $0, %ebx +; X86-X87-NEXT: cmpl $1023, %ecx # imm = 0x3FF +; X86-X87-NEXT: setb %dh ; X86-X87-NEXT: fucomp %st(0) ; X86-X87-NEXT: fnstsw %ax ; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax ; X86-X87-NEXT: sahf -; X86-X87-NEXT: movl $0, %eax -; X86-X87-NEXT: movl $0, %ebp -; X86-X87-NEXT: movl $0, %ebx -; X86-X87-NEXT: jp .LBB18_12 -; X86-X87-NEXT: # %bb.11: -; X86-X87-NEXT: movl %edi, %edx +; X86-X87-NEXT: setp %al +; X86-X87-NEXT: orb %dh, %al +; X86-X87-NEXT: je .LBB18_2 +; X86-X87-NEXT: # %bb.1: +; X86-X87-NEXT: xorl %ecx, %ecx +; X86-X87-NEXT: xorl %edi, %edi +; X86-X87-NEXT: xorl %eax, %eax +; X86-X87-NEXT: xorl %ebx, %ebx +; X86-X87-NEXT: jmp .LBB18_13 +; X86-X87-NEXT: .LBB18_2: # %fp-to-i-if-check.saturate +; X86-X87-NEXT: cmpl $1122, %ecx # imm = 0x462 +; X86-X87-NEXT: jb .LBB18_8 +; X86-X87-NEXT: # %bb.3: # %fp-to-i-if-saturate +; X86-X87-NEXT: testb %dl, %dl +; X86-X87-NEXT: movl $7, %ebx +; X86-X87-NEXT: jne .LBB18_5 +; X86-X87-NEXT: # %bb.4: # %fp-to-i-if-saturate +; X86-X87-NEXT: movl $-8, %ebx +; X86-X87-NEXT: .LBB18_5: # %fp-to-i-if-saturate +; X86-X87-NEXT: movl $-1, %ecx +; X86-X87-NEXT: jne .LBB18_7 +; X86-X87-NEXT: # %bb.6: # %fp-to-i-if-saturate +; X86-X87-NEXT: xorl %ecx, %ecx +; X86-X87-NEXT: .LBB18_7: # %fp-to-i-if-saturate +; X86-X87-NEXT: movl %ecx, %edi +; X86-X87-NEXT: movl %ecx, %eax +; X86-X87-NEXT: jmp .LBB18_13 +; X86-X87-NEXT: .LBB18_8: # %fp-to-i-if-check.exp.size +; X86-X87-NEXT: movl %edi, %ebp +; X86-X87-NEXT: sarl $31, %ebp +; X86-X87-NEXT: movl %ebp, %edi +; X86-X87-NEXT: orl $1, %edi +; X86-X87-NEXT: cmpl $1074, %ecx # imm = 0x432 +; X86-X87-NEXT: ja .LBB18_12 +; X86-X87-NEXT: # %bb.9: # %fp-to-i-if-exp.small +; X86-X87-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: movb $51, %al +; X86-X87-NEXT: subb %cl, %al +; X86-X87-NEXT: movl %eax, %ecx +; X86-X87-NEXT: shrdl %cl, %ebx, %esi +; X86-X87-NEXT: shrl %cl, %ebx +; X86-X87-NEXT: testb $32, %al +; X86-X87-NEXT: je .LBB18_11 +; X86-X87-NEXT: # %bb.10: +; X86-X87-NEXT: movl %ebx, %esi +; X86-X87-NEXT: xorl %ebx, %ebx +; X86-X87-NEXT: .LBB18_11: # %fp-to-i-if-exp.small +; X86-X87-NEXT: movl %esi, %eax +; X86-X87-NEXT: mull %edi +; X86-X87-NEXT: movl %edx, %ebp +; X86-X87-NEXT: movl %eax, %ecx +; X86-X87-NEXT: movl %ebx, %eax +; X86-X87-NEXT: mull %edi +; X86-X87-NEXT: addl %ebp, %eax +; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: adcl $0, %edx +; X86-X87-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-X87-NEXT: movl %esi, %eax ; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-X87-NEXT: .LBB18_12: -; X86-X87-NEXT: movl %ebx, 8(%ecx) -; X86-X87-NEXT: movl %ebp, 4(%ecx) -; X86-X87-NEXT: movl %eax, (%ecx) -; X86-X87-NEXT: andl $15, %edx -; X86-X87-NEXT: movb %dl, 12(%ecx) +; X86-X87-NEXT: mull %ebp +; X86-X87-NEXT: movl %eax, %edi +; X86-X87-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X86-X87-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-X87-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X86-X87-NEXT: movl %ebx, %eax +; X86-X87-NEXT: mull %ebp +; X86-X87-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X86-X87-NEXT: adcl %eax, %edx +; X86-X87-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: movl %ebp, %edx +; X86-X87-NEXT: movl %ebp, %eax +; X86-X87-NEXT: imull %ebx, %ebp +; X86-X87-NEXT: movl %ebp, %ebx +; X86-X87-NEXT: movl %edx, %ebp +; X86-X87-NEXT: imull %esi, %ebp +; X86-X87-NEXT: mull %esi +; X86-X87-NEXT: addl %edx, %ebx +; X86-X87-NEXT: addl %ebp, %ebx +; X86-X87-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-X87-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-X87-NEXT: jmp .LBB18_13 +; X86-X87-NEXT: .LBB18_12: # %fp-to-i-if-exp.large +; X86-X87-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-X87-NEXT: addb $-51, %cl +; X86-X87-NEXT: movl %ecx, %eax +; X86-X87-NEXT: shrb $3, %al +; X86-X87-NEXT: andb $12, %al +; X86-X87-NEXT: negb %al +; X86-X87-NEXT: movsbl %al, %eax +; X86-X87-NEXT: movl 52(%esp,%eax), %ebx +; X86-X87-NEXT: movl 56(%esp,%eax), %edx +; X86-X87-NEXT: movl %edx, %esi +; X86-X87-NEXT: shldl %cl, %ebx, %esi +; X86-X87-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: movl 48(%esp,%eax), %esi +; X86-X87-NEXT: movl 60(%esp,%eax), %eax +; X86-X87-NEXT: shldl %cl, %edx, %eax +; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: shldl %cl, %esi, %ebx +; X86-X87-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-X87-NEXT: shll %cl, %esi +; X86-X87-NEXT: movl %esi, %eax +; X86-X87-NEXT: mull %edi +; X86-X87-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: movl %ebx, %eax +; X86-X87-NEXT: movl %ebx, %ecx +; X86-X87-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: mull %edi +; X86-X87-NEXT: movl %eax, %ebx +; X86-X87-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X86-X87-NEXT: adcl $0, %edx +; X86-X87-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: movl %esi, %eax +; X86-X87-NEXT: mull %ebp +; X86-X87-NEXT: movl %eax, %edi +; X86-X87-NEXT: addl %ebx, %edi +; X86-X87-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-X87-NEXT: movl %edx, %ebx +; X86-X87-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X86-X87-NEXT: movl %ecx, %eax -; X86-X87-NEXT: addl $60, %esp +; X86-X87-NEXT: mull %ebp +; X86-X87-NEXT: addl %ebx, %eax +; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X86-X87-NEXT: adcl %eax, %edx +; X86-X87-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-X87-NEXT: imull %eax, %ecx +; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-X87-NEXT: mull %ebx +; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: addl %ecx, %edx +; X86-X87-NEXT: imull %ebp, %ebx +; X86-X87-NEXT: addl %edx, %ebx +; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-X87-NEXT: imull %ebp, %ecx +; X86-X87-NEXT: movl %ebp, %eax +; X86-X87-NEXT: mull %esi +; X86-X87-NEXT: addl %ecx, %edx +; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-X87-NEXT: imull %ebp, %esi +; X86-X87-NEXT: addl %edx, %esi +; X86-X87-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-X87-NEXT: adcl %ebx, %esi +; X86-X87-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-X87-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-X87-NEXT: movl %esi, %ebx +; X86-X87-NEXT: .LBB18_13: # %fp-to-i-cleanup +; X86-X87-NEXT: movl %ecx, (%ebp) +; X86-X87-NEXT: movl %edi, 4(%ebp) +; X86-X87-NEXT: movl %eax, 8(%ebp) +; X86-X87-NEXT: andl $15, %ebx +; X86-X87-NEXT: movb %bl, 12(%ebp) +; X86-X87-NEXT: movl %ebp, %eax +; X86-X87-NEXT: addl $76, %esp ; X86-X87-NEXT: popl %esi ; X86-X87-NEXT: popl %edi ; X86-X87-NEXT: popl %ebx @@ -1783,52 +2274,203 @@ define i100 @test_signed_i100_f64(double %f) nounwind { ; X86-X87-NEXT: retl $4 ; ; X86-SSE-LABEL: test_signed_i100_f64: -; X86-SSE: # %bb.0: +; X86-SSE: # %bb.0: # %fp-to-i-entry ; X86-SSE-NEXT: pushl %ebp ; X86-SSE-NEXT: pushl %ebx ; X86-SSE-NEXT: pushl %edi ; X86-SSE-NEXT: pushl %esi -; X86-SSE-NEXT: subl $44, %esp -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-SSE-NEXT: subl $92, %esp ; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; X86-SSE-NEXT: movsd %xmm0, {{[0-9]+}}(%esp) -; X86-SSE-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movl %eax, (%esp) -; X86-SSE-NEXT: calll __fixdfti -; X86-SSE-NEXT: subl $4, %esp -; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X86-SSE-NEXT: xorl %ebp, %ebp -; X86-SSE-NEXT: ucomisd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 -; X86-SSE-NEXT: movl $-8, %ebx -; X86-SSE-NEXT: movl $0, %ecx -; X86-SSE-NEXT: movl $0, %edx -; X86-SSE-NEXT: movl $0, %edi -; X86-SSE-NEXT: jb .LBB18_2 -; X86-SSE-NEXT: # %bb.1: -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ebp ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-SSE-NEXT: .LBB18_2: -; X86-SSE-NEXT: ucomisd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 -; X86-SSE-NEXT: movl $-1, %eax -; X86-SSE-NEXT: cmoval %eax, %edi -; X86-SSE-NEXT: cmoval %eax, %edx -; X86-SSE-NEXT: cmoval %eax, %ecx -; X86-SSE-NEXT: movl $7, %eax -; X86-SSE-NEXT: cmovbel %ebx, %eax +; X86-SSE-NEXT: testl %edi, %edi +; X86-SSE-NEXT: setns %al +; X86-SSE-NEXT: movl %edi, %ecx +; X86-SSE-NEXT: shrl $20, %ecx +; X86-SSE-NEXT: andl $2047, %ecx # imm = 0x7FF +; X86-SSE-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-SSE-NEXT: movl %edi, %esi +; X86-SSE-NEXT: andl $1048575, %esi # imm = 0xFFFFF +; X86-SSE-NEXT: addl $-1, %ebp +; X86-SSE-NEXT: adcl $1048575, %esi # imm = 0xFFFFF +; X86-SSE-NEXT: addl $1, %ebp +; X86-SSE-NEXT: adcl $0, %esi +; X86-SSE-NEXT: cmpl $1023, %ecx # imm = 0x3FF +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-SSE-NEXT: jb .LBB18_1 +; X86-SSE-NEXT: # %bb.2: # %fp-to-i-entry +; X86-SSE-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill ; X86-SSE-NEXT: ucomisd %xmm0, %xmm0 -; X86-SSE-NEXT: cmovpl %ebp, %eax -; X86-SSE-NEXT: cmovpl %ebp, %ecx -; X86-SSE-NEXT: cmovpl %ebp, %edx -; X86-SSE-NEXT: cmovpl %ebp, %edi -; X86-SSE-NEXT: movl %edi, 8(%esi) -; X86-SSE-NEXT: movl %edx, 4(%esi) -; X86-SSE-NEXT: movl %ecx, (%esi) -; X86-SSE-NEXT: andl $15, %eax -; X86-SSE-NEXT: movb %al, 12(%esi) +; X86-SSE-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-SSE-NEXT: movl $0, %eax +; X86-SSE-NEXT: movl $0, %ebx +; X86-SSE-NEXT: jp .LBB18_9 +; X86-SSE-NEXT: # %bb.3: # %fp-to-i-if-check.saturate +; X86-SSE-NEXT: cmpl $1122, %ecx # imm = 0x462 +; X86-SSE-NEXT: jb .LBB18_5 +; X86-SSE-NEXT: # %bb.4: # %fp-to-i-if-saturate +; X86-SSE-NEXT: xorl %eax, %eax +; X86-SSE-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X86-SSE-NEXT: movb %cl, %al +; X86-SSE-NEXT: negl %eax +; X86-SSE-NEXT: movd %eax, %xmm0 +; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] +; X86-SSE-NEXT: movdqa %xmm0, %xmm1 +; X86-SSE-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 +; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE-NEXT: por %xmm1, %xmm0 +; X86-SSE-NEXT: movd %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] +; X86-SSE-NEXT: movd %xmm1, %ebx +; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; X86-SSE-NEXT: movd %xmm1, %eax +; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] +; X86-SSE-NEXT: movd %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-SSE-NEXT: jmp .LBB18_9 +; X86-SSE-NEXT: .LBB18_1: +; X86-SSE-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-SSE-NEXT: xorl %eax, %eax +; X86-SSE-NEXT: xorl %ebx, %ebx +; X86-SSE-NEXT: jmp .LBB18_9 +; X86-SSE-NEXT: .LBB18_5: # %fp-to-i-if-check.exp.size +; X86-SSE-NEXT: sarl $31, %edi +; X86-SSE-NEXT: movl %edi, %edx +; X86-SSE-NEXT: orl $1, %edx +; X86-SSE-NEXT: cmpl $1074, %ecx # imm = 0x432 +; X86-SSE-NEXT: ja .LBB18_7 +; X86-SSE-NEXT: # %bb.6: # %fp-to-i-if-exp.small +; X86-SSE-NEXT: movb $51, %al +; X86-SSE-NEXT: subb %cl, %al +; X86-SSE-NEXT: movl %eax, %ecx +; X86-SSE-NEXT: shrdl %cl, %esi, %ebp +; X86-SSE-NEXT: shrl %cl, %esi +; X86-SSE-NEXT: xorl %ebx, %ebx +; X86-SSE-NEXT: testb $32, %al +; X86-SSE-NEXT: cmovnel %esi, %ebp +; X86-SSE-NEXT: cmovel %esi, %ebx +; X86-SSE-NEXT: movl %ebp, %eax +; X86-SSE-NEXT: movl %edx, %ecx +; X86-SSE-NEXT: mull %edx +; X86-SSE-NEXT: movl %edx, %esi +; X86-SSE-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: movl %ebx, %eax +; X86-SSE-NEXT: mull %ecx +; X86-SSE-NEXT: movl %eax, %ecx +; X86-SSE-NEXT: addl %esi, %ecx +; X86-SSE-NEXT: adcl $0, %edx +; X86-SSE-NEXT: movl %edx, %esi +; X86-SSE-NEXT: movl %ebp, %eax +; X86-SSE-NEXT: mull %edi +; X86-SSE-NEXT: addl %ecx, %eax +; X86-SSE-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: adcl %esi, %edx +; X86-SSE-NEXT: movl %edx, %ecx +; X86-SSE-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X86-SSE-NEXT: movl %ebx, %eax +; X86-SSE-NEXT: mull %edi +; X86-SSE-NEXT: movl %edx, %esi +; X86-SSE-NEXT: addl %ecx, %eax +; X86-SSE-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X86-SSE-NEXT: adcl %eax, %esi +; X86-SSE-NEXT: movl %edi, %ecx +; X86-SSE-NEXT: imull %ebp, %ecx +; X86-SSE-NEXT: imull %edi, %ebx +; X86-SSE-NEXT: movl %edi, %eax +; X86-SSE-NEXT: mull %ebp +; X86-SSE-NEXT: addl %edx, %ebx +; X86-SSE-NEXT: addl %ecx, %ebx +; X86-SSE-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-SSE-NEXT: adcl %esi, %ebx +; X86-SSE-NEXT: jmp .LBB18_8 +; X86-SSE-NEXT: .LBB18_7: # %fp-to-i-if-exp.large +; X86-SSE-NEXT: movl %ebp, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: xorpd %xmm0, %xmm0 +; X86-SSE-NEXT: movapd %xmm0, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: addb $-51, %cl +; X86-SSE-NEXT: movl %ecx, %eax +; X86-SSE-NEXT: shrb $3, %al +; X86-SSE-NEXT: andb $12, %al +; X86-SSE-NEXT: negb %al +; X86-SSE-NEXT: movsbl %al, %eax +; X86-SSE-NEXT: movl 68(%esp,%eax), %ebp +; X86-SSE-NEXT: movl %edx, %ebx +; X86-SSE-NEXT: movl 72(%esp,%eax), %edx +; X86-SSE-NEXT: movl %edx, %esi +; X86-SSE-NEXT: shldl %cl, %ebp, %esi +; X86-SSE-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: movl 64(%esp,%eax), %esi +; X86-SSE-NEXT: movl 76(%esp,%eax), %eax +; X86-SSE-NEXT: shldl %cl, %edx, %eax +; X86-SSE-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: shldl %cl, %esi, %ebp +; X86-SSE-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-SSE-NEXT: shll %cl, %esi +; X86-SSE-NEXT: movl %esi, %eax +; X86-SSE-NEXT: movl %ebx, %ecx +; X86-SSE-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: mull %ebx +; X86-SSE-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: movl %ebp, %eax +; X86-SSE-NEXT: movl %ebp, %ebx +; X86-SSE-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: mull %ecx +; X86-SSE-NEXT: movl %eax, %ecx +; X86-SSE-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X86-SSE-NEXT: adcl $0, %edx +; X86-SSE-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-SSE-NEXT: movl %esi, %eax -; X86-SSE-NEXT: addl $44, %esp +; X86-SSE-NEXT: mull %edi +; X86-SSE-NEXT: movl %edx, %ebp +; X86-SSE-NEXT: addl %ecx, %eax +; X86-SSE-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X86-SSE-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X86-SSE-NEXT: movl %ebx, %eax +; X86-SSE-NEXT: mull %edi +; X86-SSE-NEXT: movl %eax, %ecx +; X86-SSE-NEXT: addl %ebp, %ecx +; X86-SSE-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X86-SSE-NEXT: adcl %eax, %edx +; X86-SSE-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X86-SSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-SSE-NEXT: imull %eax, %ebp +; X86-SSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-SSE-NEXT: mull %ebx +; X86-SSE-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: addl %ebp, %edx +; X86-SSE-NEXT: imull %edi, %ebx +; X86-SSE-NEXT: addl %edx, %ebx +; X86-SSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X86-SSE-NEXT: imull %edi, %ebp +; X86-SSE-NEXT: movl %edi, %eax +; X86-SSE-NEXT: mull %esi +; X86-SSE-NEXT: addl %ebp, %edx +; X86-SSE-NEXT: imull %edi, %esi +; X86-SSE-NEXT: addl %edx, %esi +; X86-SSE-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-SSE-NEXT: adcl %ebx, %esi +; X86-SSE-NEXT: addl %ecx, %eax +; X86-SSE-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-SSE-NEXT: movl %esi, %ebx +; X86-SSE-NEXT: .LBB18_8: # %fp-to-i-cleanup +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-SSE-NEXT: .LBB18_9: # %fp-to-i-cleanup +; X86-SSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-SSE-NEXT: movl %ecx, (%edx) +; X86-SSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-SSE-NEXT: movl %ecx, 4(%edx) +; X86-SSE-NEXT: movl %eax, 8(%edx) +; X86-SSE-NEXT: andl $15, %ebx +; X86-SSE-NEXT: movb %bl, 12(%edx) +; X86-SSE-NEXT: movl %edx, %eax +; X86-SSE-NEXT: addl $92, %esp ; X86-SSE-NEXT: popl %esi ; X86-SSE-NEXT: popl %edi ; X86-SSE-NEXT: popl %ebx @@ -1863,143 +2505,398 @@ define i100 @test_signed_i100_f64(double %f) nounwind { define i128 @test_signed_i128_f64(double %f) nounwind { ; X86-X87-LABEL: test_signed_i128_f64: -; X86-X87: # %bb.0: +; X86-X87: # %bb.0: # %fp-to-i-entry ; X86-X87-NEXT: pushl %ebp -; X86-X87-NEXT: pushl %ebx -; X86-X87-NEXT: pushl %edi -; X86-X87-NEXT: pushl %esi -; X86-X87-NEXT: subl $60, %esp -; X86-X87-NEXT: fldl {{[0-9]+}}(%esp) -; X86-X87-NEXT: fstl {{[0-9]+}}(%esp) -; X86-X87-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-X87-NEXT: movl %eax, (%esp) -; X86-X87-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} -; X86-X87-NEXT: fxch %st(1) -; X86-X87-NEXT: fstl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Spill -; X86-X87-NEXT: fucompp -; X86-X87-NEXT: fnstsw %ax -; X86-X87-NEXT: movl %eax, %ebx -; X86-X87-NEXT: calll __fixdfti -; X86-X87-NEXT: subl $4, %esp -; X86-X87-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-X87-NEXT: movb %bh, %ah -; X86-X87-NEXT: sahf -; X86-X87-NEXT: movl $0, %eax -; X86-X87-NEXT: movl $0, %ebx -; X86-X87-NEXT: jae .LBB19_1 -; X86-X87-NEXT: # %bb.2: -; X86-X87-NEXT: movl $0, %edx -; X86-X87-NEXT: jae .LBB19_3 -; X86-X87-NEXT: .LBB19_4: -; X86-X87-NEXT: movl $-2147483648, %ecx # imm = 0x80000000 -; X86-X87-NEXT: jb .LBB19_6 -; X86-X87-NEXT: .LBB19_5: -; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-X87-NEXT: .LBB19_6: -; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-X87-NEXT: fldl {{\.?LCPI[0-9]+_[0-9]+}} -; X86-X87-NEXT: fldl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Reload -; X86-X87-NEXT: fucom %st(1) -; X86-X87-NEXT: fstp %st(1) +; X86-X87-NEXT: pushl %ebx +; X86-X87-NEXT: pushl %edi +; X86-X87-NEXT: pushl %esi +; X86-X87-NEXT: subl $76, %esp +; X86-X87-NEXT: fldl {{[0-9]+}}(%esp) +; X86-X87-NEXT: fstl {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-X87-NEXT: testl %ebp, %ebp +; X86-X87-NEXT: setns %dl +; X86-X87-NEXT: movl %ebp, %ecx +; X86-X87-NEXT: shrl $20, %ecx +; X86-X87-NEXT: andl $2047, %ecx # imm = 0x7FF +; X86-X87-NEXT: movl %ebp, %esi +; X86-X87-NEXT: andl $1048575, %esi # imm = 0xFFFFF +; X86-X87-NEXT: addl $-1, %ebx +; X86-X87-NEXT: adcl $1048575, %esi # imm = 0xFFFFF +; X86-X87-NEXT: addl $1, %ebx +; X86-X87-NEXT: adcl $0, %esi +; X86-X87-NEXT: cmpl $1023, %ecx # imm = 0x3FF +; X86-X87-NEXT: setb %dh +; X86-X87-NEXT: fucomp %st(0) ; X86-X87-NEXT: fnstsw %ax ; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax ; X86-X87-NEXT: sahf -; X86-X87-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-X87-NEXT: setp %al +; X86-X87-NEXT: orb %dh, %al +; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-X87-NEXT: je .LBB19_2 +; X86-X87-NEXT: # %bb.1: +; X86-X87-NEXT: xorl %edx, %edx +; X86-X87-NEXT: xorl %edi, %edi +; X86-X87-NEXT: xorl %ecx, %ecx +; X86-X87-NEXT: xorl %ebp, %ebp +; X86-X87-NEXT: jmp .LBB19_9 +; X86-X87-NEXT: .LBB19_2: # %fp-to-i-if-check.saturate +; X86-X87-NEXT: cmpl $1150, %ecx # imm = 0x47E +; X86-X87-NEXT: jb .LBB19_4 +; X86-X87-NEXT: # %bb.3: # %fp-to-i-if-saturate +; X86-X87-NEXT: xorl %ecx, %ecx +; X86-X87-NEXT: movb %dl, %cl +; X86-X87-NEXT: movl %ecx, %edx +; X86-X87-NEXT: xorl %ecx, %ecx +; X86-X87-NEXT: negl %edx +; X86-X87-NEXT: movl $0, %edi +; X86-X87-NEXT: sbbl %edi, %edi +; X86-X87-NEXT: sbbl %ecx, %ecx +; X86-X87-NEXT: movl $-2147483648, %ebp # imm = 0x80000000 +; X86-X87-NEXT: sbbl $0, %ebp +; X86-X87-NEXT: jmp .LBB19_9 +; X86-X87-NEXT: .LBB19_4: # %fp-to-i-if-check.exp.size +; X86-X87-NEXT: sarl $31, %ebp +; X86-X87-NEXT: movl %ebp, %edi +; X86-X87-NEXT: orl $1, %edi +; X86-X87-NEXT: cmpl $1074, %ecx # imm = 0x432 ; X86-X87-NEXT: ja .LBB19_8 -; X86-X87-NEXT: # %bb.7: -; X86-X87-NEXT: movl %ecx, %eax -; X86-X87-NEXT: .LBB19_8: +; X86-X87-NEXT: # %bb.5: # %fp-to-i-if-exp.small +; X86-X87-NEXT: movb $51, %dl +; X86-X87-NEXT: subb %cl, %dl +; X86-X87-NEXT: movl %edx, %ecx +; X86-X87-NEXT: shrdl %cl, %esi, %ebx +; X86-X87-NEXT: shrl %cl, %esi +; X86-X87-NEXT: testb $32, %dl +; X86-X87-NEXT: je .LBB19_7 +; X86-X87-NEXT: # %bb.6: +; X86-X87-NEXT: movl %esi, %ebx +; X86-X87-NEXT: xorl %esi, %esi +; X86-X87-NEXT: .LBB19_7: # %fp-to-i-if-exp.small +; X86-X87-NEXT: movl %ebx, %eax +; X86-X87-NEXT: mull %edi +; X86-X87-NEXT: movl %edi, %ecx +; X86-X87-NEXT: movl %edx, %edi ; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-X87-NEXT: movl $-1, %ebp -; X86-X87-NEXT: movl $-1, %edi -; X86-X87-NEXT: movl $-1, %esi -; X86-X87-NEXT: ja .LBB19_10 -; X86-X87-NEXT: # %bb.9: -; X86-X87-NEXT: movl %edx, %ebp -; X86-X87-NEXT: movl %ebx, %edi -; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-X87-NEXT: .LBB19_10: -; X86-X87-NEXT: fucomp %st(0) -; X86-X87-NEXT: fnstsw %ax -; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax -; X86-X87-NEXT: sahf -; X86-X87-NEXT: movl $0, %eax -; X86-X87-NEXT: movl $0, %edx -; X86-X87-NEXT: movl $0, %ebx -; X86-X87-NEXT: jp .LBB19_12 -; X86-X87-NEXT: # %bb.11: +; X86-X87-NEXT: movl %esi, %eax +; X86-X87-NEXT: mull %ecx +; X86-X87-NEXT: movl %eax, %ecx +; X86-X87-NEXT: addl %edi, %ecx +; X86-X87-NEXT: adcl $0, %edx +; X86-X87-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: movl %ebx, %eax +; X86-X87-NEXT: mull %ebp +; X86-X87-NEXT: movl %eax, %edi +; X86-X87-NEXT: addl %ecx, %edi +; X86-X87-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-X87-NEXT: movl %edx, %ecx +; X86-X87-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X86-X87-NEXT: movl %esi, %eax +; X86-X87-NEXT: mull %ebp +; X86-X87-NEXT: addl %ecx, %eax +; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X86-X87-NEXT: adcl %eax, %edx +; X86-X87-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: movl %ebp, %ecx +; X86-X87-NEXT: movl %ebp, %eax +; X86-X87-NEXT: imull %esi, %ebp +; X86-X87-NEXT: movl %ecx, %esi +; X86-X87-NEXT: imull %ebx, %esi +; X86-X87-NEXT: mull %ebx +; X86-X87-NEXT: movl %eax, %ecx +; X86-X87-NEXT: addl %edx, %ebp +; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-X87-NEXT: addl %esi, %ebp +; X86-X87-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X86-X87-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-X87-NEXT: jmp .LBB19_9 +; X86-X87-NEXT: .LBB19_8: # %fp-to-i-if-exp.large +; X86-X87-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-X87-NEXT: addb $-51, %cl +; X86-X87-NEXT: movl %ecx, %eax +; X86-X87-NEXT: shrb $3, %al +; X86-X87-NEXT: andb $12, %al +; X86-X87-NEXT: negb %al +; X86-X87-NEXT: movsbl %al, %eax +; X86-X87-NEXT: movl 52(%esp,%eax), %ebx +; X86-X87-NEXT: movl 56(%esp,%eax), %edx +; X86-X87-NEXT: movl %edx, %esi +; X86-X87-NEXT: shldl %cl, %ebx, %esi ; X86-X87-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-X87-NEXT: movl %edi, %eax -; X86-X87-NEXT: movl %ebp, %edx -; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-X87-NEXT: .LBB19_12: -; X86-X87-NEXT: movl %ebx, 12(%ecx) -; X86-X87-NEXT: movl %edx, 8(%ecx) -; X86-X87-NEXT: movl %eax, 4(%ecx) +; X86-X87-NEXT: movl 48(%esp,%eax), %esi +; X86-X87-NEXT: movl 60(%esp,%eax), %eax +; X86-X87-NEXT: shldl %cl, %edx, %eax +; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: shldl %cl, %esi, %ebx +; X86-X87-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-X87-NEXT: shll %cl, %esi +; X86-X87-NEXT: movl %esi, %eax +; X86-X87-NEXT: mull %edi +; X86-X87-NEXT: movl %edi, %ecx +; X86-X87-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: movl %edx, %edi +; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: movl %ebx, %eax +; X86-X87-NEXT: mull %ecx +; X86-X87-NEXT: movl %eax, %ecx +; X86-X87-NEXT: addl %edi, %ecx +; X86-X87-NEXT: adcl $0, %edx +; X86-X87-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: movl %esi, %eax +; X86-X87-NEXT: mull %ebp +; X86-X87-NEXT: movl %edx, %ebx +; X86-X87-NEXT: movl %eax, %edi +; X86-X87-NEXT: addl %ecx, %edi +; X86-X87-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X86-X87-NEXT: setb %cl ; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-X87-NEXT: movl %eax, (%ecx) -; X86-X87-NEXT: movl %ecx, %eax -; X86-X87-NEXT: addl $60, %esp +; X86-X87-NEXT: mull %ebp +; X86-X87-NEXT: addl %ebx, %eax +; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: movzbl %cl, %eax +; X86-X87-NEXT: adcl %eax, %edx +; X86-X87-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-X87-NEXT: imull %eax, %ecx +; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-X87-NEXT: mull %ebx +; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: addl %ecx, %edx +; X86-X87-NEXT: movl %ebx, %eax +; X86-X87-NEXT: imull %ebp, %eax +; X86-X87-NEXT: addl %edx, %eax +; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-X87-NEXT: imull %ebp, %ebx +; X86-X87-NEXT: movl %ebp, %eax +; X86-X87-NEXT: mull %esi +; X86-X87-NEXT: movl %eax, %ecx +; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-X87-NEXT: addl %ebx, %edx +; X86-X87-NEXT: imull %ebp, %esi +; X86-X87-NEXT: addl %edx, %esi +; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-X87-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X86-X87-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-X87-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X86-X87-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-X87-NEXT: movl %esi, %ebp +; X86-X87-NEXT: .LBB19_9: # %fp-to-i-cleanup +; X86-X87-NEXT: movl %edx, (%eax) +; X86-X87-NEXT: movl %edi, 4(%eax) +; X86-X87-NEXT: movl %ecx, 8(%eax) +; X86-X87-NEXT: movl %ebp, 12(%eax) +; X86-X87-NEXT: addl $76, %esp ; X86-X87-NEXT: popl %esi ; X86-X87-NEXT: popl %edi ; X86-X87-NEXT: popl %ebx ; X86-X87-NEXT: popl %ebp ; X86-X87-NEXT: retl $4 -; X86-X87-NEXT: .LBB19_1: -; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-X87-NEXT: movl $0, %edx -; X86-X87-NEXT: jb .LBB19_4 -; X86-X87-NEXT: .LBB19_3: -; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-X87-NEXT: movl $-2147483648, %ecx # imm = 0x80000000 -; X86-X87-NEXT: jae .LBB19_5 -; X86-X87-NEXT: jmp .LBB19_6 ; ; X86-SSE-LABEL: test_signed_i128_f64: -; X86-SSE: # %bb.0: +; X86-SSE: # %bb.0: # %fp-to-i-entry ; X86-SSE-NEXT: pushl %ebp ; X86-SSE-NEXT: pushl %ebx ; X86-SSE-NEXT: pushl %edi ; X86-SSE-NEXT: pushl %esi -; X86-SSE-NEXT: subl $44, %esp -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-SSE-NEXT: subl $92, %esp ; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; X86-SSE-NEXT: movsd %xmm0, {{[0-9]+}}(%esp) -; X86-SSE-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movl %eax, (%esp) -; X86-SSE-NEXT: calll __fixdfti -; X86-SSE-NEXT: subl $4, %esp -; X86-SSE-NEXT: xorl %ecx, %ecx -; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X86-SSE-NEXT: ucomisd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: cmovbl %ecx, %eax +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-SSE-NEXT: xorl %ebx, %ebx +; X86-SSE-NEXT: testl %esi, %esi +; X86-SSE-NEXT: setns %al +; X86-SSE-NEXT: movl %esi, %ecx +; X86-SSE-NEXT: shrl $20, %ecx +; X86-SSE-NEXT: andl $2047, %ecx # imm = 0x7FF +; X86-SSE-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-SSE-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: andl $1048575, %esi # imm = 0xFFFFF +; X86-SSE-NEXT: addl $-1, %ebp +; X86-SSE-NEXT: adcl $1048575, %esi # imm = 0xFFFFF +; X86-SSE-NEXT: addl $1, %ebp +; X86-SSE-NEXT: adcl $0, %esi +; X86-SSE-NEXT: cmpl $1023, %ecx # imm = 0x3FF ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-SSE-NEXT: cmovbl %ecx, %edx -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-SSE-NEXT: cmovbl %ecx, %edi -; X86-SSE-NEXT: movl $-2147483648, %ebp # imm = 0x80000000 -; X86-SSE-NEXT: cmovael {{[0-9]+}}(%esp), %ebp -; X86-SSE-NEXT: ucomisd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 -; X86-SSE-NEXT: movl $2147483647, %ebx # imm = 0x7FFFFFFF -; X86-SSE-NEXT: cmovbel %ebp, %ebx -; X86-SSE-NEXT: movl $-1, %ebp -; X86-SSE-NEXT: cmoval %ebp, %edi -; X86-SSE-NEXT: cmoval %ebp, %edx -; X86-SSE-NEXT: cmoval %ebp, %eax +; X86-SSE-NEXT: jb .LBB19_1 +; X86-SSE-NEXT: # %bb.2: # %fp-to-i-entry +; X86-SSE-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill ; X86-SSE-NEXT: ucomisd %xmm0, %xmm0 -; X86-SSE-NEXT: cmovpl %ecx, %eax -; X86-SSE-NEXT: cmovpl %ecx, %edx -; X86-SSE-NEXT: cmovpl %ecx, %edi -; X86-SSE-NEXT: cmovpl %ecx, %ebx -; X86-SSE-NEXT: movl %ebx, 12(%esi) -; X86-SSE-NEXT: movl %edi, 8(%esi) -; X86-SSE-NEXT: movl %edx, 4(%esi) -; X86-SSE-NEXT: movl %eax, (%esi) +; X86-SSE-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-SSE-NEXT: movl $0, %eax +; X86-SSE-NEXT: movl $0, %edi +; X86-SSE-NEXT: jp .LBB19_9 +; X86-SSE-NEXT: # %bb.3: # %fp-to-i-if-check.saturate +; X86-SSE-NEXT: cmpl $1150, %ecx # imm = 0x47E +; X86-SSE-NEXT: jb .LBB19_5 +; X86-SSE-NEXT: # %bb.4: # %fp-to-i-if-saturate +; X86-SSE-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X86-SSE-NEXT: movb %al, %bl +; X86-SSE-NEXT: xorl %eax, %eax +; X86-SSE-NEXT: negl %ebx +; X86-SSE-NEXT: movl $0, %ecx +; X86-SSE-NEXT: sbbl %ecx, %ecx +; X86-SSE-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: sbbl %eax, %eax +; X86-SSE-NEXT: movl $-2147483648, %edi # imm = 0x80000000 +; X86-SSE-NEXT: sbbl $0, %edi +; X86-SSE-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: jmp .LBB19_9 +; X86-SSE-NEXT: .LBB19_1: +; X86-SSE-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-SSE-NEXT: xorl %eax, %eax +; X86-SSE-NEXT: xorl %edi, %edi +; X86-SSE-NEXT: jmp .LBB19_9 +; X86-SSE-NEXT: .LBB19_5: # %fp-to-i-if-check.exp.size +; X86-SSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-SSE-NEXT: sarl $31, %ebx +; X86-SSE-NEXT: movl %ebx, %edx +; X86-SSE-NEXT: orl $1, %edx +; X86-SSE-NEXT: cmpl $1074, %ecx # imm = 0x432 +; X86-SSE-NEXT: ja .LBB19_7 +; X86-SSE-NEXT: # %bb.6: # %fp-to-i-if-exp.small +; X86-SSE-NEXT: movb $51, %al +; X86-SSE-NEXT: subb %cl, %al +; X86-SSE-NEXT: movl %eax, %ecx +; X86-SSE-NEXT: shrdl %cl, %esi, %ebp +; X86-SSE-NEXT: shrl %cl, %esi +; X86-SSE-NEXT: xorl %edi, %edi +; X86-SSE-NEXT: testb $32, %al +; X86-SSE-NEXT: cmovnel %esi, %ebp +; X86-SSE-NEXT: cmovel %esi, %edi +; X86-SSE-NEXT: movl %ebp, %eax +; X86-SSE-NEXT: movl %edx, %ecx +; X86-SSE-NEXT: mull %edx +; X86-SSE-NEXT: movl %edx, %esi +; X86-SSE-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: movl %edi, %eax +; X86-SSE-NEXT: mull %ecx +; X86-SSE-NEXT: movl %eax, %ecx +; X86-SSE-NEXT: addl %esi, %ecx +; X86-SSE-NEXT: adcl $0, %edx +; X86-SSE-NEXT: movl %edx, %esi +; X86-SSE-NEXT: movl %ebp, %eax +; X86-SSE-NEXT: mull %ebx +; X86-SSE-NEXT: addl %ecx, %eax +; X86-SSE-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: adcl %esi, %edx +; X86-SSE-NEXT: movl %edx, %ecx +; X86-SSE-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X86-SSE-NEXT: movl %edi, %eax +; X86-SSE-NEXT: mull %ebx +; X86-SSE-NEXT: movl %edx, %esi +; X86-SSE-NEXT: addl %ecx, %eax +; X86-SSE-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X86-SSE-NEXT: adcl %eax, %esi +; X86-SSE-NEXT: movl %ebx, %ecx +; X86-SSE-NEXT: imull %ebp, %ecx +; X86-SSE-NEXT: imull %ebx, %edi +; X86-SSE-NEXT: movl %ebx, %eax +; X86-SSE-NEXT: mull %ebp +; X86-SSE-NEXT: addl %edx, %edi +; X86-SSE-NEXT: addl %ecx, %edi +; X86-SSE-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-SSE-NEXT: adcl %esi, %edi +; X86-SSE-NEXT: jmp .LBB19_8 +; X86-SSE-NEXT: .LBB19_7: # %fp-to-i-if-exp.large +; X86-SSE-NEXT: movl %ebp, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: xorpd %xmm0, %xmm0 +; X86-SSE-NEXT: movapd %xmm0, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: addb $-51, %cl +; X86-SSE-NEXT: movl %ecx, %eax +; X86-SSE-NEXT: shrb $3, %al +; X86-SSE-NEXT: andb $12, %al +; X86-SSE-NEXT: negb %al +; X86-SSE-NEXT: movsbl %al, %eax +; X86-SSE-NEXT: movl 68(%esp,%eax), %ebp +; X86-SSE-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: movl 72(%esp,%eax), %edx +; X86-SSE-NEXT: movl %edx, %esi +; X86-SSE-NEXT: shldl %cl, %ebp, %esi +; X86-SSE-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: movl 64(%esp,%eax), %edi +; X86-SSE-NEXT: movl 76(%esp,%eax), %eax +; X86-SSE-NEXT: shldl %cl, %edx, %eax +; X86-SSE-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: shldl %cl, %edi, %ebp +; X86-SSE-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-SSE-NEXT: shll %cl, %edi +; X86-SSE-NEXT: movl %edi, %eax +; X86-SSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-SSE-NEXT: mull %ecx +; X86-SSE-NEXT: movl %edx, %esi +; X86-SSE-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: movl %ebp, %eax +; X86-SSE-NEXT: mull %ecx +; X86-SSE-NEXT: movl %eax, %ecx +; X86-SSE-NEXT: addl %esi, %ecx +; X86-SSE-NEXT: adcl $0, %edx +; X86-SSE-NEXT: movl %edx, %esi +; X86-SSE-NEXT: movl %edi, %eax +; X86-SSE-NEXT: mull %ebx +; X86-SSE-NEXT: movl %edx, %ebp +; X86-SSE-NEXT: addl %ecx, %eax +; X86-SSE-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: adcl %esi, %ebp +; X86-SSE-NEXT: movl %ebx, %esi +; X86-SSE-NEXT: setb %bl +; X86-SSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-SSE-NEXT: mull %esi +; X86-SSE-NEXT: movl %eax, %ecx +; X86-SSE-NEXT: addl %ebp, %ecx +; X86-SSE-NEXT: movzbl %bl, %eax +; X86-SSE-NEXT: adcl %eax, %edx +; X86-SSE-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X86-SSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-SSE-NEXT: imull %eax, %ebp +; X86-SSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-SSE-NEXT: mull %ebx +; X86-SSE-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: addl %ebp, %edx +; X86-SSE-NEXT: imull %esi, %ebx +; X86-SSE-NEXT: addl %edx, %ebx +; X86-SSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X86-SSE-NEXT: imull %esi, %ebp ; X86-SSE-NEXT: movl %esi, %eax -; X86-SSE-NEXT: addl $44, %esp +; X86-SSE-NEXT: mull %edi +; X86-SSE-NEXT: addl %ebp, %edx +; X86-SSE-NEXT: imull %esi, %edi +; X86-SSE-NEXT: addl %edx, %edi +; X86-SSE-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-SSE-NEXT: adcl %ebx, %edi +; X86-SSE-NEXT: addl %ecx, %eax +; X86-SSE-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X86-SSE-NEXT: .LBB19_8: # %fp-to-i-cleanup +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-SSE-NEXT: .LBB19_9: # %fp-to-i-cleanup +; X86-SSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-SSE-NEXT: movl %ecx, (%edx) +; X86-SSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-SSE-NEXT: movl %ecx, 4(%edx) +; X86-SSE-NEXT: movl %eax, 8(%edx) +; X86-SSE-NEXT: movl %edi, 12(%edx) +; X86-SSE-NEXT: movl %edx, %eax +; X86-SSE-NEXT: addl $92, %esp ; X86-SSE-NEXT: popl %esi ; X86-SSE-NEXT: popl %edi ; X86-SSE-NEXT: popl %ebx @@ -2835,153 +3732,51 @@ define i64 @test_signed_i64_f16(half %f) nounwind { define i100 @test_signed_i100_f16(half %f) nounwind { ; X86-X87-LABEL: test_signed_i100_f16: ; X86-X87: # %bb.0: -; X86-X87-NEXT: pushl %ebp -; X86-X87-NEXT: pushl %ebx -; X86-X87-NEXT: pushl %edi ; X86-X87-NEXT: pushl %esi -; X86-X87-NEXT: subl $60, %esp +; X86-X87-NEXT: subl $24, %esp +; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X86-X87-NEXT: movl %eax, (%esp) ; X86-X87-NEXT: calll __extendhfsf2 -; X86-X87-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-X87-NEXT: movl %eax, (%esp) -; X86-X87-NEXT: fsts {{[0-9]+}}(%esp) -; X86-X87-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} -; X86-X87-NEXT: fxch %st(1) -; X86-X87-NEXT: fsts {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-X87-NEXT: fucompp -; X86-X87-NEXT: fnstsw %ax -; X86-X87-NEXT: movl %eax, %ebx -; X86-X87-NEXT: calll __fixsfti -; X86-X87-NEXT: subl $4, %esp -; X86-X87-NEXT: xorl %edx, %edx -; X86-X87-NEXT: movb %bh, %ah -; X86-X87-NEXT: sahf -; X86-X87-NEXT: movl $-8, %ebx -; X86-X87-NEXT: jb .LBB28_2 -; X86-X87-NEXT: # %bb.1: -; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-X87-NEXT: .LBB28_2: -; X86-X87-NEXT: movl $0, %ecx -; X86-X87-NEXT: movl $0, %ebp -; X86-X87-NEXT: jb .LBB28_4 -; X86-X87-NEXT: # %bb.3: -; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-X87-NEXT: .LBB28_4: -; X86-X87-NEXT: movl $0, %edi -; X86-X87-NEXT: jb .LBB28_6 -; X86-X87-NEXT: # %bb.5: -; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-X87-NEXT: .LBB28_6: -; X86-X87-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} -; X86-X87-NEXT: flds {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X86-X87-NEXT: fucom %st(1) -; X86-X87-NEXT: fstp %st(1) -; X86-X87-NEXT: fnstsw %ax -; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax -; X86-X87-NEXT: sahf -; X86-X87-NEXT: movl $-1, %eax -; X86-X87-NEXT: movl $-1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-X87-NEXT: movl $-1, %esi -; X86-X87-NEXT: ja .LBB28_8 -; X86-X87-NEXT: # %bb.7: -; X86-X87-NEXT: movl %edi, %eax -; X86-X87-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-X87-NEXT: movl %ecx, %esi -; X86-X87-NEXT: .LBB28_8: -; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-X87-NEXT: movl $7, %edi -; X86-X87-NEXT: ja .LBB28_10 -; X86-X87-NEXT: # %bb.9: -; X86-X87-NEXT: movl %ebx, %edi -; X86-X87-NEXT: .LBB28_10: -; X86-X87-NEXT: fucomp %st(0) -; X86-X87-NEXT: fnstsw %ax -; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax -; X86-X87-NEXT: sahf -; X86-X87-NEXT: movl $0, %eax -; X86-X87-NEXT: movl $0, %ebp -; X86-X87-NEXT: movl $0, %ebx -; X86-X87-NEXT: jp .LBB28_12 -; X86-X87-NEXT: # %bb.11: -; X86-X87-NEXT: movl %edi, %edx +; X86-X87-NEXT: fnstcw {{[0-9]+}}(%esp) +; X86-X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-X87-NEXT: orl $3072, %eax # imm = 0xC00 +; X86-X87-NEXT: movw %ax, {{[0-9]+}}(%esp) +; X86-X87-NEXT: fldcw {{[0-9]+}}(%esp) +; X86-X87-NEXT: fistpl {{[0-9]+}}(%esp) +; X86-X87-NEXT: fldcw {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-X87-NEXT: movl %eax, (%esi) +; X86-X87-NEXT: sarl $31, %eax +; X86-X87-NEXT: movl %eax, 8(%esi) +; X86-X87-NEXT: movl %eax, 4(%esi) +; X86-X87-NEXT: andl $15, %eax +; X86-X87-NEXT: movb %al, 12(%esi) ; X86-X87-NEXT: movl %esi, %eax -; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-X87-NEXT: .LBB28_12: -; X86-X87-NEXT: movl %ebx, 8(%ecx) -; X86-X87-NEXT: movl %ebp, 4(%ecx) -; X86-X87-NEXT: movl %eax, (%ecx) -; X86-X87-NEXT: andl $15, %edx -; X86-X87-NEXT: movb %dl, 12(%ecx) -; X86-X87-NEXT: movl %ecx, %eax -; X86-X87-NEXT: addl $60, %esp +; X86-X87-NEXT: addl $24, %esp ; X86-X87-NEXT: popl %esi -; X86-X87-NEXT: popl %edi -; X86-X87-NEXT: popl %ebx -; X86-X87-NEXT: popl %ebp ; X86-X87-NEXT: retl $4 ; ; X86-SSE-LABEL: test_signed_i100_f16: ; X86-SSE: # %bb.0: -; X86-SSE-NEXT: pushl %ebp -; X86-SSE-NEXT: pushl %ebx -; X86-SSE-NEXT: pushl %edi ; X86-SSE-NEXT: pushl %esi -; X86-SSE-NEXT: subl $44, %esp +; X86-SSE-NEXT: subl $8, %esp ; X86-SSE-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-SSE-NEXT: pextrw $0, %xmm0, %eax ; X86-SSE-NEXT: movw %ax, (%esp) ; X86-SSE-NEXT: calll __extendhfsf2 -; X86-SSE-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movl %eax, (%esp) ; X86-SSE-NEXT: fstps {{[0-9]+}}(%esp) -; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X86-SSE-NEXT: movss %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-SSE-NEXT: movss %xmm0, {{[0-9]+}}(%esp) -; X86-SSE-NEXT: calll __fixsfti -; X86-SSE-NEXT: subl $4, %esp -; X86-SSE-NEXT: movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload -; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero -; X86-SSE-NEXT: xorl %ebp, %ebp -; X86-SSE-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 -; X86-SSE-NEXT: movl $-8, %ebx -; X86-SSE-NEXT: movl $0, %ecx -; X86-SSE-NEXT: movl $0, %edx -; X86-SSE-NEXT: movl $0, %edi -; X86-SSE-NEXT: jb .LBB28_2 -; X86-SSE-NEXT: # %bb.1: -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-SSE-NEXT: .LBB28_2: -; X86-SSE-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 -; X86-SSE-NEXT: movl $-1, %eax -; X86-SSE-NEXT: cmoval %eax, %edi -; X86-SSE-NEXT: cmoval %eax, %edx -; X86-SSE-NEXT: cmoval %eax, %ecx -; X86-SSE-NEXT: movl $7, %eax -; X86-SSE-NEXT: cmovbel %ebx, %eax -; X86-SSE-NEXT: ucomiss %xmm0, %xmm0 -; X86-SSE-NEXT: cmovpl %ebp, %eax -; X86-SSE-NEXT: cmovpl %ebp, %ecx -; X86-SSE-NEXT: cmovpl %ebp, %edx -; X86-SSE-NEXT: cmovpl %ebp, %edi -; X86-SSE-NEXT: movl %edi, 8(%esi) -; X86-SSE-NEXT: movl %edx, 4(%esi) -; X86-SSE-NEXT: movl %ecx, (%esi) +; X86-SSE-NEXT: cvttss2si {{[0-9]+}}(%esp), %eax +; X86-SSE-NEXT: movl %eax, (%esi) +; X86-SSE-NEXT: sarl $31, %eax +; X86-SSE-NEXT: movl %eax, 8(%esi) +; X86-SSE-NEXT: movl %eax, 4(%esi) ; X86-SSE-NEXT: andl $15, %eax ; X86-SSE-NEXT: movb %al, 12(%esi) ; X86-SSE-NEXT: movl %esi, %eax -; X86-SSE-NEXT: addl $44, %esp +; X86-SSE-NEXT: addl $8, %esp ; X86-SSE-NEXT: popl %esi -; X86-SSE-NEXT: popl %edi -; X86-SSE-NEXT: popl %ebx -; X86-SSE-NEXT: popl %ebp ; X86-SSE-NEXT: retl $4 ; ; X64-LABEL: test_signed_i100_f16: @@ -3014,155 +3809,49 @@ define i100 @test_signed_i100_f16(half %f) nounwind { define i128 @test_signed_i128_f16(half %f) nounwind { ; X86-X87-LABEL: test_signed_i128_f16: ; X86-X87: # %bb.0: -; X86-X87-NEXT: pushl %ebp -; X86-X87-NEXT: pushl %ebx -; X86-X87-NEXT: pushl %edi ; X86-X87-NEXT: pushl %esi -; X86-X87-NEXT: subl $60, %esp +; X86-X87-NEXT: subl $24, %esp +; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X86-X87-NEXT: movl %eax, (%esp) ; X86-X87-NEXT: calll __extendhfsf2 -; X86-X87-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-X87-NEXT: movl %eax, (%esp) -; X86-X87-NEXT: fsts {{[0-9]+}}(%esp) -; X86-X87-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} -; X86-X87-NEXT: fxch %st(1) -; X86-X87-NEXT: fsts {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-X87-NEXT: fucompp -; X86-X87-NEXT: fnstsw %ax -; X86-X87-NEXT: movl %eax, %ebx -; X86-X87-NEXT: calll __fixsfti -; X86-X87-NEXT: subl $4, %esp -; X86-X87-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-X87-NEXT: movb %bh, %ah -; X86-X87-NEXT: sahf -; X86-X87-NEXT: movl $0, %eax -; X86-X87-NEXT: movl $0, %ebx -; X86-X87-NEXT: jae .LBB29_1 -; X86-X87-NEXT: # %bb.2: -; X86-X87-NEXT: movl $0, %edx -; X86-X87-NEXT: jae .LBB29_3 -; X86-X87-NEXT: .LBB29_4: -; X86-X87-NEXT: movl $-2147483648, %ecx # imm = 0x80000000 -; X86-X87-NEXT: jb .LBB29_6 -; X86-X87-NEXT: .LBB29_5: -; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-X87-NEXT: .LBB29_6: -; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-X87-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} -; X86-X87-NEXT: flds {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X86-X87-NEXT: fucom %st(1) -; X86-X87-NEXT: fstp %st(1) -; X86-X87-NEXT: fnstsw %ax -; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax -; X86-X87-NEXT: sahf -; X86-X87-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; X86-X87-NEXT: ja .LBB29_8 -; X86-X87-NEXT: # %bb.7: -; X86-X87-NEXT: movl %ecx, %eax -; X86-X87-NEXT: .LBB29_8: -; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-X87-NEXT: movl $-1, %ebp -; X86-X87-NEXT: movl $-1, %edi -; X86-X87-NEXT: movl $-1, %esi -; X86-X87-NEXT: ja .LBB29_10 -; X86-X87-NEXT: # %bb.9: -; X86-X87-NEXT: movl %edx, %ebp -; X86-X87-NEXT: movl %ebx, %edi -; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-X87-NEXT: .LBB29_10: -; X86-X87-NEXT: fucomp %st(0) -; X86-X87-NEXT: fnstsw %ax -; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax -; X86-X87-NEXT: sahf -; X86-X87-NEXT: movl $0, %eax -; X86-X87-NEXT: movl $0, %edx -; X86-X87-NEXT: movl $0, %ebx -; X86-X87-NEXT: jp .LBB29_12 -; X86-X87-NEXT: # %bb.11: -; X86-X87-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-X87-NEXT: movl %edi, %eax -; X86-X87-NEXT: movl %ebp, %edx -; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-X87-NEXT: .LBB29_12: -; X86-X87-NEXT: movl %ebx, 12(%ecx) -; X86-X87-NEXT: movl %edx, 8(%ecx) -; X86-X87-NEXT: movl %eax, 4(%ecx) -; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-X87-NEXT: movl %eax, (%ecx) -; X86-X87-NEXT: movl %ecx, %eax -; X86-X87-NEXT: addl $60, %esp +; X86-X87-NEXT: fnstcw {{[0-9]+}}(%esp) +; X86-X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-X87-NEXT: orl $3072, %eax # imm = 0xC00 +; X86-X87-NEXT: movw %ax, {{[0-9]+}}(%esp) +; X86-X87-NEXT: fldcw {{[0-9]+}}(%esp) +; X86-X87-NEXT: fistpl {{[0-9]+}}(%esp) +; X86-X87-NEXT: fldcw {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-X87-NEXT: movl %eax, (%esi) +; X86-X87-NEXT: sarl $31, %eax +; X86-X87-NEXT: movl %eax, 12(%esi) +; X86-X87-NEXT: movl %eax, 8(%esi) +; X86-X87-NEXT: movl %eax, 4(%esi) +; X86-X87-NEXT: movl %esi, %eax +; X86-X87-NEXT: addl $24, %esp ; X86-X87-NEXT: popl %esi -; X86-X87-NEXT: popl %edi -; X86-X87-NEXT: popl %ebx -; X86-X87-NEXT: popl %ebp ; X86-X87-NEXT: retl $4 -; X86-X87-NEXT: .LBB29_1: -; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-X87-NEXT: movl $0, %edx -; X86-X87-NEXT: jb .LBB29_4 -; X86-X87-NEXT: .LBB29_3: -; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-X87-NEXT: movl $-2147483648, %ecx # imm = 0x80000000 -; X86-X87-NEXT: jae .LBB29_5 -; X86-X87-NEXT: jmp .LBB29_6 ; ; X86-SSE-LABEL: test_signed_i128_f16: ; X86-SSE: # %bb.0: -; X86-SSE-NEXT: pushl %ebp -; X86-SSE-NEXT: pushl %ebx -; X86-SSE-NEXT: pushl %edi ; X86-SSE-NEXT: pushl %esi -; X86-SSE-NEXT: subl $44, %esp +; X86-SSE-NEXT: subl $8, %esp ; X86-SSE-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-SSE-NEXT: pextrw $0, %xmm0, %eax ; X86-SSE-NEXT: movw %ax, (%esp) ; X86-SSE-NEXT: calll __extendhfsf2 -; X86-SSE-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movl %eax, (%esp) ; X86-SSE-NEXT: fstps {{[0-9]+}}(%esp) -; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X86-SSE-NEXT: movss %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-SSE-NEXT: movss %xmm0, {{[0-9]+}}(%esp) -; X86-SSE-NEXT: calll __fixsfti -; X86-SSE-NEXT: subl $4, %esp -; X86-SSE-NEXT: xorl %ecx, %ecx -; X86-SSE-NEXT: movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload -; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero -; X86-SSE-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: cmovbl %ecx, %eax -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-SSE-NEXT: cmovbl %ecx, %edx -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-SSE-NEXT: cmovbl %ecx, %edi -; X86-SSE-NEXT: movl $-2147483648, %ebp # imm = 0x80000000 -; X86-SSE-NEXT: cmovael {{[0-9]+}}(%esp), %ebp -; X86-SSE-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 -; X86-SSE-NEXT: movl $2147483647, %ebx # imm = 0x7FFFFFFF -; X86-SSE-NEXT: cmovbel %ebp, %ebx -; X86-SSE-NEXT: movl $-1, %ebp -; X86-SSE-NEXT: cmoval %ebp, %edi -; X86-SSE-NEXT: cmoval %ebp, %edx -; X86-SSE-NEXT: cmoval %ebp, %eax -; X86-SSE-NEXT: ucomiss %xmm0, %xmm0 -; X86-SSE-NEXT: cmovpl %ecx, %eax -; X86-SSE-NEXT: cmovpl %ecx, %edx -; X86-SSE-NEXT: cmovpl %ecx, %edi -; X86-SSE-NEXT: cmovpl %ecx, %ebx -; X86-SSE-NEXT: movl %ebx, 12(%esi) -; X86-SSE-NEXT: movl %edi, 8(%esi) -; X86-SSE-NEXT: movl %edx, 4(%esi) +; X86-SSE-NEXT: cvttss2si {{[0-9]+}}(%esp), %eax ; X86-SSE-NEXT: movl %eax, (%esi) +; X86-SSE-NEXT: sarl $31, %eax +; X86-SSE-NEXT: movl %eax, 12(%esi) +; X86-SSE-NEXT: movl %eax, 8(%esi) +; X86-SSE-NEXT: movl %eax, 4(%esi) ; X86-SSE-NEXT: movl %esi, %eax -; X86-SSE-NEXT: addl $44, %esp -; X86-SSE-NEXT: popl %esi -; X86-SSE-NEXT: popl %edi -; X86-SSE-NEXT: popl %ebx -; X86-SSE-NEXT: popl %ebp +; X86-SSE-NEXT: addl $8, %esp +; X86-SSE-NEXT: popl %esi ; X86-SSE-NEXT: retl $4 ; ; X64-LABEL: test_signed_i128_f16: @@ -4165,90 +4854,257 @@ define i64 @test_signed_i64_f80(x86_fp80 %f) nounwind { define i100 @test_signed_i100_f80(x86_fp80 %f) nounwind { ; X86-X87-LABEL: test_signed_i100_f80: -; X86-X87: # %bb.0: +; X86-X87: # %bb.0: # %fp-to-i-entry ; X86-X87-NEXT: pushl %ebp ; X86-X87-NEXT: pushl %ebx ; X86-X87-NEXT: pushl %edi ; X86-X87-NEXT: pushl %esi -; X86-X87-NEXT: subl $60, %esp +; X86-X87-NEXT: subl $124, %esp ; X86-X87-NEXT: fldt {{[0-9]+}}(%esp) -; X86-X87-NEXT: fld %st(0) +; X86-X87-NEXT: subl $16, %esp ; X86-X87-NEXT: fstpt {{[0-9]+}}(%esp) ; X86-X87-NEXT: leal {{[0-9]+}}(%esp), %eax ; X86-X87-NEXT: movl %eax, (%esp) -; X86-X87-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} -; X86-X87-NEXT: fld %st(1) -; X86-X87-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill -; X86-X87-NEXT: fxch %st(1) -; X86-X87-NEXT: fucompp -; X86-X87-NEXT: fnstsw %ax -; X86-X87-NEXT: movl %eax, %ebx -; X86-X87-NEXT: calll __fixxfti -; X86-X87-NEXT: subl $4, %esp -; X86-X87-NEXT: xorl %edx, %edx -; X86-X87-NEXT: movb %bh, %ah -; X86-X87-NEXT: sahf -; X86-X87-NEXT: movl $-8, %ebx -; X86-X87-NEXT: jb .LBB38_2 -; X86-X87-NEXT: # %bb.1: +; X86-X87-NEXT: calll __extendxftf2 +; X86-X87-NEXT: addl $12, %esp ; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-X87-NEXT: .LBB38_2: -; X86-X87-NEXT: movl $0, %ecx -; X86-X87-NEXT: movl $0, %ebp -; X86-X87-NEXT: jb .LBB38_4 -; X86-X87-NEXT: # %bb.3: -; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-X87-NEXT: testl %ebx, %ebx +; X86-X87-NEXT: setns %cl +; X86-X87-NEXT: movl %ebx, %eax +; X86-X87-NEXT: shrl $16, %eax +; X86-X87-NEXT: andl $32767, %eax # imm = 0x7FFF +; X86-X87-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: cmpl $16383, %eax # imm = 0x3FFF +; X86-X87-NEXT: jb .LBB38_1 +; X86-X87-NEXT: # %bb.2: # %fp-to-i-entry +; X86-X87-NEXT: movb %cl, (%esp) # 1-byte Spill +; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-X87-NEXT: .LBB38_4: -; X86-X87-NEXT: movl $0, %edi -; X86-X87-NEXT: jb .LBB38_6 -; X86-X87-NEXT: # %bb.5: ; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-X87-NEXT: .LBB38_6: -; X86-X87-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}} -; X86-X87-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload -; X86-X87-NEXT: fucom %st(1) -; X86-X87-NEXT: fstp %st(1) -; X86-X87-NEXT: fnstsw %ax -; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax -; X86-X87-NEXT: sahf +; X86-X87-NEXT: pushl %ebx +; X86-X87-NEXT: pushl %esi +; X86-X87-NEXT: pushl %edi +; X86-X87-NEXT: pushl %ebp +; X86-X87-NEXT: pushl %ebx +; X86-X87-NEXT: pushl %esi +; X86-X87-NEXT: pushl %edi +; X86-X87-NEXT: pushl %ebp +; X86-X87-NEXT: calll __unordtf2 +; X86-X87-NEXT: addl $32, %esp +; X86-X87-NEXT: testl %eax, %eax +; X86-X87-NEXT: movl $0, %ecx +; X86-X87-NEXT: movl $0, %eax +; X86-X87-NEXT: movl $0, %edx +; X86-X87-NEXT: jne .LBB38_12 +; X86-X87-NEXT: # %bb.3: # %fp-to-i-if-check.saturate +; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-X87-NEXT: cmpl $16482, %eax # imm = 0x4062 +; X86-X87-NEXT: sbbl %ecx, %ecx +; X86-X87-NEXT: movl $0, %ecx +; X86-X87-NEXT: sbbl %ecx, %ecx +; X86-X87-NEXT: movl $0, %ecx +; X86-X87-NEXT: sbbl %ecx, %ecx +; X86-X87-NEXT: jb .LBB38_9 +; X86-X87-NEXT: # %bb.4: # %fp-to-i-if-saturate +; X86-X87-NEXT: cmpb $0, (%esp) # 1-byte Folded Reload +; X86-X87-NEXT: movl $7, %edx +; X86-X87-NEXT: jne .LBB38_6 +; X86-X87-NEXT: # %bb.5: # %fp-to-i-if-saturate +; X86-X87-NEXT: movl $-8, %edx +; X86-X87-NEXT: .LBB38_6: # %fp-to-i-if-saturate ; X86-X87-NEXT: movl $-1, %eax -; X86-X87-NEXT: movl $-1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-X87-NEXT: movl $-1, %esi -; X86-X87-NEXT: ja .LBB38_8 -; X86-X87-NEXT: # %bb.7: +; X86-X87-NEXT: jne .LBB38_8 +; X86-X87-NEXT: # %bb.7: # %fp-to-i-if-saturate +; X86-X87-NEXT: xorl %eax, %eax +; X86-X87-NEXT: .LBB38_8: # %fp-to-i-if-saturate +; X86-X87-NEXT: movl %eax, %ecx +; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: jmp .LBB38_12 +; X86-X87-NEXT: .LBB38_1: +; X86-X87-NEXT: xorl %ecx, %ecx +; X86-X87-NEXT: xorl %eax, %eax +; X86-X87-NEXT: xorl %edx, %edx +; X86-X87-NEXT: jmp .LBB38_12 +; X86-X87-NEXT: .LBB38_9: # %fp-to-i-if-check.exp.size +; X86-X87-NEXT: movl %ebx, %ecx +; X86-X87-NEXT: sarl $31, %ecx +; X86-X87-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: orl $1, %ecx +; X86-X87-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X86-X87-NEXT: movzwl %bx, %ecx +; X86-X87-NEXT: orl $65536, %ecx # imm = 0x10000 +; X86-X87-NEXT: xorl %ebx, %ebx +; X86-X87-NEXT: movl $16494, %edx # imm = 0x406E +; X86-X87-NEXT: cmpl %eax, %edx +; X86-X87-NEXT: movl $0, %edx +; X86-X87-NEXT: sbbl %edx, %edx +; X86-X87-NEXT: movl $0, %edx +; X86-X87-NEXT: sbbl %edx, %edx +; X86-X87-NEXT: sbbl %ebx, %ebx +; X86-X87-NEXT: movl %eax, %ebx +; X86-X87-NEXT: jb .LBB38_11 +; X86-X87-NEXT: # %bb.10: # %fp-to-i-if-exp.small +; X86-X87-NEXT: movl %ebp, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movb $111, %cl +; X86-X87-NEXT: subb %bl, %cl +; X86-X87-NEXT: movl %ecx, %eax +; X86-X87-NEXT: shrb $3, %al +; X86-X87-NEXT: andb $12, %al +; X86-X87-NEXT: movzbl %al, %eax +; X86-X87-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl 44(%esp,%eax), %ebp +; X86-X87-NEXT: movl 40(%esp,%eax), %edx +; X86-X87-NEXT: movl %edx, %esi +; X86-X87-NEXT: shrdl %cl, %ebp, %esi +; X86-X87-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: movl 32(%esp,%eax), %edi +; X86-X87-NEXT: movl 36(%esp,%eax), %esi +; X86-X87-NEXT: movl %esi, %eax +; X86-X87-NEXT: shrdl %cl, %edx, %eax +; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: shrl %cl, %ebp +; X86-X87-NEXT: shrdl %cl, %esi, %edi +; X86-X87-NEXT: movl (%esp), %esi # 4-byte Reload +; X86-X87-NEXT: mull %esi +; X86-X87-NEXT: movl %edx, %ebx +; X86-X87-NEXT: movl %eax, %ecx ; X86-X87-NEXT: movl %edi, %eax -; X86-X87-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-X87-NEXT: movl %ecx, %esi -; X86-X87-NEXT: .LBB38_8: +; X86-X87-NEXT: mull %esi ; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-X87-NEXT: movl $7, %edi -; X86-X87-NEXT: ja .LBB38_10 -; X86-X87-NEXT: # %bb.9: -; X86-X87-NEXT: movl %ebx, %edi -; X86-X87-NEXT: .LBB38_10: -; X86-X87-NEXT: fucomp %st(0) -; X86-X87-NEXT: fnstsw %ax -; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax -; X86-X87-NEXT: sahf -; X86-X87-NEXT: movl $0, %eax -; X86-X87-NEXT: movl $0, %ebp -; X86-X87-NEXT: movl $0, %ebx -; X86-X87-NEXT: jp .LBB38_12 -; X86-X87-NEXT: # %bb.11: +; X86-X87-NEXT: movl %edx, %esi +; X86-X87-NEXT: addl %ecx, %esi +; X86-X87-NEXT: adcl $0, %ebx +; X86-X87-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: movl %edi, %eax +; X86-X87-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X86-X87-NEXT: movl %edx, %ebx +; X86-X87-NEXT: movl %eax, %ecx +; X86-X87-NEXT: addl %esi, %ecx +; X86-X87-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X86-X87-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-X87-NEXT: mull %esi +; X86-X87-NEXT: addl %ebx, %eax +; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X86-X87-NEXT: adcl %eax, %edx +; X86-X87-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: movl %esi, %ebx +; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-X87-NEXT: imull %edx, %ebx +; X86-X87-NEXT: movl (%esp), %eax # 4-byte Reload +; X86-X87-NEXT: imull %eax, %ebp +; X86-X87-NEXT: mull %edx +; X86-X87-NEXT: movl %eax, (%esp) # 4-byte Spill +; X86-X87-NEXT: addl %edx, %ebp +; X86-X87-NEXT: addl %ebx, %ebp +; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-X87-NEXT: imull %esi, %ebx +; X86-X87-NEXT: movl %esi, %eax +; X86-X87-NEXT: mull %edi +; X86-X87-NEXT: addl %ebx, %edx +; X86-X87-NEXT: imull %esi, %edi +; X86-X87-NEXT: addl %edx, %edi ; X86-X87-NEXT: movl %edi, %edx +; X86-X87-NEXT: addl (%esp), %eax # 4-byte Folded Reload +; X86-X87-NEXT: adcl %ebp, %edx +; X86-X87-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-X87-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-X87-NEXT: jmp .LBB38_12 +; X86-X87-NEXT: .LBB38_11: # %fp-to-i-if-exp.large +; X86-X87-NEXT: movl %ebp, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-X87-NEXT: addb $-111, %bl +; X86-X87-NEXT: movl %ebx, %eax +; X86-X87-NEXT: shrb $3, %al +; X86-X87-NEXT: andb $12, %al +; X86-X87-NEXT: negb %al +; X86-X87-NEXT: movsbl %al, %eax +; X86-X87-NEXT: movl 84(%esp,%eax), %esi +; X86-X87-NEXT: movl 88(%esp,%eax), %edx +; X86-X87-NEXT: movl %edx, %edi +; X86-X87-NEXT: movl %ebx, %ecx +; X86-X87-NEXT: shldl %cl, %esi, %edi +; X86-X87-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: movl 80(%esp,%eax), %ebp +; X86-X87-NEXT: movl 92(%esp,%eax), %eax +; X86-X87-NEXT: shldl %cl, %edx, %eax +; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: shldl %cl, %ebp, %esi +; X86-X87-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: shll %cl, %ebp +; X86-X87-NEXT: movl %ebp, %eax +; X86-X87-NEXT: movl (%esp), %edi # 4-byte Reload +; X86-X87-NEXT: mull %edi +; X86-X87-NEXT: movl %edx, %ecx +; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-X87-NEXT: movl %esi, %eax -; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X86-X87-NEXT: mull %edi +; X86-X87-NEXT: movl %eax, %esi +; X86-X87-NEXT: addl %ecx, %esi +; X86-X87-NEXT: adcl $0, %edx +; X86-X87-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: movl %ebp, %eax ; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-X87-NEXT: .LBB38_12: -; X86-X87-NEXT: movl %ebx, 8(%ecx) -; X86-X87-NEXT: movl %ebp, 4(%ecx) -; X86-X87-NEXT: movl %eax, (%ecx) -; X86-X87-NEXT: andl $15, %edx -; X86-X87-NEXT: movb %dl, 12(%ecx) -; X86-X87-NEXT: movl %ecx, %eax -; X86-X87-NEXT: addl $60, %esp +; X86-X87-NEXT: mull %ebx +; X86-X87-NEXT: movl %edx, %edi +; X86-X87-NEXT: movl %eax, %ecx +; X86-X87-NEXT: addl %esi, %ecx +; X86-X87-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X86-X87-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-X87-NEXT: mull %ebx +; X86-X87-NEXT: addl %edi, %eax +; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X86-X87-NEXT: adcl %eax, %edx +; X86-X87-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-X87-NEXT: movl (%esp), %eax # 4-byte Reload +; X86-X87-NEXT: imull %eax, %esi +; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-X87-NEXT: mull %edi +; X86-X87-NEXT: movl %eax, (%esp) # 4-byte Spill +; X86-X87-NEXT: addl %esi, %edx +; X86-X87-NEXT: imull %ebx, %edi +; X86-X87-NEXT: addl %edx, %edi +; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-X87-NEXT: imull %ebx, %esi +; X86-X87-NEXT: movl %ebx, %eax +; X86-X87-NEXT: mull %ebp +; X86-X87-NEXT: addl %esi, %edx +; X86-X87-NEXT: imull %ebx, %ebp +; X86-X87-NEXT: addl %edx, %ebp +; X86-X87-NEXT: movl %ebp, %edx +; X86-X87-NEXT: addl (%esp), %eax # 4-byte Folded Reload +; X86-X87-NEXT: adcl %edi, %edx +; X86-X87-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-X87-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-X87-NEXT: .LBB38_12: # %fp-to-i-cleanup +; X86-X87-NEXT: movl %edx, %ebx +; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-X87-NEXT: movl %esi, (%edx) +; X86-X87-NEXT: movl %ecx, 4(%edx) +; X86-X87-NEXT: movl %eax, 8(%edx) +; X86-X87-NEXT: andl $15, %ebx +; X86-X87-NEXT: movb %bl, 12(%edx) +; X86-X87-NEXT: movl %edx, %eax +; X86-X87-NEXT: addl $124, %esp ; X86-X87-NEXT: popl %esi ; X86-X87-NEXT: popl %edi ; X86-X87-NEXT: popl %ebx @@ -4256,60 +5112,257 @@ define i100 @test_signed_i100_f80(x86_fp80 %f) nounwind { ; X86-X87-NEXT: retl $4 ; ; X86-SSE-LABEL: test_signed_i100_f80: -; X86-SSE: # %bb.0: +; X86-SSE: # %bb.0: # %fp-to-i-entry ; X86-SSE-NEXT: pushl %ebp ; X86-SSE-NEXT: pushl %ebx ; X86-SSE-NEXT: pushl %edi ; X86-SSE-NEXT: pushl %esi -; X86-SSE-NEXT: subl $44, %esp -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-SSE-NEXT: subl $124, %esp ; X86-SSE-NEXT: fldt {{[0-9]+}}(%esp) -; X86-SSE-NEXT: fld %st(0) -; X86-SSE-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill +; X86-SSE-NEXT: subl $16, %esp ; X86-SSE-NEXT: fstpt {{[0-9]+}}(%esp) ; X86-SSE-NEXT: leal {{[0-9]+}}(%esp), %eax ; X86-SSE-NEXT: movl %eax, (%esp) -; X86-SSE-NEXT: calll __fixxfti -; X86-SSE-NEXT: subl $4, %esp -; X86-SSE-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload -; X86-SSE-NEXT: xorl %ebp, %ebp -; X86-SSE-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} -; X86-SSE-NEXT: fxch %st(1) -; X86-SSE-NEXT: fucomi %st(1), %st -; X86-SSE-NEXT: fstp %st(1) -; X86-SSE-NEXT: movl $-8, %ebx -; X86-SSE-NEXT: movl $0, %ecx -; X86-SSE-NEXT: movl $0, %edx -; X86-SSE-NEXT: movl $0, %edi -; X86-SSE-NEXT: jb .LBB38_2 -; X86-SSE-NEXT: # %bb.1: +; X86-SSE-NEXT: calll __extendxftf2 +; X86-SSE-NEXT: addl $12, %esp ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-SSE-NEXT: testl %ebx, %ebx +; X86-SSE-NEXT: setns %cl +; X86-SSE-NEXT: movl %ebx, %eax +; X86-SSE-NEXT: shrl $16, %eax +; X86-SSE-NEXT: andl $32767, %eax # imm = 0x7FFF +; X86-SSE-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-SSE-NEXT: cmpl $16383, %eax # imm = 0x3FFF +; X86-SSE-NEXT: jb .LBB38_1 +; X86-SSE-NEXT: # %bb.2: # %fp-to-i-entry +; X86-SSE-NEXT: movb %cl, (%esp) # 1-byte Spill +; X86-SSE-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ebp ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-SSE-NEXT: .LBB38_2: -; X86-SSE-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}} -; X86-SSE-NEXT: fxch %st(1) -; X86-SSE-NEXT: fucomi %st(1), %st -; X86-SSE-NEXT: fstp %st(1) -; X86-SSE-NEXT: movl $-1, %eax -; X86-SSE-NEXT: cmoval %eax, %edi -; X86-SSE-NEXT: cmoval %eax, %edx -; X86-SSE-NEXT: cmoval %eax, %ecx -; X86-SSE-NEXT: movl $7, %eax -; X86-SSE-NEXT: cmovbel %ebx, %eax -; X86-SSE-NEXT: fucompi %st(0), %st -; X86-SSE-NEXT: cmovpl %ebp, %eax -; X86-SSE-NEXT: cmovpl %ebp, %ecx -; X86-SSE-NEXT: cmovpl %ebp, %edx -; X86-SSE-NEXT: cmovpl %ebp, %edi -; X86-SSE-NEXT: movl %edi, 8(%esi) -; X86-SSE-NEXT: movl %edx, 4(%esi) -; X86-SSE-NEXT: movl %ecx, (%esi) -; X86-SSE-NEXT: andl $15, %eax -; X86-SSE-NEXT: movb %al, 12(%esi) +; X86-SSE-NEXT: pushl %ebx +; X86-SSE-NEXT: pushl %esi +; X86-SSE-NEXT: pushl %edi +; X86-SSE-NEXT: pushl %ebp +; X86-SSE-NEXT: pushl %ebx +; X86-SSE-NEXT: pushl %esi +; X86-SSE-NEXT: pushl %edi +; X86-SSE-NEXT: pushl %ebp +; X86-SSE-NEXT: calll __unordtf2 +; X86-SSE-NEXT: addl $32, %esp +; X86-SSE-NEXT: testl %eax, %eax +; X86-SSE-NEXT: movl $0, %ecx +; X86-SSE-NEXT: movl $0, %eax +; X86-SSE-NEXT: movl $0, %edx +; X86-SSE-NEXT: jne .LBB38_8 +; X86-SSE-NEXT: # %bb.3: # %fp-to-i-if-check.saturate +; X86-SSE-NEXT: xorl %eax, %eax +; X86-SSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-SSE-NEXT: cmpl $16482, %edx # imm = 0x4062 +; X86-SSE-NEXT: sbbl %ecx, %ecx +; X86-SSE-NEXT: movl $0, %ecx +; X86-SSE-NEXT: sbbl %ecx, %ecx +; X86-SSE-NEXT: movl $0, %ecx +; X86-SSE-NEXT: sbbl %ecx, %ecx +; X86-SSE-NEXT: jb .LBB38_5 +; X86-SSE-NEXT: # %bb.4: # %fp-to-i-if-saturate +; X86-SSE-NEXT: xorl %eax, %eax +; X86-SSE-NEXT: movzbl (%esp), %ecx # 1-byte Folded Reload +; X86-SSE-NEXT: movb %cl, %al +; X86-SSE-NEXT: negl %eax +; X86-SSE-NEXT: movd %eax, %xmm0 +; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] +; X86-SSE-NEXT: movdqa %xmm0, %xmm1 +; X86-SSE-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 +; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE-NEXT: por %xmm1, %xmm0 +; X86-SSE-NEXT: movd %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] +; X86-SSE-NEXT: movd %xmm1, %edx +; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; X86-SSE-NEXT: movd %xmm1, %eax +; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] +; X86-SSE-NEXT: movd %xmm0, %ecx +; X86-SSE-NEXT: jmp .LBB38_8 +; X86-SSE-NEXT: .LBB38_1: +; X86-SSE-NEXT: xorl %ecx, %ecx +; X86-SSE-NEXT: xorl %eax, %eax +; X86-SSE-NEXT: xorl %edx, %edx +; X86-SSE-NEXT: jmp .LBB38_8 +; X86-SSE-NEXT: .LBB38_5: # %fp-to-i-if-check.exp.size +; X86-SSE-NEXT: movl %ebx, %ecx +; X86-SSE-NEXT: sarl $31, %ecx +; X86-SSE-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: orl $1, %ecx +; X86-SSE-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X86-SSE-NEXT: movzwl %bx, %ecx +; X86-SSE-NEXT: orl $65536, %ecx # imm = 0x10000 +; X86-SSE-NEXT: movl %edx, %ebx +; X86-SSE-NEXT: movl $16494, %edx # imm = 0x406E +; X86-SSE-NEXT: cmpl %ebx, %edx +; X86-SSE-NEXT: movl $0, %edx +; X86-SSE-NEXT: sbbl %edx, %edx +; X86-SSE-NEXT: movl $0, %edx +; X86-SSE-NEXT: sbbl %edx, %edx +; X86-SSE-NEXT: sbbl %eax, %eax +; X86-SSE-NEXT: jb .LBB38_7 +; X86-SSE-NEXT: # %bb.6: # %fp-to-i-if-exp.small +; X86-SSE-NEXT: pxor %xmm0, %xmm0 +; X86-SSE-NEXT: movdqa %xmm0, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movl %ebp, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movb $111, %cl +; X86-SSE-NEXT: subb %bl, %cl +; X86-SSE-NEXT: movl %ecx, %eax +; X86-SSE-NEXT: shrb $3, %al +; X86-SSE-NEXT: andb $12, %al +; X86-SSE-NEXT: movzbl %al, %eax +; X86-SSE-NEXT: movl 60(%esp,%eax), %ebx +; X86-SSE-NEXT: movl 56(%esp,%eax), %edx +; X86-SSE-NEXT: movl %edx, %esi +; X86-SSE-NEXT: shrdl %cl, %ebx, %esi +; X86-SSE-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: movl 48(%esp,%eax), %esi +; X86-SSE-NEXT: movl 52(%esp,%eax), %edi +; X86-SSE-NEXT: movl %edi, %eax +; X86-SSE-NEXT: shrdl %cl, %edx, %eax +; X86-SSE-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: shrl %cl, %ebx +; X86-SSE-NEXT: shrdl %cl, %edi, %esi +; X86-SSE-NEXT: movl (%esp), %edi # 4-byte Reload +; X86-SSE-NEXT: mull %edi +; X86-SSE-NEXT: movl %edx, %ecx +; X86-SSE-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-SSE-NEXT: movl %esi, %eax -; X86-SSE-NEXT: addl $44, %esp +; X86-SSE-NEXT: mull %edi +; X86-SSE-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: movl %edx, %ebp +; X86-SSE-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X86-SSE-NEXT: adcl $0, %ecx +; X86-SSE-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: movl %esi, %eax +; X86-SSE-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X86-SSE-NEXT: movl %edx, %edi +; X86-SSE-NEXT: movl %eax, %ecx +; X86-SSE-NEXT: addl %ebp, %ecx +; X86-SSE-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X86-SSE-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X86-SSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-SSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X86-SSE-NEXT: mull %ebp +; X86-SSE-NEXT: addl %edi, %eax +; X86-SSE-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X86-SSE-NEXT: adcl %eax, %edx +; X86-SSE-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: movl %ebp, %edi +; X86-SSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-SSE-NEXT: imull %edx, %edi +; X86-SSE-NEXT: movl (%esp), %eax # 4-byte Reload +; X86-SSE-NEXT: imull %eax, %ebx +; X86-SSE-NEXT: mull %edx +; X86-SSE-NEXT: movl %eax, (%esp) # 4-byte Spill +; X86-SSE-NEXT: addl %edx, %ebx +; X86-SSE-NEXT: addl %edi, %ebx +; X86-SSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-SSE-NEXT: imull %ebp, %edi +; X86-SSE-NEXT: movl %ebp, %eax +; X86-SSE-NEXT: mull %esi +; X86-SSE-NEXT: addl %edi, %edx +; X86-SSE-NEXT: imull %ebp, %esi +; X86-SSE-NEXT: addl %edx, %esi +; X86-SSE-NEXT: movl %esi, %edx +; X86-SSE-NEXT: addl (%esp), %eax # 4-byte Folded Reload +; X86-SSE-NEXT: adcl %ebx, %edx +; X86-SSE-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-SSE-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-SSE-NEXT: jmp .LBB38_8 +; X86-SSE-NEXT: .LBB38_7: # %fp-to-i-if-exp.large +; X86-SSE-NEXT: movl %ebp, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: pxor %xmm0, %xmm0 +; X86-SSE-NEXT: movdqa %xmm0, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: addb $-111, %bl +; X86-SSE-NEXT: movl %ebx, %eax +; X86-SSE-NEXT: shrb $3, %al +; X86-SSE-NEXT: andb $12, %al +; X86-SSE-NEXT: negb %al +; X86-SSE-NEXT: movsbl %al, %eax +; X86-SSE-NEXT: movl 100(%esp,%eax), %edi +; X86-SSE-NEXT: movl 104(%esp,%eax), %edx +; X86-SSE-NEXT: movl %edx, %esi +; X86-SSE-NEXT: movl %ebx, %ecx +; X86-SSE-NEXT: shldl %cl, %edi, %esi +; X86-SSE-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: movl 96(%esp,%eax), %ebp +; X86-SSE-NEXT: movl 108(%esp,%eax), %eax +; X86-SSE-NEXT: shldl %cl, %edx, %eax +; X86-SSE-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: shldl %cl, %ebp, %edi +; X86-SSE-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: shll %cl, %ebp +; X86-SSE-NEXT: movl %ebp, %eax +; X86-SSE-NEXT: movl (%esp), %esi # 4-byte Reload +; X86-SSE-NEXT: mull %esi +; X86-SSE-NEXT: movl %edx, %ecx +; X86-SSE-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: movl %edi, %eax +; X86-SSE-NEXT: mull %esi +; X86-SSE-NEXT: movl %eax, %esi +; X86-SSE-NEXT: addl %ecx, %esi +; X86-SSE-NEXT: adcl $0, %edx +; X86-SSE-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: movl %ebp, %eax +; X86-SSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-SSE-NEXT: mull %ebx +; X86-SSE-NEXT: movl %edx, %edi +; X86-SSE-NEXT: movl %eax, %ecx +; X86-SSE-NEXT: addl %esi, %ecx +; X86-SSE-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X86-SSE-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X86-SSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-SSE-NEXT: mull %ebx +; X86-SSE-NEXT: addl %edi, %eax +; X86-SSE-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X86-SSE-NEXT: adcl %eax, %edx +; X86-SSE-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-SSE-NEXT: movl (%esp), %eax # 4-byte Reload +; X86-SSE-NEXT: imull %eax, %esi +; X86-SSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-SSE-NEXT: mull %edi +; X86-SSE-NEXT: movl %eax, (%esp) # 4-byte Spill +; X86-SSE-NEXT: addl %esi, %edx +; X86-SSE-NEXT: imull %ebx, %edi +; X86-SSE-NEXT: addl %edx, %edi +; X86-SSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-SSE-NEXT: imull %ebx, %esi +; X86-SSE-NEXT: movl %ebx, %eax +; X86-SSE-NEXT: mull %ebp +; X86-SSE-NEXT: addl %esi, %edx +; X86-SSE-NEXT: imull %ebx, %ebp +; X86-SSE-NEXT: addl %edx, %ebp +; X86-SSE-NEXT: movl %ebp, %edx +; X86-SSE-NEXT: addl (%esp), %eax # 4-byte Folded Reload +; X86-SSE-NEXT: adcl %edi, %edx +; X86-SSE-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-SSE-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-SSE-NEXT: .LBB38_8: # %fp-to-i-cleanup +; X86-SSE-NEXT: movl %edx, %ebx +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-SSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-SSE-NEXT: movl %esi, (%edx) +; X86-SSE-NEXT: movl %ecx, 4(%edx) +; X86-SSE-NEXT: movl %eax, 8(%edx) +; X86-SSE-NEXT: andl $15, %ebx +; X86-SSE-NEXT: movb %bl, 12(%edx) +; X86-SSE-NEXT: movl %edx, %eax +; X86-SSE-NEXT: addl $124, %esp ; X86-SSE-NEXT: popl %esi ; X86-SSE-NEXT: popl %edi ; X86-SSE-NEXT: popl %ebx @@ -4351,152 +5404,503 @@ define i100 @test_signed_i100_f80(x86_fp80 %f) nounwind { define i128 @test_signed_i128_f80(x86_fp80 %f) nounwind { ; X86-X87-LABEL: test_signed_i128_f80: -; X86-X87: # %bb.0: +; X86-X87: # %bb.0: # %fp-to-i-entry ; X86-X87-NEXT: pushl %ebp ; X86-X87-NEXT: pushl %ebx ; X86-X87-NEXT: pushl %edi ; X86-X87-NEXT: pushl %esi -; X86-X87-NEXT: subl $60, %esp +; X86-X87-NEXT: subl $124, %esp ; X86-X87-NEXT: fldt {{[0-9]+}}(%esp) -; X86-X87-NEXT: fld %st(0) +; X86-X87-NEXT: subl $16, %esp ; X86-X87-NEXT: fstpt {{[0-9]+}}(%esp) ; X86-X87-NEXT: leal {{[0-9]+}}(%esp), %eax ; X86-X87-NEXT: movl %eax, (%esp) -; X86-X87-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} -; X86-X87-NEXT: fld %st(1) -; X86-X87-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill -; X86-X87-NEXT: fxch %st(1) -; X86-X87-NEXT: fucompp -; X86-X87-NEXT: fnstsw %ax -; X86-X87-NEXT: movl %eax, %ebx -; X86-X87-NEXT: calll __fixxfti -; X86-X87-NEXT: subl $4, %esp +; X86-X87-NEXT: calll __extendxftf2 +; X86-X87-NEXT: addl $12, %esp +; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-X87-NEXT: testl %ebx, %ebx +; X86-X87-NEXT: setns %cl +; X86-X87-NEXT: movl %ebx, %eax +; X86-X87-NEXT: shrl $16, %eax +; X86-X87-NEXT: andl $32767, %eax # imm = 0x7FFF ; X86-X87-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-X87-NEXT: movb %bh, %ah -; X86-X87-NEXT: sahf +; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: cmpl $16383, %eax # imm = 0x3FFF +; X86-X87-NEXT: jb .LBB39_1 +; X86-X87-NEXT: # %bb.2: # %fp-to-i-entry +; X86-X87-NEXT: movb %cl, (%esp) # 1-byte Spill +; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-X87-NEXT: pushl %ebx +; X86-X87-NEXT: pushl %eax +; X86-X87-NEXT: pushl %ebp +; X86-X87-NEXT: pushl %edi +; X86-X87-NEXT: pushl %ebx +; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: pushl %eax +; X86-X87-NEXT: pushl %ebp +; X86-X87-NEXT: pushl %edi +; X86-X87-NEXT: calll __unordtf2 +; X86-X87-NEXT: addl $32, %esp +; X86-X87-NEXT: testl %eax, %eax +; X86-X87-NEXT: movl $0, %ecx ; X86-X87-NEXT: movl $0, %eax -; X86-X87-NEXT: movl $0, %ebx -; X86-X87-NEXT: jae .LBB39_1 -; X86-X87-NEXT: # %bb.2: +; X86-X87-NEXT: movl $0, %esi +; X86-X87-NEXT: jne .LBB39_8 +; X86-X87-NEXT: # %bb.3: # %fp-to-i-if-check.saturate +; X86-X87-NEXT: xorl %eax, %eax +; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-X87-NEXT: cmpl $16510, %esi # imm = 0x407E +; X86-X87-NEXT: sbbl %ecx, %ecx +; X86-X87-NEXT: movl $0, %ecx +; X86-X87-NEXT: sbbl %ecx, %ecx +; X86-X87-NEXT: movl $0, %ecx +; X86-X87-NEXT: sbbl %ecx, %ecx +; X86-X87-NEXT: jb .LBB39_5 +; X86-X87-NEXT: # %bb.4: # %fp-to-i-if-saturate +; X86-X87-NEXT: xorl %edx, %edx +; X86-X87-NEXT: movzbl (%esp), %ecx # 1-byte Folded Reload +; X86-X87-NEXT: movb %cl, %dl +; X86-X87-NEXT: negl %edx +; X86-X87-NEXT: movl $0, %ecx +; X86-X87-NEXT: sbbl %ecx, %ecx +; X86-X87-NEXT: sbbl %eax, %eax +; X86-X87-NEXT: movl $-2147483648, %esi # imm = 0x80000000 +; X86-X87-NEXT: sbbl $0, %esi +; X86-X87-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: jmp .LBB39_8 +; X86-X87-NEXT: .LBB39_1: +; X86-X87-NEXT: xorl %ecx, %ecx +; X86-X87-NEXT: xorl %eax, %eax +; X86-X87-NEXT: xorl %esi, %esi +; X86-X87-NEXT: jmp .LBB39_8 +; X86-X87-NEXT: .LBB39_5: # %fp-to-i-if-check.exp.size +; X86-X87-NEXT: movl %ebx, %ecx +; X86-X87-NEXT: sarl $31, %ecx +; X86-X87-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: orl $1, %ecx +; X86-X87-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X86-X87-NEXT: movzwl %bx, %ecx +; X86-X87-NEXT: orl $65536, %ecx # imm = 0x10000 +; X86-X87-NEXT: movl $16494, %edx # imm = 0x406E +; X86-X87-NEXT: cmpl %esi, %edx ; X86-X87-NEXT: movl $0, %edx -; X86-X87-NEXT: jae .LBB39_3 -; X86-X87-NEXT: .LBB39_4: -; X86-X87-NEXT: movl $-2147483648, %ecx # imm = 0x80000000 -; X86-X87-NEXT: jb .LBB39_6 -; X86-X87-NEXT: .LBB39_5: -; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-X87-NEXT: .LBB39_6: -; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-X87-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}} -; X86-X87-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload -; X86-X87-NEXT: fucom %st(1) -; X86-X87-NEXT: fstp %st(1) -; X86-X87-NEXT: fnstsw %ax -; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax -; X86-X87-NEXT: sahf -; X86-X87-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; X86-X87-NEXT: ja .LBB39_8 -; X86-X87-NEXT: # %bb.7: +; X86-X87-NEXT: sbbl %edx, %edx +; X86-X87-NEXT: movl $0, %edx +; X86-X87-NEXT: sbbl %edx, %edx +; X86-X87-NEXT: sbbl %eax, %eax +; X86-X87-NEXT: movl %esi, %ebx +; X86-X87-NEXT: jb .LBB39_7 +; X86-X87-NEXT: # %bb.6: # %fp-to-i-if-exp.small +; X86-X87-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl %ebp, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-X87-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movb $111, %cl +; X86-X87-NEXT: subb %bl, %cl ; X86-X87-NEXT: movl %ecx, %eax -; X86-X87-NEXT: .LBB39_8: +; X86-X87-NEXT: shrb $3, %al +; X86-X87-NEXT: andb $12, %al +; X86-X87-NEXT: movzbl %al, %eax +; X86-X87-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl 44(%esp,%eax), %ebx +; X86-X87-NEXT: movl 40(%esp,%eax), %edx +; X86-X87-NEXT: movl %edx, %esi +; X86-X87-NEXT: shrdl %cl, %ebx, %esi +; X86-X87-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: movl 32(%esp,%eax), %esi +; X86-X87-NEXT: movl 36(%esp,%eax), %edi +; X86-X87-NEXT: movl %edi, %eax +; X86-X87-NEXT: shrdl %cl, %edx, %eax ; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-X87-NEXT: movl $-1, %ebp -; X86-X87-NEXT: movl $-1, %edi -; X86-X87-NEXT: movl $-1, %esi -; X86-X87-NEXT: ja .LBB39_10 -; X86-X87-NEXT: # %bb.9: +; X86-X87-NEXT: shrl %cl, %ebx +; X86-X87-NEXT: shrdl %cl, %edi, %esi +; X86-X87-NEXT: movl (%esp), %edi # 4-byte Reload +; X86-X87-NEXT: mull %edi ; X86-X87-NEXT: movl %edx, %ebp -; X86-X87-NEXT: movl %ebx, %edi -; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-X87-NEXT: .LBB39_10: -; X86-X87-NEXT: fucomp %st(0) -; X86-X87-NEXT: fnstsw %ax -; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax -; X86-X87-NEXT: sahf -; X86-X87-NEXT: movl $0, %eax -; X86-X87-NEXT: movl $0, %edx -; X86-X87-NEXT: movl $0, %ebx -; X86-X87-NEXT: jp .LBB39_12 -; X86-X87-NEXT: # %bb.11: +; X86-X87-NEXT: movl %eax, %ecx +; X86-X87-NEXT: movl %esi, %eax +; X86-X87-NEXT: mull %edi +; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: movl %edx, %edi +; X86-X87-NEXT: addl %ecx, %edi +; X86-X87-NEXT: adcl $0, %ebp +; X86-X87-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: movl %esi, %eax +; X86-X87-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X86-X87-NEXT: movl %edx, %ebp +; X86-X87-NEXT: movl %eax, %ecx +; X86-X87-NEXT: addl %edi, %ecx +; X86-X87-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X86-X87-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-X87-NEXT: mull %edi +; X86-X87-NEXT: addl %ebp, %eax +; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X86-X87-NEXT: adcl %eax, %edx +; X86-X87-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: movl %edi, %ebp +; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-X87-NEXT: imull %edx, %ebp +; X86-X87-NEXT: movl (%esp), %eax # 4-byte Reload +; X86-X87-NEXT: imull %eax, %ebx +; X86-X87-NEXT: mull %edx +; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: addl %edx, %ebx +; X86-X87-NEXT: addl %ebp, %ebx +; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X86-X87-NEXT: imull %edi, %ebp +; X86-X87-NEXT: movl %edi, %eax +; X86-X87-NEXT: mull %esi +; X86-X87-NEXT: addl %ebp, %edx +; X86-X87-NEXT: imull %edi, %esi +; X86-X87-NEXT: addl %edx, %esi +; X86-X87-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-X87-NEXT: adcl %ebx, %esi +; X86-X87-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-X87-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-X87-NEXT: jmp .LBB39_8 +; X86-X87-NEXT: .LBB39_7: # %fp-to-i-if-exp.large +; X86-X87-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl %ebp, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-X87-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-X87-NEXT: addb $-111, %bl +; X86-X87-NEXT: movl %ebx, %eax +; X86-X87-NEXT: shrb $3, %al +; X86-X87-NEXT: andb $12, %al +; X86-X87-NEXT: negb %al +; X86-X87-NEXT: movsbl %al, %eax +; X86-X87-NEXT: movl 84(%esp,%eax), %edi +; X86-X87-NEXT: movl 88(%esp,%eax), %edx +; X86-X87-NEXT: movl %edx, %esi +; X86-X87-NEXT: movl %ebx, %ecx +; X86-X87-NEXT: shldl %cl, %edi, %esi ; X86-X87-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: movl 80(%esp,%eax), %esi +; X86-X87-NEXT: movl 92(%esp,%eax), %eax +; X86-X87-NEXT: shldl %cl, %edx, %eax +; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: shldl %cl, %esi, %edi +; X86-X87-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: shll %cl, %esi +; X86-X87-NEXT: movl %esi, %eax +; X86-X87-NEXT: movl (%esp), %ebx # 4-byte Reload +; X86-X87-NEXT: mull %ebx +; X86-X87-NEXT: movl %edx, %ecx +; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-X87-NEXT: movl %edi, %eax -; X86-X87-NEXT: movl %ebp, %edx +; X86-X87-NEXT: mull %ebx +; X86-X87-NEXT: movl %eax, %edi +; X86-X87-NEXT: addl %ecx, %edi +; X86-X87-NEXT: adcl $0, %edx +; X86-X87-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: movl %esi, %eax ; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-X87-NEXT: .LBB39_12: -; X86-X87-NEXT: movl %ebx, 12(%ecx) -; X86-X87-NEXT: movl %edx, 8(%ecx) -; X86-X87-NEXT: movl %eax, 4(%ecx) +; X86-X87-NEXT: mull %ebx +; X86-X87-NEXT: movl %edx, %ebp +; X86-X87-NEXT: movl %eax, %ecx +; X86-X87-NEXT: addl %edi, %ecx +; X86-X87-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X86-X87-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-X87-NEXT: movl %eax, (%ecx) -; X86-X87-NEXT: movl %ecx, %eax -; X86-X87-NEXT: addl $60, %esp +; X86-X87-NEXT: mull %ebx +; X86-X87-NEXT: addl %ebp, %eax +; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X86-X87-NEXT: adcl %eax, %edx +; X86-X87-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-X87-NEXT: movl (%esp), %eax # 4-byte Reload +; X86-X87-NEXT: imull %eax, %edi +; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X86-X87-NEXT: mull %ebp +; X86-X87-NEXT: movl %eax, (%esp) # 4-byte Spill +; X86-X87-NEXT: addl %edi, %edx +; X86-X87-NEXT: imull %ebx, %ebp +; X86-X87-NEXT: addl %edx, %ebp +; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-X87-NEXT: imull %ebx, %edi +; X86-X87-NEXT: movl %ebx, %eax +; X86-X87-NEXT: mull %esi +; X86-X87-NEXT: addl %edi, %edx +; X86-X87-NEXT: imull %ebx, %esi +; X86-X87-NEXT: addl %edx, %esi +; X86-X87-NEXT: addl (%esp), %eax # 4-byte Folded Reload +; X86-X87-NEXT: adcl %ebp, %esi +; X86-X87-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-X87-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-X87-NEXT: .LBB39_8: # %fp-to-i-cleanup +; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-X87-NEXT: movl %edi, (%edx) +; X86-X87-NEXT: movl %ecx, 4(%edx) +; X86-X87-NEXT: movl %eax, 8(%edx) +; X86-X87-NEXT: movl %esi, 12(%edx) +; X86-X87-NEXT: movl %edx, %eax +; X86-X87-NEXT: addl $124, %esp ; X86-X87-NEXT: popl %esi ; X86-X87-NEXT: popl %edi ; X86-X87-NEXT: popl %ebx ; X86-X87-NEXT: popl %ebp ; X86-X87-NEXT: retl $4 -; X86-X87-NEXT: .LBB39_1: -; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-X87-NEXT: movl $0, %edx -; X86-X87-NEXT: jb .LBB39_4 -; X86-X87-NEXT: .LBB39_3: -; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-X87-NEXT: movl $-2147483648, %ecx # imm = 0x80000000 -; X86-X87-NEXT: jae .LBB39_5 -; X86-X87-NEXT: jmp .LBB39_6 ; ; X86-SSE-LABEL: test_signed_i128_f80: -; X86-SSE: # %bb.0: +; X86-SSE: # %bb.0: # %fp-to-i-entry ; X86-SSE-NEXT: pushl %ebp ; X86-SSE-NEXT: pushl %ebx ; X86-SSE-NEXT: pushl %edi ; X86-SSE-NEXT: pushl %esi -; X86-SSE-NEXT: subl $44, %esp -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-SSE-NEXT: subl $124, %esp ; X86-SSE-NEXT: fldt {{[0-9]+}}(%esp) -; X86-SSE-NEXT: fld %st(0) -; X86-SSE-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill +; X86-SSE-NEXT: subl $16, %esp ; X86-SSE-NEXT: fstpt {{[0-9]+}}(%esp) ; X86-SSE-NEXT: leal {{[0-9]+}}(%esp), %eax ; X86-SSE-NEXT: movl %eax, (%esp) -; X86-SSE-NEXT: calll __fixxfti -; X86-SSE-NEXT: subl $4, %esp -; X86-SSE-NEXT: xorl %ecx, %ecx -; X86-SSE-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} -; X86-SSE-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload -; X86-SSE-NEXT: fucomi %st(1), %st -; X86-SSE-NEXT: fstp %st(1) +; X86-SSE-NEXT: calll __extendxftf2 +; X86-SSE-NEXT: addl $12, %esp +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-SSE-NEXT: testl %ebx, %ebx +; X86-SSE-NEXT: setns %cl +; X86-SSE-NEXT: movl %ebx, %eax +; X86-SSE-NEXT: shrl $16, %eax +; X86-SSE-NEXT: andl $32767, %eax # imm = 0x7FFF +; X86-SSE-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-SSE-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: cmpl $16383, %eax # imm = 0x3FFF +; X86-SSE-NEXT: jb .LBB39_1 +; X86-SSE-NEXT: # %bb.2: # %fp-to-i-entry +; X86-SSE-NEXT: movb %cl, (%esp) # 1-byte Spill ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: cmovbl %ecx, %eax -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-SSE-NEXT: cmovbl %ecx, %edx ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-SSE-NEXT: cmovbl %ecx, %edi -; X86-SSE-NEXT: movl $-2147483648, %ebp # imm = 0x80000000 -; X86-SSE-NEXT: cmovael {{[0-9]+}}(%esp), %ebp -; X86-SSE-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}} -; X86-SSE-NEXT: fxch %st(1) -; X86-SSE-NEXT: fucomi %st(1), %st -; X86-SSE-NEXT: fstp %st(1) -; X86-SSE-NEXT: movl $2147483647, %ebx # imm = 0x7FFFFFFF -; X86-SSE-NEXT: cmovbel %ebp, %ebx -; X86-SSE-NEXT: movl $-1, %ebp -; X86-SSE-NEXT: cmoval %ebp, %edi -; X86-SSE-NEXT: cmoval %ebp, %edx -; X86-SSE-NEXT: cmoval %ebp, %eax -; X86-SSE-NEXT: fucompi %st(0), %st -; X86-SSE-NEXT: cmovpl %ecx, %eax -; X86-SSE-NEXT: cmovpl %ecx, %edx -; X86-SSE-NEXT: cmovpl %ecx, %edi -; X86-SSE-NEXT: cmovpl %ecx, %ebx -; X86-SSE-NEXT: movl %ebx, 12(%esi) -; X86-SSE-NEXT: movl %edi, 8(%esi) -; X86-SSE-NEXT: movl %edx, 4(%esi) -; X86-SSE-NEXT: movl %eax, (%esi) +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-SSE-NEXT: pushl %ebx +; X86-SSE-NEXT: pushl %eax +; X86-SSE-NEXT: pushl %ebp +; X86-SSE-NEXT: pushl %edi +; X86-SSE-NEXT: pushl %ebx +; X86-SSE-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: pushl %eax +; X86-SSE-NEXT: pushl %ebp +; X86-SSE-NEXT: pushl %edi +; X86-SSE-NEXT: calll __unordtf2 +; X86-SSE-NEXT: addl $32, %esp +; X86-SSE-NEXT: testl %eax, %eax +; X86-SSE-NEXT: movl $0, %ecx +; X86-SSE-NEXT: movl $0, %eax +; X86-SSE-NEXT: movl $0, %esi +; X86-SSE-NEXT: jne .LBB39_8 +; X86-SSE-NEXT: # %bb.3: # %fp-to-i-if-check.saturate +; X86-SSE-NEXT: xorl %eax, %eax +; X86-SSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-SSE-NEXT: cmpl $16510, %esi # imm = 0x407E +; X86-SSE-NEXT: sbbl %ecx, %ecx +; X86-SSE-NEXT: movl $0, %ecx +; X86-SSE-NEXT: sbbl %ecx, %ecx +; X86-SSE-NEXT: movl $0, %ecx +; X86-SSE-NEXT: sbbl %ecx, %ecx +; X86-SSE-NEXT: jb .LBB39_5 +; X86-SSE-NEXT: # %bb.4: # %fp-to-i-if-saturate +; X86-SSE-NEXT: xorl %edx, %edx +; X86-SSE-NEXT: movzbl (%esp), %ecx # 1-byte Folded Reload +; X86-SSE-NEXT: movb %cl, %dl +; X86-SSE-NEXT: negl %edx +; X86-SSE-NEXT: movl $0, %ecx +; X86-SSE-NEXT: sbbl %ecx, %ecx +; X86-SSE-NEXT: sbbl %eax, %eax +; X86-SSE-NEXT: movl $-2147483648, %esi # imm = 0x80000000 +; X86-SSE-NEXT: sbbl $0, %esi +; X86-SSE-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: jmp .LBB39_8 +; X86-SSE-NEXT: .LBB39_1: +; X86-SSE-NEXT: xorl %ecx, %ecx +; X86-SSE-NEXT: xorl %eax, %eax +; X86-SSE-NEXT: xorl %esi, %esi +; X86-SSE-NEXT: jmp .LBB39_8 +; X86-SSE-NEXT: .LBB39_5: # %fp-to-i-if-check.exp.size +; X86-SSE-NEXT: movl %ebx, %ecx +; X86-SSE-NEXT: sarl $31, %ecx +; X86-SSE-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: orl $1, %ecx +; X86-SSE-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X86-SSE-NEXT: movzwl %bx, %ecx +; X86-SSE-NEXT: orl $65536, %ecx # imm = 0x10000 +; X86-SSE-NEXT: movl $16494, %edx # imm = 0x406E +; X86-SSE-NEXT: cmpl %esi, %edx +; X86-SSE-NEXT: movl $0, %edx +; X86-SSE-NEXT: sbbl %edx, %edx +; X86-SSE-NEXT: movl $0, %edx +; X86-SSE-NEXT: sbbl %edx, %edx +; X86-SSE-NEXT: sbbl %eax, %eax +; X86-SSE-NEXT: movl %esi, %ebx +; X86-SSE-NEXT: jb .LBB39_7 +; X86-SSE-NEXT: # %bb.6: # %fp-to-i-if-exp.small +; X86-SSE-NEXT: xorps %xmm0, %xmm0 +; X86-SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movl %ebp, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-SSE-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movb $111, %cl +; X86-SSE-NEXT: subb %bl, %cl +; X86-SSE-NEXT: movl %ecx, %eax +; X86-SSE-NEXT: shrb $3, %al +; X86-SSE-NEXT: andb $12, %al +; X86-SSE-NEXT: movzbl %al, %eax +; X86-SSE-NEXT: movl 60(%esp,%eax), %ebx +; X86-SSE-NEXT: movl 56(%esp,%eax), %edx +; X86-SSE-NEXT: movl %edx, %esi +; X86-SSE-NEXT: shrdl %cl, %ebx, %esi +; X86-SSE-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: movl 48(%esp,%eax), %esi +; X86-SSE-NEXT: movl 52(%esp,%eax), %edi +; X86-SSE-NEXT: movl %edi, %eax +; X86-SSE-NEXT: shrdl %cl, %edx, %eax +; X86-SSE-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: shrl %cl, %ebx +; X86-SSE-NEXT: shrdl %cl, %edi, %esi +; X86-SSE-NEXT: movl (%esp), %edi # 4-byte Reload +; X86-SSE-NEXT: mull %edi +; X86-SSE-NEXT: movl %edx, %ebp +; X86-SSE-NEXT: movl %eax, %ecx ; X86-SSE-NEXT: movl %esi, %eax -; X86-SSE-NEXT: addl $44, %esp +; X86-SSE-NEXT: mull %edi +; X86-SSE-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: movl %edx, %edi +; X86-SSE-NEXT: addl %ecx, %edi +; X86-SSE-NEXT: adcl $0, %ebp +; X86-SSE-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: movl %esi, %eax +; X86-SSE-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X86-SSE-NEXT: movl %edx, %ebp +; X86-SSE-NEXT: movl %eax, %ecx +; X86-SSE-NEXT: addl %edi, %ecx +; X86-SSE-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X86-SSE-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X86-SSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-SSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-SSE-NEXT: mull %edi +; X86-SSE-NEXT: addl %ebp, %eax +; X86-SSE-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X86-SSE-NEXT: adcl %eax, %edx +; X86-SSE-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: movl %edi, %ebp +; X86-SSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-SSE-NEXT: imull %edx, %ebp +; X86-SSE-NEXT: movl (%esp), %eax # 4-byte Reload +; X86-SSE-NEXT: imull %eax, %ebx +; X86-SSE-NEXT: mull %edx +; X86-SSE-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: addl %edx, %ebx +; X86-SSE-NEXT: addl %ebp, %ebx +; X86-SSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X86-SSE-NEXT: imull %edi, %ebp +; X86-SSE-NEXT: movl %edi, %eax +; X86-SSE-NEXT: mull %esi +; X86-SSE-NEXT: addl %ebp, %edx +; X86-SSE-NEXT: imull %edi, %esi +; X86-SSE-NEXT: addl %edx, %esi +; X86-SSE-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-SSE-NEXT: adcl %ebx, %esi +; X86-SSE-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-SSE-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-SSE-NEXT: jmp .LBB39_8 +; X86-SSE-NEXT: .LBB39_7: # %fp-to-i-if-exp.large +; X86-SSE-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: xorps %xmm0, %xmm0 +; X86-SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movl %ebp, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-SSE-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: addb $-111, %bl +; X86-SSE-NEXT: movl %ebx, %eax +; X86-SSE-NEXT: shrb $3, %al +; X86-SSE-NEXT: andb $12, %al +; X86-SSE-NEXT: negb %al +; X86-SSE-NEXT: movsbl %al, %eax +; X86-SSE-NEXT: movl 100(%esp,%eax), %edi +; X86-SSE-NEXT: movl 104(%esp,%eax), %edx +; X86-SSE-NEXT: movl %edx, %esi +; X86-SSE-NEXT: movl %ebx, %ecx +; X86-SSE-NEXT: shldl %cl, %edi, %esi +; X86-SSE-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: movl 96(%esp,%eax), %esi +; X86-SSE-NEXT: movl 108(%esp,%eax), %eax +; X86-SSE-NEXT: shldl %cl, %edx, %eax +; X86-SSE-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: shldl %cl, %esi, %edi +; X86-SSE-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: shll %cl, %esi +; X86-SSE-NEXT: movl %esi, %eax +; X86-SSE-NEXT: movl (%esp), %ebx # 4-byte Reload +; X86-SSE-NEXT: mull %ebx +; X86-SSE-NEXT: movl %edx, %ecx +; X86-SSE-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: movl %edi, %eax +; X86-SSE-NEXT: mull %ebx +; X86-SSE-NEXT: movl %eax, %edi +; X86-SSE-NEXT: addl %ecx, %edi +; X86-SSE-NEXT: adcl $0, %edx +; X86-SSE-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: movl %esi, %eax +; X86-SSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-SSE-NEXT: mull %ebx +; X86-SSE-NEXT: movl %edx, %ebp +; X86-SSE-NEXT: movl %eax, %ecx +; X86-SSE-NEXT: addl %edi, %ecx +; X86-SSE-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X86-SSE-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X86-SSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-SSE-NEXT: mull %ebx +; X86-SSE-NEXT: addl %ebp, %eax +; X86-SSE-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X86-SSE-NEXT: adcl %eax, %edx +; X86-SSE-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-SSE-NEXT: movl (%esp), %eax # 4-byte Reload +; X86-SSE-NEXT: imull %eax, %edi +; X86-SSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X86-SSE-NEXT: mull %ebp +; X86-SSE-NEXT: movl %eax, (%esp) # 4-byte Spill +; X86-SSE-NEXT: addl %edi, %edx +; X86-SSE-NEXT: imull %ebx, %ebp +; X86-SSE-NEXT: addl %edx, %ebp +; X86-SSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-SSE-NEXT: imull %ebx, %edi +; X86-SSE-NEXT: movl %ebx, %eax +; X86-SSE-NEXT: mull %esi +; X86-SSE-NEXT: addl %edi, %edx +; X86-SSE-NEXT: imull %ebx, %esi +; X86-SSE-NEXT: addl %edx, %esi +; X86-SSE-NEXT: addl (%esp), %eax # 4-byte Folded Reload +; X86-SSE-NEXT: adcl %ebp, %esi +; X86-SSE-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-SSE-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-SSE-NEXT: .LBB39_8: # %fp-to-i-cleanup +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-SSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-SSE-NEXT: movl %edi, (%edx) +; X86-SSE-NEXT: movl %ecx, 4(%edx) +; X86-SSE-NEXT: movl %eax, 8(%edx) +; X86-SSE-NEXT: movl %esi, 12(%edx) +; X86-SSE-NEXT: movl %edx, %eax +; X86-SSE-NEXT: addl $124, %esp ; X86-SSE-NEXT: popl %esi ; X86-SSE-NEXT: popl %edi ; X86-SSE-NEXT: popl %ebx diff --git a/llvm/test/CodeGen/X86/fptoui-sat-scalar.ll b/llvm/test/CodeGen/X86/fptoui-sat-scalar.ll index a074c78d512f5..410c5845d3bb4 100644 --- a/llvm/test/CodeGen/X86/fptoui-sat-scalar.ll +++ b/llvm/test/CodeGen/X86/fptoui-sat-scalar.ll @@ -658,119 +658,170 @@ define i64 @test_unsigned_i64_f32(float %f) nounwind { define i100 @test_unsigned_i100_f32(float %f) nounwind { ; X86-X87-LABEL: test_unsigned_i100_f32: -; X86-X87: # %bb.0: -; X86-X87-NEXT: pushl %ebp +; X86-X87: # %bb.0: # %fp-to-i-entry ; X86-X87-NEXT: pushl %ebx ; X86-X87-NEXT: pushl %edi ; X86-X87-NEXT: pushl %esi -; X86-X87-NEXT: subl $44, %esp +; X86-X87-NEXT: subl $48, %esp +; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-X87-NEXT: flds {{[0-9]+}}(%esp) ; X86-X87-NEXT: fsts {{[0-9]+}}(%esp) -; X86-X87-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-X87-NEXT: movl %eax, (%esp) -; X86-X87-NEXT: fldz -; X86-X87-NEXT: fxch %st(1) -; X86-X87-NEXT: fsts {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-X87-NEXT: fucompp -; X86-X87-NEXT: fnstsw %ax -; X86-X87-NEXT: movl %eax, %ebx -; X86-X87-NEXT: calll __fixunssfti -; X86-X87-NEXT: subl $4, %esp -; X86-X87-NEXT: xorl %edi, %edi -; X86-X87-NEXT: movb %bh, %ah -; X86-X87-NEXT: sahf -; X86-X87-NEXT: movl $0, %eax -; X86-X87-NEXT: jb .LBB8_2 -; X86-X87-NEXT: # %bb.1: -; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-X87-NEXT: .LBB8_2: -; X86-X87-NEXT: movl $0, %esi -; X86-X87-NEXT: movl $0, %ebx -; X86-X87-NEXT: jb .LBB8_4 -; X86-X87-NEXT: # %bb.3: -; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ebx ; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-X87-NEXT: .LBB8_4: -; X86-X87-NEXT: jb .LBB8_6 -; X86-X87-NEXT: # %bb.5: -; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-X87-NEXT: .LBB8_6: -; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-X87-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} -; X86-X87-NEXT: flds {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X86-X87-NEXT: fucompp +; X86-X87-NEXT: movl %esi, %eax +; X86-X87-NEXT: shrl $23, %eax +; X86-X87-NEXT: movzbl %al, %ecx +; X86-X87-NEXT: cmpl $127, %ecx +; X86-X87-NEXT: setb %bl +; X86-X87-NEXT: fucomp %st(0) ; X86-X87-NEXT: fnstsw %ax ; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax ; X86-X87-NEXT: sahf +; X86-X87-NEXT: setp %al +; X86-X87-NEXT: orb %bl, %al +; X86-X87-NEXT: testl %esi, %esi +; X86-X87-NEXT: sets %ah +; X86-X87-NEXT: orb %al, %ah +; X86-X87-NEXT: je .LBB8_3 +; X86-X87-NEXT: # %bb.1: +; X86-X87-NEXT: xorl %esi, %esi +; X86-X87-NEXT: .LBB8_2: # %fp-to-i-cleanup +; X86-X87-NEXT: xorl %ebx, %ebx +; X86-X87-NEXT: xorl %edi, %edi +; X86-X87-NEXT: xorl %eax, %eax +; X86-X87-NEXT: jmp .LBB8_8 +; X86-X87-NEXT: .LBB8_3: # %fp-to-i-if-check.saturate +; X86-X87-NEXT: cmpl $226, %ecx +; X86-X87-NEXT: jbe .LBB8_5 +; X86-X87-NEXT: # %bb.4: ; X86-X87-NEXT: movl $15, %eax -; X86-X87-NEXT: ja .LBB8_8 -; X86-X87-NEXT: # %bb.7: -; X86-X87-NEXT: movl %edi, %eax -; X86-X87-NEXT: .LBB8_8: +; X86-X87-NEXT: movl $-1, %esi +; X86-X87-NEXT: movl $-1, %ebx ; X86-X87-NEXT: movl $-1, %edi -; X86-X87-NEXT: movl $-1, %ebp -; X86-X87-NEXT: movl $-1, %edx -; X86-X87-NEXT: ja .LBB8_10 -; X86-X87-NEXT: # %bb.9: -; X86-X87-NEXT: movl %ebx, %edi -; X86-X87-NEXT: movl %esi, %ebp -; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-X87-NEXT: .LBB8_10: -; X86-X87-NEXT: movl %edx, 8(%ecx) -; X86-X87-NEXT: movl %ebp, 4(%ecx) -; X86-X87-NEXT: movl %edi, (%ecx) -; X86-X87-NEXT: andl $15, %eax -; X86-X87-NEXT: movb %al, 12(%ecx) +; X86-X87-NEXT: jmp .LBB8_8 +; X86-X87-NEXT: .LBB8_5: # %fp-to-i-if-check.exp.size +; X86-X87-NEXT: andl $8388607, %esi # imm = 0x7FFFFF +; X86-X87-NEXT: orl $8388608, %esi # imm = 0x800000 +; X86-X87-NEXT: cmpl $149, %ecx +; X86-X87-NEXT: ja .LBB8_7 +; X86-X87-NEXT: # %bb.6: # %fp-to-i-if-exp.small +; X86-X87-NEXT: movb $-106, %al +; X86-X87-NEXT: subb %cl, %al +; X86-X87-NEXT: movl %eax, %ecx +; X86-X87-NEXT: shrl %cl, %esi +; X86-X87-NEXT: jmp .LBB8_2 +; X86-X87-NEXT: .LBB8_7: # %fp-to-i-if-exp.large +; X86-X87-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-X87-NEXT: addb $106, %cl ; X86-X87-NEXT: movl %ecx, %eax -; X86-X87-NEXT: addl $44, %esp +; X86-X87-NEXT: shrb $3, %al +; X86-X87-NEXT: andb $12, %al +; X86-X87-NEXT: negb %al +; X86-X87-NEXT: movsbl %al, %ebx +; X86-X87-NEXT: movl 40(%esp,%ebx), %edi +; X86-X87-NEXT: movl 44(%esp,%ebx), %eax +; X86-X87-NEXT: shldl %cl, %edi, %eax +; X86-X87-NEXT: movl 32(%esp,%ebx), %esi +; X86-X87-NEXT: movl 36(%esp,%ebx), %ebx +; X86-X87-NEXT: shldl %cl, %ebx, %edi +; X86-X87-NEXT: shldl %cl, %esi, %ebx +; X86-X87-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-X87-NEXT: shll %cl, %esi +; X86-X87-NEXT: .LBB8_8: # %fp-to-i-cleanup +; X86-X87-NEXT: movl %esi, (%edx) +; X86-X87-NEXT: movl %ebx, 4(%edx) +; X86-X87-NEXT: movl %edi, 8(%edx) +; X86-X87-NEXT: andl $15, %eax +; X86-X87-NEXT: movb %al, 12(%edx) +; X86-X87-NEXT: movl %edx, %eax +; X86-X87-NEXT: addl $48, %esp ; X86-X87-NEXT: popl %esi ; X86-X87-NEXT: popl %edi ; X86-X87-NEXT: popl %ebx -; X86-X87-NEXT: popl %ebp ; X86-X87-NEXT: retl $4 ; ; X86-SSE-LABEL: test_unsigned_i100_f32: -; X86-SSE: # %bb.0: +; X86-SSE: # %bb.0: # %fp-to-i-entry ; X86-SSE-NEXT: pushl %ebx ; X86-SSE-NEXT: pushl %edi ; X86-SSE-NEXT: pushl %esi ; X86-SSE-NEXT: subl $32, %esp -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X86-SSE-NEXT: movss %xmm0, {{[0-9]+}}(%esp) -; X86-SSE-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movl %eax, (%esp) -; X86-SSE-NEXT: calll __fixunssfti -; X86-SSE-NEXT: subl $4, %esp -; X86-SSE-NEXT: xorl %eax, %eax -; X86-SSE-NEXT: xorps %xmm0, %xmm0 -; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; X86-SSE-NEXT: ucomiss %xmm0, %xmm1 -; X86-SSE-NEXT: movaps %xmm1, %xmm0 -; X86-SSE-NEXT: movl $0, %ecx -; X86-SSE-NEXT: movl $0, %edx -; X86-SSE-NEXT: movl $0, %edi -; X86-SSE-NEXT: jb .LBB8_2 -; X86-SSE-NEXT: # %bb.1: ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-SSE-NEXT: .LBB8_2: -; X86-SSE-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 -; X86-SSE-NEXT: movl $15, %ebx -; X86-SSE-NEXT: cmovbel %edi, %ebx +; X86-SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-SSE-NEXT: movd %xmm0, %esi +; X86-SSE-NEXT: movl %esi, %ecx +; X86-SSE-NEXT: shrl $23, %ecx +; X86-SSE-NEXT: movzbl %cl, %ecx +; X86-SSE-NEXT: cmpl $127, %ecx +; X86-SSE-NEXT: setb %dl +; X86-SSE-NEXT: ucomiss %xmm0, %xmm0 +; X86-SSE-NEXT: setp %dh +; X86-SSE-NEXT: testl %esi, %esi +; X86-SSE-NEXT: sets %bl +; X86-SSE-NEXT: orb %dh, %bl +; X86-SSE-NEXT: orb %dl, %bl +; X86-SSE-NEXT: je .LBB8_3 +; X86-SSE-NEXT: # %bb.1: +; X86-SSE-NEXT: xorl %esi, %esi +; X86-SSE-NEXT: .LBB8_2: # %fp-to-i-cleanup +; X86-SSE-NEXT: xorl %ebx, %ebx +; X86-SSE-NEXT: xorl %edi, %edi +; X86-SSE-NEXT: xorl %edx, %edx +; X86-SSE-NEXT: jmp .LBB8_8 +; X86-SSE-NEXT: .LBB8_3: # %fp-to-i-if-check.saturate +; X86-SSE-NEXT: cmpl $226, %ecx +; X86-SSE-NEXT: jbe .LBB8_5 +; X86-SSE-NEXT: # %bb.4: +; X86-SSE-NEXT: movl $15, %edx +; X86-SSE-NEXT: movl $-1, %esi +; X86-SSE-NEXT: movl $-1, %ebx ; X86-SSE-NEXT: movl $-1, %edi -; X86-SSE-NEXT: cmoval %edi, %edx -; X86-SSE-NEXT: cmoval %edi, %ecx -; X86-SSE-NEXT: cmoval %edi, %eax -; X86-SSE-NEXT: movl %eax, 8(%esi) -; X86-SSE-NEXT: movl %ecx, 4(%esi) -; X86-SSE-NEXT: movl %edx, (%esi) -; X86-SSE-NEXT: andl $15, %ebx -; X86-SSE-NEXT: movb %bl, 12(%esi) -; X86-SSE-NEXT: movl %esi, %eax +; X86-SSE-NEXT: jmp .LBB8_8 +; X86-SSE-NEXT: .LBB8_5: # %fp-to-i-if-check.exp.size +; X86-SSE-NEXT: andl $8388607, %esi # imm = 0x7FFFFF +; X86-SSE-NEXT: orl $8388608, %esi # imm = 0x800000 +; X86-SSE-NEXT: cmpl $149, %ecx +; X86-SSE-NEXT: ja .LBB8_7 +; X86-SSE-NEXT: # %bb.6: # %fp-to-i-if-exp.small +; X86-SSE-NEXT: movb $-106, %dl +; X86-SSE-NEXT: subb %cl, %dl +; X86-SSE-NEXT: movl %edx, %ecx +; X86-SSE-NEXT: shrl %cl, %esi +; X86-SSE-NEXT: jmp .LBB8_2 +; X86-SSE-NEXT: .LBB8_7: # %fp-to-i-if-exp.large +; X86-SSE-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: xorps %xmm0, %xmm0 +; X86-SSE-NEXT: movaps %xmm0, (%esp) +; X86-SSE-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: addb $106, %cl +; X86-SSE-NEXT: movl %ecx, %edx +; X86-SSE-NEXT: shrb $3, %dl +; X86-SSE-NEXT: andb $12, %dl +; X86-SSE-NEXT: negb %dl +; X86-SSE-NEXT: movsbl %dl, %ebx +; X86-SSE-NEXT: movl 24(%esp,%ebx), %edi +; X86-SSE-NEXT: movl 28(%esp,%ebx), %edx +; X86-SSE-NEXT: shldl %cl, %edi, %edx +; X86-SSE-NEXT: movl 16(%esp,%ebx), %esi +; X86-SSE-NEXT: movl 20(%esp,%ebx), %ebx +; X86-SSE-NEXT: shldl %cl, %ebx, %edi +; X86-SSE-NEXT: shldl %cl, %esi, %ebx +; X86-SSE-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-SSE-NEXT: shll %cl, %esi +; X86-SSE-NEXT: .LBB8_8: # %fp-to-i-cleanup +; X86-SSE-NEXT: movl %esi, (%eax) +; X86-SSE-NEXT: movl %ebx, 4(%eax) +; X86-SSE-NEXT: movl %edi, 8(%eax) +; X86-SSE-NEXT: andl $15, %edx +; X86-SSE-NEXT: movb %dl, 12(%eax) ; X86-SSE-NEXT: addl $32, %esp ; X86-SSE-NEXT: popl %esi ; X86-SSE-NEXT: popl %edi @@ -802,114 +853,168 @@ define i100 @test_unsigned_i100_f32(float %f) nounwind { define i128 @test_unsigned_i128_f32(float %f) nounwind { ; X86-X87-LABEL: test_unsigned_i128_f32: -; X86-X87: # %bb.0: -; X86-X87-NEXT: pushl %ebp +; X86-X87: # %bb.0: # %fp-to-i-entry ; X86-X87-NEXT: pushl %ebx ; X86-X87-NEXT: pushl %edi ; X86-X87-NEXT: pushl %esi -; X86-X87-NEXT: subl $60, %esp +; X86-X87-NEXT: subl $48, %esp +; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-X87-NEXT: flds {{[0-9]+}}(%esp) ; X86-X87-NEXT: fsts {{[0-9]+}}(%esp) -; X86-X87-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-X87-NEXT: movl %eax, (%esp) -; X86-X87-NEXT: fldz -; X86-X87-NEXT: fxch %st(1) -; X86-X87-NEXT: fsts {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-X87-NEXT: fucompp -; X86-X87-NEXT: fnstsw %ax -; X86-X87-NEXT: movl %eax, %ebx -; X86-X87-NEXT: calll __fixunssfti -; X86-X87-NEXT: subl $4, %esp -; X86-X87-NEXT: xorl %edx, %edx -; X86-X87-NEXT: movb %bh, %ah -; X86-X87-NEXT: sahf -; X86-X87-NEXT: movl $0, %eax -; X86-X87-NEXT: jb .LBB9_2 -; X86-X87-NEXT: # %bb.1: -; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-X87-NEXT: .LBB9_2: -; X86-X87-NEXT: movl $0, %ecx -; X86-X87-NEXT: jb .LBB9_4 -; X86-X87-NEXT: # %bb.3: -; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-X87-NEXT: .LBB9_4: -; X86-X87-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-X87-NEXT: movl $0, %ebx -; X86-X87-NEXT: jb .LBB9_6 -; X86-X87-NEXT: # %bb.5: -; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-X87-NEXT: .LBB9_6: -; X86-X87-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} -; X86-X87-NEXT: flds {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X86-X87-NEXT: fucompp +; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-X87-NEXT: movl %esi, %eax +; X86-X87-NEXT: shrl $23, %eax +; X86-X87-NEXT: movzbl %al, %ecx +; X86-X87-NEXT: cmpl $127, %ecx +; X86-X87-NEXT: setb %bl +; X86-X87-NEXT: fucomp %st(0) ; X86-X87-NEXT: fnstsw %ax ; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax ; X86-X87-NEXT: sahf -; X86-X87-NEXT: movl $-1, %eax -; X86-X87-NEXT: movl $-1, %ebp -; X86-X87-NEXT: movl $-1, %edi +; X86-X87-NEXT: setp %al +; X86-X87-NEXT: orb %bl, %al +; X86-X87-NEXT: testl %esi, %esi +; X86-X87-NEXT: sets %ah +; X86-X87-NEXT: orb %al, %ah +; X86-X87-NEXT: je .LBB9_3 +; X86-X87-NEXT: # %bb.1: +; X86-X87-NEXT: xorl %esi, %esi +; X86-X87-NEXT: .LBB9_2: # %fp-to-i-cleanup +; X86-X87-NEXT: xorl %edi, %edi +; X86-X87-NEXT: xorl %eax, %eax +; X86-X87-NEXT: xorl %ebx, %ebx +; X86-X87-NEXT: jmp .LBB9_8 +; X86-X87-NEXT: .LBB9_3: # %fp-to-i-if-check.saturate +; X86-X87-NEXT: cmpl $254, %ecx +; X86-X87-NEXT: jbe .LBB9_5 +; X86-X87-NEXT: # %bb.4: ; X86-X87-NEXT: movl $-1, %esi -; X86-X87-NEXT: ja .LBB9_8 -; X86-X87-NEXT: # %bb.7: -; X86-X87-NEXT: movl %ebx, %eax -; X86-X87-NEXT: movl %edx, %ebp -; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-X87-NEXT: .LBB9_8: -; X86-X87-NEXT: movl %esi, 12(%ecx) -; X86-X87-NEXT: movl %edi, 8(%ecx) -; X86-X87-NEXT: movl %ebp, 4(%ecx) -; X86-X87-NEXT: movl %eax, (%ecx) +; X86-X87-NEXT: movl $-1, %edi +; X86-X87-NEXT: movl $-1, %eax +; X86-X87-NEXT: movl $-1, %ebx +; X86-X87-NEXT: jmp .LBB9_8 +; X86-X87-NEXT: .LBB9_5: # %fp-to-i-if-check.exp.size +; X86-X87-NEXT: andl $8388607, %esi # imm = 0x7FFFFF +; X86-X87-NEXT: orl $8388608, %esi # imm = 0x800000 +; X86-X87-NEXT: cmpl $149, %ecx +; X86-X87-NEXT: ja .LBB9_7 +; X86-X87-NEXT: # %bb.6: # %fp-to-i-if-exp.small +; X86-X87-NEXT: movb $-106, %al +; X86-X87-NEXT: subb %cl, %al +; X86-X87-NEXT: movl %eax, %ecx +; X86-X87-NEXT: shrl %cl, %esi +; X86-X87-NEXT: jmp .LBB9_2 +; X86-X87-NEXT: .LBB9_7: # %fp-to-i-if-exp.large +; X86-X87-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-X87-NEXT: addb $106, %cl ; X86-X87-NEXT: movl %ecx, %eax -; X86-X87-NEXT: addl $60, %esp +; X86-X87-NEXT: shrb $3, %al +; X86-X87-NEXT: andb $12, %al +; X86-X87-NEXT: negb %al +; X86-X87-NEXT: movsbl %al, %edi +; X86-X87-NEXT: movl 40(%esp,%edi), %eax +; X86-X87-NEXT: movl 44(%esp,%edi), %ebx +; X86-X87-NEXT: shldl %cl, %eax, %ebx +; X86-X87-NEXT: movl 32(%esp,%edi), %esi +; X86-X87-NEXT: movl 36(%esp,%edi), %edi +; X86-X87-NEXT: shldl %cl, %edi, %eax +; X86-X87-NEXT: shldl %cl, %esi, %edi +; X86-X87-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-X87-NEXT: shll %cl, %esi +; X86-X87-NEXT: .LBB9_8: # %fp-to-i-cleanup +; X86-X87-NEXT: movl %esi, (%edx) +; X86-X87-NEXT: movl %edi, 4(%edx) +; X86-X87-NEXT: movl %eax, 8(%edx) +; X86-X87-NEXT: movl %ebx, 12(%edx) +; X86-X87-NEXT: movl %edx, %eax +; X86-X87-NEXT: addl $48, %esp ; X86-X87-NEXT: popl %esi ; X86-X87-NEXT: popl %edi ; X86-X87-NEXT: popl %ebx -; X86-X87-NEXT: popl %ebp ; X86-X87-NEXT: retl $4 ; ; X86-SSE-LABEL: test_unsigned_i128_f32: -; X86-SSE: # %bb.0: +; X86-SSE: # %bb.0: # %fp-to-i-entry ; X86-SSE-NEXT: pushl %ebx ; X86-SSE-NEXT: pushl %edi ; X86-SSE-NEXT: pushl %esi ; X86-SSE-NEXT: subl $32, %esp -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X86-SSE-NEXT: movss %xmm0, {{[0-9]+}}(%esp) -; X86-SSE-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movl %eax, (%esp) -; X86-SSE-NEXT: calll __fixunssfti -; X86-SSE-NEXT: subl $4, %esp -; X86-SSE-NEXT: xorl %eax, %eax -; X86-SSE-NEXT: xorps %xmm0, %xmm0 -; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; X86-SSE-NEXT: ucomiss %xmm0, %xmm1 -; X86-SSE-NEXT: movaps %xmm1, %xmm0 -; X86-SSE-NEXT: movl $0, %ecx -; X86-SSE-NEXT: movl $0, %edx -; X86-SSE-NEXT: movl $0, %edi -; X86-SSE-NEXT: jb .LBB9_2 -; X86-SSE-NEXT: # %bb.1: ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-SSE-NEXT: .LBB9_2: -; X86-SSE-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-SSE-NEXT: movd %xmm0, %esi +; X86-SSE-NEXT: movl %esi, %ecx +; X86-SSE-NEXT: shrl $23, %ecx +; X86-SSE-NEXT: movzbl %cl, %ecx +; X86-SSE-NEXT: cmpl $127, %ecx +; X86-SSE-NEXT: setb %dl +; X86-SSE-NEXT: ucomiss %xmm0, %xmm0 +; X86-SSE-NEXT: setp %dh +; X86-SSE-NEXT: testl %esi, %esi +; X86-SSE-NEXT: sets %bl +; X86-SSE-NEXT: orb %dh, %bl +; X86-SSE-NEXT: orb %dl, %bl +; X86-SSE-NEXT: je .LBB9_3 +; X86-SSE-NEXT: # %bb.1: +; X86-SSE-NEXT: xorl %esi, %esi +; X86-SSE-NEXT: .LBB9_2: # %fp-to-i-cleanup +; X86-SSE-NEXT: xorl %edi, %edi +; X86-SSE-NEXT: xorl %edx, %edx +; X86-SSE-NEXT: xorl %ebx, %ebx +; X86-SSE-NEXT: jmp .LBB9_8 +; X86-SSE-NEXT: .LBB9_3: # %fp-to-i-if-check.saturate +; X86-SSE-NEXT: cmpl $254, %ecx +; X86-SSE-NEXT: jbe .LBB9_5 +; X86-SSE-NEXT: # %bb.4: +; X86-SSE-NEXT: movl $-1, %esi +; X86-SSE-NEXT: movl $-1, %edi +; X86-SSE-NEXT: movl $-1, %edx ; X86-SSE-NEXT: movl $-1, %ebx -; X86-SSE-NEXT: cmoval %ebx, %edi -; X86-SSE-NEXT: cmoval %ebx, %edx -; X86-SSE-NEXT: cmoval %ebx, %ecx -; X86-SSE-NEXT: cmoval %ebx, %eax -; X86-SSE-NEXT: movl %eax, 12(%esi) -; X86-SSE-NEXT: movl %ecx, 8(%esi) -; X86-SSE-NEXT: movl %edx, 4(%esi) -; X86-SSE-NEXT: movl %edi, (%esi) -; X86-SSE-NEXT: movl %esi, %eax +; X86-SSE-NEXT: jmp .LBB9_8 +; X86-SSE-NEXT: .LBB9_5: # %fp-to-i-if-check.exp.size +; X86-SSE-NEXT: andl $8388607, %esi # imm = 0x7FFFFF +; X86-SSE-NEXT: orl $8388608, %esi # imm = 0x800000 +; X86-SSE-NEXT: cmpl $149, %ecx +; X86-SSE-NEXT: ja .LBB9_7 +; X86-SSE-NEXT: # %bb.6: # %fp-to-i-if-exp.small +; X86-SSE-NEXT: movb $-106, %dl +; X86-SSE-NEXT: subb %cl, %dl +; X86-SSE-NEXT: movl %edx, %ecx +; X86-SSE-NEXT: shrl %cl, %esi +; X86-SSE-NEXT: jmp .LBB9_2 +; X86-SSE-NEXT: .LBB9_7: # %fp-to-i-if-exp.large +; X86-SSE-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: xorps %xmm0, %xmm0 +; X86-SSE-NEXT: movaps %xmm0, (%esp) +; X86-SSE-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: addb $106, %cl +; X86-SSE-NEXT: movl %ecx, %edx +; X86-SSE-NEXT: shrb $3, %dl +; X86-SSE-NEXT: andb $12, %dl +; X86-SSE-NEXT: negb %dl +; X86-SSE-NEXT: movsbl %dl, %edi +; X86-SSE-NEXT: movl 24(%esp,%edi), %edx +; X86-SSE-NEXT: movl 28(%esp,%edi), %ebx +; X86-SSE-NEXT: shldl %cl, %edx, %ebx +; X86-SSE-NEXT: movl 16(%esp,%edi), %esi +; X86-SSE-NEXT: movl 20(%esp,%edi), %edi +; X86-SSE-NEXT: shldl %cl, %edi, %edx +; X86-SSE-NEXT: shldl %cl, %esi, %edi +; X86-SSE-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-SSE-NEXT: shll %cl, %esi +; X86-SSE-NEXT: .LBB9_8: # %fp-to-i-cleanup +; X86-SSE-NEXT: movl %esi, (%eax) +; X86-SSE-NEXT: movl %edi, 4(%eax) +; X86-SSE-NEXT: movl %edx, 8(%eax) +; X86-SSE-NEXT: movl %ebx, 12(%eax) ; X86-SSE-NEXT: addl $32, %esp ; X86-SSE-NEXT: popl %esi ; X86-SSE-NEXT: popl %edi @@ -1570,71 +1675,102 @@ define i64 @test_unsigned_i64_f64(double %f) nounwind { define i100 @test_unsigned_i100_f64(double %f) nounwind { ; X86-X87-LABEL: test_unsigned_i100_f64: -; X86-X87: # %bb.0: +; X86-X87: # %bb.0: # %fp-to-i-entry ; X86-X87-NEXT: pushl %ebp ; X86-X87-NEXT: pushl %ebx ; X86-X87-NEXT: pushl %edi ; X86-X87-NEXT: pushl %esi ; X86-X87-NEXT: subl $44, %esp +; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ebp ; X86-X87-NEXT: fldl {{[0-9]+}}(%esp) ; X86-X87-NEXT: fstl {{[0-9]+}}(%esp) -; X86-X87-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-X87-NEXT: movl %eax, (%esp) -; X86-X87-NEXT: fldz -; X86-X87-NEXT: fxch %st(1) -; X86-X87-NEXT: fstl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Spill -; X86-X87-NEXT: fucompp -; X86-X87-NEXT: fnstsw %ax -; X86-X87-NEXT: movl %eax, %ebx -; X86-X87-NEXT: calll __fixunsdfti -; X86-X87-NEXT: subl $4, %esp -; X86-X87-NEXT: xorl %edi, %edi -; X86-X87-NEXT: movb %bh, %ah -; X86-X87-NEXT: sahf -; X86-X87-NEXT: movl $0, %eax -; X86-X87-NEXT: jb .LBB18_2 -; X86-X87-NEXT: # %bb.1: -; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-X87-NEXT: .LBB18_2: -; X86-X87-NEXT: movl $0, %esi -; X86-X87-NEXT: movl $0, %ebx -; X86-X87-NEXT: jb .LBB18_4 -; X86-X87-NEXT: # %bb.3: -; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ebx ; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-X87-NEXT: .LBB18_4: -; X86-X87-NEXT: jb .LBB18_6 -; X86-X87-NEXT: # %bb.5: -; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-X87-NEXT: .LBB18_6: -; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-X87-NEXT: fldl {{\.?LCPI[0-9]+_[0-9]+}} -; X86-X87-NEXT: fldl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Reload -; X86-X87-NEXT: fucompp +; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-X87-NEXT: movl %ebx, %ecx +; X86-X87-NEXT: shrl $20, %ecx +; X86-X87-NEXT: andl $2047, %ecx # imm = 0x7FF +; X86-X87-NEXT: movl %ebx, %edi +; X86-X87-NEXT: andl $1048575, %edi # imm = 0xFFFFF +; X86-X87-NEXT: addl $-1, %esi +; X86-X87-NEXT: adcl $1048575, %edi # imm = 0xFFFFF +; X86-X87-NEXT: addl $1, %esi +; X86-X87-NEXT: adcl $0, %edi +; X86-X87-NEXT: cmpl $1023, %ecx # imm = 0x3FF +; X86-X87-NEXT: setb %dl +; X86-X87-NEXT: fucomp %st(0) ; X86-X87-NEXT: fnstsw %ax ; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax ; X86-X87-NEXT: sahf +; X86-X87-NEXT: setp %al +; X86-X87-NEXT: orb %dl, %al +; X86-X87-NEXT: testl %ebx, %ebx +; X86-X87-NEXT: sets %dl +; X86-X87-NEXT: orb %al, %dl +; X86-X87-NEXT: je .LBB18_2 +; X86-X87-NEXT: # %bb.1: +; X86-X87-NEXT: xorl %esi, %esi +; X86-X87-NEXT: xorl %edi, %edi +; X86-X87-NEXT: xorl %ebx, %ebx +; X86-X87-NEXT: xorl %eax, %eax +; X86-X87-NEXT: jmp .LBB18_9 +; X86-X87-NEXT: .LBB18_2: # %fp-to-i-if-check.saturate +; X86-X87-NEXT: cmpl $1122, %ecx # imm = 0x462 +; X86-X87-NEXT: jbe .LBB18_4 +; X86-X87-NEXT: # %bb.3: ; X86-X87-NEXT: movl $15, %eax -; X86-X87-NEXT: ja .LBB18_8 -; X86-X87-NEXT: # %bb.7: -; X86-X87-NEXT: movl %edi, %eax -; X86-X87-NEXT: .LBB18_8: +; X86-X87-NEXT: movl $-1, %esi ; X86-X87-NEXT: movl $-1, %edi -; X86-X87-NEXT: movl $-1, %ebp -; X86-X87-NEXT: movl $-1, %edx -; X86-X87-NEXT: ja .LBB18_10 -; X86-X87-NEXT: # %bb.9: -; X86-X87-NEXT: movl %ebx, %edi -; X86-X87-NEXT: movl %esi, %ebp -; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-X87-NEXT: .LBB18_10: -; X86-X87-NEXT: movl %edx, 8(%ecx) -; X86-X87-NEXT: movl %ebp, 4(%ecx) -; X86-X87-NEXT: movl %edi, (%ecx) -; X86-X87-NEXT: andl $15, %eax -; X86-X87-NEXT: movb %al, 12(%ecx) +; X86-X87-NEXT: movl $-1, %ebx +; X86-X87-NEXT: jmp .LBB18_9 +; X86-X87-NEXT: .LBB18_4: # %fp-to-i-if-check.exp.size +; X86-X87-NEXT: cmpl $1074, %ecx # imm = 0x432 +; X86-X87-NEXT: ja .LBB18_8 +; X86-X87-NEXT: # %bb.5: # %fp-to-i-if-exp.small +; X86-X87-NEXT: movb $51, %al +; X86-X87-NEXT: subb %cl, %al +; X86-X87-NEXT: movl %eax, %ecx +; X86-X87-NEXT: shrdl %cl, %edi, %esi +; X86-X87-NEXT: shrl %cl, %edi +; X86-X87-NEXT: xorl %ebx, %ebx +; X86-X87-NEXT: testb $32, %al +; X86-X87-NEXT: je .LBB18_7 +; X86-X87-NEXT: # %bb.6: +; X86-X87-NEXT: movl %edi, %esi +; X86-X87-NEXT: xorl %edi, %edi +; X86-X87-NEXT: .LBB18_7: # %fp-to-i-if-exp.small +; X86-X87-NEXT: xorl %eax, %eax +; X86-X87-NEXT: jmp .LBB18_9 +; X86-X87-NEXT: .LBB18_8: # %fp-to-i-if-exp.large +; X86-X87-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl $0, (%esp) +; X86-X87-NEXT: addb $-51, %cl ; X86-X87-NEXT: movl %ecx, %eax +; X86-X87-NEXT: shrb $3, %al +; X86-X87-NEXT: andb $12, %al +; X86-X87-NEXT: negb %al +; X86-X87-NEXT: movsbl %al, %edx +; X86-X87-NEXT: movl 24(%esp,%edx), %ebx +; X86-X87-NEXT: movl 28(%esp,%edx), %eax +; X86-X87-NEXT: shldl %cl, %ebx, %eax +; X86-X87-NEXT: movl 16(%esp,%edx), %esi +; X86-X87-NEXT: movl 20(%esp,%edx), %edi +; X86-X87-NEXT: shldl %cl, %edi, %ebx +; X86-X87-NEXT: shldl %cl, %esi, %edi +; X86-X87-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-X87-NEXT: shll %cl, %esi +; X86-X87-NEXT: .LBB18_9: # %fp-to-i-cleanup +; X86-X87-NEXT: movl %esi, (%ebp) +; X86-X87-NEXT: movl %edi, 4(%ebp) +; X86-X87-NEXT: movl %ebx, 8(%ebp) +; X86-X87-NEXT: andl $15, %eax +; X86-X87-NEXT: movb %al, 12(%ebp) +; X86-X87-NEXT: movl %ebp, %eax ; X86-X87-NEXT: addl $44, %esp ; X86-X87-NEXT: popl %esi ; X86-X87-NEXT: popl %edi @@ -1643,47 +1779,93 @@ define i100 @test_unsigned_i100_f64(double %f) nounwind { ; X86-X87-NEXT: retl $4 ; ; X86-SSE-LABEL: test_unsigned_i100_f64: -; X86-SSE: # %bb.0: +; X86-SSE: # %bb.0: # %fp-to-i-entry ; X86-SSE-NEXT: pushl %ebx ; X86-SSE-NEXT: pushl %edi ; X86-SSE-NEXT: pushl %esi -; X86-SSE-NEXT: subl $32, %esp -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-SSE-NEXT: subl $48, %esp +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; X86-SSE-NEXT: movsd %xmm0, {{[0-9]+}}(%esp) -; X86-SSE-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movl %eax, (%esp) -; X86-SSE-NEXT: calll __fixunsdfti -; X86-SSE-NEXT: subl $4, %esp -; X86-SSE-NEXT: xorl %eax, %eax -; X86-SSE-NEXT: xorpd %xmm0, %xmm0 -; X86-SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero -; X86-SSE-NEXT: ucomisd %xmm0, %xmm1 -; X86-SSE-NEXT: movapd %xmm1, %xmm0 -; X86-SSE-NEXT: movl $0, %ecx -; X86-SSE-NEXT: movl $0, %edx -; X86-SSE-NEXT: movl $0, %edi -; X86-SSE-NEXT: jb .LBB18_2 -; X86-SSE-NEXT: # %bb.1: -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-SSE-NEXT: .LBB18_2: -; X86-SSE-NEXT: ucomisd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 -; X86-SSE-NEXT: movl $15, %ebx -; X86-SSE-NEXT: cmovbel %edi, %ebx +; X86-SSE-NEXT: movl %edx, %ecx +; X86-SSE-NEXT: shrl $20, %ecx +; X86-SSE-NEXT: andl $2047, %ecx # imm = 0x7FF +; X86-SSE-NEXT: movl %edx, %edi +; X86-SSE-NEXT: andl $1048575, %edi # imm = 0xFFFFF +; X86-SSE-NEXT: addl $-1, %esi +; X86-SSE-NEXT: adcl $1048575, %edi # imm = 0xFFFFF +; X86-SSE-NEXT: addl $1, %esi +; X86-SSE-NEXT: adcl $0, %edi +; X86-SSE-NEXT: cmpl $1023, %ecx # imm = 0x3FF +; X86-SSE-NEXT: setb %bl +; X86-SSE-NEXT: ucomisd %xmm0, %xmm0 +; X86-SSE-NEXT: setp %bh +; X86-SSE-NEXT: testl %edx, %edx +; X86-SSE-NEXT: sets %dl +; X86-SSE-NEXT: orb %bh, %dl +; X86-SSE-NEXT: orb %bl, %dl +; X86-SSE-NEXT: je .LBB18_2 +; X86-SSE-NEXT: # %bb.1: +; X86-SSE-NEXT: xorl %esi, %esi +; X86-SSE-NEXT: xorl %edi, %edi +; X86-SSE-NEXT: xorl %ebx, %ebx +; X86-SSE-NEXT: xorl %edx, %edx +; X86-SSE-NEXT: jmp .LBB18_7 +; X86-SSE-NEXT: .LBB18_2: # %fp-to-i-if-check.saturate +; X86-SSE-NEXT: cmpl $1122, %ecx # imm = 0x462 +; X86-SSE-NEXT: jbe .LBB18_4 +; X86-SSE-NEXT: # %bb.3: +; X86-SSE-NEXT: movl $15, %edx +; X86-SSE-NEXT: movl $-1, %esi ; X86-SSE-NEXT: movl $-1, %edi -; X86-SSE-NEXT: cmoval %edi, %edx -; X86-SSE-NEXT: cmoval %edi, %ecx -; X86-SSE-NEXT: cmoval %edi, %eax -; X86-SSE-NEXT: movl %eax, 8(%esi) -; X86-SSE-NEXT: movl %ecx, 4(%esi) -; X86-SSE-NEXT: movl %edx, (%esi) -; X86-SSE-NEXT: andl $15, %ebx -; X86-SSE-NEXT: movb %bl, 12(%esi) -; X86-SSE-NEXT: movl %esi, %eax -; X86-SSE-NEXT: addl $32, %esp +; X86-SSE-NEXT: movl $-1, %ebx +; X86-SSE-NEXT: jmp .LBB18_7 +; X86-SSE-NEXT: .LBB18_4: # %fp-to-i-if-check.exp.size +; X86-SSE-NEXT: cmpl $1074, %ecx # imm = 0x432 +; X86-SSE-NEXT: ja .LBB18_6 +; X86-SSE-NEXT: # %bb.5: # %fp-to-i-if-exp.small +; X86-SSE-NEXT: movb $51, %dl +; X86-SSE-NEXT: subb %cl, %dl +; X86-SSE-NEXT: movl %edx, %ecx +; X86-SSE-NEXT: shrdl %cl, %edi, %esi +; X86-SSE-NEXT: shrl %cl, %edi +; X86-SSE-NEXT: xorl %ebx, %ebx +; X86-SSE-NEXT: testb $32, %dl +; X86-SSE-NEXT: cmovnel %edi, %esi +; X86-SSE-NEXT: cmovnel %ebx, %edi +; X86-SSE-NEXT: xorl %edx, %edx +; X86-SSE-NEXT: jmp .LBB18_7 +; X86-SSE-NEXT: .LBB18_6: # %fp-to-i-if-exp.large +; X86-SSE-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: xorpd %xmm0, %xmm0 +; X86-SSE-NEXT: movapd %xmm0, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: addb $-51, %cl +; X86-SSE-NEXT: movl %ecx, %edx +; X86-SSE-NEXT: shrb $3, %dl +; X86-SSE-NEXT: andb $12, %dl +; X86-SSE-NEXT: negb %dl +; X86-SSE-NEXT: movsbl %dl, %edi +; X86-SSE-NEXT: movl 40(%esp,%edi), %ebx +; X86-SSE-NEXT: movl 44(%esp,%edi), %edx +; X86-SSE-NEXT: shldl %cl, %ebx, %edx +; X86-SSE-NEXT: movl 32(%esp,%edi), %esi +; X86-SSE-NEXT: movl 36(%esp,%edi), %edi +; X86-SSE-NEXT: shldl %cl, %edi, %ebx +; X86-SSE-NEXT: shldl %cl, %esi, %edi +; X86-SSE-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-SSE-NEXT: shll %cl, %esi +; X86-SSE-NEXT: .LBB18_7: # %fp-to-i-cleanup +; X86-SSE-NEXT: movl %esi, (%eax) +; X86-SSE-NEXT: movl %edi, 4(%eax) +; X86-SSE-NEXT: movl %ebx, 8(%eax) +; X86-SSE-NEXT: andl $15, %edx +; X86-SSE-NEXT: movb %dl, 12(%eax) +; X86-SSE-NEXT: addl $48, %esp ; X86-SSE-NEXT: popl %esi ; X86-SSE-NEXT: popl %edi ; X86-SSE-NEXT: popl %ebx @@ -1714,69 +1896,102 @@ define i100 @test_unsigned_i100_f64(double %f) nounwind { define i128 @test_unsigned_i128_f64(double %f) nounwind { ; X86-X87-LABEL: test_unsigned_i128_f64: -; X86-X87: # %bb.0: +; X86-X87: # %bb.0: # %fp-to-i-entry ; X86-X87-NEXT: pushl %ebp ; X86-X87-NEXT: pushl %ebx ; X86-X87-NEXT: pushl %edi ; X86-X87-NEXT: pushl %esi -; X86-X87-NEXT: subl $60, %esp +; X86-X87-NEXT: subl $44, %esp +; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ebp ; X86-X87-NEXT: fldl {{[0-9]+}}(%esp) ; X86-X87-NEXT: fstl {{[0-9]+}}(%esp) -; X86-X87-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-X87-NEXT: movl %eax, (%esp) -; X86-X87-NEXT: fldz -; X86-X87-NEXT: fxch %st(1) -; X86-X87-NEXT: fstl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Spill -; X86-X87-NEXT: fucompp -; X86-X87-NEXT: fnstsw %ax -; X86-X87-NEXT: movl %eax, %ebx -; X86-X87-NEXT: calll __fixunsdfti -; X86-X87-NEXT: subl $4, %esp -; X86-X87-NEXT: xorl %edx, %edx -; X86-X87-NEXT: movb %bh, %ah -; X86-X87-NEXT: sahf -; X86-X87-NEXT: movl $0, %eax -; X86-X87-NEXT: jb .LBB19_2 -; X86-X87-NEXT: # %bb.1: -; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-X87-NEXT: .LBB19_2: -; X86-X87-NEXT: movl $0, %ecx -; X86-X87-NEXT: jb .LBB19_4 -; X86-X87-NEXT: # %bb.3: -; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-X87-NEXT: .LBB19_4: -; X86-X87-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-X87-NEXT: movl $0, %ebx -; X86-X87-NEXT: jb .LBB19_6 -; X86-X87-NEXT: # %bb.5: +; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-X87-NEXT: .LBB19_6: -; X86-X87-NEXT: fldl {{\.?LCPI[0-9]+_[0-9]+}} -; X86-X87-NEXT: fldl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Reload -; X86-X87-NEXT: fucompp +; X86-X87-NEXT: movl %ebx, %ecx +; X86-X87-NEXT: shrl $20, %ecx +; X86-X87-NEXT: andl $2047, %ecx # imm = 0x7FF +; X86-X87-NEXT: movl %ebx, %edi +; X86-X87-NEXT: andl $1048575, %edi # imm = 0xFFFFF +; X86-X87-NEXT: addl $-1, %esi +; X86-X87-NEXT: adcl $1048575, %edi # imm = 0xFFFFF +; X86-X87-NEXT: addl $1, %esi +; X86-X87-NEXT: adcl $0, %edi +; X86-X87-NEXT: cmpl $1023, %ecx # imm = 0x3FF +; X86-X87-NEXT: setb %dl +; X86-X87-NEXT: fucomp %st(0) ; X86-X87-NEXT: fnstsw %ax ; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax ; X86-X87-NEXT: sahf -; X86-X87-NEXT: movl $-1, %eax -; X86-X87-NEXT: movl $-1, %ebp -; X86-X87-NEXT: movl $-1, %edi +; X86-X87-NEXT: setp %al +; X86-X87-NEXT: orb %dl, %al +; X86-X87-NEXT: testl %ebx, %ebx +; X86-X87-NEXT: sets %dl +; X86-X87-NEXT: orb %al, %dl +; X86-X87-NEXT: je .LBB19_2 +; X86-X87-NEXT: # %bb.1: +; X86-X87-NEXT: xorl %esi, %esi +; X86-X87-NEXT: xorl %edi, %edi +; X86-X87-NEXT: xorl %ebx, %ebx +; X86-X87-NEXT: xorl %eax, %eax +; X86-X87-NEXT: jmp .LBB19_9 +; X86-X87-NEXT: .LBB19_2: # %fp-to-i-if-check.saturate +; X86-X87-NEXT: cmpl $1150, %ecx # imm = 0x47E +; X86-X87-NEXT: jbe .LBB19_4 +; X86-X87-NEXT: # %bb.3: ; X86-X87-NEXT: movl $-1, %esi +; X86-X87-NEXT: movl $-1, %edi +; X86-X87-NEXT: movl $-1, %ebx +; X86-X87-NEXT: movl $-1, %eax +; X86-X87-NEXT: jmp .LBB19_9 +; X86-X87-NEXT: .LBB19_4: # %fp-to-i-if-check.exp.size +; X86-X87-NEXT: cmpl $1074, %ecx # imm = 0x432 ; X86-X87-NEXT: ja .LBB19_8 -; X86-X87-NEXT: # %bb.7: -; X86-X87-NEXT: movl %ebx, %eax -; X86-X87-NEXT: movl %edx, %ebp -; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-X87-NEXT: .LBB19_8: -; X86-X87-NEXT: movl %esi, 12(%ecx) -; X86-X87-NEXT: movl %edi, 8(%ecx) -; X86-X87-NEXT: movl %ebp, 4(%ecx) -; X86-X87-NEXT: movl %eax, (%ecx) +; X86-X87-NEXT: # %bb.5: # %fp-to-i-if-exp.small +; X86-X87-NEXT: movb $51, %al +; X86-X87-NEXT: subb %cl, %al +; X86-X87-NEXT: movl %eax, %ecx +; X86-X87-NEXT: shrdl %cl, %edi, %esi +; X86-X87-NEXT: shrl %cl, %edi +; X86-X87-NEXT: xorl %ebx, %ebx +; X86-X87-NEXT: testb $32, %al +; X86-X87-NEXT: je .LBB19_7 +; X86-X87-NEXT: # %bb.6: +; X86-X87-NEXT: movl %edi, %esi +; X86-X87-NEXT: xorl %edi, %edi +; X86-X87-NEXT: .LBB19_7: # %fp-to-i-if-exp.small +; X86-X87-NEXT: xorl %eax, %eax +; X86-X87-NEXT: jmp .LBB19_9 +; X86-X87-NEXT: .LBB19_8: # %fp-to-i-if-exp.large +; X86-X87-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl $0, (%esp) +; X86-X87-NEXT: addb $-51, %cl ; X86-X87-NEXT: movl %ecx, %eax -; X86-X87-NEXT: addl $60, %esp +; X86-X87-NEXT: shrb $3, %al +; X86-X87-NEXT: andb $12, %al +; X86-X87-NEXT: negb %al +; X86-X87-NEXT: movsbl %al, %edx +; X86-X87-NEXT: movl 24(%esp,%edx), %ebx +; X86-X87-NEXT: movl 28(%esp,%edx), %eax +; X86-X87-NEXT: shldl %cl, %ebx, %eax +; X86-X87-NEXT: movl 16(%esp,%edx), %esi +; X86-X87-NEXT: movl 20(%esp,%edx), %edi +; X86-X87-NEXT: shldl %cl, %edi, %ebx +; X86-X87-NEXT: shldl %cl, %esi, %edi +; X86-X87-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-X87-NEXT: shll %cl, %esi +; X86-X87-NEXT: .LBB19_9: # %fp-to-i-cleanup +; X86-X87-NEXT: movl %esi, (%ebp) +; X86-X87-NEXT: movl %edi, 4(%ebp) +; X86-X87-NEXT: movl %ebx, 8(%ebp) +; X86-X87-NEXT: movl %eax, 12(%ebp) +; X86-X87-NEXT: movl %ebp, %eax +; X86-X87-NEXT: addl $44, %esp ; X86-X87-NEXT: popl %esi ; X86-X87-NEXT: popl %edi ; X86-X87-NEXT: popl %ebx @@ -1784,45 +1999,92 @@ define i128 @test_unsigned_i128_f64(double %f) nounwind { ; X86-X87-NEXT: retl $4 ; ; X86-SSE-LABEL: test_unsigned_i128_f64: -; X86-SSE: # %bb.0: +; X86-SSE: # %bb.0: # %fp-to-i-entry ; X86-SSE-NEXT: pushl %ebx ; X86-SSE-NEXT: pushl %edi ; X86-SSE-NEXT: pushl %esi -; X86-SSE-NEXT: subl $32, %esp -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-SSE-NEXT: subl $48, %esp +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; X86-SSE-NEXT: movsd %xmm0, {{[0-9]+}}(%esp) -; X86-SSE-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movl %eax, (%esp) -; X86-SSE-NEXT: calll __fixunsdfti -; X86-SSE-NEXT: subl $4, %esp -; X86-SSE-NEXT: xorl %eax, %eax -; X86-SSE-NEXT: xorpd %xmm0, %xmm0 -; X86-SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero -; X86-SSE-NEXT: ucomisd %xmm0, %xmm1 -; X86-SSE-NEXT: movapd %xmm1, %xmm0 -; X86-SSE-NEXT: movl $0, %ecx -; X86-SSE-NEXT: movl $0, %edx -; X86-SSE-NEXT: movl $0, %edi -; X86-SSE-NEXT: jb .LBB19_2 -; X86-SSE-NEXT: # %bb.1: -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-SSE-NEXT: .LBB19_2: -; X86-SSE-NEXT: ucomisd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE-NEXT: movl %edx, %ecx +; X86-SSE-NEXT: shrl $20, %ecx +; X86-SSE-NEXT: andl $2047, %ecx # imm = 0x7FF +; X86-SSE-NEXT: movl %edx, %edi +; X86-SSE-NEXT: andl $1048575, %edi # imm = 0xFFFFF +; X86-SSE-NEXT: addl $-1, %esi +; X86-SSE-NEXT: adcl $1048575, %edi # imm = 0xFFFFF +; X86-SSE-NEXT: addl $1, %esi +; X86-SSE-NEXT: adcl $0, %edi +; X86-SSE-NEXT: cmpl $1023, %ecx # imm = 0x3FF +; X86-SSE-NEXT: setb %bl +; X86-SSE-NEXT: ucomisd %xmm0, %xmm0 +; X86-SSE-NEXT: setp %bh +; X86-SSE-NEXT: testl %edx, %edx +; X86-SSE-NEXT: sets %dl +; X86-SSE-NEXT: orb %bh, %dl +; X86-SSE-NEXT: orb %bl, %dl +; X86-SSE-NEXT: je .LBB19_2 +; X86-SSE-NEXT: # %bb.1: +; X86-SSE-NEXT: xorl %esi, %esi +; X86-SSE-NEXT: xorl %edi, %edi +; X86-SSE-NEXT: xorl %ebx, %ebx +; X86-SSE-NEXT: xorl %edx, %edx +; X86-SSE-NEXT: jmp .LBB19_7 +; X86-SSE-NEXT: .LBB19_2: # %fp-to-i-if-check.saturate +; X86-SSE-NEXT: cmpl $1150, %ecx # imm = 0x47E +; X86-SSE-NEXT: jbe .LBB19_4 +; X86-SSE-NEXT: # %bb.3: +; X86-SSE-NEXT: movl $-1, %esi +; X86-SSE-NEXT: movl $-1, %edi ; X86-SSE-NEXT: movl $-1, %ebx -; X86-SSE-NEXT: cmoval %ebx, %edi -; X86-SSE-NEXT: cmoval %ebx, %edx -; X86-SSE-NEXT: cmoval %ebx, %ecx -; X86-SSE-NEXT: cmoval %ebx, %eax -; X86-SSE-NEXT: movl %eax, 12(%esi) -; X86-SSE-NEXT: movl %ecx, 8(%esi) -; X86-SSE-NEXT: movl %edx, 4(%esi) -; X86-SSE-NEXT: movl %edi, (%esi) -; X86-SSE-NEXT: movl %esi, %eax -; X86-SSE-NEXT: addl $32, %esp +; X86-SSE-NEXT: movl $-1, %edx +; X86-SSE-NEXT: jmp .LBB19_7 +; X86-SSE-NEXT: .LBB19_4: # %fp-to-i-if-check.exp.size +; X86-SSE-NEXT: cmpl $1074, %ecx # imm = 0x432 +; X86-SSE-NEXT: ja .LBB19_6 +; X86-SSE-NEXT: # %bb.5: # %fp-to-i-if-exp.small +; X86-SSE-NEXT: movb $51, %dl +; X86-SSE-NEXT: subb %cl, %dl +; X86-SSE-NEXT: movl %edx, %ecx +; X86-SSE-NEXT: shrdl %cl, %edi, %esi +; X86-SSE-NEXT: shrl %cl, %edi +; X86-SSE-NEXT: xorl %ebx, %ebx +; X86-SSE-NEXT: testb $32, %dl +; X86-SSE-NEXT: cmovnel %edi, %esi +; X86-SSE-NEXT: cmovnel %ebx, %edi +; X86-SSE-NEXT: xorl %edx, %edx +; X86-SSE-NEXT: jmp .LBB19_7 +; X86-SSE-NEXT: .LBB19_6: # %fp-to-i-if-exp.large +; X86-SSE-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: xorpd %xmm0, %xmm0 +; X86-SSE-NEXT: movapd %xmm0, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: addb $-51, %cl +; X86-SSE-NEXT: movl %ecx, %edx +; X86-SSE-NEXT: shrb $3, %dl +; X86-SSE-NEXT: andb $12, %dl +; X86-SSE-NEXT: negb %dl +; X86-SSE-NEXT: movsbl %dl, %edi +; X86-SSE-NEXT: movl 40(%esp,%edi), %ebx +; X86-SSE-NEXT: movl 44(%esp,%edi), %edx +; X86-SSE-NEXT: shldl %cl, %ebx, %edx +; X86-SSE-NEXT: movl 32(%esp,%edi), %esi +; X86-SSE-NEXT: movl 36(%esp,%edi), %edi +; X86-SSE-NEXT: shldl %cl, %edi, %ebx +; X86-SSE-NEXT: shldl %cl, %esi, %edi +; X86-SSE-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-SSE-NEXT: shll %cl, %esi +; X86-SSE-NEXT: .LBB19_7: # %fp-to-i-cleanup +; X86-SSE-NEXT: movl %esi, (%eax) +; X86-SSE-NEXT: movl %edi, 4(%eax) +; X86-SSE-NEXT: movl %ebx, 8(%eax) +; X86-SSE-NEXT: movl %edx, 12(%eax) +; X86-SSE-NEXT: addl $48, %esp ; X86-SSE-NEXT: popl %esi ; X86-SSE-NEXT: popl %edi ; X86-SSE-NEXT: popl %ebx @@ -2608,131 +2870,51 @@ define i64 @test_unsigned_i64_f16(half %f) nounwind { define i100 @test_unsigned_i100_f16(half %f) nounwind { ; X86-X87-LABEL: test_unsigned_i100_f16: ; X86-X87: # %bb.0: -; X86-X87-NEXT: pushl %ebp -; X86-X87-NEXT: pushl %ebx -; X86-X87-NEXT: pushl %edi ; X86-X87-NEXT: pushl %esi -; X86-X87-NEXT: subl $44, %esp +; X86-X87-NEXT: subl $24, %esp +; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X86-X87-NEXT: movl %eax, (%esp) ; X86-X87-NEXT: calll __extendhfsf2 -; X86-X87-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-X87-NEXT: movl %eax, (%esp) -; X86-X87-NEXT: fsts {{[0-9]+}}(%esp) -; X86-X87-NEXT: fldz -; X86-X87-NEXT: fxch %st(1) -; X86-X87-NEXT: fsts {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-X87-NEXT: fucompp -; X86-X87-NEXT: fnstsw %ax -; X86-X87-NEXT: movl %eax, %ebx -; X86-X87-NEXT: calll __fixunssfti -; X86-X87-NEXT: subl $4, %esp -; X86-X87-NEXT: xorl %edi, %edi -; X86-X87-NEXT: movb %bh, %ah -; X86-X87-NEXT: sahf -; X86-X87-NEXT: movl $0, %eax -; X86-X87-NEXT: jb .LBB28_2 -; X86-X87-NEXT: # %bb.1: +; X86-X87-NEXT: fnstcw {{[0-9]+}}(%esp) +; X86-X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-X87-NEXT: orl $3072, %eax # imm = 0xC00 +; X86-X87-NEXT: movw %ax, {{[0-9]+}}(%esp) +; X86-X87-NEXT: fldcw {{[0-9]+}}(%esp) +; X86-X87-NEXT: fistpl {{[0-9]+}}(%esp) +; X86-X87-NEXT: fldcw {{[0-9]+}}(%esp) ; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-X87-NEXT: .LBB28_2: -; X86-X87-NEXT: movl $0, %esi -; X86-X87-NEXT: movl $0, %ebx -; X86-X87-NEXT: jb .LBB28_4 -; X86-X87-NEXT: # %bb.3: -; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-X87-NEXT: .LBB28_4: -; X86-X87-NEXT: jb .LBB28_6 -; X86-X87-NEXT: # %bb.5: -; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-X87-NEXT: .LBB28_6: -; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-X87-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} -; X86-X87-NEXT: flds {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X86-X87-NEXT: fucompp -; X86-X87-NEXT: fnstsw %ax -; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax -; X86-X87-NEXT: sahf -; X86-X87-NEXT: movl $15, %eax -; X86-X87-NEXT: ja .LBB28_8 -; X86-X87-NEXT: # %bb.7: -; X86-X87-NEXT: movl %edi, %eax -; X86-X87-NEXT: .LBB28_8: -; X86-X87-NEXT: movl $-1, %edi -; X86-X87-NEXT: movl $-1, %ebp -; X86-X87-NEXT: movl $-1, %edx -; X86-X87-NEXT: ja .LBB28_10 -; X86-X87-NEXT: # %bb.9: -; X86-X87-NEXT: movl %ebx, %edi -; X86-X87-NEXT: movl %esi, %ebp -; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-X87-NEXT: .LBB28_10: -; X86-X87-NEXT: movl %edx, 8(%ecx) -; X86-X87-NEXT: movl %ebp, 4(%ecx) -; X86-X87-NEXT: movl %edi, (%ecx) +; X86-X87-NEXT: movl %eax, (%esi) +; X86-X87-NEXT: sarl $31, %eax +; X86-X87-NEXT: movl %eax, 8(%esi) +; X86-X87-NEXT: movl %eax, 4(%esi) ; X86-X87-NEXT: andl $15, %eax -; X86-X87-NEXT: movb %al, 12(%ecx) -; X86-X87-NEXT: movl %ecx, %eax -; X86-X87-NEXT: addl $44, %esp +; X86-X87-NEXT: movb %al, 12(%esi) +; X86-X87-NEXT: movl %esi, %eax +; X86-X87-NEXT: addl $24, %esp ; X86-X87-NEXT: popl %esi -; X86-X87-NEXT: popl %edi -; X86-X87-NEXT: popl %ebx -; X86-X87-NEXT: popl %ebp ; X86-X87-NEXT: retl $4 ; ; X86-SSE-LABEL: test_unsigned_i100_f16: ; X86-SSE: # %bb.0: -; X86-SSE-NEXT: pushl %ebx -; X86-SSE-NEXT: pushl %edi ; X86-SSE-NEXT: pushl %esi -; X86-SSE-NEXT: subl $32, %esp +; X86-SSE-NEXT: subl $8, %esp ; X86-SSE-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-SSE-NEXT: pextrw $0, %xmm0, %eax ; X86-SSE-NEXT: movw %ax, (%esp) ; X86-SSE-NEXT: calll __extendhfsf2 -; X86-SSE-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movl %eax, (%esp) ; X86-SSE-NEXT: fstps {{[0-9]+}}(%esp) -; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X86-SSE-NEXT: movss %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-SSE-NEXT: movss %xmm0, {{[0-9]+}}(%esp) -; X86-SSE-NEXT: calll __fixunssfti -; X86-SSE-NEXT: subl $4, %esp -; X86-SSE-NEXT: xorl %eax, %eax -; X86-SSE-NEXT: xorps %xmm0, %xmm0 -; X86-SSE-NEXT: movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 4-byte Reload -; X86-SSE-NEXT: # xmm1 = mem[0],zero,zero,zero -; X86-SSE-NEXT: ucomiss %xmm0, %xmm1 -; X86-SSE-NEXT: movaps %xmm1, %xmm0 -; X86-SSE-NEXT: movl $0, %ecx -; X86-SSE-NEXT: movl $0, %edx -; X86-SSE-NEXT: movl $0, %edi -; X86-SSE-NEXT: jb .LBB28_2 -; X86-SSE-NEXT: # %bb.1: -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-SSE-NEXT: .LBB28_2: -; X86-SSE-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 -; X86-SSE-NEXT: movl $15, %ebx -; X86-SSE-NEXT: cmovbel %edi, %ebx -; X86-SSE-NEXT: movl $-1, %edi -; X86-SSE-NEXT: cmoval %edi, %edx -; X86-SSE-NEXT: cmoval %edi, %ecx -; X86-SSE-NEXT: cmoval %edi, %eax +; X86-SSE-NEXT: cvttss2si {{[0-9]+}}(%esp), %eax +; X86-SSE-NEXT: movl %eax, (%esi) +; X86-SSE-NEXT: sarl $31, %eax ; X86-SSE-NEXT: movl %eax, 8(%esi) -; X86-SSE-NEXT: movl %ecx, 4(%esi) -; X86-SSE-NEXT: movl %edx, (%esi) -; X86-SSE-NEXT: andl $15, %ebx -; X86-SSE-NEXT: movb %bl, 12(%esi) +; X86-SSE-NEXT: movl %eax, 4(%esi) +; X86-SSE-NEXT: andl $15, %eax +; X86-SSE-NEXT: movb %al, 12(%esi) ; X86-SSE-NEXT: movl %esi, %eax -; X86-SSE-NEXT: addl $32, %esp +; X86-SSE-NEXT: addl $8, %esp ; X86-SSE-NEXT: popl %esi -; X86-SSE-NEXT: popl %edi -; X86-SSE-NEXT: popl %ebx ; X86-SSE-NEXT: retl $4 ; ; X64-LABEL: test_unsigned_i100_f16: @@ -2762,126 +2944,49 @@ define i100 @test_unsigned_i100_f16(half %f) nounwind { define i128 @test_unsigned_i128_f16(half %f) nounwind { ; X86-X87-LABEL: test_unsigned_i128_f16: ; X86-X87: # %bb.0: -; X86-X87-NEXT: pushl %ebp -; X86-X87-NEXT: pushl %ebx -; X86-X87-NEXT: pushl %edi ; X86-X87-NEXT: pushl %esi -; X86-X87-NEXT: subl $60, %esp -; X86-X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-X87-NEXT: movl %eax, (%esp) -; X86-X87-NEXT: calll __extendhfsf2 -; X86-X87-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-X87-NEXT: movl %eax, (%esp) -; X86-X87-NEXT: fsts {{[0-9]+}}(%esp) -; X86-X87-NEXT: fldz -; X86-X87-NEXT: fxch %st(1) -; X86-X87-NEXT: fsts {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-X87-NEXT: fucompp -; X86-X87-NEXT: fnstsw %ax -; X86-X87-NEXT: movl %eax, %ebx -; X86-X87-NEXT: calll __fixunssfti -; X86-X87-NEXT: subl $4, %esp -; X86-X87-NEXT: xorl %edx, %edx -; X86-X87-NEXT: movb %bh, %ah -; X86-X87-NEXT: sahf -; X86-X87-NEXT: movl $0, %eax -; X86-X87-NEXT: jb .LBB29_2 -; X86-X87-NEXT: # %bb.1: -; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-X87-NEXT: .LBB29_2: -; X86-X87-NEXT: movl $0, %ecx -; X86-X87-NEXT: jb .LBB29_4 -; X86-X87-NEXT: # %bb.3: -; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-X87-NEXT: .LBB29_4: -; X86-X87-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-X87-NEXT: movl $0, %ebx -; X86-X87-NEXT: jb .LBB29_6 -; X86-X87-NEXT: # %bb.5: -; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-X87-NEXT: .LBB29_6: -; X86-X87-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} -; X86-X87-NEXT: flds {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X86-X87-NEXT: fucompp -; X86-X87-NEXT: fnstsw %ax -; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax -; X86-X87-NEXT: sahf -; X86-X87-NEXT: movl $-1, %eax -; X86-X87-NEXT: movl $-1, %ebp -; X86-X87-NEXT: movl $-1, %edi -; X86-X87-NEXT: movl $-1, %esi -; X86-X87-NEXT: ja .LBB29_8 -; X86-X87-NEXT: # %bb.7: -; X86-X87-NEXT: movl %ebx, %eax -; X86-X87-NEXT: movl %edx, %ebp -; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-X87-NEXT: .LBB29_8: -; X86-X87-NEXT: movl %esi, 12(%ecx) -; X86-X87-NEXT: movl %edi, 8(%ecx) -; X86-X87-NEXT: movl %ebp, 4(%ecx) -; X86-X87-NEXT: movl %eax, (%ecx) -; X86-X87-NEXT: movl %ecx, %eax -; X86-X87-NEXT: addl $60, %esp +; X86-X87-NEXT: subl $24, %esp +; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-X87-NEXT: movl %eax, (%esp) +; X86-X87-NEXT: calll __extendhfsf2 +; X86-X87-NEXT: fnstcw {{[0-9]+}}(%esp) +; X86-X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-X87-NEXT: orl $3072, %eax # imm = 0xC00 +; X86-X87-NEXT: movw %ax, {{[0-9]+}}(%esp) +; X86-X87-NEXT: fldcw {{[0-9]+}}(%esp) +; X86-X87-NEXT: fistpl {{[0-9]+}}(%esp) +; X86-X87-NEXT: fldcw {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-X87-NEXT: movl %eax, (%esi) +; X86-X87-NEXT: sarl $31, %eax +; X86-X87-NEXT: movl %eax, 12(%esi) +; X86-X87-NEXT: movl %eax, 8(%esi) +; X86-X87-NEXT: movl %eax, 4(%esi) +; X86-X87-NEXT: movl %esi, %eax +; X86-X87-NEXT: addl $24, %esp ; X86-X87-NEXT: popl %esi -; X86-X87-NEXT: popl %edi -; X86-X87-NEXT: popl %ebx -; X86-X87-NEXT: popl %ebp ; X86-X87-NEXT: retl $4 ; ; X86-SSE-LABEL: test_unsigned_i128_f16: ; X86-SSE: # %bb.0: -; X86-SSE-NEXT: pushl %ebx -; X86-SSE-NEXT: pushl %edi ; X86-SSE-NEXT: pushl %esi -; X86-SSE-NEXT: subl $32, %esp +; X86-SSE-NEXT: subl $8, %esp ; X86-SSE-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-SSE-NEXT: pextrw $0, %xmm0, %eax ; X86-SSE-NEXT: movw %ax, (%esp) ; X86-SSE-NEXT: calll __extendhfsf2 -; X86-SSE-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movl %eax, (%esp) ; X86-SSE-NEXT: fstps {{[0-9]+}}(%esp) -; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X86-SSE-NEXT: movss %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-SSE-NEXT: movss %xmm0, {{[0-9]+}}(%esp) -; X86-SSE-NEXT: calll __fixunssfti -; X86-SSE-NEXT: subl $4, %esp -; X86-SSE-NEXT: xorl %eax, %eax -; X86-SSE-NEXT: xorps %xmm0, %xmm0 -; X86-SSE-NEXT: movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 4-byte Reload -; X86-SSE-NEXT: # xmm1 = mem[0],zero,zero,zero -; X86-SSE-NEXT: ucomiss %xmm0, %xmm1 -; X86-SSE-NEXT: movaps %xmm1, %xmm0 -; X86-SSE-NEXT: movl $0, %ecx -; X86-SSE-NEXT: movl $0, %edx -; X86-SSE-NEXT: movl $0, %edi -; X86-SSE-NEXT: jb .LBB29_2 -; X86-SSE-NEXT: # %bb.1: -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-SSE-NEXT: .LBB29_2: -; X86-SSE-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 -; X86-SSE-NEXT: movl $-1, %ebx -; X86-SSE-NEXT: cmoval %ebx, %edi -; X86-SSE-NEXT: cmoval %ebx, %edx -; X86-SSE-NEXT: cmoval %ebx, %ecx -; X86-SSE-NEXT: cmoval %ebx, %eax +; X86-SSE-NEXT: cvttss2si {{[0-9]+}}(%esp), %eax +; X86-SSE-NEXT: movl %eax, (%esi) +; X86-SSE-NEXT: sarl $31, %eax ; X86-SSE-NEXT: movl %eax, 12(%esi) -; X86-SSE-NEXT: movl %ecx, 8(%esi) -; X86-SSE-NEXT: movl %edx, 4(%esi) -; X86-SSE-NEXT: movl %edi, (%esi) +; X86-SSE-NEXT: movl %eax, 8(%esi) +; X86-SSE-NEXT: movl %eax, 4(%esi) ; X86-SSE-NEXT: movl %esi, %eax -; X86-SSE-NEXT: addl $32, %esp +; X86-SSE-NEXT: addl $8, %esp ; X86-SSE-NEXT: popl %esi -; X86-SSE-NEXT: popl %edi -; X86-SSE-NEXT: popl %ebx ; X86-SSE-NEXT: retl $4 ; ; X64-LABEL: test_unsigned_i128_f16: @@ -3822,74 +3927,157 @@ define i64 @test_unsigned_i64_f80(x86_fp80 %f) nounwind { define i100 @test_unsigned_i100_f80(x86_fp80 %f) nounwind { ; X86-X87-LABEL: test_unsigned_i100_f80: -; X86-X87: # %bb.0: +; X86-X87: # %bb.0: # %fp-to-i-entry ; X86-X87-NEXT: pushl %ebp ; X86-X87-NEXT: pushl %ebx ; X86-X87-NEXT: pushl %edi ; X86-X87-NEXT: pushl %esi -; X86-X87-NEXT: subl $60, %esp +; X86-X87-NEXT: subl $108, %esp ; X86-X87-NEXT: fldt {{[0-9]+}}(%esp) -; X86-X87-NEXT: fld %st(0) +; X86-X87-NEXT: subl $16, %esp ; X86-X87-NEXT: fstpt {{[0-9]+}}(%esp) ; X86-X87-NEXT: leal {{[0-9]+}}(%esp), %eax ; X86-X87-NEXT: movl %eax, (%esp) -; X86-X87-NEXT: fldz -; X86-X87-NEXT: fld %st(1) -; X86-X87-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill -; X86-X87-NEXT: fxch %st(1) -; X86-X87-NEXT: fucompp -; X86-X87-NEXT: fnstsw %ax -; X86-X87-NEXT: movl %eax, %ebx -; X86-X87-NEXT: calll __fixunsxfti -; X86-X87-NEXT: subl $4, %esp -; X86-X87-NEXT: xorl %edi, %edi -; X86-X87-NEXT: movb %bh, %ah -; X86-X87-NEXT: sahf -; X86-X87-NEXT: movl $0, %eax -; X86-X87-NEXT: jb .LBB38_2 -; X86-X87-NEXT: # %bb.1: -; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-X87-NEXT: .LBB38_2: -; X86-X87-NEXT: movl $0, %esi -; X86-X87-NEXT: movl $0, %ebx -; X86-X87-NEXT: jb .LBB38_4 -; X86-X87-NEXT: # %bb.3: +; X86-X87-NEXT: calll __extendxftf2 +; X86-X87-NEXT: addl $12, %esp ; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-X87-NEXT: .LBB38_4: -; X86-X87-NEXT: jb .LBB38_6 -; X86-X87-NEXT: # %bb.5: -; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-X87-NEXT: .LBB38_6: +; X86-X87-NEXT: movl %ebx, %eax +; X86-X87-NEXT: shrl $16, %eax +; X86-X87-NEXT: andl $32767, %eax # imm = 0x7FFF +; X86-X87-NEXT: xorl %ecx, %ecx ; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-X87-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}} -; X86-X87-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload -; X86-X87-NEXT: fucompp -; X86-X87-NEXT: fnstsw %ax -; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax -; X86-X87-NEXT: sahf -; X86-X87-NEXT: movl $15, %eax -; X86-X87-NEXT: ja .LBB38_8 -; X86-X87-NEXT: # %bb.7: -; X86-X87-NEXT: movl %edi, %eax -; X86-X87-NEXT: .LBB38_8: -; X86-X87-NEXT: movl $-1, %edi +; X86-X87-NEXT: cmpl $16383, %eax # imm = 0x3FFF +; X86-X87-NEXT: jb .LBB38_1 +; X86-X87-NEXT: # %bb.2: # %fp-to-i-entry +; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-X87-NEXT: pushl %ebx +; X86-X87-NEXT: pushl %edi +; X86-X87-NEXT: pushl %ebp +; X86-X87-NEXT: pushl %esi +; X86-X87-NEXT: pushl %ebx +; X86-X87-NEXT: pushl %edi +; X86-X87-NEXT: pushl %ebp +; X86-X87-NEXT: pushl %esi +; X86-X87-NEXT: calll __unordtf2 +; X86-X87-NEXT: addl $32, %esp +; X86-X87-NEXT: testl %eax, %eax +; X86-X87-NEXT: jne .LBB38_3 +; X86-X87-NEXT: # %bb.4: # %fp-to-i-entry +; X86-X87-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: testl %ebx, %ebx +; X86-X87-NEXT: movl $0, %ebp +; X86-X87-NEXT: movl $0, %edi +; X86-X87-NEXT: movl $0, %edx +; X86-X87-NEXT: movl $0, %ecx +; X86-X87-NEXT: js .LBB38_10 +; X86-X87-NEXT: # %bb.5: # %fp-to-i-if-check.saturate +; X86-X87-NEXT: xorl %eax, %eax +; X86-X87-NEXT: movl $16482, %ecx # imm = 0x4062 +; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-X87-NEXT: cmpl %edi, %ecx +; X86-X87-NEXT: movl $0, %ecx +; X86-X87-NEXT: sbbl %ecx, %ecx +; X86-X87-NEXT: movl $0, %ecx +; X86-X87-NEXT: sbbl %ecx, %ecx +; X86-X87-NEXT: movl $0, %ecx +; X86-X87-NEXT: sbbl %ecx, %ecx +; X86-X87-NEXT: jae .LBB38_7 +; X86-X87-NEXT: # %bb.6: +; X86-X87-NEXT: movl $15, %edx +; X86-X87-NEXT: movl $-1, %ecx ; X86-X87-NEXT: movl $-1, %ebp -; X86-X87-NEXT: movl $-1, %edx -; X86-X87-NEXT: ja .LBB38_10 -; X86-X87-NEXT: # %bb.9: -; X86-X87-NEXT: movl %ebx, %edi -; X86-X87-NEXT: movl %esi, %ebp -; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-X87-NEXT: .LBB38_10: -; X86-X87-NEXT: movl %edx, 8(%ecx) -; X86-X87-NEXT: movl %ebp, 4(%ecx) -; X86-X87-NEXT: movl %edi, (%ecx) -; X86-X87-NEXT: andl $15, %eax -; X86-X87-NEXT: movb %al, 12(%ecx) +; X86-X87-NEXT: movl $-1, %edi +; X86-X87-NEXT: jmp .LBB38_10 +; X86-X87-NEXT: .LBB38_7: # %fp-to-i-if-check.exp.size +; X86-X87-NEXT: movzwl %bx, %ecx +; X86-X87-NEXT: orl $65536, %ecx # imm = 0x10000 +; X86-X87-NEXT: movl $16494, %edx # imm = 0x406E +; X86-X87-NEXT: cmpl %edi, %edx +; X86-X87-NEXT: movl $0, %edx +; X86-X87-NEXT: sbbl %edx, %edx +; X86-X87-NEXT: movl $0, %edx +; X86-X87-NEXT: sbbl %edx, %edx +; X86-X87-NEXT: sbbl %eax, %eax +; X86-X87-NEXT: movl %edi, %ebx +; X86-X87-NEXT: jb .LBB38_9 +; X86-X87-NEXT: # %bb.8: # %fp-to-i-if-exp.small +; X86-X87-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-X87-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-X87-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movb $111, %cl +; X86-X87-NEXT: subb %bl, %cl ; X86-X87-NEXT: movl %ecx, %eax -; X86-X87-NEXT: addl $60, %esp +; X86-X87-NEXT: shrb $3, %al +; X86-X87-NEXT: andb $12, %al +; X86-X87-NEXT: movzbl %al, %esi +; X86-X87-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl 28(%esp,%esi), %edx +; X86-X87-NEXT: movl 24(%esp,%esi), %eax +; X86-X87-NEXT: movl %eax, %edi +; X86-X87-NEXT: shrdl %cl, %edx, %edi +; X86-X87-NEXT: movl 16(%esp,%esi), %ebx +; X86-X87-NEXT: movl 20(%esp,%esi), %esi +; X86-X87-NEXT: movl %esi, %ebp +; X86-X87-NEXT: shrdl %cl, %eax, %ebp +; X86-X87-NEXT: shrl %cl, %edx +; X86-X87-NEXT: shrdl %cl, %esi, %ebx +; X86-X87-NEXT: movl %ebx, %ecx +; X86-X87-NEXT: jmp .LBB38_10 +; X86-X87-NEXT: .LBB38_1: +; X86-X87-NEXT: xorl %ebp, %ebp +; X86-X87-NEXT: xorl %edi, %edi +; X86-X87-NEXT: xorl %edx, %edx +; X86-X87-NEXT: jmp .LBB38_10 +; X86-X87-NEXT: .LBB38_9: # %fp-to-i-if-exp.large +; X86-X87-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-X87-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-X87-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-X87-NEXT: addb $-111, %bl +; X86-X87-NEXT: movl %ebx, %eax +; X86-X87-NEXT: shrb $3, %al +; X86-X87-NEXT: andb $12, %al +; X86-X87-NEXT: negb %al +; X86-X87-NEXT: movsbl %al, %esi +; X86-X87-NEXT: movl 72(%esp,%esi), %edi +; X86-X87-NEXT: movl 76(%esp,%esi), %edx +; X86-X87-NEXT: movl %ebx, %ecx +; X86-X87-NEXT: shldl %cl, %edi, %edx +; X86-X87-NEXT: movl 64(%esp,%esi), %eax +; X86-X87-NEXT: movl 68(%esp,%esi), %ebp +; X86-X87-NEXT: shldl %cl, %ebp, %edi +; X86-X87-NEXT: shldl %cl, %eax, %ebp +; X86-X87-NEXT: shll %cl, %eax +; X86-X87-NEXT: movl %eax, %ecx +; X86-X87-NEXT: jmp .LBB38_10 +; X86-X87-NEXT: .LBB38_3: +; X86-X87-NEXT: xorl %ebp, %ebp +; X86-X87-NEXT: xorl %edi, %edi +; X86-X87-NEXT: xorl %edx, %edx +; X86-X87-NEXT: xorl %ecx, %ecx +; X86-X87-NEXT: .LBB38_10: # %fp-to-i-cleanup +; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-X87-NEXT: movl %ecx, (%eax) +; X86-X87-NEXT: movl %ebp, 4(%eax) +; X86-X87-NEXT: movl %edi, 8(%eax) +; X86-X87-NEXT: andl $15, %edx +; X86-X87-NEXT: movb %dl, 12(%eax) +; X86-X87-NEXT: addl $108, %esp ; X86-X87-NEXT: popl %esi ; X86-X87-NEXT: popl %edi ; X86-X87-NEXT: popl %ebx @@ -3897,56 +4085,157 @@ define i100 @test_unsigned_i100_f80(x86_fp80 %f) nounwind { ; X86-X87-NEXT: retl $4 ; ; X86-SSE-LABEL: test_unsigned_i100_f80: -; X86-SSE: # %bb.0: +; X86-SSE: # %bb.0: # %fp-to-i-entry +; X86-SSE-NEXT: pushl %ebp ; X86-SSE-NEXT: pushl %ebx ; X86-SSE-NEXT: pushl %edi ; X86-SSE-NEXT: pushl %esi -; X86-SSE-NEXT: subl $48, %esp -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-SSE-NEXT: subl $108, %esp ; X86-SSE-NEXT: fldt {{[0-9]+}}(%esp) -; X86-SSE-NEXT: fld %st(0) -; X86-SSE-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill +; X86-SSE-NEXT: subl $16, %esp ; X86-SSE-NEXT: fstpt {{[0-9]+}}(%esp) ; X86-SSE-NEXT: leal {{[0-9]+}}(%esp), %eax ; X86-SSE-NEXT: movl %eax, (%esp) -; X86-SSE-NEXT: calll __fixunsxfti -; X86-SSE-NEXT: subl $4, %esp -; X86-SSE-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload -; X86-SSE-NEXT: xorl %eax, %eax -; X86-SSE-NEXT: fldz -; X86-SSE-NEXT: fxch %st(1) -; X86-SSE-NEXT: fucomi %st(1), %st -; X86-SSE-NEXT: fstp %st(1) +; X86-SSE-NEXT: calll __extendxftf2 +; X86-SSE-NEXT: addl $12, %esp +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-SSE-NEXT: movl %ebx, %eax +; X86-SSE-NEXT: shrl $16, %eax +; X86-SSE-NEXT: andl $32767, %eax # imm = 0x7FFF +; X86-SSE-NEXT: xorl %ecx, %ecx +; X86-SSE-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: cmpl $16383, %eax # imm = 0x3FFF +; X86-SSE-NEXT: jb .LBB38_1 +; X86-SSE-NEXT: # %bb.2: # %fp-to-i-entry +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-SSE-NEXT: pushl %ebx +; X86-SSE-NEXT: pushl %edi +; X86-SSE-NEXT: pushl %ebp +; X86-SSE-NEXT: pushl %esi +; X86-SSE-NEXT: pushl %ebx +; X86-SSE-NEXT: pushl %edi +; X86-SSE-NEXT: pushl %ebp +; X86-SSE-NEXT: pushl %esi +; X86-SSE-NEXT: calll __unordtf2 +; X86-SSE-NEXT: addl $32, %esp +; X86-SSE-NEXT: testl %eax, %eax +; X86-SSE-NEXT: jne .LBB38_3 +; X86-SSE-NEXT: # %bb.4: # %fp-to-i-entry +; X86-SSE-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: testl %ebx, %ebx +; X86-SSE-NEXT: movl $0, %ebp +; X86-SSE-NEXT: movl $0, %edi +; X86-SSE-NEXT: movl $0, %edx ; X86-SSE-NEXT: movl $0, %ecx +; X86-SSE-NEXT: js .LBB38_10 +; X86-SSE-NEXT: # %bb.5: # %fp-to-i-if-check.saturate +; X86-SSE-NEXT: xorl %ecx, %ecx +; X86-SSE-NEXT: movl $16482, %eax # imm = 0x4062 +; X86-SSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-SSE-NEXT: cmpl %edi, %eax +; X86-SSE-NEXT: movl $0, %eax +; X86-SSE-NEXT: sbbl %eax, %eax +; X86-SSE-NEXT: movl $0, %eax +; X86-SSE-NEXT: sbbl %eax, %eax +; X86-SSE-NEXT: movl $0, %eax +; X86-SSE-NEXT: sbbl %eax, %eax +; X86-SSE-NEXT: jae .LBB38_7 +; X86-SSE-NEXT: # %bb.6: +; X86-SSE-NEXT: movl $15, %edx +; X86-SSE-NEXT: movl $-1, %ecx +; X86-SSE-NEXT: movl $-1, %ebp +; X86-SSE-NEXT: movl $-1, %edi +; X86-SSE-NEXT: jmp .LBB38_10 +; X86-SSE-NEXT: .LBB38_7: # %fp-to-i-if-check.exp.size +; X86-SSE-NEXT: movzwl %bx, %eax +; X86-SSE-NEXT: orl $65536, %eax # imm = 0x10000 +; X86-SSE-NEXT: movl $16494, %edx # imm = 0x406E +; X86-SSE-NEXT: cmpl %edi, %edx ; X86-SSE-NEXT: movl $0, %edx -; X86-SSE-NEXT: movl $0, %edi -; X86-SSE-NEXT: jb .LBB38_2 -; X86-SSE-NEXT: # %bb.1: +; X86-SSE-NEXT: sbbl %edx, %edx +; X86-SSE-NEXT: movl $0, %edx +; X86-SSE-NEXT: sbbl %edx, %edx +; X86-SSE-NEXT: sbbl %ecx, %ecx +; X86-SSE-NEXT: movl %edi, %ebx +; X86-SSE-NEXT: jb .LBB38_9 +; X86-SSE-NEXT: # %bb.8: # %fp-to-i-if-exp.small +; X86-SSE-NEXT: xorps %xmm0, %xmm0 +; X86-SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-SSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-SSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movb $111, %cl +; X86-SSE-NEXT: subb %bl, %cl +; X86-SSE-NEXT: movl %ecx, %eax +; X86-SSE-NEXT: shrb $3, %al +; X86-SSE-NEXT: andb $12, %al +; X86-SSE-NEXT: movzbl %al, %esi +; X86-SSE-NEXT: movl 44(%esp,%esi), %edx +; X86-SSE-NEXT: movl 40(%esp,%esi), %eax +; X86-SSE-NEXT: movl %eax, %edi +; X86-SSE-NEXT: shrdl %cl, %edx, %edi +; X86-SSE-NEXT: movl 32(%esp,%esi), %ebx +; X86-SSE-NEXT: movl 36(%esp,%esi), %esi +; X86-SSE-NEXT: movl %esi, %ebp +; X86-SSE-NEXT: shrdl %cl, %eax, %ebp +; X86-SSE-NEXT: shrl %cl, %edx +; X86-SSE-NEXT: shrdl %cl, %esi, %ebx +; X86-SSE-NEXT: movl %ebx, %ecx +; X86-SSE-NEXT: jmp .LBB38_10 +; X86-SSE-NEXT: .LBB38_1: +; X86-SSE-NEXT: xorl %ebp, %ebp +; X86-SSE-NEXT: xorl %edi, %edi +; X86-SSE-NEXT: xorl %edx, %edx +; X86-SSE-NEXT: jmp .LBB38_10 +; X86-SSE-NEXT: .LBB38_9: # %fp-to-i-if-exp.large +; X86-SSE-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: xorps %xmm0, %xmm0 +; X86-SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-SSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-SSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: addb $-111, %bl +; X86-SSE-NEXT: movl %ebx, %eax +; X86-SSE-NEXT: shrb $3, %al +; X86-SSE-NEXT: andb $12, %al +; X86-SSE-NEXT: negb %al +; X86-SSE-NEXT: movsbl %al, %esi +; X86-SSE-NEXT: movl 88(%esp,%esi), %edi +; X86-SSE-NEXT: movl 92(%esp,%esi), %edx +; X86-SSE-NEXT: movl %ebx, %ecx +; X86-SSE-NEXT: shldl %cl, %edi, %edx +; X86-SSE-NEXT: movl 80(%esp,%esi), %eax +; X86-SSE-NEXT: movl 84(%esp,%esi), %ebp +; X86-SSE-NEXT: shldl %cl, %ebp, %edi +; X86-SSE-NEXT: shldl %cl, %eax, %ebp +; X86-SSE-NEXT: shll %cl, %eax +; X86-SSE-NEXT: movl %eax, %ecx +; X86-SSE-NEXT: jmp .LBB38_10 +; X86-SSE-NEXT: .LBB38_3: +; X86-SSE-NEXT: xorl %ebp, %ebp +; X86-SSE-NEXT: xorl %edi, %edi +; X86-SSE-NEXT: xorl %edx, %edx +; X86-SSE-NEXT: xorl %ecx, %ecx +; X86-SSE-NEXT: .LBB38_10: # %fp-to-i-cleanup ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-SSE-NEXT: .LBB38_2: -; X86-SSE-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}} -; X86-SSE-NEXT: fxch %st(1) -; X86-SSE-NEXT: fucompi %st(1), %st -; X86-SSE-NEXT: fstp %st(0) -; X86-SSE-NEXT: movl $15, %ebx -; X86-SSE-NEXT: cmovbel %edi, %ebx -; X86-SSE-NEXT: movl $-1, %edi -; X86-SSE-NEXT: cmoval %edi, %edx -; X86-SSE-NEXT: cmoval %edi, %ecx -; X86-SSE-NEXT: cmoval %edi, %eax -; X86-SSE-NEXT: movl %eax, 8(%esi) -; X86-SSE-NEXT: movl %ecx, 4(%esi) -; X86-SSE-NEXT: movl %edx, (%esi) -; X86-SSE-NEXT: andl $15, %ebx -; X86-SSE-NEXT: movb %bl, 12(%esi) -; X86-SSE-NEXT: movl %esi, %eax -; X86-SSE-NEXT: addl $48, %esp +; X86-SSE-NEXT: movl %ecx, (%eax) +; X86-SSE-NEXT: movl %ebp, 4(%eax) +; X86-SSE-NEXT: movl %edi, 8(%eax) +; X86-SSE-NEXT: andl $15, %edx +; X86-SSE-NEXT: movb %dl, 12(%eax) +; X86-SSE-NEXT: addl $108, %esp ; X86-SSE-NEXT: popl %esi ; X86-SSE-NEXT: popl %edi ; X86-SSE-NEXT: popl %ebx +; X86-SSE-NEXT: popl %ebp ; X86-SSE-NEXT: retl $4 ; ; X64-LABEL: test_unsigned_i100_f80: @@ -3980,71 +4269,156 @@ define i100 @test_unsigned_i100_f80(x86_fp80 %f) nounwind { define i128 @test_unsigned_i128_f80(x86_fp80 %f) nounwind { ; X86-X87-LABEL: test_unsigned_i128_f80: -; X86-X87: # %bb.0: +; X86-X87: # %bb.0: # %fp-to-i-entry ; X86-X87-NEXT: pushl %ebp ; X86-X87-NEXT: pushl %ebx ; X86-X87-NEXT: pushl %edi ; X86-X87-NEXT: pushl %esi -; X86-X87-NEXT: subl $60, %esp +; X86-X87-NEXT: subl $108, %esp ; X86-X87-NEXT: fldt {{[0-9]+}}(%esp) -; X86-X87-NEXT: fld %st(0) +; X86-X87-NEXT: subl $16, %esp ; X86-X87-NEXT: fstpt {{[0-9]+}}(%esp) ; X86-X87-NEXT: leal {{[0-9]+}}(%esp), %eax ; X86-X87-NEXT: movl %eax, (%esp) -; X86-X87-NEXT: fldz -; X86-X87-NEXT: fld %st(1) -; X86-X87-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill -; X86-X87-NEXT: fxch %st(1) -; X86-X87-NEXT: fucompp -; X86-X87-NEXT: fnstsw %ax -; X86-X87-NEXT: movl %eax, %ebx -; X86-X87-NEXT: calll __fixunsxfti -; X86-X87-NEXT: subl $4, %esp -; X86-X87-NEXT: xorl %edx, %edx -; X86-X87-NEXT: movb %bh, %ah -; X86-X87-NEXT: sahf -; X86-X87-NEXT: movl $0, %eax -; X86-X87-NEXT: jb .LBB39_2 -; X86-X87-NEXT: # %bb.1: -; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-X87-NEXT: .LBB39_2: -; X86-X87-NEXT: movl $0, %ecx -; X86-X87-NEXT: jb .LBB39_4 -; X86-X87-NEXT: # %bb.3: -; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-X87-NEXT: .LBB39_4: -; X86-X87-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-X87-NEXT: movl $0, %ebx -; X86-X87-NEXT: jb .LBB39_6 -; X86-X87-NEXT: # %bb.5: +; X86-X87-NEXT: calll __extendxftf2 +; X86-X87-NEXT: addl $12, %esp ; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-X87-NEXT: .LBB39_6: -; X86-X87-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}} -; X86-X87-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload -; X86-X87-NEXT: fucompp -; X86-X87-NEXT: fnstsw %ax -; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax -; X86-X87-NEXT: sahf -; X86-X87-NEXT: movl $-1, %eax -; X86-X87-NEXT: movl $-1, %ebp -; X86-X87-NEXT: movl $-1, %edi -; X86-X87-NEXT: movl $-1, %esi -; X86-X87-NEXT: ja .LBB39_8 -; X86-X87-NEXT: # %bb.7: ; X86-X87-NEXT: movl %ebx, %eax -; X86-X87-NEXT: movl %edx, %ebp +; X86-X87-NEXT: shrl $16, %eax +; X86-X87-NEXT: andl $32767, %eax # imm = 0x7FFF +; X86-X87-NEXT: xorl %ecx, %ecx +; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: cmpl $16383, %eax # imm = 0x3FFF +; X86-X87-NEXT: jb .LBB39_1 +; X86-X87-NEXT: # %bb.2: # %fp-to-i-entry +; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-X87-NEXT: pushl %ebx +; X86-X87-NEXT: pushl %edi +; X86-X87-NEXT: pushl %ebp +; X86-X87-NEXT: pushl %esi +; X86-X87-NEXT: pushl %ebx +; X86-X87-NEXT: pushl %edi +; X86-X87-NEXT: pushl %ebp +; X86-X87-NEXT: pushl %esi +; X86-X87-NEXT: calll __unordtf2 +; X86-X87-NEXT: addl $32, %esp +; X86-X87-NEXT: testl %eax, %eax +; X86-X87-NEXT: jne .LBB39_3 +; X86-X87-NEXT: # %bb.4: # %fp-to-i-entry +; X86-X87-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-X87-NEXT: testl %ebx, %ebx +; X86-X87-NEXT: movl $0, %ebp +; X86-X87-NEXT: movl $0, %edi +; X86-X87-NEXT: movl $0, %edx +; X86-X87-NEXT: movl $0, %ecx +; X86-X87-NEXT: js .LBB39_10 +; X86-X87-NEXT: # %bb.5: # %fp-to-i-if-check.saturate +; X86-X87-NEXT: xorl %eax, %eax +; X86-X87-NEXT: movl $16510, %ecx # imm = 0x407E ; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-X87-NEXT: .LBB39_8: -; X86-X87-NEXT: movl %esi, 12(%ecx) -; X86-X87-NEXT: movl %edi, 8(%ecx) -; X86-X87-NEXT: movl %ebp, 4(%ecx) -; X86-X87-NEXT: movl %eax, (%ecx) +; X86-X87-NEXT: cmpl %edi, %ecx +; X86-X87-NEXT: movl $0, %ecx +; X86-X87-NEXT: sbbl %ecx, %ecx +; X86-X87-NEXT: movl $0, %ecx +; X86-X87-NEXT: sbbl %ecx, %ecx +; X86-X87-NEXT: movl $0, %ecx +; X86-X87-NEXT: sbbl %ecx, %ecx +; X86-X87-NEXT: jae .LBB39_7 +; X86-X87-NEXT: # %bb.6: +; X86-X87-NEXT: movl $-1, %ecx +; X86-X87-NEXT: movl $-1, %ebp +; X86-X87-NEXT: movl $-1, %edi +; X86-X87-NEXT: movl $-1, %edx +; X86-X87-NEXT: jmp .LBB39_10 +; X86-X87-NEXT: .LBB39_7: # %fp-to-i-if-check.exp.size +; X86-X87-NEXT: movzwl %bx, %ecx +; X86-X87-NEXT: orl $65536, %ecx # imm = 0x10000 +; X86-X87-NEXT: movl $16494, %edx # imm = 0x406E +; X86-X87-NEXT: cmpl %edi, %edx +; X86-X87-NEXT: movl $0, %edx +; X86-X87-NEXT: sbbl %edx, %edx +; X86-X87-NEXT: movl $0, %edx +; X86-X87-NEXT: sbbl %edx, %edx +; X86-X87-NEXT: sbbl %eax, %eax +; X86-X87-NEXT: movl %edi, %ebx +; X86-X87-NEXT: jb .LBB39_9 +; X86-X87-NEXT: # %bb.8: # %fp-to-i-if-exp.small +; X86-X87-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-X87-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-X87-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movb $111, %cl +; X86-X87-NEXT: subb %bl, %cl ; X86-X87-NEXT: movl %ecx, %eax -; X86-X87-NEXT: addl $60, %esp +; X86-X87-NEXT: shrb $3, %al +; X86-X87-NEXT: andb $12, %al +; X86-X87-NEXT: movzbl %al, %esi +; X86-X87-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl 28(%esp,%esi), %edx +; X86-X87-NEXT: movl 24(%esp,%esi), %eax +; X86-X87-NEXT: movl %eax, %edi +; X86-X87-NEXT: shrdl %cl, %edx, %edi +; X86-X87-NEXT: movl 16(%esp,%esi), %ebx +; X86-X87-NEXT: movl 20(%esp,%esi), %esi +; X86-X87-NEXT: movl %esi, %ebp +; X86-X87-NEXT: shrdl %cl, %eax, %ebp +; X86-X87-NEXT: shrl %cl, %edx +; X86-X87-NEXT: shrdl %cl, %esi, %ebx +; X86-X87-NEXT: movl %ebx, %ecx +; X86-X87-NEXT: jmp .LBB39_10 +; X86-X87-NEXT: .LBB39_1: +; X86-X87-NEXT: xorl %ebp, %ebp +; X86-X87-NEXT: xorl %edi, %edi +; X86-X87-NEXT: xorl %edx, %edx +; X86-X87-NEXT: jmp .LBB39_10 +; X86-X87-NEXT: .LBB39_9: # %fp-to-i-if-exp.large +; X86-X87-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-X87-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-X87-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-X87-NEXT: addb $-111, %bl +; X86-X87-NEXT: movl %ebx, %eax +; X86-X87-NEXT: shrb $3, %al +; X86-X87-NEXT: andb $12, %al +; X86-X87-NEXT: negb %al +; X86-X87-NEXT: movsbl %al, %esi +; X86-X87-NEXT: movl 72(%esp,%esi), %edi +; X86-X87-NEXT: movl 76(%esp,%esi), %edx +; X86-X87-NEXT: movl %ebx, %ecx +; X86-X87-NEXT: shldl %cl, %edi, %edx +; X86-X87-NEXT: movl 64(%esp,%esi), %eax +; X86-X87-NEXT: movl 68(%esp,%esi), %ebp +; X86-X87-NEXT: shldl %cl, %ebp, %edi +; X86-X87-NEXT: shldl %cl, %eax, %ebp +; X86-X87-NEXT: shll %cl, %eax +; X86-X87-NEXT: movl %eax, %ecx +; X86-X87-NEXT: jmp .LBB39_10 +; X86-X87-NEXT: .LBB39_3: +; X86-X87-NEXT: xorl %ebp, %ebp +; X86-X87-NEXT: xorl %edi, %edi +; X86-X87-NEXT: xorl %edx, %edx +; X86-X87-NEXT: xorl %ecx, %ecx +; X86-X87-NEXT: .LBB39_10: # %fp-to-i-cleanup +; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-X87-NEXT: movl %ecx, (%eax) +; X86-X87-NEXT: movl %ebp, 4(%eax) +; X86-X87-NEXT: movl %edi, 8(%eax) +; X86-X87-NEXT: movl %edx, 12(%eax) +; X86-X87-NEXT: addl $108, %esp ; X86-X87-NEXT: popl %esi ; X86-X87-NEXT: popl %edi ; X86-X87-NEXT: popl %ebx @@ -4052,54 +4426,156 @@ define i128 @test_unsigned_i128_f80(x86_fp80 %f) nounwind { ; X86-X87-NEXT: retl $4 ; ; X86-SSE-LABEL: test_unsigned_i128_f80: -; X86-SSE: # %bb.0: +; X86-SSE: # %bb.0: # %fp-to-i-entry +; X86-SSE-NEXT: pushl %ebp ; X86-SSE-NEXT: pushl %ebx ; X86-SSE-NEXT: pushl %edi ; X86-SSE-NEXT: pushl %esi -; X86-SSE-NEXT: subl $48, %esp -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-SSE-NEXT: subl $108, %esp ; X86-SSE-NEXT: fldt {{[0-9]+}}(%esp) -; X86-SSE-NEXT: fld %st(0) -; X86-SSE-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill +; X86-SSE-NEXT: subl $16, %esp ; X86-SSE-NEXT: fstpt {{[0-9]+}}(%esp) ; X86-SSE-NEXT: leal {{[0-9]+}}(%esp), %eax ; X86-SSE-NEXT: movl %eax, (%esp) -; X86-SSE-NEXT: calll __fixunsxfti -; X86-SSE-NEXT: subl $4, %esp -; X86-SSE-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload -; X86-SSE-NEXT: xorl %eax, %eax -; X86-SSE-NEXT: fldz -; X86-SSE-NEXT: fxch %st(1) -; X86-SSE-NEXT: fucomi %st(1), %st -; X86-SSE-NEXT: fstp %st(1) +; X86-SSE-NEXT: calll __extendxftf2 +; X86-SSE-NEXT: addl $12, %esp +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-SSE-NEXT: movl %ebx, %eax +; X86-SSE-NEXT: shrl $16, %eax +; X86-SSE-NEXT: andl $32767, %eax # imm = 0x7FFF +; X86-SSE-NEXT: xorl %ecx, %ecx +; X86-SSE-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: cmpl $16383, %eax # imm = 0x3FFF +; X86-SSE-NEXT: jb .LBB39_1 +; X86-SSE-NEXT: # %bb.2: # %fp-to-i-entry +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-SSE-NEXT: pushl %ebx +; X86-SSE-NEXT: pushl %edi +; X86-SSE-NEXT: pushl %ebp +; X86-SSE-NEXT: pushl %esi +; X86-SSE-NEXT: pushl %ebx +; X86-SSE-NEXT: pushl %edi +; X86-SSE-NEXT: pushl %ebp +; X86-SSE-NEXT: pushl %esi +; X86-SSE-NEXT: calll __unordtf2 +; X86-SSE-NEXT: addl $32, %esp +; X86-SSE-NEXT: testl %eax, %eax +; X86-SSE-NEXT: jne .LBB39_3 +; X86-SSE-NEXT: # %bb.4: # %fp-to-i-entry +; X86-SSE-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: testl %ebx, %ebx +; X86-SSE-NEXT: movl $0, %ebp +; X86-SSE-NEXT: movl $0, %edi +; X86-SSE-NEXT: movl $0, %edx ; X86-SSE-NEXT: movl $0, %ecx +; X86-SSE-NEXT: js .LBB39_10 +; X86-SSE-NEXT: # %bb.5: # %fp-to-i-if-check.saturate +; X86-SSE-NEXT: xorl %ecx, %ecx +; X86-SSE-NEXT: movl $16510, %eax # imm = 0x407E +; X86-SSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-SSE-NEXT: cmpl %edi, %eax +; X86-SSE-NEXT: movl $0, %eax +; X86-SSE-NEXT: sbbl %eax, %eax +; X86-SSE-NEXT: movl $0, %eax +; X86-SSE-NEXT: sbbl %eax, %eax +; X86-SSE-NEXT: movl $0, %eax +; X86-SSE-NEXT: sbbl %eax, %eax +; X86-SSE-NEXT: jae .LBB39_7 +; X86-SSE-NEXT: # %bb.6: +; X86-SSE-NEXT: movl $-1, %ecx +; X86-SSE-NEXT: movl $-1, %ebp +; X86-SSE-NEXT: movl $-1, %edi +; X86-SSE-NEXT: movl $-1, %edx +; X86-SSE-NEXT: jmp .LBB39_10 +; X86-SSE-NEXT: .LBB39_7: # %fp-to-i-if-check.exp.size +; X86-SSE-NEXT: movzwl %bx, %eax +; X86-SSE-NEXT: orl $65536, %eax # imm = 0x10000 +; X86-SSE-NEXT: movl $16494, %edx # imm = 0x406E +; X86-SSE-NEXT: cmpl %edi, %edx ; X86-SSE-NEXT: movl $0, %edx -; X86-SSE-NEXT: movl $0, %edi -; X86-SSE-NEXT: jb .LBB39_2 -; X86-SSE-NEXT: # %bb.1: +; X86-SSE-NEXT: sbbl %edx, %edx +; X86-SSE-NEXT: movl $0, %edx +; X86-SSE-NEXT: sbbl %edx, %edx +; X86-SSE-NEXT: sbbl %ecx, %ecx +; X86-SSE-NEXT: movl %edi, %ebx +; X86-SSE-NEXT: jb .LBB39_9 +; X86-SSE-NEXT: # %bb.8: # %fp-to-i-if-exp.small +; X86-SSE-NEXT: xorps %xmm0, %xmm0 +; X86-SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-SSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-SSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movb $111, %cl +; X86-SSE-NEXT: subb %bl, %cl +; X86-SSE-NEXT: movl %ecx, %eax +; X86-SSE-NEXT: shrb $3, %al +; X86-SSE-NEXT: andb $12, %al +; X86-SSE-NEXT: movzbl %al, %esi +; X86-SSE-NEXT: movl 44(%esp,%esi), %edx +; X86-SSE-NEXT: movl 40(%esp,%esi), %eax +; X86-SSE-NEXT: movl %eax, %edi +; X86-SSE-NEXT: shrdl %cl, %edx, %edi +; X86-SSE-NEXT: movl 32(%esp,%esi), %ebx +; X86-SSE-NEXT: movl 36(%esp,%esi), %esi +; X86-SSE-NEXT: movl %esi, %ebp +; X86-SSE-NEXT: shrdl %cl, %eax, %ebp +; X86-SSE-NEXT: shrl %cl, %edx +; X86-SSE-NEXT: shrdl %cl, %esi, %ebx +; X86-SSE-NEXT: movl %ebx, %ecx +; X86-SSE-NEXT: jmp .LBB39_10 +; X86-SSE-NEXT: .LBB39_1: +; X86-SSE-NEXT: xorl %ebp, %ebp +; X86-SSE-NEXT: xorl %edi, %edi +; X86-SSE-NEXT: xorl %edx, %edx +; X86-SSE-NEXT: jmp .LBB39_10 +; X86-SSE-NEXT: .LBB39_9: # %fp-to-i-if-exp.large +; X86-SSE-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: xorps %xmm0, %xmm0 +; X86-SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-SSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-SSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: addb $-111, %bl +; X86-SSE-NEXT: movl %ebx, %eax +; X86-SSE-NEXT: shrb $3, %al +; X86-SSE-NEXT: andb $12, %al +; X86-SSE-NEXT: negb %al +; X86-SSE-NEXT: movsbl %al, %esi +; X86-SSE-NEXT: movl 88(%esp,%esi), %edi +; X86-SSE-NEXT: movl 92(%esp,%esi), %edx +; X86-SSE-NEXT: movl %ebx, %ecx +; X86-SSE-NEXT: shldl %cl, %edi, %edx +; X86-SSE-NEXT: movl 80(%esp,%esi), %eax +; X86-SSE-NEXT: movl 84(%esp,%esi), %ebp +; X86-SSE-NEXT: shldl %cl, %ebp, %edi +; X86-SSE-NEXT: shldl %cl, %eax, %ebp +; X86-SSE-NEXT: shll %cl, %eax +; X86-SSE-NEXT: movl %eax, %ecx +; X86-SSE-NEXT: jmp .LBB39_10 +; X86-SSE-NEXT: .LBB39_3: +; X86-SSE-NEXT: xorl %ebp, %ebp +; X86-SSE-NEXT: xorl %edi, %edi +; X86-SSE-NEXT: xorl %edx, %edx +; X86-SSE-NEXT: xorl %ecx, %ecx +; X86-SSE-NEXT: .LBB39_10: # %fp-to-i-cleanup ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-SSE-NEXT: .LBB39_2: -; X86-SSE-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}} -; X86-SSE-NEXT: fxch %st(1) -; X86-SSE-NEXT: fucompi %st(1), %st -; X86-SSE-NEXT: fstp %st(0) -; X86-SSE-NEXT: movl $-1, %ebx -; X86-SSE-NEXT: cmoval %ebx, %edi -; X86-SSE-NEXT: cmoval %ebx, %edx -; X86-SSE-NEXT: cmoval %ebx, %ecx -; X86-SSE-NEXT: cmoval %ebx, %eax -; X86-SSE-NEXT: movl %eax, 12(%esi) -; X86-SSE-NEXT: movl %ecx, 8(%esi) -; X86-SSE-NEXT: movl %edx, 4(%esi) -; X86-SSE-NEXT: movl %edi, (%esi) -; X86-SSE-NEXT: movl %esi, %eax -; X86-SSE-NEXT: addl $48, %esp +; X86-SSE-NEXT: movl %ecx, (%eax) +; X86-SSE-NEXT: movl %ebp, 4(%eax) +; X86-SSE-NEXT: movl %edi, 8(%eax) +; X86-SSE-NEXT: movl %edx, 12(%eax) +; X86-SSE-NEXT: addl $108, %esp ; X86-SSE-NEXT: popl %esi ; X86-SSE-NEXT: popl %edi ; X86-SSE-NEXT: popl %ebx +; X86-SSE-NEXT: popl %ebp ; X86-SSE-NEXT: retl $4 ; ; X64-LABEL: test_unsigned_i128_f80: