From bbe16b7af2d66d2abcf75ad72af6155d4da964cb Mon Sep 17 00:00:00 2001 From: Sam Tebbs Date: Mon, 7 Jun 2021 13:29:38 +0100 Subject: [PATCH] [ARM] Transform a fixed-point to floating-point conversion into a VCVT_fix Conversion from a fixed-point number to a floating-point number is done by multiplying the fixed-point number by 2^(-n) where n is the number of fractional bits. Currently this is lowered to a vcvt (integer to floating-point) then a vmul, but it can instead be lowered directly to a vcvt (fixed-point to floating-point). This patch enables such transformations as long as the multiplication factor is a power of 2. Differential Revision: https://reviews.llvm.org/D103903 --- llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp | 101 ++ llvm/test/CodeGen/Thumb2/mve-vcvt-fixed.ll | 1031 ++++++++++++++++++++ 2 files changed, 1132 insertions(+) create mode 100644 llvm/test/CodeGen/Thumb2/mve-vcvt-fixed.ll diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp index 4eee7a73a4569..bb6a0c95a114b 100644 --- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -15,6 +15,7 @@ #include "ARMTargetMachine.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "Utils/ARMBaseInfo.h" +#include "llvm/ADT/APSInt.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -195,6 +196,7 @@ class ARMDAGToDAGISel : public SelectionDAGISel { bool tryT1IndexedLoad(SDNode *N); bool tryT2IndexedLoad(SDNode *N); bool tryMVEIndexedLoad(SDNode *N); + bool tryFMULFixed(SDNode *N, SDLoc dl); /// SelectVLD - Select NEON load intrinsics. NumVecs should be /// 1, 2, 3 or 4. The opcode arrays specify the instructions used for @@ -3148,6 +3150,101 @@ bool ARMDAGToDAGISel::tryInsertVectorElt(SDNode *N) { return false; } +bool ARMDAGToDAGISel::tryFMULFixed(SDNode *N, SDLoc dl) { + // Transform a fixed-point to floating-point conversion to a VCVT + if (!Subtarget->hasMVEFloatOps()) + return false; + auto Type = N->getValueType(0); + if (!Type.isVector()) + return false; + + auto ScalarType = Type.getVectorElementType(); + unsigned ScalarBits = ScalarType.getSizeInBits(); + auto LHS = N->getOperand(0); + auto RHS = N->getOperand(1); + + if (ScalarBits > 32) + return false; + + if (RHS.getOpcode() == ISD::BITCAST) { + if (RHS.getValueType().getVectorElementType().getSizeInBits() != ScalarBits) + return false; + RHS = RHS.getOperand(0); + } + if (RHS.getValueType().getVectorElementType().getSizeInBits() != ScalarBits) + return false; + if (LHS.getOpcode() != ISD::SINT_TO_FP && LHS.getOpcode() != ISD::UINT_TO_FP) + return false; + + bool IsUnsigned = LHS.getOpcode() == ISD::UINT_TO_FP; + SDNodeFlags FMulFlags = N->getFlags(); + // The fixed-point vcvt and vcvt+vmul are not always equivalent if inf is + // allowed in 16 bit unsigned floats + if (ScalarBits == 16 && !FMulFlags.hasNoInfs() && IsUnsigned) + return false; + + APFloat ImmAPF(0.0f); + switch (RHS.getOpcode()) { + case ARMISD::VMOVIMM: + case ARMISD::VDUP: { + if (!isa(RHS.getOperand(0))) + return false; + unsigned Imm = RHS.getConstantOperandVal(0); + if (RHS.getOpcode() == ARMISD::VMOVIMM) + Imm = ARM_AM::decodeVMOVModImm(Imm, ScalarBits); + ImmAPF = + APFloat(ScalarBits == 32 ? APFloat::IEEEsingle() : APFloat::IEEEhalf(), + APInt(ScalarBits, Imm)); + break; + } + case ARMISD::VMOVFPIMM: { + ImmAPF = APFloat(ARM_AM::getFPImmFloat(RHS.getConstantOperandVal(0))); + break; + } + default: + return false; + } + + // Multiplying by a factor of 2^(-n) will convert from fixed point to + // floating point, where n is the number of fractional bits in the fixed + // point number. Taking the inverse and log2 of the factor will give n + APFloat Inverse(0.0f); + if (!ImmAPF.getExactInverse(&Inverse)) + return false; + + APSInt Converted(64, 0); + bool IsExact; + Inverse.convertToInteger(Converted, llvm::RoundingMode::NearestTiesToEven, + &IsExact); + if (!IsExact || !Converted.isPowerOf2()) + return false; + + unsigned FracBits = Converted.logBase2(); + if (FracBits > ScalarBits) + return false; + + auto SintToFpOperand = LHS.getOperand(0); + SmallVector Ops{SintToFpOperand, + CurDAG->getConstant(FracBits, dl, MVT::i32)}; + AddEmptyMVEPredicateToOps(Ops, dl, Type); + + unsigned int Opcode; + switch (ScalarBits) { + case 16: + Opcode = IsUnsigned ? ARM::MVE_VCVTf16u16_fix : ARM::MVE_VCVTf16s16_fix; + break; + case 32: + Opcode = IsUnsigned ? ARM::MVE_VCVTf32u32_fix : ARM::MVE_VCVTf32s32_fix; + break; + default: + llvm_unreachable("unexpected number of scalar bits"); + break; + } + + ReplaceNode(N, CurDAG->getMachineNode(Opcode, dl, Type, Ops)); + return true; +} + bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) { if (!Subtarget->hasV6T2Ops()) return false; @@ -3583,6 +3680,10 @@ void ARMDAGToDAGISel::Select(SDNode *N) { if (tryV6T2BitfieldExtractOp(N, true)) return; break; + case ISD::FMUL: + if (tryFMULFixed(N, dl)) + return; + break; case ISD::MUL: if (Subtarget->isThumb1Only()) break; diff --git a/llvm/test/CodeGen/Thumb2/mve-vcvt-fixed.ll b/llvm/test/CodeGen/Thumb2/mve-vcvt-fixed.ll new file mode 100644 index 0000000000000..38a2cfc1a579d --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/mve-vcvt-fixed.ll @@ -0,0 +1,1031 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi %s -o - -mattr=+mve.fp | FileCheck %s + +define arm_aapcs_vfpcc <4 x float> @vcvt_i32_1(<4 x i32> %0) { +; CHECK-LABEL: vcvt_i32_1: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f32.s32 q0, q0, #1 +; CHECK-NEXT: bx lr + %2 = sitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define arm_aapcs_vfpcc <4 x float> @vcvt_i32_2(<4 x i32> %0) { +; CHECK-LABEL: vcvt_i32_2: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f32.s32 q0, q0, #2 +; CHECK-NEXT: bx lr + %2 = sitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define arm_aapcs_vfpcc <4 x float> @vcvt_i32_3(<4 x i32> %0) { +; CHECK-LABEL: vcvt_i32_3: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f32.s32 q0, q0, #3 +; CHECK-NEXT: bx lr + %2 = sitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define arm_aapcs_vfpcc <4 x float> @vcvt_i32_4(<4 x i32> %0) { +; CHECK-LABEL: vcvt_i32_4: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f32.s32 q0, q0, #4 +; CHECK-NEXT: bx lr + %2 = sitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define arm_aapcs_vfpcc <4 x float> @vcvt_i32_5(<4 x i32> %0) { +; CHECK-LABEL: vcvt_i32_5: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f32.s32 q0, q0, #5 +; CHECK-NEXT: bx lr + %2 = sitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define arm_aapcs_vfpcc <4 x float> @vcvt_i32_6(<4 x i32> %0) { +; CHECK-LABEL: vcvt_i32_6: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f32.s32 q0, q0, #6 +; CHECK-NEXT: bx lr + %2 = sitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define arm_aapcs_vfpcc <4 x float> @vcvt_i32_7(<4 x i32> %0) { +; CHECK-LABEL: vcvt_i32_7: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f32.s32 q0, q0, #7 +; CHECK-NEXT: bx lr + %2 = sitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define arm_aapcs_vfpcc <4 x float> @vcvt_i32_8(<4 x i32> %0) { +; CHECK-LABEL: vcvt_i32_8: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f32.s32 q0, q0, #8 +; CHECK-NEXT: bx lr + %2 = sitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define arm_aapcs_vfpcc <4 x float> @vcvt_i32_9(<4 x i32> %0) { +; CHECK-LABEL: vcvt_i32_9: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f32.s32 q0, q0, #9 +; CHECK-NEXT: bx lr + %2 = sitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define arm_aapcs_vfpcc <4 x float> @vcvt_i32_10(<4 x i32> %0) { +; CHECK-LABEL: vcvt_i32_10: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f32.s32 q0, q0, #10 +; CHECK-NEXT: bx lr + %2 = sitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define arm_aapcs_vfpcc <4 x float> @vcvt_i32_11(<4 x i32> %0) { +; CHECK-LABEL: vcvt_i32_11: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f32.s32 q0, q0, #11 +; CHECK-NEXT: bx lr + %2 = sitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define arm_aapcs_vfpcc <4 x float> @vcvt_i32_12(<4 x i32> %0) { +; CHECK-LABEL: vcvt_i32_12: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f32.s32 q0, q0, #12 +; CHECK-NEXT: bx lr + %2 = sitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define arm_aapcs_vfpcc <4 x float> @vcvt_i32_13(<4 x i32> %0) { +; CHECK-LABEL: vcvt_i32_13: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f32.s32 q0, q0, #13 +; CHECK-NEXT: bx lr + %2 = sitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define arm_aapcs_vfpcc <4 x float> @vcvt_i32_14(<4 x i32> %0) { +; CHECK-LABEL: vcvt_i32_14: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f32.s32 q0, q0, #14 +; CHECK-NEXT: bx lr + %2 = sitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define arm_aapcs_vfpcc <4 x float> @vcvt_i32_15(<4 x i32> %0) { +; CHECK-LABEL: vcvt_i32_15: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f32.s32 q0, q0, #15 +; CHECK-NEXT: bx lr + %2 = sitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define arm_aapcs_vfpcc <4 x float> @vcvt_i32_16(<4 x i32> %0) { +; CHECK-LABEL: vcvt_i32_16: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f32.s32 q0, q0, #16 +; CHECK-NEXT: bx lr + %2 = sitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define arm_aapcs_vfpcc <4 x float> @vcvt_i32_17(<4 x i32> %0) { +; CHECK-LABEL: vcvt_i32_17: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f32.s32 q0, q0, #17 +; CHECK-NEXT: bx lr + %2 = sitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define arm_aapcs_vfpcc <4 x float> @vcvt_i32_18(<4 x i32> %0) { +; CHECK-LABEL: vcvt_i32_18: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f32.s32 q0, q0, #18 +; CHECK-NEXT: bx lr + %2 = sitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define arm_aapcs_vfpcc <4 x float> @vcvt_i32_19(<4 x i32> %0) { +; CHECK-LABEL: vcvt_i32_19: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f32.s32 q0, q0, #19 +; CHECK-NEXT: bx lr + %2 = sitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define arm_aapcs_vfpcc <4 x float> @vcvt_i32_20(<4 x i32> %0) { +; CHECK-LABEL: vcvt_i32_20: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f32.s32 q0, q0, #20 +; CHECK-NEXT: bx lr + %2 = sitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define arm_aapcs_vfpcc <4 x float> @vcvt_i32_21(<4 x i32> %0) { +; CHECK-LABEL: vcvt_i32_21: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f32.s32 q0, q0, #21 +; CHECK-NEXT: bx lr + %2 = sitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define arm_aapcs_vfpcc <4 x float> @vcvt_i32_22(<4 x i32> %0) { +; CHECK-LABEL: vcvt_i32_22: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f32.s32 q0, q0, #22 +; CHECK-NEXT: bx lr + %2 = sitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define arm_aapcs_vfpcc <4 x float> @vcvt_i32_23(<4 x i32> %0) { +; CHECK-LABEL: vcvt_i32_23: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f32.s32 q0, q0, #23 +; CHECK-NEXT: bx lr + %2 = sitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define arm_aapcs_vfpcc <4 x float> @vcvt_i32_24(<4 x i32> %0) { +; CHECK-LABEL: vcvt_i32_24: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f32.s32 q0, q0, #24 +; CHECK-NEXT: bx lr + %2 = sitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define arm_aapcs_vfpcc <4 x float> @vcvt_i32_25(<4 x i32> %0) { +; CHECK-LABEL: vcvt_i32_25: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f32.s32 q0, q0, #25 +; CHECK-NEXT: bx lr + %2 = sitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define arm_aapcs_vfpcc <4 x float> @vcvt_i32_26(<4 x i32> %0) { +; CHECK-LABEL: vcvt_i32_26: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f32.s32 q0, q0, #26 +; CHECK-NEXT: bx lr + %2 = sitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define arm_aapcs_vfpcc <4 x float> @vcvt_i32_27(<4 x i32> %0) { +; CHECK-LABEL: vcvt_i32_27: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f32.s32 q0, q0, #27 +; CHECK-NEXT: bx lr + %2 = sitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define arm_aapcs_vfpcc <4 x float> @vcvt_i32_28(<4 x i32> %0) { +; CHECK-LABEL: vcvt_i32_28: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f32.s32 q0, q0, #28 +; CHECK-NEXT: bx lr + %2 = sitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define arm_aapcs_vfpcc <4 x float> @vcvt_i32_29(<4 x i32> %0) { +; CHECK-LABEL: vcvt_i32_29: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f32.s32 q0, q0, #29 +; CHECK-NEXT: bx lr + %2 = sitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define arm_aapcs_vfpcc <4 x float> @vcvt_i32_30(<4 x i32> %0) { +; CHECK-LABEL: vcvt_i32_30: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f32.s32 q0, q0, #30 +; CHECK-NEXT: bx lr + %2 = sitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define arm_aapcs_vfpcc <4 x float> @vcvt_i32_31(<4 x i32> %0) { +; CHECK-LABEL: vcvt_i32_31: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f32.s32 q0, q0, #31 +; CHECK-NEXT: bx lr + %2 = sitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define arm_aapcs_vfpcc <4 x float> @vcvt_i32_32(<4 x i32> %0) { +; CHECK-LABEL: vcvt_i32_32: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f32.s32 q0, q0, #32 +; CHECK-NEXT: bx lr + %2 = sitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define arm_aapcs_vfpcc <4 x float> @vcvt_i32_33(<4 x i32> %0) { +; CHECK-LABEL: vcvt_i32_33: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i32 q1, #0x2f000000 +; CHECK-NEXT: vcvt.f32.s32 q0, q0 +; CHECK-NEXT: vmul.f32 q0, q0, q1 +; CHECK-NEXT: bx lr + %2 = sitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define arm_aapcs_vfpcc <8 x half> @vcvt_i16_1(<8 x i16> %0) { +; CHECK-LABEL: vcvt_i16_1: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f16.s16 q0, q0, #1 +; CHECK-NEXT: bx lr + %2 = sitofp <8 x i16> %0 to <8 x half> + %3 = fmul ninf <8 x half> %2, + ret <8 x half> %3 +} + +define arm_aapcs_vfpcc <8 x half> @vcvt_i16_2(<8 x i16> %0) { +; CHECK-LABEL: vcvt_i16_2: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f16.s16 q0, q0, #2 +; CHECK-NEXT: bx lr + %2 = sitofp <8 x i16> %0 to <8 x half> + %3 = fmul ninf <8 x half> %2, + ret <8 x half> %3 +} + +define arm_aapcs_vfpcc <8 x half> @vcvt_i16_3(<8 x i16> %0) { +; CHECK-LABEL: vcvt_i16_3: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f16.s16 q0, q0, #3 +; CHECK-NEXT: bx lr + %2 = sitofp <8 x i16> %0 to <8 x half> + %3 = fmul ninf <8 x half> %2, + ret <8 x half> %3 +} + +define arm_aapcs_vfpcc <8 x half> @vcvt_i16_4(<8 x i16> %0) { +; CHECK-LABEL: vcvt_i16_4: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f16.s16 q0, q0, #4 +; CHECK-NEXT: bx lr + %2 = sitofp <8 x i16> %0 to <8 x half> + %3 = fmul ninf <8 x half> %2, + ret <8 x half> %3 +} + +define arm_aapcs_vfpcc <8 x half> @vcvt_i16_5(<8 x i16> %0) { +; CHECK-LABEL: vcvt_i16_5: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f16.s16 q0, q0, #5 +; CHECK-NEXT: bx lr + %2 = sitofp <8 x i16> %0 to <8 x half> + %3 = fmul ninf <8 x half> %2, + ret <8 x half> %3 +} + +define arm_aapcs_vfpcc <8 x half> @vcvt_i16_6(<8 x i16> %0) { +; CHECK-LABEL: vcvt_i16_6: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f16.s16 q0, q0, #6 +; CHECK-NEXT: bx lr + %2 = sitofp <8 x i16> %0 to <8 x half> + %3 = fmul ninf <8 x half> %2, + ret <8 x half> %3 +} + +define arm_aapcs_vfpcc <8 x half> @vcvt_i16_7(<8 x i16> %0) { +; CHECK-LABEL: vcvt_i16_7: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f16.s16 q0, q0, #7 +; CHECK-NEXT: bx lr + %2 = sitofp <8 x i16> %0 to <8 x half> + %3 = fmul ninf <8 x half> %2, + ret <8 x half> %3 +} + +define arm_aapcs_vfpcc <8 x half> @vcvt_i16_8(<8 x i16> %0) { +; CHECK-LABEL: vcvt_i16_8: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f16.s16 q0, q0, #8 +; CHECK-NEXT: bx lr + %2 = sitofp <8 x i16> %0 to <8 x half> + %3 = fmul ninf <8 x half> %2, + ret <8 x half> %3 +} + +define arm_aapcs_vfpcc <8 x half> @vcvt_i16_9(<8 x i16> %0) { +; CHECK-LABEL: vcvt_i16_9: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f16.s16 q0, q0, #9 +; CHECK-NEXT: bx lr + %2 = sitofp <8 x i16> %0 to <8 x half> + %3 = fmul ninf <8 x half> %2, + ret <8 x half> %3 +} + +define arm_aapcs_vfpcc <8 x half> @vcvt_i16_10(<8 x i16> %0) { +; CHECK-LABEL: vcvt_i16_10: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f16.s16 q0, q0, #10 +; CHECK-NEXT: bx lr + %2 = sitofp <8 x i16> %0 to <8 x half> + %3 = fmul ninf <8 x half> %2, + ret <8 x half> %3 +} + +define arm_aapcs_vfpcc <8 x half> @vcvt_i16_11(<8 x i16> %0) { +; CHECK-LABEL: vcvt_i16_11: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f16.s16 q0, q0, #11 +; CHECK-NEXT: bx lr + %2 = sitofp <8 x i16> %0 to <8 x half> + %3 = fmul ninf <8 x half> %2, + ret <8 x half> %3 +} + +define arm_aapcs_vfpcc <8 x half> @vcvt_i16_12(<8 x i16> %0) { +; CHECK-LABEL: vcvt_i16_12: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f16.s16 q0, q0, #12 +; CHECK-NEXT: bx lr + %2 = sitofp <8 x i16> %0 to <8 x half> + %3 = fmul ninf <8 x half> %2, + ret <8 x half> %3 +} + +define arm_aapcs_vfpcc <8 x half> @vcvt_i16_13(<8 x i16> %0) { +; CHECK-LABEL: vcvt_i16_13: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f16.s16 q0, q0, #13 +; CHECK-NEXT: bx lr + %2 = sitofp <8 x i16> %0 to <8 x half> + %3 = fmul ninf <8 x half> %2, + ret <8 x half> %3 +} + +define arm_aapcs_vfpcc <8 x half> @vcvt_i16_14(<8 x i16> %0) { +; CHECK-LABEL: vcvt_i16_14: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f16.s16 q0, q0, #14 +; CHECK-NEXT: bx lr + %2 = sitofp <8 x i16> %0 to <8 x half> + %3 = fmul ninf <8 x half> %2, + ret <8 x half> %3 +} + +define arm_aapcs_vfpcc <8 x half> @vcvt_i16_15(<8 x i16> %0) { +; CHECK-LABEL: vcvt_i16_15: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i16 q1, #0x200 +; CHECK-NEXT: vcvt.f16.s16 q0, q0 +; CHECK-NEXT: vmul.f16 q0, q0, q1 +; CHECK-NEXT: bx lr + %2 = sitofp <8 x i16> %0 to <8 x half> + %3 = fmul ninf <8 x half> %2, + ret <8 x half> %3 +} + +define arm_aapcs_vfpcc <4 x float> @vcvt_u32_1(<4 x i32> %0) { +; CHECK-LABEL: vcvt_u32_1: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f32.u32 q0, q0, #1 +; CHECK-NEXT: bx lr + %2 = uitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define arm_aapcs_vfpcc <4 x float> @vcvt_u32_2(<4 x i32> %0) { +; CHECK-LABEL: vcvt_u32_2: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f32.u32 q0, q0, #2 +; CHECK-NEXT: bx lr + %2 = uitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define arm_aapcs_vfpcc <4 x float> @vcvt_u32_3(<4 x i32> %0) { +; CHECK-LABEL: vcvt_u32_3: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f32.u32 q0, q0, #3 +; CHECK-NEXT: bx lr + %2 = uitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define arm_aapcs_vfpcc <4 x float> @vcvt_u32_4(<4 x i32> %0) { +; CHECK-LABEL: vcvt_u32_4: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f32.u32 q0, q0, #4 +; CHECK-NEXT: bx lr + %2 = uitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define arm_aapcs_vfpcc <4 x float> @vcvt_u32_5(<4 x i32> %0) { +; CHECK-LABEL: vcvt_u32_5: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f32.u32 q0, q0, #5 +; CHECK-NEXT: bx lr + %2 = uitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define arm_aapcs_vfpcc <4 x float> @vcvt_u32_6(<4 x i32> %0) { +; CHECK-LABEL: vcvt_u32_6: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f32.u32 q0, q0, #6 +; CHECK-NEXT: bx lr + %2 = uitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define arm_aapcs_vfpcc <4 x float> @vcvt_u32_7(<4 x i32> %0) { +; CHECK-LABEL: vcvt_u32_7: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f32.u32 q0, q0, #7 +; CHECK-NEXT: bx lr + %2 = uitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define arm_aapcs_vfpcc <4 x float> @vcvt_u32_8(<4 x i32> %0) { +; CHECK-LABEL: vcvt_u32_8: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f32.u32 q0, q0, #8 +; CHECK-NEXT: bx lr + %2 = uitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define arm_aapcs_vfpcc <4 x float> @vcvt_u32_9(<4 x i32> %0) { +; CHECK-LABEL: vcvt_u32_9: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f32.u32 q0, q0, #9 +; CHECK-NEXT: bx lr + %2 = uitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define arm_aapcs_vfpcc <4 x float> @vcvt_u32_10(<4 x i32> %0) { +; CHECK-LABEL: vcvt_u32_10: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f32.u32 q0, q0, #10 +; CHECK-NEXT: bx lr + %2 = uitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define arm_aapcs_vfpcc <4 x float> @vcvt_u32_11(<4 x i32> %0) { +; CHECK-LABEL: vcvt_u32_11: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f32.u32 q0, q0, #11 +; CHECK-NEXT: bx lr + %2 = uitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define arm_aapcs_vfpcc <4 x float> @vcvt_u32_12(<4 x i32> %0) { +; CHECK-LABEL: vcvt_u32_12: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f32.u32 q0, q0, #12 +; CHECK-NEXT: bx lr + %2 = uitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define arm_aapcs_vfpcc <4 x float> @vcvt_u32_13(<4 x i32> %0) { +; CHECK-LABEL: vcvt_u32_13: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f32.u32 q0, q0, #13 +; CHECK-NEXT: bx lr + %2 = uitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define arm_aapcs_vfpcc <4 x float> @vcvt_u32_14(<4 x i32> %0) { +; CHECK-LABEL: vcvt_u32_14: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f32.u32 q0, q0, #14 +; CHECK-NEXT: bx lr + %2 = uitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define arm_aapcs_vfpcc <4 x float> @vcvt_u32_15(<4 x i32> %0) { +; CHECK-LABEL: vcvt_u32_15: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f32.u32 q0, q0, #15 +; CHECK-NEXT: bx lr + %2 = uitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define arm_aapcs_vfpcc <4 x float> @vcvt_u32_16(<4 x i32> %0) { +; CHECK-LABEL: vcvt_u32_16: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f32.u32 q0, q0, #16 +; CHECK-NEXT: bx lr + %2 = uitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define arm_aapcs_vfpcc <4 x float> @vcvt_u32_17(<4 x i32> %0) { +; CHECK-LABEL: vcvt_u32_17: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f32.u32 q0, q0, #17 +; CHECK-NEXT: bx lr + %2 = uitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define arm_aapcs_vfpcc <4 x float> @vcvt_u32_18(<4 x i32> %0) { +; CHECK-LABEL: vcvt_u32_18: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f32.u32 q0, q0, #18 +; CHECK-NEXT: bx lr + %2 = uitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define arm_aapcs_vfpcc <4 x float> @vcvt_u32_19(<4 x i32> %0) { +; CHECK-LABEL: vcvt_u32_19: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f32.u32 q0, q0, #19 +; CHECK-NEXT: bx lr + %2 = uitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define arm_aapcs_vfpcc <4 x float> @vcvt_u32_20(<4 x i32> %0) { +; CHECK-LABEL: vcvt_u32_20: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f32.u32 q0, q0, #20 +; CHECK-NEXT: bx lr + %2 = uitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define arm_aapcs_vfpcc <4 x float> @vcvt_u32_21(<4 x i32> %0) { +; CHECK-LABEL: vcvt_u32_21: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f32.u32 q0, q0, #21 +; CHECK-NEXT: bx lr + %2 = uitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define arm_aapcs_vfpcc <4 x float> @vcvt_u32_22(<4 x i32> %0) { +; CHECK-LABEL: vcvt_u32_22: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f32.u32 q0, q0, #22 +; CHECK-NEXT: bx lr + %2 = uitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define arm_aapcs_vfpcc <4 x float> @vcvt_u32_23(<4 x i32> %0) { +; CHECK-LABEL: vcvt_u32_23: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f32.u32 q0, q0, #23 +; CHECK-NEXT: bx lr + %2 = uitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define arm_aapcs_vfpcc <4 x float> @vcvt_u32_24(<4 x i32> %0) { +; CHECK-LABEL: vcvt_u32_24: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f32.u32 q0, q0, #24 +; CHECK-NEXT: bx lr + %2 = uitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define arm_aapcs_vfpcc <4 x float> @vcvt_u32_25(<4 x i32> %0) { +; CHECK-LABEL: vcvt_u32_25: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f32.u32 q0, q0, #25 +; CHECK-NEXT: bx lr + %2 = uitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define arm_aapcs_vfpcc <4 x float> @vcvt_u32_26(<4 x i32> %0) { +; CHECK-LABEL: vcvt_u32_26: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f32.u32 q0, q0, #26 +; CHECK-NEXT: bx lr + %2 = uitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define arm_aapcs_vfpcc <4 x float> @vcvt_u32_27(<4 x i32> %0) { +; CHECK-LABEL: vcvt_u32_27: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f32.u32 q0, q0, #27 +; CHECK-NEXT: bx lr + %2 = uitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define arm_aapcs_vfpcc <4 x float> @vcvt_u32_28(<4 x i32> %0) { +; CHECK-LABEL: vcvt_u32_28: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f32.u32 q0, q0, #28 +; CHECK-NEXT: bx lr + %2 = uitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define arm_aapcs_vfpcc <4 x float> @vcvt_u32_29(<4 x i32> %0) { +; CHECK-LABEL: vcvt_u32_29: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f32.u32 q0, q0, #29 +; CHECK-NEXT: bx lr + %2 = uitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define arm_aapcs_vfpcc <4 x float> @vcvt_u32_30(<4 x i32> %0) { +; CHECK-LABEL: vcvt_u32_30: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f32.u32 q0, q0, #30 +; CHECK-NEXT: bx lr + %2 = uitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define arm_aapcs_vfpcc <4 x float> @vcvt_u32_31(<4 x i32> %0) { +; CHECK-LABEL: vcvt_u32_31: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i32 q1, #0xb0000000 +; CHECK-NEXT: vcvt.f32.u32 q0, q0 +; CHECK-NEXT: vmul.f32 q0, q0, q1 +; CHECK-NEXT: bx lr + %2 = uitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define arm_aapcs_vfpcc <4 x float> @vcvt_u32_32(<4 x i32> %0) { +; CHECK-LABEL: vcvt_u32_32: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f32.u32 q0, q0, #32 +; CHECK-NEXT: bx lr + %2 = uitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define arm_aapcs_vfpcc <4 x float> @vcvt_u32_33(<4 x i32> %0) { +; CHECK-LABEL: vcvt_u32_33: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i32 q1, #0x2f000000 +; CHECK-NEXT: vcvt.f32.u32 q0, q0 +; CHECK-NEXT: vmul.f32 q0, q0, q1 +; CHECK-NEXT: bx lr + %2 = uitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define arm_aapcs_vfpcc <8 x half> @vcvt_u16_1(<8 x i16> %0) { +; CHECK-LABEL: vcvt_u16_1: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f16.u16 q0, q0, #1 +; CHECK-NEXT: bx lr + %2 = uitofp <8 x i16> %0 to <8 x half> + %3 = fmul ninf <8 x half> %2, + ret <8 x half> %3 +} + +define arm_aapcs_vfpcc <8 x half> @vcvt_u16_2(<8 x i16> %0) { +; CHECK-LABEL: vcvt_u16_2: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f16.u16 q0, q0, #2 +; CHECK-NEXT: bx lr + %2 = uitofp <8 x i16> %0 to <8 x half> + %3 = fmul ninf <8 x half> %2, + ret <8 x half> %3 +} + +define arm_aapcs_vfpcc <8 x half> @vcvt_u16_3(<8 x i16> %0) { +; CHECK-LABEL: vcvt_u16_3: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f16.u16 q0, q0, #3 +; CHECK-NEXT: bx lr + %2 = uitofp <8 x i16> %0 to <8 x half> + %3 = fmul ninf <8 x half> %2, + ret <8 x half> %3 +} + +define arm_aapcs_vfpcc <8 x half> @vcvt_u16_4(<8 x i16> %0) { +; CHECK-LABEL: vcvt_u16_4: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f16.u16 q0, q0, #4 +; CHECK-NEXT: bx lr + %2 = uitofp <8 x i16> %0 to <8 x half> + %3 = fmul ninf <8 x half> %2, + ret <8 x half> %3 +} + +define arm_aapcs_vfpcc <8 x half> @vcvt_u16_5(<8 x i16> %0) { +; CHECK-LABEL: vcvt_u16_5: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f16.u16 q0, q0, #5 +; CHECK-NEXT: bx lr + %2 = uitofp <8 x i16> %0 to <8 x half> + %3 = fmul ninf <8 x half> %2, + ret <8 x half> %3 +} + +define arm_aapcs_vfpcc <8 x half> @vcvt_u16_6(<8 x i16> %0) { +; CHECK-LABEL: vcvt_u16_6: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f16.u16 q0, q0, #6 +; CHECK-NEXT: bx lr + %2 = uitofp <8 x i16> %0 to <8 x half> + %3 = fmul ninf <8 x half> %2, + ret <8 x half> %3 +} + +define arm_aapcs_vfpcc <8 x half> @vcvt_u16_7(<8 x i16> %0) { +; CHECK-LABEL: vcvt_u16_7: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f16.u16 q0, q0, #7 +; CHECK-NEXT: bx lr + %2 = uitofp <8 x i16> %0 to <8 x half> + %3 = fmul ninf <8 x half> %2, + ret <8 x half> %3 +} + +define arm_aapcs_vfpcc <8 x half> @vcvt_u16_8(<8 x i16> %0) { +; CHECK-LABEL: vcvt_u16_8: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f16.u16 q0, q0, #8 +; CHECK-NEXT: bx lr + %2 = uitofp <8 x i16> %0 to <8 x half> + %3 = fmul ninf <8 x half> %2, + ret <8 x half> %3 +} + +define arm_aapcs_vfpcc <8 x half> @vcvt_u16_9(<8 x i16> %0) { +; CHECK-LABEL: vcvt_u16_9: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f16.u16 q0, q0, #9 +; CHECK-NEXT: bx lr + %2 = uitofp <8 x i16> %0 to <8 x half> + %3 = fmul ninf <8 x half> %2, + ret <8 x half> %3 +} + +define arm_aapcs_vfpcc <8 x half> @vcvt_u16_10(<8 x i16> %0) { +; CHECK-LABEL: vcvt_u16_10: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f16.u16 q0, q0, #10 +; CHECK-NEXT: bx lr + %2 = uitofp <8 x i16> %0 to <8 x half> + %3 = fmul ninf <8 x half> %2, + ret <8 x half> %3 +} + +define arm_aapcs_vfpcc <8 x half> @vcvt_u16_11(<8 x i16> %0) { +; CHECK-LABEL: vcvt_u16_11: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f16.u16 q0, q0, #11 +; CHECK-NEXT: bx lr + %2 = uitofp <8 x i16> %0 to <8 x half> + %3 = fmul ninf <8 x half> %2, + ret <8 x half> %3 +} + +define arm_aapcs_vfpcc <8 x half> @vcvt_u16_12(<8 x i16> %0) { +; CHECK-LABEL: vcvt_u16_12: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f16.u16 q0, q0, #12 +; CHECK-NEXT: bx lr + %2 = uitofp <8 x i16> %0 to <8 x half> + %3 = fmul ninf <8 x half> %2, + ret <8 x half> %3 +} + +define arm_aapcs_vfpcc <8 x half> @vcvt_u16_13(<8 x i16> %0) { +; CHECK-LABEL: vcvt_u16_13: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f16.u16 q0, q0, #13 +; CHECK-NEXT: bx lr + %2 = uitofp <8 x i16> %0 to <8 x half> + %3 = fmul ninf <8 x half> %2, + ret <8 x half> %3 +} + +define arm_aapcs_vfpcc <8 x half> @vcvt_u16_14(<8 x i16> %0) { +; CHECK-LABEL: vcvt_u16_14: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f16.u16 q0, q0, #14 +; CHECK-NEXT: bx lr + %2 = uitofp <8 x i16> %0 to <8 x half> + %3 = fmul ninf <8 x half> %2, + ret <8 x half> %3 +} + +define arm_aapcs_vfpcc <8 x half> @vcvt_u16_15(<8 x i16> %0) { +; CHECK-LABEL: vcvt_u16_15: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i16 q1, #0x200 +; CHECK-NEXT: vcvt.f16.u16 q0, q0 +; CHECK-NEXT: vmul.f16 q0, q0, q1 +; CHECK-NEXT: bx lr + %2 = uitofp <8 x i16> %0 to <8 x half> + %3 = fmul ninf <8 x half> %2, + ret <8 x half> %3 +} + +define arm_aapcs_vfpcc <8 x half> @vcvt_u16_inf(<8 x i16> %0) { +; CHECK-LABEL: vcvt_u16_inf: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i16 q1, #0x400 +; CHECK-NEXT: vcvt.f16.u16 q0, q0 +; CHECK-NEXT: vmul.f16 q0, q0, q1 +; CHECK-NEXT: bx lr + %2 = uitofp <8 x i16> %0 to <8 x half> + %3 = fmul <8 x half> %2, + ret <8 x half> %3 +} + +define arm_aapcs_vfpcc <8 x half> @vcvt_s16_inf(<8 x i16> %0) { +; CHECK-LABEL: vcvt_s16_inf: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f16.s16 q0, q0, #14 +; CHECK-NEXT: bx lr + %2 = sitofp <8 x i16> %0 to <8 x half> + %3 = fmul <8 x half> %2, + ret <8 x half> %3 +} + +define arm_aapcs_vfpcc <4 x float> @vcvt_bad_imm(<4 x i32> %0) { +; CHECK-LABEL: vcvt_bad_imm: +; CHECK: @ %bb.0: +; CHECK-NEXT: movw r0, #2048 +; CHECK-NEXT: vcvt.f32.s32 q0, q0 +; CHECK-NEXT: movt r0, #15104 +; CHECK-NEXT: vmul.f32 q0, q0, r0 +; CHECK-NEXT: bx lr + %2 = sitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define arm_aapcs_vfpcc <4 x float> @vcvt_negative(<4 x i32> %0) { +; CHECK-LABEL: vcvt_negative: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i32 q1, #0xb8000000 +; CHECK-NEXT: vcvt.f32.s32 q0, q0 +; CHECK-NEXT: vmul.f32 q0, q0, q1 +; CHECK-NEXT: bx lr + %2 = sitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define arm_aapcs_vfpcc <4 x float> @vcvt_negative2(<4 x i32> %0) { +; CHECK-LABEL: vcvt_negative2: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i32 q1, #0xb0000000 +; CHECK-NEXT: vcvt.f32.s32 q0, q0 +; CHECK-NEXT: vmul.f32 q0, q0, q1 +; CHECK-NEXT: bx lr + %2 = sitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +}