diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 19ca9f6ae3fe3..570a058bde8da 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -16021,6 +16021,8 @@ functions would, but without setting errno. If the rounded value is too large to be stored in the result type, the return value is a non-deterministic value (equivalent to `freeze poison`). +.. _int_lrint: + '``llvm.lrint.*``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -16066,6 +16068,8 @@ would, but without setting errno. If the rounded value is too large to be stored in the result type, the return value is a non-deterministic value (equivalent to `freeze poison`). +.. _int_llrint: + '``llvm.llrint.*``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -23382,6 +23386,100 @@ Examples: %t = call <4 x float> @llvm.trunc.v4f32(<4 x float> %a) %also.r = select <4 x i1> %mask, <4 x float> %t, <4 x float> poison +.. _int_vp_lrint: + +'``llvm.vp.lrint.*``' Intrinsics +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" +This is an overloaded intrinsic. + +:: + + declare <16 x i32> @llvm.vp.lrint.v16i32.v16f32(<16 x float> , <16 x i1> , i32 ) + declare @llvm.vp.lrint.nxv4i32.nxv4f32( , , i32 ) + declare <256 x i64> @llvm.vp.lrint.v256i64.v256f64(<256 x double> , <256 x i1> , i32 ) + +Overview: +""""""""" + +Predicated lrint of a vector of floating-point values. + + +Arguments: +"""""""""" + +The result is an integer vector and the first operand is a vector of :ref:`floating-point ` +type with the same number of elements as the result vector type. The second +operand is the vector mask and has the same number of elements as the result +vector type. The third operand is the explicit vector length of the operation. + +Semantics: +"""""""""" + +The '``llvm.vp.lrint``' intrinsic performs lrint (:ref:`lrint `) of +the first vector operand on each enabled lane. The result on disabled lanes is a +:ref:`poison value `. + +Examples: +""""""""" + +.. code-block:: llvm + + %r = call <4 x i32> @llvm.vp.lrint.v4i32.v4f32(<4 x float> %a, <4 x i1> %mask, i32 %evl) + ;; For all lanes below %evl, %r is lane-wise equivalent to %also.r + + %t = call <4 x i32> @llvm.lrint.v4f32(<4 x float> %a) + %also.r = select <4 x i1> %mask, <4 x i32> %t, <4 x i32> poison + +.. _int_vp_llrint: + +'``llvm.vp.llrint.*``' Intrinsics +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" +This is an overloaded intrinsic. + +:: + + declare <16 x i32> @llvm.vp.llrint.v16i32.v16f32(<16 x float> , <16 x i1> , i32 ) + declare @llvm.vp.llrint.nxv4i32.nxv4f32( , , i32 ) + declare <256 x i64> @llvm.vp.llrint.v256i64.v256f64(<256 x double> , <256 x i1> , i32 ) + +Overview: +""""""""" + +Predicated llrint of a vector of floating-point values. + + +Arguments: +"""""""""" +The result is an integer vector and the first operand is a vector of :ref:`floating-point ` +type with the same number of elements as the result vector type. The second +operand is the vector mask and has the same number of elements as the result +vector type. The third operand is the explicit vector length of the operation. + +Semantics: +"""""""""" + +The '``llvm.vp.llrint``' intrinsic performs lrint (:ref:`llrint `) of +the first vector operand on each enabled lane. The result on disabled lanes is a +:ref:`poison value `. + +Examples: +""""""""" + +.. code-block:: llvm + + %r = call <4 x i32> @llvm.vp.llrint.v4i32.v4f32(<4 x float> %a, <4 x i1> %mask, i32 %evl) + ;; For all lanes below %evl, %r is lane-wise equivalent to %also.r + + %t = call <4 x i32> @llvm.llrint.v4f32(<4 x float> %a) + %also.r = select <4 x i1> %mask, <4 x i32> %t, <4 x i32> poison + + .. _int_vp_bitreverse: '``llvm.vp.bitreverse.*``' Intrinsics diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index d7c1ce153a6c8..0f13d25eb30eb 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -2057,6 +2057,14 @@ let IntrProperties = [IntrNoMem, IntrNoSync, IntrWillReturn] in { [ LLVMMatchType<0>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_i32_ty]>; + def int_vp_lrint : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_vp_llrint : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; // Casts def int_vp_trunc : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], diff --git a/llvm/include/llvm/IR/VPIntrinsics.def b/llvm/include/llvm/IR/VPIntrinsics.def index 4089acf9ec3f0..1c2708a9e8543 100644 --- a/llvm/include/llvm/IR/VPIntrinsics.def +++ b/llvm/include/llvm/IR/VPIntrinsics.def @@ -461,6 +461,18 @@ VP_PROPERTY_FUNCTIONAL_INTRINSIC(nearbyint) VP_PROPERTY_FUNCTIONAL_SDOPC(FNEARBYINT) END_REGISTER_VP(vp_nearbyint, VP_FNEARBYINT) +// llvm.vp.lrint(x,mask,vlen) +BEGIN_REGISTER_VP(vp_lrint, 1, 2, VP_LRINT, 0) +VP_PROPERTY_FUNCTIONAL_INTRINSIC(lrint) +VP_PROPERTY_FUNCTIONAL_SDOPC(LRINT) +END_REGISTER_VP(vp_lrint, VP_LRINT) + +// llvm.vp.llrint(x,mask,vlen) +BEGIN_REGISTER_VP(vp_llrint, 1, 2, VP_LLRINT, 0) +VP_PROPERTY_FUNCTIONAL_INTRINSIC(llrint) +VP_PROPERTY_FUNCTIONAL_SDOPC(LLRINT) +END_REGISTER_VP(vp_llrint, VP_LLRINT) + ///// } Floating-Point Arithmetic ///// Type Casts { diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 90cda2a1155b6..5fb9d8d07d151 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -1102,7 +1102,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::FRINT: case ISD::VP_FRINT: case ISD::LRINT: + case ISD::VP_LRINT: case ISD::LLRINT: + case ISD::VP_LLRINT: case ISD::FROUND: case ISD::VP_FROUND: case ISD::FROUNDEVEN: @@ -4263,6 +4265,8 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::LRINT: case ISD::LLRINT: + case ISD::VP_LRINT: + case ISD::VP_LLRINT: Res = WidenVecRes_XRINT(N); break; @@ -4869,7 +4873,15 @@ SDValue DAGTypeLegalizer::WidenVecRes_XRINT(SDNode *N) { if (WidenNumElts != SrcVT.getVectorElementCount()) return DAG.UnrollVectorOp(N, WidenNumElts.getKnownMinValue()); - return DAG.getNode(N->getOpcode(), dl, WidenVT, Src); + if (N->getNumOperands() == 1) + return DAG.getNode(N->getOpcode(), dl, WidenVT, Src); + + assert(N->getNumOperands() == 3 && "Unexpected number of operands!"); + assert(N->isVPOpcode() && "Expected VP opcode"); + + SDValue Mask = + GetWidenedMask(N->getOperand(1), WidenVT.getVectorElementCount()); + return DAG.getNode(N->getOpcode(), dl, WidenVT, Src, Mask, N->getOperand(2)); } SDValue DAGTypeLegalizer::WidenVecRes_Convert_StrictFP(SDNode *N) { diff --git a/llvm/lib/IR/IntrinsicInst.cpp b/llvm/lib/IR/IntrinsicInst.cpp index 5050091836b7f..89403e1d7fcb4 100644 --- a/llvm/lib/IR/IntrinsicInst.cpp +++ b/llvm/lib/IR/IntrinsicInst.cpp @@ -666,6 +666,8 @@ Function *VPIntrinsic::getDeclarationForParams(Module *M, Intrinsic::ID VPID, case Intrinsic::vp_fpext: case Intrinsic::vp_ptrtoint: case Intrinsic::vp_inttoptr: + case Intrinsic::vp_lrint: + case Intrinsic::vp_llrint: VPFunc = Intrinsic::getDeclaration(M, VPID, {ReturnType, Params[0]->getType()}); break; diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index b04d39c700a8f..4f321bc516cc3 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -6183,9 +6183,11 @@ void Verifier::visitVPIntrinsic(VPIntrinsic &VPI) { break; case Intrinsic::vp_fptoui: case Intrinsic::vp_fptosi: + case Intrinsic::vp_lrint: + case Intrinsic::vp_llrint: Check( RetTy->isIntOrIntVectorTy() && ValTy->isFPOrFPVectorTy(), - "llvm.vp.fptoui or llvm.vp.fptosi intrinsic first argument element " + "llvm.vp.fptoui, llvm.vp.fptosi, llvm.vp.lrint or llvm.vp.llrint" "intrinsic first argument element " "type must be floating-point and result element type must be integer", *VPCast); break; diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 540c2e7476dc1..0c98642748d4e 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -706,7 +706,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND, ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO, ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS, - ISD::VP_FMINIMUM, ISD::VP_FMAXIMUM, ISD::EXPERIMENTAL_VP_REVERSE, + ISD::VP_FMINIMUM, ISD::VP_FMAXIMUM, ISD::VP_LRINT, + ISD::VP_LLRINT, ISD::EXPERIMENTAL_VP_REVERSE, ISD::EXPERIMENTAL_VP_SPLICE}; static const unsigned IntegerVecReduceOps[] = { @@ -5811,6 +5812,11 @@ static unsigned getRISCVVLOp(SDValue Op) { case ISD::FMAXNUM: case ISD::VP_FMAXNUM: return RISCVISD::VFMAX_VL; + case ISD::LRINT: + case ISD::VP_LRINT: + case ISD::LLRINT: + case ISD::VP_LLRINT: + return RISCVISD::VFCVT_X_F_VL; } // clang-format on #undef OP_CASE @@ -6801,6 +6807,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, case ISD::VP_USUBSAT: case ISD::VP_SADDSAT: case ISD::VP_SSUBSAT: + case ISD::VP_LRINT: + case ISD::VP_LLRINT: return lowerVPOp(Op, DAG); case ISD::VP_AND: case ISD::VP_OR: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint-vp.ll new file mode 100644 index 0000000000000..8282b8884aed6 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint-vp.ll @@ -0,0 +1,187 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+v,+f,+d -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v,+f,+d -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV64 + +define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x, <1 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: llrint_v1i64_v1f32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; RV32-NEXT: vfwcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: llrint_v1i64_v1f32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; RV64-NEXT: vfwcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: vmv1r.v v8, v9 +; RV64-NEXT: ret + %a = call <1 x i64> @llvm.vp.llrint.v1i64.v1f32(<1 x float> %x, <1 x i1> %m, i32 %evl) + ret <1 x i64> %a +} +declare <1 x i64> @llvm.vp.llrint.v1i64.v1f32(<1 x float>, <1 x i1>, i32) + +define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x, <2 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: llrint_v2i64_v2f32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; RV32-NEXT: vfwcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: llrint_v2i64_v2f32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; RV64-NEXT: vfwcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: vmv1r.v v8, v9 +; RV64-NEXT: ret + %a = call <2 x i64> @llvm.vp.llrint.v2i64.v2f32(<2 x float> %x, <2 x i1> %m, i32 %evl) + ret <2 x i64> %a +} +declare <2 x i64> @llvm.vp.llrint.v2i64.v2f32(<2 x float>, <2 x i1>, i32) + +define <3 x i64> @llrint_v3i64_v3f32(<3 x float> %x, <3 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: llrint_v3i64_v3f32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; RV32-NEXT: vfwcvt.x.f.v v10, v8, v0.t +; RV32-NEXT: vmv2r.v v8, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: llrint_v3i64_v3f32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; RV64-NEXT: vfwcvt.x.f.v v10, v8, v0.t +; RV64-NEXT: vmv2r.v v8, v10 +; RV64-NEXT: ret + %a = call <3 x i64> @llvm.vp.llrint.v3i64.v3f32(<3 x float> %x, <3 x i1> %m, i32 %evl) + ret <3 x i64> %a +} +declare <3 x i64> @llvm.vp.llrint.v3i64.v3f32(<3 x float>, <3 x i1>, i32) + +define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x, <4 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: llrint_v4i64_v4f32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; RV32-NEXT: vfwcvt.x.f.v v10, v8, v0.t +; RV32-NEXT: vmv2r.v v8, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: llrint_v4i64_v4f32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; RV64-NEXT: vfwcvt.x.f.v v10, v8, v0.t +; RV64-NEXT: vmv2r.v v8, v10 +; RV64-NEXT: ret + %a = call <4 x i64> @llvm.vp.llrint.v4i64.v4f32(<4 x float> %x, <4 x i1> %m, i32 %evl) + ret <4 x i64> %a +} +declare <4 x i64> @llvm.vp.llrint.v4i64.v4f32(<4 x float>, <4 x i1>, i32) + +define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x, <8 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: llrint_v8i64_v8f32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; RV32-NEXT: vfwcvt.x.f.v v12, v8, v0.t +; RV32-NEXT: vmv4r.v v8, v12 +; RV32-NEXT: ret +; +; RV64-LABEL: llrint_v8i64_v8f32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; RV64-NEXT: vfwcvt.x.f.v v12, v8, v0.t +; RV64-NEXT: vmv4r.v v8, v12 +; RV64-NEXT: ret + %a = call <8 x i64> @llvm.vp.llrint.v8i64.v8f32(<8 x float> %x, <8 x i1> %m, i32 %evl) + ret <8 x i64> %a +} +declare <8 x i64> @llvm.vp.llrint.v8i64.v8f32(<8 x float>, <8 x i1>, i32) + +define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x, <16 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: llrint_v16i64_v16f32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; RV32-NEXT: vfwcvt.x.f.v v16, v8, v0.t +; RV32-NEXT: vmv8r.v v8, v16 +; RV32-NEXT: ret +; +; RV64-LABEL: llrint_v16i64_v16f32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; RV64-NEXT: vfwcvt.x.f.v v16, v8, v0.t +; RV64-NEXT: vmv8r.v v8, v16 +; RV64-NEXT: ret + %a = call <16 x i64> @llvm.vp.llrint.v16i64.v16f32(<16 x float> %x, <16 x i1> %m, i32 %evl) + ret <16 x i64> %a +} +declare <16 x i64> @llvm.vp.llrint.v16i64.v16f32(<16 x float>, <16 x i1>, i32) + +define <1 x i64> @llrint_v1i64_v1f64(<1 x double> %x, <1 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: llrint_v1i64_v1f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV32-NEXT: vfcvt.x.f.v v8, v8, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: llrint_v1i64_v1f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV64-NEXT: vfcvt.x.f.v v8, v8, v0.t +; RV64-NEXT: ret + %a = call <1 x i64> @llvm.vp.llrint.v1i64.v1f64(<1 x double> %x, <1 x i1> %m, i32 %evl) + ret <1 x i64> %a +} +declare <1 x i64> @llvm.vp.llrint.v1i64.v1f64(<1 x double>, <1 x i1>, i32) + +define <2 x i64> @llrint_v2i64_v2f64(<2 x double> %x, <2 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: llrint_v2i64_v2f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV32-NEXT: vfcvt.x.f.v v8, v8, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: llrint_v2i64_v2f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV64-NEXT: vfcvt.x.f.v v8, v8, v0.t +; RV64-NEXT: ret + %a = call <2 x i64> @llvm.vp.llrint.v2i64.v2f64(<2 x double> %x, <2 x i1> %m, i32 %evl) + ret <2 x i64> %a +} +declare <2 x i64> @llvm.vp.llrint.v2i64.v2f64(<2 x double>, <2 x i1>, i32) + +define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x, <4 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: llrint_v4i64_v4f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV32-NEXT: vfcvt.x.f.v v8, v8, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: llrint_v4i64_v4f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV64-NEXT: vfcvt.x.f.v v8, v8, v0.t +; RV64-NEXT: ret + %a = call <4 x i64> @llvm.vp.llrint.v4i64.v4f64(<4 x double> %x, <4 x i1> %m, i32 %evl) + ret <4 x i64> %a +} +declare <4 x i64> @llvm.vp.llrint.v4i64.v4f64(<4 x double>, <4 x i1>, i32) + +define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x, <8 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: llrint_v8i64_v8f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV32-NEXT: vfcvt.x.f.v v8, v8, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: llrint_v8i64_v8f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV64-NEXT: vfcvt.x.f.v v8, v8, v0.t +; RV64-NEXT: ret + %a = call <8 x i64> @llvm.vp.llrint.v8i64.v8f64(<8 x double> %x, <8 x i1> %m, i32 %evl) + ret <8 x i64> %a +} +declare <8 x i64> @llvm.vp.llrint.v8i64.v8f64(<8 x double>, <8 x i1>, i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint-vp.ll new file mode 100644 index 0000000000000..08dd1c79f24c9 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint-vp.ll @@ -0,0 +1,233 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+f,+d \ +; RUN: -target-abi=ilp32d -verify-machineinstrs | FileCheck %s --check-prefix=RV32 +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv64 -mattr=+v,+f,+d \ +; RUN: -target-abi=lp64d -verify-machineinstrs | FileCheck %s --check-prefix=RV64-i32 +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+f,+d \ +; RUN: -target-abi=lp64d -verify-machineinstrs | FileCheck %s --check-prefix=RV64-i64 + +define <1 x iXLen> @lrint_v1f32(<1 x float> %x, <1 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: lrint_v1f32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; RV32-NEXT: vfcvt.x.f.v v8, v8, v0.t +; RV32-NEXT: ret +; +; RV64-i32-LABEL: lrint_v1f32: +; RV64-i32: # %bb.0: +; RV64-i32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; RV64-i32-NEXT: vfcvt.x.f.v v8, v8, v0.t +; RV64-i32-NEXT: ret +; +; RV64-i64-LABEL: lrint_v1f32: +; RV64-i64: # %bb.0: +; RV64-i64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; RV64-i64-NEXT: vfwcvt.x.f.v v9, v8, v0.t +; RV64-i64-NEXT: vmv1r.v v8, v9 +; RV64-i64-NEXT: ret + %a = call <1 x iXLen> @llvm.vp.lrint.v1iXLen.v1f32(<1 x float> %x, <1 x i1> %m, i32 %evl) + ret <1 x iXLen> %a +} +declare <1 x iXLen> @llvm.vp.lrint.v1iXLen.v1f32(<1 x float>, <1 x i1>, i32) + +define <2 x iXLen> @lrint_v2f32(<2 x float> %x, <2 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: lrint_v2f32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; RV32-NEXT: vfcvt.x.f.v v8, v8, v0.t +; RV32-NEXT: ret +; +; RV64-i32-LABEL: lrint_v2f32: +; RV64-i32: # %bb.0: +; RV64-i32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; RV64-i32-NEXT: vfcvt.x.f.v v8, v8, v0.t +; RV64-i32-NEXT: ret +; +; RV64-i64-LABEL: lrint_v2f32: +; RV64-i64: # %bb.0: +; RV64-i64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; RV64-i64-NEXT: vfwcvt.x.f.v v9, v8, v0.t +; RV64-i64-NEXT: vmv1r.v v8, v9 +; RV64-i64-NEXT: ret + %a = call <2 x iXLen> @llvm.vp.lrint.v2iXLen.v2f32(<2 x float> %x, <2 x i1> %m, i32 %evl) + ret <2 x iXLen> %a +} +declare <2 x iXLen> @llvm.vp.lrint.v2iXLen.v2f32(<2 x float>, <2 x i1>, i32) + +define <3 x iXLen> @lrint_v3f32(<3 x float> %x, <3 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: lrint_v3f32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; RV32-NEXT: vfcvt.x.f.v v8, v8, v0.t +; RV32-NEXT: ret +; +; RV64-i32-LABEL: lrint_v3f32: +; RV64-i32: # %bb.0: +; RV64-i32-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; RV64-i32-NEXT: vfcvt.x.f.v v8, v8, v0.t +; RV64-i32-NEXT: ret +; +; RV64-i64-LABEL: lrint_v3f32: +; RV64-i64: # %bb.0: +; RV64-i64-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; RV64-i64-NEXT: vfwcvt.x.f.v v10, v8, v0.t +; RV64-i64-NEXT: vmv2r.v v8, v10 +; RV64-i64-NEXT: ret + %a = call <3 x iXLen> @llvm.vp.lrint.v3iXLen.v3f32(<3 x float> %x, <3 x i1> %m, i32 %evl) + ret <3 x iXLen> %a +} +declare <3 x iXLen> @llvm.vp.lrint.v3iXLen.v3f32(<3 x float>, <3 x i1>, i32) + +define <4 x iXLen> @lrint_v4f32(<4 x float> %x, <4 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: lrint_v4f32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; RV32-NEXT: vfcvt.x.f.v v8, v8, v0.t +; RV32-NEXT: ret +; +; RV64-i32-LABEL: lrint_v4f32: +; RV64-i32: # %bb.0: +; RV64-i32-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; RV64-i32-NEXT: vfcvt.x.f.v v8, v8, v0.t +; RV64-i32-NEXT: ret +; +; RV64-i64-LABEL: lrint_v4f32: +; RV64-i64: # %bb.0: +; RV64-i64-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; RV64-i64-NEXT: vfwcvt.x.f.v v10, v8, v0.t +; RV64-i64-NEXT: vmv2r.v v8, v10 +; RV64-i64-NEXT: ret + %a = call <4 x iXLen> @llvm.vp.lrint.v4iXLen.v4f32(<4 x float> %x, <4 x i1> %m, i32 %evl) + ret <4 x iXLen> %a +} +declare <4 x iXLen> @llvm.vp.lrint.v4iXLen.v4f32(<4 x float>, <4 x i1>, i32) + +define <8 x iXLen> @lrint_v8f32(<8 x float> %x, <8 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: lrint_v8f32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; RV32-NEXT: vfcvt.x.f.v v8, v8, v0.t +; RV32-NEXT: ret +; +; RV64-i32-LABEL: lrint_v8f32: +; RV64-i32: # %bb.0: +; RV64-i32-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; RV64-i32-NEXT: vfcvt.x.f.v v8, v8, v0.t +; RV64-i32-NEXT: ret +; +; RV64-i64-LABEL: lrint_v8f32: +; RV64-i64: # %bb.0: +; RV64-i64-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; RV64-i64-NEXT: vfwcvt.x.f.v v12, v8, v0.t +; RV64-i64-NEXT: vmv4r.v v8, v12 +; RV64-i64-NEXT: ret + %a = call <8 x iXLen> @llvm.vp.lrint.v8iXLen.v8f32(<8 x float> %x, <8 x i1> %m, i32 %evl) + ret <8 x iXLen> %a +} +declare <8 x iXLen> @llvm.vp.lrint.v8iXLen.v8f32(<8 x float>, <8 x i1>, i32) + +define <16 x iXLen> @lrint_v16iXLen_v16f32(<16 x float> %x, <16 x i1> %m, i32 zeroext %evl) { + %a = call <16 x iXLen> @llvm.vp.lrint.v16iXLen.v16f32(<16 x float> %x, <16 x i1> %m, i32 %evl) + ret <16 x iXLen> %a +} +declare <16 x iXLen> @llvm.vp.lrint.v16iXLen.v16f32(<16 x float>, <16 x i1>, i32) + +define <1 x iXLen> @lrint_v1f64(<1 x double> %x, <1 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: lrint_v1f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; RV32-NEXT: vfncvt.x.f.w v9, v8, v0.t +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64-i32-LABEL: lrint_v1f64: +; RV64-i32: # %bb.0: +; RV64-i32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; RV64-i32-NEXT: vfncvt.x.f.w v9, v8, v0.t +; RV64-i32-NEXT: vmv1r.v v8, v9 +; RV64-i32-NEXT: ret +; +; RV64-i64-LABEL: lrint_v1f64: +; RV64-i64: # %bb.0: +; RV64-i64-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV64-i64-NEXT: vfcvt.x.f.v v8, v8, v0.t +; RV64-i64-NEXT: ret + %a = call <1 x iXLen> @llvm.vp.lrint.v1iXLen.v1f64(<1 x double> %x, <1 x i1> %m, i32 %evl) + ret <1 x iXLen> %a +} +declare <1 x iXLen> @llvm.vp.lrint.v1iXLen.v1f64(<1 x double>, <1 x i1>, i32) + +define <2 x iXLen> @lrint_v2f64(<2 x double> %x, <2 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: lrint_v2f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; RV32-NEXT: vfncvt.x.f.w v9, v8, v0.t +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64-i32-LABEL: lrint_v2f64: +; RV64-i32: # %bb.0: +; RV64-i32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; RV64-i32-NEXT: vfncvt.x.f.w v9, v8, v0.t +; RV64-i32-NEXT: vmv1r.v v8, v9 +; RV64-i32-NEXT: ret +; +; RV64-i64-LABEL: lrint_v2f64: +; RV64-i64: # %bb.0: +; RV64-i64-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV64-i64-NEXT: vfcvt.x.f.v v8, v8, v0.t +; RV64-i64-NEXT: ret + %a = call <2 x iXLen> @llvm.vp.lrint.v2iXLen.v2f64(<2 x double> %x, <2 x i1> %m, i32 %evl) + ret <2 x iXLen> %a +} +declare <2 x iXLen> @llvm.vp.lrint.v2iXLen.v2f64(<2 x double>, <2 x i1>, i32) + +define <4 x iXLen> @lrint_v4f64(<4 x double> %x, <4 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: lrint_v4f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; RV32-NEXT: vfncvt.x.f.w v10, v8, v0.t +; RV32-NEXT: vmv.v.v v8, v10 +; RV32-NEXT: ret +; +; RV64-i32-LABEL: lrint_v4f64: +; RV64-i32: # %bb.0: +; RV64-i32-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; RV64-i32-NEXT: vfncvt.x.f.w v10, v8, v0.t +; RV64-i32-NEXT: vmv.v.v v8, v10 +; RV64-i32-NEXT: ret +; +; RV64-i64-LABEL: lrint_v4f64: +; RV64-i64: # %bb.0: +; RV64-i64-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV64-i64-NEXT: vfcvt.x.f.v v8, v8, v0.t +; RV64-i64-NEXT: ret + %a = call <4 x iXLen> @llvm.vp.lrint.v4iXLen.v4f64(<4 x double> %x, <4 x i1> %m, i32 %evl) + ret <4 x iXLen> %a +} +declare <4 x iXLen> @llvm.vp.lrint.v4iXLen.v4f64(<4 x double>, <4 x i1>, i32) + +define <8 x iXLen> @lrint_v8f64(<8 x double> %x, <8 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: lrint_v8f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; RV32-NEXT: vfncvt.x.f.w v12, v8, v0.t +; RV32-NEXT: vmv.v.v v8, v12 +; RV32-NEXT: ret +; +; RV64-i32-LABEL: lrint_v8f64: +; RV64-i32: # %bb.0: +; RV64-i32-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; RV64-i32-NEXT: vfncvt.x.f.w v12, v8, v0.t +; RV64-i32-NEXT: vmv.v.v v8, v12 +; RV64-i32-NEXT: ret +; +; RV64-i64-LABEL: lrint_v8f64: +; RV64-i64: # %bb.0: +; RV64-i64-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV64-i64-NEXT: vfcvt.x.f.v v8, v8, v0.t +; RV64-i64-NEXT: ret + %a = call <8 x iXLen> @llvm.vp.lrint.v8iXLen.v8f64(<8 x double> %x, <8 x i1> %m, i32 %evl) + ret <8 x iXLen> %a +} +declare <8 x iXLen> @llvm.vp.lrint.v8iXLen.v8f64(<8 x double>, <8 x i1>, i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/llrint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/llrint-vp.ll new file mode 100644 index 0000000000000..6d8763d34ec1b --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/llrint-vp.ll @@ -0,0 +1,125 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+v,+f,+d -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+v,+f,+d -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s + +define @llrint_nxv1i64_nxv1f32( %x, %m, i32 zeroext %evl) { +; CHECK-LABEL: llrint_nxv1i64_nxv1f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vfwcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %a = call @llvm.vp.llrint.nxv1i64.nxv1f32( %x, %m, i32 %evl) + ret %a +} +declare @llvm.vp.llrint.nxv1i64.nxv1f32(, , i32) + +define @llrint_nxv2i64_nxv2f32( %x, %m, i32 zeroext %evl) { +; CHECK-LABEL: llrint_nxv2i64_nxv2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vfwcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %a = call @llvm.vp.llrint.nxv2i64.nxv2f32( %x, %m, i32 %evl) + ret %a +} +declare @llvm.vp.llrint.nxv2i64.nxv2f32(, , i32) + +define @llrint_nxv4i64_nxv4f32( %x, %m, i32 zeroext %evl) { +; CHECK-LABEL: llrint_nxv4i64_nxv4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfwcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %a = call @llvm.vp.llrint.nxv4i64.nxv4f32( %x, %m, i32 %evl) + ret %a +} +declare @llvm.vp.llrint.nxv4i64.nxv4f32(, , i32) + +define @llrint_nxv8i64_nxv8f32( %x, %m, i32 zeroext %evl) { +; CHECK-LABEL: llrint_nxv8i64_nxv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vfwcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %a = call @llvm.vp.llrint.nxv8i64.nxv8f32( %x, %m, i32 %evl) + ret %a +} +declare @llvm.vp.llrint.nxv8i64.nxv8f32(, , i32) + +define @llrint_nxv16i64_nxv16f32( %x, %m, i32 zeroext %evl) { +; CHECK-LABEL: llrint_nxv16i64_nxv16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: srli a2, a1, 3 +; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v0, a2 +; CHECK-NEXT: sub a2, a0, a1 +; CHECK-NEXT: sltu a3, a0, a2 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a2, a3, a2 +; CHECK-NEXT: vsetvli zero, a2, e32, m4, ta, ma +; CHECK-NEXT: vfwcvt.x.f.v v16, v12, v0.t +; CHECK-NEXT: bltu a0, a1, .LBB4_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: .LBB4_2: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vfwcvt.x.f.v v24, v8, v0.t +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret + %a = call @llvm.vp.llrint.nxv16i64.nxv16f32( %x, %m, i32 %evl) + ret %a +} +declare @llvm.vp.llrint.nxv16i64.nxv16f32(, , i32) + +define @llrint_nxv1i64_nxv1f64( %x, %m, i32 zeroext %evl) { +; CHECK-LABEL: llrint_nxv1i64_nxv1f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vfcvt.x.f.v v8, v8, v0.t +; CHECK-NEXT: ret + %a = call @llvm.vp.llrint.nxv1i64.nxv1f64( %x, %m, i32 %evl) + ret %a +} +declare @llvm.vp.llrint.nxv1i64.nxv1f64(, , i32) + +define @llrint_nxv2i64_nxv2f64( %x, %m, i32 zeroext %evl) { +; CHECK-LABEL: llrint_nxv2i64_nxv2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vfcvt.x.f.v v8, v8, v0.t +; CHECK-NEXT: ret + %a = call @llvm.vp.llrint.nxv2i64.nxv2f64( %x, %m, i32 %evl) + ret %a +} +declare @llvm.vp.llrint.nxv2i64.nxv2f64(, , i32) + +define @llrint_nxv4i64_nxv4f64( %x, %m, i32 zeroext %evl) { +; CHECK-LABEL: llrint_nxv4i64_nxv4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vfcvt.x.f.v v8, v8, v0.t +; CHECK-NEXT: ret + %a = call @llvm.vp.llrint.nxv4i64.nxv4f64( %x, %m, i32 %evl) + ret %a +} +declare @llvm.vp.llrint.nxv4i64.nxv4f64(, , i32) + +define @llrint_nxv8i64_nxv8f64( %x, %m, i32 zeroext %evl) { +; CHECK-LABEL: llrint_nxv8i64_nxv8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vfcvt.x.f.v v8, v8, v0.t +; CHECK-NEXT: ret + %a = call @llvm.vp.llrint.nxv8i64.nxv8f64( %x, %m, i32 %evl) + ret %a +} +declare @llvm.vp.llrint.nxv8i64.nxv8f64(, , i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/lrint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/lrint-vp.ll new file mode 100644 index 0000000000000..8a826fb3ac1ea --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/lrint-vp.ll @@ -0,0 +1,209 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+f,+d \ +; RUN: -target-abi=ilp32d -verify-machineinstrs | FileCheck %s --check-prefix=RV32 +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv64 -mattr=+v,+f,+d \ +; RUN: -target-abi=lp64d -verify-machineinstrs | FileCheck %s --check-prefix=RV64-i32 +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+f,+d \ +; RUN: -target-abi=lp64d -verify-machineinstrs | FileCheck %s --check-prefix=RV64-i64 + +define @lrint_nxv1f32( %x, %m, i32 zeroext %evl) { +; RV32-LABEL: lrint_nxv1f32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; RV32-NEXT: vfcvt.x.f.v v8, v8, v0.t +; RV32-NEXT: ret +; +; RV64-i32-LABEL: lrint_nxv1f32: +; RV64-i32: # %bb.0: +; RV64-i32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; RV64-i32-NEXT: vfcvt.x.f.v v8, v8, v0.t +; RV64-i32-NEXT: ret +; +; RV64-i64-LABEL: lrint_nxv1f32: +; RV64-i64: # %bb.0: +; RV64-i64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; RV64-i64-NEXT: vfwcvt.x.f.v v9, v8, v0.t +; RV64-i64-NEXT: vmv1r.v v8, v9 +; RV64-i64-NEXT: ret + %a = call @llvm.vp.lrint.nxv1iXLen.nxv1f32( %x, %m, i32 %evl) + ret %a +} +declare @llvm.vp.lrint.nxv1iXLen.nxv1f32(, , i32) + +define @lrint_nxv2f32( %x, %m, i32 zeroext %evl) { +; RV32-LABEL: lrint_nxv2f32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; RV32-NEXT: vfcvt.x.f.v v8, v8, v0.t +; RV32-NEXT: ret +; +; RV64-i32-LABEL: lrint_nxv2f32: +; RV64-i32: # %bb.0: +; RV64-i32-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; RV64-i32-NEXT: vfcvt.x.f.v v8, v8, v0.t +; RV64-i32-NEXT: ret +; +; RV64-i64-LABEL: lrint_nxv2f32: +; RV64-i64: # %bb.0: +; RV64-i64-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; RV64-i64-NEXT: vfwcvt.x.f.v v10, v8, v0.t +; RV64-i64-NEXT: vmv2r.v v8, v10 +; RV64-i64-NEXT: ret + %a = call @llvm.vp.lrint.nxv2iXLen.nxv2f32( %x, %m, i32 %evl) + ret %a +} +declare @llvm.vp.lrint.nxv2iXLen.nxv2f32(, , i32) + +define @lrint_nxv4f32( %x, %m, i32 zeroext %evl) { +; RV32-LABEL: lrint_nxv4f32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; RV32-NEXT: vfcvt.x.f.v v8, v8, v0.t +; RV32-NEXT: ret +; +; RV64-i32-LABEL: lrint_nxv4f32: +; RV64-i32: # %bb.0: +; RV64-i32-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; RV64-i32-NEXT: vfcvt.x.f.v v8, v8, v0.t +; RV64-i32-NEXT: ret +; +; RV64-i64-LABEL: lrint_nxv4f32: +; RV64-i64: # %bb.0: +; RV64-i64-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; RV64-i64-NEXT: vfwcvt.x.f.v v12, v8, v0.t +; RV64-i64-NEXT: vmv4r.v v8, v12 +; RV64-i64-NEXT: ret + %a = call @llvm.vp.lrint.nxv4iXLen.nxv4f32( %x, %m, i32 %evl) + ret %a +} +declare @llvm.vp.lrint.nxv4iXLen.nxv4f32(, , i32) + +define @lrint_nxv8f32( %x, %m, i32 zeroext %evl) { +; RV32-LABEL: lrint_nxv8f32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; RV32-NEXT: vfcvt.x.f.v v8, v8, v0.t +; RV32-NEXT: ret +; +; RV64-i32-LABEL: lrint_nxv8f32: +; RV64-i32: # %bb.0: +; RV64-i32-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; RV64-i32-NEXT: vfcvt.x.f.v v8, v8, v0.t +; RV64-i32-NEXT: ret +; +; RV64-i64-LABEL: lrint_nxv8f32: +; RV64-i64: # %bb.0: +; RV64-i64-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; RV64-i64-NEXT: vfwcvt.x.f.v v16, v8, v0.t +; RV64-i64-NEXT: vmv8r.v v8, v16 +; RV64-i64-NEXT: ret + %a = call @llvm.vp.lrint.nxv8iXLen.nxv8f32( %x, %m, i32 %evl) + ret %a +} +declare @llvm.vp.lrint.nxv8iXLen.nxv8f32(, , i32) + +define @lrint_nxv16iXLen_nxv16f32( %x, %m, i32 zeroext %evl) { + %a = call @llvm.vp.lrint.nxv16iXLen.nxv16f32( %x, %m, i32 %evl) + ret %a +} +declare @llvm.vp.lrint.nxv16iXLen.nxv16f32(, , i32) + +define @lrint_nxv1f64( %x, %m, i32 zeroext %evl) { +; RV32-LABEL: lrint_nxv1f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; RV32-NEXT: vfncvt.x.f.w v9, v8, v0.t +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64-i32-LABEL: lrint_nxv1f64: +; RV64-i32: # %bb.0: +; RV64-i32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; RV64-i32-NEXT: vfncvt.x.f.w v9, v8, v0.t +; RV64-i32-NEXT: vmv1r.v v8, v9 +; RV64-i32-NEXT: ret +; +; RV64-i64-LABEL: lrint_nxv1f64: +; RV64-i64: # %bb.0: +; RV64-i64-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV64-i64-NEXT: vfcvt.x.f.v v8, v8, v0.t +; RV64-i64-NEXT: ret + %a = call @llvm.vp.lrint.nxv1iXLen.nxv1f64( %x, %m, i32 %evl) + ret %a +} +declare @llvm.vp.lrint.nxv1iXLen.nxv1f64(, , i32) + +define @lrint_nxv2f64( %x, %m, i32 zeroext %evl) { +; RV32-LABEL: lrint_nxv2f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; RV32-NEXT: vfncvt.x.f.w v10, v8, v0.t +; RV32-NEXT: vmv.v.v v8, v10 +; RV32-NEXT: ret +; +; RV64-i32-LABEL: lrint_nxv2f64: +; RV64-i32: # %bb.0: +; RV64-i32-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; RV64-i32-NEXT: vfncvt.x.f.w v10, v8, v0.t +; RV64-i32-NEXT: vmv.v.v v8, v10 +; RV64-i32-NEXT: ret +; +; RV64-i64-LABEL: lrint_nxv2f64: +; RV64-i64: # %bb.0: +; RV64-i64-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV64-i64-NEXT: vfcvt.x.f.v v8, v8, v0.t +; RV64-i64-NEXT: ret + %a = call @llvm.vp.lrint.nxv2iXLen.nxv2f64( %x, %m, i32 %evl) + ret %a +} +declare @llvm.vp.lrint.nxv2iXLen.nxv2f64(, , i32) + +define @lrint_nxv4f64( %x, %m, i32 zeroext %evl) { +; RV32-LABEL: lrint_nxv4f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; RV32-NEXT: vfncvt.x.f.w v12, v8, v0.t +; RV32-NEXT: vmv.v.v v8, v12 +; RV32-NEXT: ret +; +; RV64-i32-LABEL: lrint_nxv4f64: +; RV64-i32: # %bb.0: +; RV64-i32-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; RV64-i32-NEXT: vfncvt.x.f.w v12, v8, v0.t +; RV64-i32-NEXT: vmv.v.v v8, v12 +; RV64-i32-NEXT: ret +; +; RV64-i64-LABEL: lrint_nxv4f64: +; RV64-i64: # %bb.0: +; RV64-i64-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV64-i64-NEXT: vfcvt.x.f.v v8, v8, v0.t +; RV64-i64-NEXT: ret + %a = call @llvm.vp.lrint.nxv4iXLen.nxv4f64( %x, %m, i32 %evl) + ret %a +} +declare @llvm.vp.lrint.nxv4iXLen.nxv4f64(, , i32) + +define @lrint_nxv8f64( %x, %m, i32 zeroext %evl) { +; RV32-LABEL: lrint_nxv8f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; RV32-NEXT: vfncvt.x.f.w v16, v8, v0.t +; RV32-NEXT: vmv.v.v v8, v16 +; RV32-NEXT: ret +; +; RV64-i32-LABEL: lrint_nxv8f64: +; RV64-i32: # %bb.0: +; RV64-i32-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; RV64-i32-NEXT: vfncvt.x.f.w v16, v8, v0.t +; RV64-i32-NEXT: vmv.v.v v8, v16 +; RV64-i32-NEXT: ret +; +; RV64-i64-LABEL: lrint_nxv8f64: +; RV64-i64: # %bb.0: +; RV64-i64-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64-i64-NEXT: vfcvt.x.f.v v8, v8, v0.t +; RV64-i64-NEXT: ret + %a = call @llvm.vp.lrint.nxv8iXLen.nxv8f64( %x, %m, i32 %evl) + ret %a +} +declare @llvm.vp.lrint.nxv8iXLen.nxv8f64(, , i32) diff --git a/llvm/unittests/IR/VPIntrinsicTest.cpp b/llvm/unittests/IR/VPIntrinsicTest.cpp index e3462f0f33f11..fd010ef2208c4 100644 --- a/llvm/unittests/IR/VPIntrinsicTest.cpp +++ b/llvm/unittests/IR/VPIntrinsicTest.cpp @@ -72,6 +72,10 @@ class VPIntrinsicTest : public testing::Test { "i32)"; Str << " declare <8 x float> @llvm.vp.ceil.v8f32(<8 x float>, <8 x i1>, " "i32)"; + Str << " declare <8 x i32> @llvm.vp.lrint.v8i32.v8f32(<8 x float>, " + "<8 x i1>, i32)"; + Str << " declare <8 x i64> @llvm.vp.llrint.v8i64.v8f32(<8 x float>, " + "<8 x i1>, i32)"; Str << " declare <8 x float> @llvm.vp.fneg.v8f32(<8 x float>, <8 x i1>, " "i32)"; Str << " declare <8 x float> @llvm.vp.fabs.v8f32(<8 x float>, <8 x i1>, "