diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 402e5e7c78d92c..9d9e59a13f54ac 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1876,8 +1876,8 @@ static SDValue lowerFTRUNC_FCEIL_FFLOOR(SDValue Op, SelectionDAG &DAG) { // floor(X + copysign(nextafter(0.5, 0.0), X)). // FIXME: Could be shorter by changing rounding mode, but we don't have FRM // dependencies modeled yet. -// FIXME: Use masked operations to avoid final merge. -static SDValue lowerFROUND(SDValue Op, SelectionDAG &DAG) { +static SDValue lowerFROUND(SDValue Op, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { MVT VT = Op.getSimpleValueType(); assert(VT.isVector() && "Unexpected type"); @@ -1885,44 +1885,70 @@ static SDValue lowerFROUND(SDValue Op, SelectionDAG &DAG) { SDValue Src = Op.getOperand(0); + MVT ContainerVT = VT; + if (VT.isFixedLengthVector()) { + ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); + Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget); + } + + SDValue TrueMask, VL; + std::tie(TrueMask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); + // Freeze the source since we are increasing the number of uses. - Src = DAG.getFreeze(Op.getOperand(0)); + Src = DAG.getFreeze(Src); // We do the conversion on the absolute value and fix the sign at the end. - SDValue Abs = DAG.getNode(ISD::FABS, DL, VT, Src); + SDValue Abs = + DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, TrueMask, VL); // Determine the largest integer that can be represented exactly. This and // values larger than it don't have any fractional bits so don't need to // be converted. - const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT); + const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(ContainerVT); unsigned Precision = APFloat::semanticsPrecision(FltSem); APFloat MaxVal = APFloat(FltSem); MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1), /*IsSigned*/ false, APFloat::rmNearestTiesToEven); - SDValue MaxValNode = DAG.getConstantFP(MaxVal, DL, VT); + SDValue MaxValNode = + DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType()); + SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT, + DAG.getUNDEF(ContainerVT), MaxValNode, VL); // If abs(Src) was larger than MaxVal or nan, keep it. - MVT SetccVT = MVT::getVectorVT(MVT::i1, VT.getVectorElementCount()); - SDValue Mask = DAG.getSetCC(DL, SetccVT, Abs, MaxValNode, ISD::SETOLT); + MVT SetccVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); + SDValue Mask = DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT, Abs, MaxValSplat, + DAG.getCondCode(ISD::SETOLT), TrueMask, VL); bool Ignored; APFloat Point5Pred = APFloat(0.5f); Point5Pred.convert(FltSem, APFloat::rmNearestTiesToEven, &Ignored); Point5Pred.next(/*nextDown*/ true); + SDValue SplatVal = + DAG.getConstantFP(Point5Pred, DL, ContainerVT.getVectorElementType()); + SDValue Splat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT, + DAG.getUNDEF(ContainerVT), SplatVal, VL); // Add the adjustment. - SDValue Adjust = DAG.getNode(ISD::FADD, DL, VT, Abs, - DAG.getConstantFP(Point5Pred, DL, VT)); + SDValue Adjust = + DAG.getNode(RISCVISD::FADD_VL, DL, ContainerVT, Abs, Splat, Mask, VL); // Truncate to integer and convert back to fp. - MVT IntVT = VT.changeVectorElementTypeToInteger(); - SDValue Truncated = DAG.getNode(ISD::FP_TO_SINT, DL, IntVT, Adjust); - Truncated = DAG.getNode(ISD::SINT_TO_FP, DL, VT, Truncated); + MVT IntVT = ContainerVT.changeVectorElementTypeToInteger(); + SDValue Truncated = + DAG.getNode(RISCVISD::FP_TO_SINT_VL, DL, IntVT, Adjust, Mask, VL); - // Restore the original sign. - Truncated = DAG.getNode(ISD::FCOPYSIGN, DL, VT, Truncated, Src); + Truncated = DAG.getNode(RISCVISD::SINT_TO_FP_VL, DL, ContainerVT, Truncated, + Mask, VL); + + // Restore the original sign and merge the original source to masked off + // lanes. + Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated, + Src, Mask, Src, VL); + + if (!VT.isFixedLengthVector()) + return Truncated; - return DAG.getSelect(DL, VT, Mask, Truncated, Src); + return convertFromScalableVector(VT, Truncated, DAG, Subtarget); } struct VIDSequence { @@ -3417,7 +3443,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, case ISD::FFLOOR: return lowerFTRUNC_FCEIL_FFLOOR(Op, DAG); case ISD::FROUND: - return lowerFROUND(Op, DAG); + return lowerFROUND(Op, DAG, Subtarget); case ISD::VECREDUCE_ADD: case ISD::VECREDUCE_UMAX: case ISD::VECREDUCE_SMAX: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll index 6015f6d0ea6cb4..245a2fdc441ebd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll @@ -2202,11 +2202,10 @@ define void @round_v8f16(<8 x half>* %x) { ; CHECK-NEXT: flh ft1, %lo(.LCPI100_1)(a1) ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 -; CHECK-NEXT: vfadd.vf v9, v9, ft1 -; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v9 -; CHECK-NEXT: vfcvt.f.x.v v9, v9 -; CHECK-NEXT: vfsgnj.vv v9, v9, v8 -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vfadd.vf v9, v9, ft1, v0.t +; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <8 x half>, <8 x half>* %x @@ -2227,11 +2226,10 @@ define void @round_v4f32(<4 x float>* %x) { ; CHECK-NEXT: flw ft1, %lo(.LCPI101_1)(a1) ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 -; CHECK-NEXT: vfadd.vf v9, v9, ft1 -; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v9 -; CHECK-NEXT: vfcvt.f.x.v v9, v9 -; CHECK-NEXT: vfsgnj.vv v9, v9, v8 -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vfadd.vf v9, v9, ft1, v0.t +; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret %a = load <4 x float>, <4 x float>* %x @@ -2252,11 +2250,10 @@ define void @round_v2f64(<2 x double>* %x) { ; CHECK-NEXT: fld ft1, %lo(.LCPI102_1)(a1) ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 -; CHECK-NEXT: vfadd.vf v9, v9, ft1 -; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v9 -; CHECK-NEXT: vfcvt.f.x.v v9, v9 -; CHECK-NEXT: vfsgnj.vv v9, v9, v8 -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vfadd.vf v9, v9, ft1, v0.t +; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret %a = load <2 x double>, <2 x double>* %x diff --git a/llvm/test/CodeGen/RISCV/rvv/fround-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fround-sdnode.ll index c6dc78b2a7383c..103158e0ca228d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fround-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fround-sdnode.ll @@ -7,18 +7,17 @@ define @round_nxv1f16( %x) { ; CHECK-LABEL: round_nxv1f16: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI0_0)(a0) ; CHECK-NEXT: lui a0, %hi(.LCPI0_1) ; CHECK-NEXT: flh ft1, %lo(.LCPI0_1)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 -; CHECK-NEXT: vfadd.vf v9, v9, ft1 -; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v9 -; CHECK-NEXT: vfcvt.f.x.v v9, v9 -; CHECK-NEXT: vfsgnj.vv v9, v9, v8 -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vfadd.vf v9, v9, ft1, v0.t +; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.round.nxv1f16( %x) ret %a @@ -28,18 +27,17 @@ declare @llvm.round.nxv1f16() define @round_nxv2f16( %x) { ; CHECK-LABEL: round_nxv2f16: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI1_0)(a0) ; CHECK-NEXT: lui a0, %hi(.LCPI1_1) ; CHECK-NEXT: flh ft1, %lo(.LCPI1_1)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 -; CHECK-NEXT: vfadd.vf v9, v9, ft1 -; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v9 -; CHECK-NEXT: vfcvt.f.x.v v9, v9 -; CHECK-NEXT: vfsgnj.vv v9, v9, v8 -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vfadd.vf v9, v9, ft1, v0.t +; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.round.nxv2f16( %x) ret %a @@ -49,18 +47,17 @@ declare @llvm.round.nxv2f16() define @round_nxv4f16( %x) { ; CHECK-LABEL: round_nxv4f16: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI2_0)(a0) ; CHECK-NEXT: lui a0, %hi(.LCPI2_1) ; CHECK-NEXT: flh ft1, %lo(.LCPI2_1)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 -; CHECK-NEXT: vfadd.vf v9, v9, ft1 -; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v9 -; CHECK-NEXT: vfcvt.f.x.v v9, v9 -; CHECK-NEXT: vfsgnj.vv v9, v9, v8 -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vfadd.vf v9, v9, ft1, v0.t +; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.round.nxv4f16( %x) ret %a @@ -70,18 +67,17 @@ declare @llvm.round.nxv4f16() define @round_nxv8f16( %x) { ; CHECK-LABEL: round_nxv8f16: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI3_0)(a0) ; CHECK-NEXT: lui a0, %hi(.LCPI3_1) ; CHECK-NEXT: flh ft1, %lo(.LCPI3_1)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, ft0 -; CHECK-NEXT: vfadd.vf v10, v10, ft1 -; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v10 -; CHECK-NEXT: vfcvt.f.x.v v10, v10 -; CHECK-NEXT: vfsgnj.vv v10, v10, v8 -; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 +; CHECK-NEXT: vfadd.vf v10, v10, ft1, v0.t +; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.round.nxv8f16( %x) ret %a @@ -91,18 +87,17 @@ declare @llvm.round.nxv8f16() define @round_nxv16f16( %x) { ; CHECK-LABEL: round_nxv16f16: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI4_0)(a0) ; CHECK-NEXT: lui a0, %hi(.LCPI4_1) ; CHECK-NEXT: flh ft1, %lo(.LCPI4_1)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, ft0 -; CHECK-NEXT: vfadd.vf v12, v12, ft1 -; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v12 -; CHECK-NEXT: vfcvt.f.x.v v12, v12 -; CHECK-NEXT: vfsgnj.vv v12, v12, v8 -; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0 +; CHECK-NEXT: vfadd.vf v12, v12, ft1, v0.t +; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.round.nxv16f16( %x) ret %a @@ -112,18 +107,17 @@ declare @llvm.round.nxv16f16() define @round_nxv32f16( %x) { ; CHECK-LABEL: round_nxv32f16: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI5_0)(a0) ; CHECK-NEXT: lui a0, %hi(.LCPI5_1) ; CHECK-NEXT: flh ft1, %lo(.LCPI5_1)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, ft0 -; CHECK-NEXT: vfadd.vf v16, v16, ft1 -; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v16 -; CHECK-NEXT: vfcvt.f.x.v v16, v16 -; CHECK-NEXT: vfsgnj.vv v16, v16, v8 -; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 +; CHECK-NEXT: vfadd.vf v16, v16, ft1, v0.t +; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.round.nxv32f16( %x) ret %a @@ -133,18 +127,17 @@ declare @llvm.round.nxv32f16() define @round_nxv1f32( %x) { ; CHECK-LABEL: round_nxv1f32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu ; CHECK-NEXT: lui a0, %hi(.LCPI6_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI6_0)(a0) ; CHECK-NEXT: lui a0, %hi(.LCPI6_1) ; CHECK-NEXT: flw ft1, %lo(.LCPI6_1)(a0) -; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 -; CHECK-NEXT: vfadd.vf v9, v9, ft1 -; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v9 -; CHECK-NEXT: vfcvt.f.x.v v9, v9 -; CHECK-NEXT: vfsgnj.vv v9, v9, v8 -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vfadd.vf v9, v9, ft1, v0.t +; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.round.nxv1f32( %x) ret %a @@ -154,18 +147,17 @@ declare @llvm.round.nxv1f32() define @round_nxv2f32( %x) { ; CHECK-LABEL: round_nxv2f32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu ; CHECK-NEXT: lui a0, %hi(.LCPI7_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI7_0)(a0) ; CHECK-NEXT: lui a0, %hi(.LCPI7_1) ; CHECK-NEXT: flw ft1, %lo(.LCPI7_1)(a0) -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 -; CHECK-NEXT: vfadd.vf v9, v9, ft1 -; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v9 -; CHECK-NEXT: vfcvt.f.x.v v9, v9 -; CHECK-NEXT: vfsgnj.vv v9, v9, v8 -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vfadd.vf v9, v9, ft1, v0.t +; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.round.nxv2f32( %x) ret %a @@ -175,18 +167,17 @@ declare @llvm.round.nxv2f32() define @round_nxv4f32( %x) { ; CHECK-LABEL: round_nxv4f32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu ; CHECK-NEXT: lui a0, %hi(.LCPI8_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI8_0)(a0) ; CHECK-NEXT: lui a0, %hi(.LCPI8_1) ; CHECK-NEXT: flw ft1, %lo(.LCPI8_1)(a0) -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, ft0 -; CHECK-NEXT: vfadd.vf v10, v10, ft1 -; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v10 -; CHECK-NEXT: vfcvt.f.x.v v10, v10 -; CHECK-NEXT: vfsgnj.vv v10, v10, v8 -; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 +; CHECK-NEXT: vfadd.vf v10, v10, ft1, v0.t +; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.round.nxv4f32( %x) ret %a @@ -196,18 +187,17 @@ declare @llvm.round.nxv4f32() define @round_nxv8f32( %x) { ; CHECK-LABEL: round_nxv8f32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu ; CHECK-NEXT: lui a0, %hi(.LCPI9_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI9_0)(a0) ; CHECK-NEXT: lui a0, %hi(.LCPI9_1) ; CHECK-NEXT: flw ft1, %lo(.LCPI9_1)(a0) -; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, ft0 -; CHECK-NEXT: vfadd.vf v12, v12, ft1 -; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v12 -; CHECK-NEXT: vfcvt.f.x.v v12, v12 -; CHECK-NEXT: vfsgnj.vv v12, v12, v8 -; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0 +; CHECK-NEXT: vfadd.vf v12, v12, ft1, v0.t +; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.round.nxv8f32( %x) ret %a @@ -217,18 +207,17 @@ declare @llvm.round.nxv8f32() define @round_nxv16f32( %x) { ; CHECK-LABEL: round_nxv16f32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; CHECK-NEXT: lui a0, %hi(.LCPI10_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI10_0)(a0) ; CHECK-NEXT: lui a0, %hi(.LCPI10_1) ; CHECK-NEXT: flw ft1, %lo(.LCPI10_1)(a0) -; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, ft0 -; CHECK-NEXT: vfadd.vf v16, v16, ft1 -; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v16 -; CHECK-NEXT: vfcvt.f.x.v v16, v16 -; CHECK-NEXT: vfsgnj.vv v16, v16, v8 -; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 +; CHECK-NEXT: vfadd.vf v16, v16, ft1, v0.t +; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.round.nxv16f32( %x) ret %a @@ -238,18 +227,17 @@ declare @llvm.round.nxv16f32() define @round_nxv1f64( %x) { ; CHECK-LABEL: round_nxv1f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI11_0)(a0) ; CHECK-NEXT: lui a0, %hi(.LCPI11_1) ; CHECK-NEXT: fld ft1, %lo(.LCPI11_1)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 -; CHECK-NEXT: vfadd.vf v9, v9, ft1 -; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v9 -; CHECK-NEXT: vfcvt.f.x.v v9, v9 -; CHECK-NEXT: vfsgnj.vv v9, v9, v8 -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vfadd.vf v9, v9, ft1, v0.t +; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.round.nxv1f64( %x) ret %a @@ -259,18 +247,17 @@ declare @llvm.round.nxv1f64() define @round_nxv2f64( %x) { ; CHECK-LABEL: round_nxv2f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI12_0)(a0) ; CHECK-NEXT: lui a0, %hi(.LCPI12_1) ; CHECK-NEXT: fld ft1, %lo(.LCPI12_1)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, ft0 -; CHECK-NEXT: vfadd.vf v10, v10, ft1 -; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v10 -; CHECK-NEXT: vfcvt.f.x.v v10, v10 -; CHECK-NEXT: vfsgnj.vv v10, v10, v8 -; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 +; CHECK-NEXT: vfadd.vf v10, v10, ft1, v0.t +; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.round.nxv2f64( %x) ret %a @@ -280,18 +267,17 @@ declare @llvm.round.nxv2f64() define @round_nxv4f64( %x) { ; CHECK-LABEL: round_nxv4f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI13_0)(a0) ; CHECK-NEXT: lui a0, %hi(.LCPI13_1) ; CHECK-NEXT: fld ft1, %lo(.LCPI13_1)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, ft0 -; CHECK-NEXT: vfadd.vf v12, v12, ft1 -; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v12 -; CHECK-NEXT: vfcvt.f.x.v v12, v12 -; CHECK-NEXT: vfsgnj.vv v12, v12, v8 -; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0 +; CHECK-NEXT: vfadd.vf v12, v12, ft1, v0.t +; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.round.nxv4f64( %x) ret %a @@ -301,18 +287,17 @@ declare @llvm.round.nxv4f64() define @round_nxv8f64( %x) { ; CHECK-LABEL: round_nxv8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI14_0)(a0) ; CHECK-NEXT: lui a0, %hi(.LCPI14_1) ; CHECK-NEXT: fld ft1, %lo(.LCPI14_1)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, ft0 -; CHECK-NEXT: vfadd.vf v16, v16, ft1 -; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v16 -; CHECK-NEXT: vfcvt.f.x.v v16, v16 -; CHECK-NEXT: vfsgnj.vv v16, v16, v8 -; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 +; CHECK-NEXT: vfadd.vf v16, v16, ft1, v0.t +; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.round.nxv8f64( %x) ret %a