diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 315c10887a838..de3dcc29b78c8 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1915,7 +1915,8 @@ getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, SDLoc DL, SelectionDAG &DAG, // Gets the two common "VL" operands: an all-ones mask and the vector length. // VecVT is a vector type, either fixed-length or scalable, and ContainerVT is -// the vector type that it is contained in. +// the vector type that the fixed-length vector is contained in. Otherwise if +// VecVT is scalable, then ContainerVT should be the same as VecVT. static std::pair getDefaultVLOps(MVT VecVT, MVT ContainerVT, SDLoc DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { @@ -9555,15 +9556,9 @@ static SDValue performFP_TO_INTCombine(SDNode *N, const TargetLowering &TLI = DAG.getTargetLoweringInfo(); MVT XLenVT = Subtarget.getXLenVT(); - // Only handle XLen or i32 types. Other types narrower than XLen will - // eventually be legalized to XLenVT. - EVT VT = N->getValueType(0); - if (VT != MVT::i32 && VT != XLenVT) - return SDValue(); - SDValue Src = N->getOperand(0); - // Ensure the FP type is also legal. + // Ensure the FP type is legal. if (!TLI.isTypeLegal(Src.getValueType())) return SDValue(); @@ -9575,7 +9570,57 @@ static SDValue performFP_TO_INTCombine(SDNode *N, if (FRM == RISCVFPRndMode::Invalid) return SDValue(); + SDLoc DL(N); bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT; + EVT VT = N->getValueType(0); + + if (VT.isVector() && TLI.isTypeLegal(VT)) { + MVT SrcVT = Src.getSimpleValueType(); + MVT SrcContainerVT = SrcVT; + MVT ContainerVT = VT.getSimpleVT(); + SDValue XVal = Src.getOperand(0); + + // TODO: Support combining with widening and narrowing instructions + // For now only support conversions of the same bit size + if (VT.getScalarSizeInBits() != SrcVT.getScalarSizeInBits()) + return SDValue(); + + // Make fixed-length vectors scalable first + if (SrcVT.isFixedLengthVector()) { + SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget); + XVal = convertToScalableVector(SrcContainerVT, XVal, DAG, Subtarget); + ContainerVT = + getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget); + } + + auto [Mask, VL] = + getDefaultVLOps(SrcVT, SrcContainerVT, DL, DAG, Subtarget); + + SDValue FpToInt; + if (FRM == RISCVFPRndMode::RTZ) { + // Use the dedicated trunc static rounding mode if we're truncating so we + // don't need to generate calls to fsrmi/fsrm + unsigned Opc = + IsSigned ? RISCVISD::VFCVT_RTZ_X_F_VL : RISCVISD::VFCVT_RTZ_XU_F_VL; + FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, VL); + } else { + unsigned Opc = + IsSigned ? RISCVISD::VFCVT_RM_X_F_VL : RISCVISD::VFCVT_RM_XU_F_VL; + FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, + DAG.getTargetConstant(FRM, DL, XLenVT), VL); + } + + // If converted from fixed-length to scalable, convert back + if (VT.isFixedLengthVector()) + FpToInt = convertFromScalableVector(VT, FpToInt, DAG, Subtarget); + + return FpToInt; + } + + // Only handle XLen or i32 types. Other types narrower than XLen will + // eventually be legalized to XLenVT. + if (VT != MVT::i32 && VT != XLenVT) + return SDValue(); unsigned Opc; if (VT == XLenVT) @@ -9583,7 +9628,6 @@ static SDValue performFP_TO_INTCombine(SDNode *N, else Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64; - SDLoc DL(N); SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src.getOperand(0), DAG.getTargetConstant(FRM, DL, XLenVT)); return DAG.getNode(ISD::TRUNCATE, DL, VT, FpToInt); @@ -11604,6 +11648,18 @@ RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF2_MASK); case RISCV::PseudoVFCVT_RM_X_F_V_MF4_MASK: return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF4_MASK); + case RISCV::PseudoVFCVT_RM_XU_F_V_M1_MASK: + return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_XU_F_V_M1_MASK); + case RISCV::PseudoVFCVT_RM_XU_F_V_M2_MASK: + return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_XU_F_V_M2_MASK); + case RISCV::PseudoVFCVT_RM_XU_F_V_M4_MASK: + return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_XU_F_V_M4_MASK); + case RISCV::PseudoVFCVT_RM_XU_F_V_M8_MASK: + return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_XU_F_V_M8_MASK); + case RISCV::PseudoVFCVT_RM_XU_F_V_MF2_MASK: + return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_XU_F_V_MF2_MASK); + case RISCV::PseudoVFCVT_RM_XU_F_V_MF4_MASK: + return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_XU_F_V_MF4_MASK); case RISCV::PseudoVFCVT_RM_F_XU_V_M1_MASK: return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_F_XU_V_M1_MASK); case RISCV::PseudoVFCVT_RM_F_XU_V_M2_MASK: @@ -13218,7 +13274,9 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(VFCVT_RTZ_X_F_VL) NODE_NAME_CASE(VFCVT_RTZ_XU_F_VL) NODE_NAME_CASE(VFCVT_RM_X_F_VL) + NODE_NAME_CASE(VFCVT_RM_XU_F_VL) NODE_NAME_CASE(VFCVT_X_F_VL) + NODE_NAME_CASE(VFCVT_XU_F_VL) NODE_NAME_CASE(VFROUND_NOEXCEPT_VL) NODE_NAME_CASE(SINT_TO_FP_VL) NODE_NAME_CASE(UINT_TO_FP_VL) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index 91f1a0f2e3439..7c77f9c2fe8b0 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -239,8 +239,10 @@ enum NodeType : unsigned { VFCVT_RTZ_X_F_VL, VFCVT_RTZ_XU_F_VL, VFCVT_X_F_VL, + VFCVT_XU_F_VL, VFROUND_NOEXCEPT_VL, VFCVT_RM_X_F_VL, // Has a rounding mode operand. + VFCVT_RM_XU_F_VL, // Has a rounding mode operand. SINT_TO_FP_VL, UINT_TO_FP_VL, VFCVT_RM_F_XU_VL, // Has a rounding mode operand. diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index f2d22048babc9..4d52730d1dd39 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -5512,6 +5512,7 @@ defm PseudoVFCVT_X_F : VPseudoVCVTI_V; } defm PseudoVFCVT_RTZ_XU_F : VPseudoVCVTI_V; defm PseudoVFCVT_RTZ_X_F : VPseudoVCVTI_V; +defm PseudoVFCVT_RM_XU_F : VPseudoVCVTI_RM_V; defm PseudoVFCVT_RM_X_F : VPseudoVCVTI_RM_V; defm PseudoVFROUND_NOEXCEPT : VPseudoVFROUND_NOEXCEPT_V; let Uses = [FRM] in { diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td index bbb55f8ef257e..8b26e6a2475b4 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -159,7 +159,9 @@ def SDT_RISCVVecCvtF2XOp_VL : SDTypeProfile<1, 4, [ ]>; def riscv_vfcvt_rm_x_f_vl : SDNode<"RISCVISD::VFCVT_RM_X_F_VL", SDT_RISCVVecCvtF2XOp_VL>; +def riscv_vfcvt_rm_xu_f_vl : SDNode<"RISCVISD::VFCVT_RM_XU_F_VL", SDT_RISCVVecCvtF2XOp_VL>; def riscv_vfcvt_x_f_vl : SDNode<"RISCVISD::VFCVT_X_F_VL", SDT_RISCVFP2IOp_VL>; +def riscv_vfcvt_xu_f_vl : SDNode<"RISCVISD::VFCVT_XU_F_VL", SDT_RISCVFP2IOp_VL>; def riscv_vfround_noexcept_vl: SDNode<"RISCVISD::VFROUND_NOEXCEPT_VL", SDT_RISCVFPUnOp_VL>; def riscv_setcc_vl : SDNode<"RISCVISD::SETCC_VL", @@ -1739,7 +1741,9 @@ foreach fvti = AllFloatVectors in { // 13.17. Vector Single-Width Floating-Point/Integer Type-Convert Instructions defm : VPatConvertFP2I_RM_VL_V; + defm : VPatConvertFP2I_RM_VL_V; defm : VPatConvertFP2IVL_V; + defm : VPatConvertFP2IVL_V; defm : VPatConvertFP2IVL_V; defm : VPatConvertFP2IVL_V; defm : VPatConvertI2FPVL_V; diff --git a/llvm/test/CodeGen/RISCV/rvv/double-round-conv.ll b/llvm/test/CodeGen/RISCV/rvv/double-round-conv.ll index 7f8644edfef28..de270fce84ea0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/double-round-conv.ll +++ b/llvm/test/CodeGen/RISCV/rvv/double-round-conv.ll @@ -251,29 +251,13 @@ define @trunc_nxv1f64_to_ui32( %x) { define @trunc_nxv1f64_to_si64( %x) { ; RV32-LABEL: trunc_nxv1f64_to_si64: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI6_0) -; RV32-NEXT: fld ft0, %lo(.LCPI6_0)(a0) ; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; RV32-NEXT: vfabs.v v9, v8 -; RV32-NEXT: vmflt.vf v0, v9, ft0 -; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; RV32-NEXT: vfcvt.rtz.x.f.v v8, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: trunc_nxv1f64_to_si64: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI6_0) -; RV64-NEXT: fld ft0, %lo(.LCPI6_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; RV64-NEXT: vfabs.v v9, v8 -; RV64-NEXT: vmflt.vf v0, v9, ft0 -; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; RV64-NEXT: vfcvt.rtz.x.f.v v8, v8 ; RV64-NEXT: ret %a = call @llvm.trunc.nxv1f64( %x) @@ -284,29 +268,13 @@ define @trunc_nxv1f64_to_si64( %x) { define @trunc_nxv1f64_to_ui64( %x) { ; RV32-LABEL: trunc_nxv1f64_to_ui64: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI7_0) -; RV32-NEXT: fld ft0, %lo(.LCPI7_0)(a0) ; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; RV32-NEXT: vfabs.v v9, v8 -; RV32-NEXT: vmflt.vf v0, v9, ft0 -; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; RV32-NEXT: vfcvt.rtz.xu.f.v v8, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: trunc_nxv1f64_to_ui64: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI7_0) -; RV64-NEXT: fld ft0, %lo(.LCPI7_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; RV64-NEXT: vfabs.v v9, v8 -; RV64-NEXT: vmflt.vf v0, v9, ft0 -; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; RV64-NEXT: vfcvt.rtz.xu.f.v v8, v8 ; RV64-NEXT: ret %a = call @llvm.trunc.nxv1f64( %x) @@ -561,29 +529,13 @@ define @trunc_nxv4f64_to_ui32( %x) { define @trunc_nxv4f64_to_si64( %x) { ; RV32-LABEL: trunc_nxv4f64_to_si64: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI14_0) -; RV32-NEXT: fld ft0, %lo(.LCPI14_0)(a0) ; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; RV32-NEXT: vfabs.v v12, v8 -; RV32-NEXT: vmflt.vf v0, v12, ft0 -; RV32-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t -; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t -; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV32-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; RV32-NEXT: vfcvt.rtz.x.f.v v8, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: trunc_nxv4f64_to_si64: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI14_0) -; RV64-NEXT: fld ft0, %lo(.LCPI14_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; RV64-NEXT: vfabs.v v12, v8 -; RV64-NEXT: vmflt.vf v0, v12, ft0 -; RV64-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t -; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t -; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV64-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; RV64-NEXT: vfcvt.rtz.x.f.v v8, v8 ; RV64-NEXT: ret %a = call @llvm.trunc.nxv4f64( %x) @@ -594,29 +546,13 @@ define @trunc_nxv4f64_to_si64( %x) { define @trunc_nxv4f64_to_ui64( %x) { ; RV32-LABEL: trunc_nxv4f64_to_ui64: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI15_0) -; RV32-NEXT: fld ft0, %lo(.LCPI15_0)(a0) ; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; RV32-NEXT: vfabs.v v12, v8 -; RV32-NEXT: vmflt.vf v0, v12, ft0 -; RV32-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t -; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t -; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV32-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; RV32-NEXT: vfcvt.rtz.xu.f.v v8, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: trunc_nxv4f64_to_ui64: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI15_0) -; RV64-NEXT: fld ft0, %lo(.LCPI15_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; RV64-NEXT: vfabs.v v12, v8 -; RV64-NEXT: vmflt.vf v0, v12, ft0 -; RV64-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t -; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t -; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV64-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; RV64-NEXT: vfcvt.rtz.xu.f.v v8, v8 ; RV64-NEXT: ret %a = call @llvm.trunc.nxv4f64( %x) @@ -895,34 +831,20 @@ define @ceil_nxv1f64_to_ui32( %x) { define @ceil_nxv1f64_to_si64( %x) { ; RV32-LABEL: ceil_nxv1f64_to_si64: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI22_0) -; RV32-NEXT: fld ft0, %lo(.LCPI22_0)(a0) ; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; RV32-NEXT: vfabs.v v9, v8 -; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: vmset.m v0 ; RV32-NEXT: fsrmi a0, 3 -; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: vfcvt.x.f.v v8, v8, v0.t ; RV32-NEXT: fsrm a0 -; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; RV32-NEXT: vfcvt.rtz.x.f.v v8, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: ceil_nxv1f64_to_si64: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI22_0) -; RV64-NEXT: fld ft0, %lo(.LCPI22_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; RV64-NEXT: vfabs.v v9, v8 -; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: vmset.m v0 ; RV64-NEXT: fsrmi a0, 3 -; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: vfcvt.x.f.v v8, v8, v0.t ; RV64-NEXT: fsrm a0 -; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; RV64-NEXT: vfcvt.rtz.x.f.v v8, v8 ; RV64-NEXT: ret %a = call @llvm.ceil.nxv1f64( %x) %b = fptosi %a to @@ -932,34 +854,20 @@ define @ceil_nxv1f64_to_si64( %x) { define @ceil_nxv1f64_to_ui64( %x) { ; RV32-LABEL: ceil_nxv1f64_to_ui64: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI23_0) -; RV32-NEXT: fld ft0, %lo(.LCPI23_0)(a0) ; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; RV32-NEXT: vfabs.v v9, v8 -; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: vmset.m v0 ; RV32-NEXT: fsrmi a0, 3 -; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: vfcvt.xu.f.v v8, v8, v0.t ; RV32-NEXT: fsrm a0 -; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; RV32-NEXT: vfcvt.rtz.xu.f.v v8, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: ceil_nxv1f64_to_ui64: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI23_0) -; RV64-NEXT: fld ft0, %lo(.LCPI23_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; RV64-NEXT: vfabs.v v9, v8 -; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: vmset.m v0 ; RV64-NEXT: fsrmi a0, 3 -; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: vfcvt.xu.f.v v8, v8, v0.t ; RV64-NEXT: fsrm a0 -; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; RV64-NEXT: vfcvt.rtz.xu.f.v v8, v8 ; RV64-NEXT: ret %a = call @llvm.ceil.nxv1f64( %x) %b = fptoui %a to @@ -1237,34 +1145,20 @@ define @ceil_nxv4f64_to_ui32( %x) { define @ceil_nxv4f64_to_si64( %x) { ; RV32-LABEL: ceil_nxv4f64_to_si64: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI30_0) -; RV32-NEXT: fld ft0, %lo(.LCPI30_0)(a0) ; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; RV32-NEXT: vfabs.v v12, v8 -; RV32-NEXT: vmflt.vf v0, v12, ft0 +; RV32-NEXT: vmset.m v0 ; RV32-NEXT: fsrmi a0, 3 -; RV32-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32-NEXT: vfcvt.x.f.v v8, v8, v0.t ; RV32-NEXT: fsrm a0 -; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t -; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV32-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; RV32-NEXT: vfcvt.rtz.x.f.v v8, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: ceil_nxv4f64_to_si64: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI30_0) -; RV64-NEXT: fld ft0, %lo(.LCPI30_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; RV64-NEXT: vfabs.v v12, v8 -; RV64-NEXT: vmflt.vf v0, v12, ft0 +; RV64-NEXT: vmset.m v0 ; RV64-NEXT: fsrmi a0, 3 -; RV64-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64-NEXT: vfcvt.x.f.v v8, v8, v0.t ; RV64-NEXT: fsrm a0 -; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t -; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV64-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; RV64-NEXT: vfcvt.rtz.x.f.v v8, v8 ; RV64-NEXT: ret %a = call @llvm.ceil.nxv4f64( %x) %b = fptosi %a to @@ -1274,34 +1168,20 @@ define @ceil_nxv4f64_to_si64( %x) { define @ceil_nxv4f64_to_ui64( %x) { ; RV32-LABEL: ceil_nxv4f64_to_ui64: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI31_0) -; RV32-NEXT: fld ft0, %lo(.LCPI31_0)(a0) ; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; RV32-NEXT: vfabs.v v12, v8 -; RV32-NEXT: vmflt.vf v0, v12, ft0 +; RV32-NEXT: vmset.m v0 ; RV32-NEXT: fsrmi a0, 3 -; RV32-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32-NEXT: vfcvt.xu.f.v v8, v8, v0.t ; RV32-NEXT: fsrm a0 -; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t -; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV32-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; RV32-NEXT: vfcvt.rtz.xu.f.v v8, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: ceil_nxv4f64_to_ui64: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI31_0) -; RV64-NEXT: fld ft0, %lo(.LCPI31_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; RV64-NEXT: vfabs.v v12, v8 -; RV64-NEXT: vmflt.vf v0, v12, ft0 +; RV64-NEXT: vmset.m v0 ; RV64-NEXT: fsrmi a0, 3 -; RV64-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64-NEXT: vfcvt.xu.f.v v8, v8, v0.t ; RV64-NEXT: fsrm a0 -; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t -; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV64-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; RV64-NEXT: vfcvt.rtz.xu.f.v v8, v8 ; RV64-NEXT: ret %a = call @llvm.ceil.nxv4f64( %x) %b = fptoui %a to diff --git a/llvm/test/CodeGen/RISCV/rvv/float-round-conv.ll b/llvm/test/CodeGen/RISCV/rvv/float-round-conv.ll index 1c3612651fe12..334bc29e23708 100644 --- a/llvm/test/CodeGen/RISCV/rvv/float-round-conv.ll +++ b/llvm/test/CodeGen/RISCV/rvv/float-round-conv.ll @@ -165,29 +165,13 @@ define @trunc_nxv1f32_to_ui16( %x) { define @trunc_nxv1f32_to_si32( %x) { ; RV32-LABEL: trunc_nxv1f32_to_si32: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI4_0) -; RV32-NEXT: flw ft0, %lo(.LCPI4_0)(a0) ; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma -; RV32-NEXT: vfabs.v v9, v8 -; RV32-NEXT: vmflt.vf v0, v9, ft0 -; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; RV32-NEXT: vfcvt.rtz.x.f.v v8, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: trunc_nxv1f32_to_si32: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI4_0) -; RV64-NEXT: flw ft0, %lo(.LCPI4_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma -; RV64-NEXT: vfabs.v v9, v8 -; RV64-NEXT: vmflt.vf v0, v9, ft0 -; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; RV64-NEXT: vfcvt.rtz.x.f.v v8, v8 ; RV64-NEXT: ret %a = call @llvm.trunc.nxv1f32( %x) @@ -198,29 +182,13 @@ define @trunc_nxv1f32_to_si32( %x) { define @trunc_nxv1f32_to_ui32( %x) { ; RV32-LABEL: trunc_nxv1f32_to_ui32: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI5_0) -; RV32-NEXT: flw ft0, %lo(.LCPI5_0)(a0) ; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma -; RV32-NEXT: vfabs.v v9, v8 -; RV32-NEXT: vmflt.vf v0, v9, ft0 -; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; RV32-NEXT: vfcvt.rtz.xu.f.v v8, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: trunc_nxv1f32_to_ui32: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI5_0) -; RV64-NEXT: flw ft0, %lo(.LCPI5_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma -; RV64-NEXT: vfabs.v v9, v8 -; RV64-NEXT: vmflt.vf v0, v9, ft0 -; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; RV64-NEXT: vfcvt.rtz.xu.f.v v8, v8 ; RV64-NEXT: ret %a = call @llvm.trunc.nxv1f32( %x) @@ -459,29 +427,13 @@ define @trunc_nxv4f32_to_ui16( %x) { define @trunc_nxv4f32_to_si32( %x) { ; RV32-LABEL: trunc_nxv4f32_to_si32: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI12_0) -; RV32-NEXT: flw ft0, %lo(.LCPI12_0)(a0) ; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; RV32-NEXT: vfabs.v v10, v8 -; RV32-NEXT: vmflt.vf v0, v10, ft0 -; RV32-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t -; RV32-NEXT: vfcvt.f.x.v v10, v10, v0.t -; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV32-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; RV32-NEXT: vfcvt.rtz.x.f.v v8, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: trunc_nxv4f32_to_si32: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI12_0) -; RV64-NEXT: flw ft0, %lo(.LCPI12_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; RV64-NEXT: vfabs.v v10, v8 -; RV64-NEXT: vmflt.vf v0, v10, ft0 -; RV64-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t -; RV64-NEXT: vfcvt.f.x.v v10, v10, v0.t -; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV64-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; RV64-NEXT: vfcvt.rtz.x.f.v v8, v8 ; RV64-NEXT: ret %a = call @llvm.trunc.nxv4f32( %x) @@ -492,29 +444,13 @@ define @trunc_nxv4f32_to_si32( %x) { define @trunc_nxv4f32_to_ui32( %x) { ; RV32-LABEL: trunc_nxv4f32_to_ui32: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI13_0) -; RV32-NEXT: flw ft0, %lo(.LCPI13_0)(a0) ; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; RV32-NEXT: vfabs.v v10, v8 -; RV32-NEXT: vmflt.vf v0, v10, ft0 -; RV32-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t -; RV32-NEXT: vfcvt.f.x.v v10, v10, v0.t -; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV32-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; RV32-NEXT: vfcvt.rtz.xu.f.v v8, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: trunc_nxv4f32_to_ui32: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI13_0) -; RV64-NEXT: flw ft0, %lo(.LCPI13_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; RV64-NEXT: vfabs.v v10, v8 -; RV64-NEXT: vmflt.vf v0, v10, ft0 -; RV64-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t -; RV64-NEXT: vfcvt.f.x.v v10, v10, v0.t -; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV64-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; RV64-NEXT: vfcvt.rtz.xu.f.v v8, v8 ; RV64-NEXT: ret %a = call @llvm.trunc.nxv4f32( %x) @@ -769,34 +705,20 @@ define @ceil_nxv1f32_to_ui16( %x) { define @ceil_nxv1f32_to_si32( %x) { ; RV32-LABEL: ceil_nxv1f32_to_si32: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI20_0) -; RV32-NEXT: flw ft0, %lo(.LCPI20_0)(a0) ; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma -; RV32-NEXT: vfabs.v v9, v8 -; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: vmset.m v0 ; RV32-NEXT: fsrmi a0, 3 -; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: vfcvt.x.f.v v8, v8, v0.t ; RV32-NEXT: fsrm a0 -; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; RV32-NEXT: vfcvt.rtz.x.f.v v8, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: ceil_nxv1f32_to_si32: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI20_0) -; RV64-NEXT: flw ft0, %lo(.LCPI20_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma -; RV64-NEXT: vfabs.v v9, v8 -; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: vmset.m v0 ; RV64-NEXT: fsrmi a0, 3 -; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: vfcvt.x.f.v v8, v8, v0.t ; RV64-NEXT: fsrm a0 -; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; RV64-NEXT: vfcvt.rtz.x.f.v v8, v8 ; RV64-NEXT: ret %a = call @llvm.ceil.nxv1f32( %x) %b = fptosi %a to @@ -806,34 +728,20 @@ define @ceil_nxv1f32_to_si32( %x) { define @ceil_nxv1f32_to_ui32( %x) { ; RV32-LABEL: ceil_nxv1f32_to_ui32: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI21_0) -; RV32-NEXT: flw ft0, %lo(.LCPI21_0)(a0) ; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma -; RV32-NEXT: vfabs.v v9, v8 -; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: vmset.m v0 ; RV32-NEXT: fsrmi a0, 3 -; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: vfcvt.xu.f.v v8, v8, v0.t ; RV32-NEXT: fsrm a0 -; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; RV32-NEXT: vfcvt.rtz.xu.f.v v8, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: ceil_nxv1f32_to_ui32: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI21_0) -; RV64-NEXT: flw ft0, %lo(.LCPI21_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma -; RV64-NEXT: vfabs.v v9, v8 -; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: vmset.m v0 ; RV64-NEXT: fsrmi a0, 3 -; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: vfcvt.xu.f.v v8, v8, v0.t ; RV64-NEXT: fsrm a0 -; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; RV64-NEXT: vfcvt.rtz.xu.f.v v8, v8 ; RV64-NEXT: ret %a = call @llvm.ceil.nxv1f32( %x) %b = fptoui %a to @@ -1095,34 +1003,20 @@ define @ceil_nxv4f32_to_ui16( %x) { define @ceil_nxv4f32_to_si32( %x) { ; RV32-LABEL: ceil_nxv4f32_to_si32: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI28_0) -; RV32-NEXT: flw ft0, %lo(.LCPI28_0)(a0) ; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; RV32-NEXT: vfabs.v v10, v8 -; RV32-NEXT: vmflt.vf v0, v10, ft0 +; RV32-NEXT: vmset.m v0 ; RV32-NEXT: fsrmi a0, 3 -; RV32-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV32-NEXT: vfcvt.x.f.v v8, v8, v0.t ; RV32-NEXT: fsrm a0 -; RV32-NEXT: vfcvt.f.x.v v10, v10, v0.t -; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV32-NEXT: vfsgnj.vv v8, v10, v8, v0.t -; RV32-NEXT: vfcvt.rtz.x.f.v v8, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: ceil_nxv4f32_to_si32: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI28_0) -; RV64-NEXT: flw ft0, %lo(.LCPI28_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; RV64-NEXT: vfabs.v v10, v8 -; RV64-NEXT: vmflt.vf v0, v10, ft0 +; RV64-NEXT: vmset.m v0 ; RV64-NEXT: fsrmi a0, 3 -; RV64-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV64-NEXT: vfcvt.x.f.v v8, v8, v0.t ; RV64-NEXT: fsrm a0 -; RV64-NEXT: vfcvt.f.x.v v10, v10, v0.t -; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV64-NEXT: vfsgnj.vv v8, v10, v8, v0.t -; RV64-NEXT: vfcvt.rtz.x.f.v v8, v8 ; RV64-NEXT: ret %a = call @llvm.ceil.nxv4f32( %x) %b = fptosi %a to @@ -1132,34 +1026,20 @@ define @ceil_nxv4f32_to_si32( %x) { define @ceil_nxv4f32_to_ui32( %x) { ; RV32-LABEL: ceil_nxv4f32_to_ui32: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI29_0) -; RV32-NEXT: flw ft0, %lo(.LCPI29_0)(a0) ; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; RV32-NEXT: vfabs.v v10, v8 -; RV32-NEXT: vmflt.vf v0, v10, ft0 +; RV32-NEXT: vmset.m v0 ; RV32-NEXT: fsrmi a0, 3 -; RV32-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV32-NEXT: vfcvt.xu.f.v v8, v8, v0.t ; RV32-NEXT: fsrm a0 -; RV32-NEXT: vfcvt.f.x.v v10, v10, v0.t -; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV32-NEXT: vfsgnj.vv v8, v10, v8, v0.t -; RV32-NEXT: vfcvt.rtz.xu.f.v v8, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: ceil_nxv4f32_to_ui32: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI29_0) -; RV64-NEXT: flw ft0, %lo(.LCPI29_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; RV64-NEXT: vfabs.v v10, v8 -; RV64-NEXT: vmflt.vf v0, v10, ft0 +; RV64-NEXT: vmset.m v0 ; RV64-NEXT: fsrmi a0, 3 -; RV64-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV64-NEXT: vfcvt.xu.f.v v8, v8, v0.t ; RV64-NEXT: fsrm a0 -; RV64-NEXT: vfcvt.f.x.v v10, v10, v0.t -; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV64-NEXT: vfsgnj.vv v8, v10, v8, v0.t -; RV64-NEXT: vfcvt.rtz.xu.f.v v8, v8 ; RV64-NEXT: ret %a = call @llvm.ceil.nxv4f32( %x) %b = fptoui %a to diff --git a/llvm/test/CodeGen/RISCV/rvv/half-round-conv.ll b/llvm/test/CodeGen/RISCV/rvv/half-round-conv.ll index e488874884286..8a249b4272a81 100644 --- a/llvm/test/CodeGen/RISCV/rvv/half-round-conv.ll +++ b/llvm/test/CodeGen/RISCV/rvv/half-round-conv.ll @@ -87,29 +87,13 @@ define @trunc_nxv1f16_to_ui8( %x) { define @trunc_nxv1f16_to_si16( %x) { ; RV32-LABEL: trunc_nxv1f16_to_si16: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI2_0) -; RV32-NEXT: flh ft0, %lo(.LCPI2_0)(a0) ; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; RV32-NEXT: vfabs.v v9, v8 -; RV32-NEXT: vmflt.vf v0, v9, ft0 -; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; RV32-NEXT: vfcvt.rtz.x.f.v v8, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: trunc_nxv1f16_to_si16: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI2_0) -; RV64-NEXT: flh ft0, %lo(.LCPI2_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; RV64-NEXT: vfabs.v v9, v8 -; RV64-NEXT: vmflt.vf v0, v9, ft0 -; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; RV64-NEXT: vfcvt.rtz.x.f.v v8, v8 ; RV64-NEXT: ret %a = call @llvm.trunc.nxv1f16( %x) @@ -120,29 +104,13 @@ define @trunc_nxv1f16_to_si16( %x) { define @trunc_nxv1f16_to_ui16( %x) { ; RV32-LABEL: trunc_nxv1f16_to_ui16: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI3_0) -; RV32-NEXT: flh ft0, %lo(.LCPI3_0)(a0) ; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; RV32-NEXT: vfabs.v v9, v8 -; RV32-NEXT: vmflt.vf v0, v9, ft0 -; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; RV32-NEXT: vfcvt.rtz.xu.f.v v8, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: trunc_nxv1f16_to_ui16: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI3_0) -; RV64-NEXT: flh ft0, %lo(.LCPI3_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; RV64-NEXT: vfabs.v v9, v8 -; RV64-NEXT: vmflt.vf v0, v9, ft0 -; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; RV64-NEXT: vfcvt.rtz.xu.f.v v8, v8 ; RV64-NEXT: ret %a = call @llvm.trunc.nxv1f16( %x) @@ -377,29 +345,13 @@ define @trunc_nxv4f16_to_ui8( %x) { define @trunc_nxv4f16_to_si16( %x) { ; RV32-LABEL: trunc_nxv4f16_to_si16: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI10_0) -; RV32-NEXT: flh ft0, %lo(.LCPI10_0)(a0) ; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; RV32-NEXT: vfabs.v v9, v8 -; RV32-NEXT: vmflt.vf v0, v9, ft0 -; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; RV32-NEXT: vfcvt.rtz.x.f.v v8, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: trunc_nxv4f16_to_si16: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI10_0) -; RV64-NEXT: flh ft0, %lo(.LCPI10_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; RV64-NEXT: vfabs.v v9, v8 -; RV64-NEXT: vmflt.vf v0, v9, ft0 -; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; RV64-NEXT: vfcvt.rtz.x.f.v v8, v8 ; RV64-NEXT: ret %a = call @llvm.trunc.nxv4f16( %x) @@ -410,29 +362,13 @@ define @trunc_nxv4f16_to_si16( %x) { define @trunc_nxv4f16_to_ui16( %x) { ; RV32-LABEL: trunc_nxv4f16_to_ui16: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI11_0) -; RV32-NEXT: flh ft0, %lo(.LCPI11_0)(a0) ; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; RV32-NEXT: vfabs.v v9, v8 -; RV32-NEXT: vmflt.vf v0, v9, ft0 -; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; RV32-NEXT: vfcvt.rtz.xu.f.v v8, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: trunc_nxv4f16_to_ui16: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI11_0) -; RV64-NEXT: flh ft0, %lo(.LCPI11_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; RV64-NEXT: vfabs.v v9, v8 -; RV64-NEXT: vmflt.vf v0, v9, ft0 -; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; RV64-NEXT: vfcvt.rtz.xu.f.v v8, v8 ; RV64-NEXT: ret %a = call @llvm.trunc.nxv4f16( %x) @@ -675,34 +611,20 @@ define @ceil_nxv1f16_to_ui8( %x) { define @ceil_nxv1f16_to_si16( %x) { ; RV32-LABEL: ceil_nxv1f16_to_si16: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI18_0) -; RV32-NEXT: flh ft0, %lo(.LCPI18_0)(a0) ; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; RV32-NEXT: vfabs.v v9, v8 -; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: vmset.m v0 ; RV32-NEXT: fsrmi a0, 3 -; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: vfcvt.x.f.v v8, v8, v0.t ; RV32-NEXT: fsrm a0 -; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; RV32-NEXT: vfcvt.rtz.x.f.v v8, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: ceil_nxv1f16_to_si16: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI18_0) -; RV64-NEXT: flh ft0, %lo(.LCPI18_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; RV64-NEXT: vfabs.v v9, v8 -; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: vmset.m v0 ; RV64-NEXT: fsrmi a0, 3 -; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: vfcvt.x.f.v v8, v8, v0.t ; RV64-NEXT: fsrm a0 -; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; RV64-NEXT: vfcvt.rtz.x.f.v v8, v8 ; RV64-NEXT: ret %a = call @llvm.ceil.nxv1f16( %x) %b = fptosi %a to @@ -712,34 +634,20 @@ define @ceil_nxv1f16_to_si16( %x) { define @ceil_nxv1f16_to_ui16( %x) { ; RV32-LABEL: ceil_nxv1f16_to_ui16: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI19_0) -; RV32-NEXT: flh ft0, %lo(.LCPI19_0)(a0) ; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; RV32-NEXT: vfabs.v v9, v8 -; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: vmset.m v0 ; RV32-NEXT: fsrmi a0, 3 -; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: vfcvt.xu.f.v v8, v8, v0.t ; RV32-NEXT: fsrm a0 -; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; RV32-NEXT: vfcvt.rtz.xu.f.v v8, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: ceil_nxv1f16_to_ui16: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI19_0) -; RV64-NEXT: flh ft0, %lo(.LCPI19_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; RV64-NEXT: vfabs.v v9, v8 -; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: vmset.m v0 ; RV64-NEXT: fsrmi a0, 3 -; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: vfcvt.xu.f.v v8, v8, v0.t ; RV64-NEXT: fsrm a0 -; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; RV64-NEXT: vfcvt.rtz.xu.f.v v8, v8 ; RV64-NEXT: ret %a = call @llvm.ceil.nxv1f16( %x) %b = fptoui %a to @@ -997,34 +905,20 @@ define @ceil_nxv4f16_to_ui8( %x) { define @ceil_nxv4f16_to_si16( %x) { ; RV32-LABEL: ceil_nxv4f16_to_si16: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI26_0) -; RV32-NEXT: flh ft0, %lo(.LCPI26_0)(a0) ; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; RV32-NEXT: vfabs.v v9, v8 -; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: vmset.m v0 ; RV32-NEXT: fsrmi a0, 3 -; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: vfcvt.x.f.v v8, v8, v0.t ; RV32-NEXT: fsrm a0 -; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; RV32-NEXT: vfcvt.rtz.x.f.v v8, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: ceil_nxv4f16_to_si16: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI26_0) -; RV64-NEXT: flh ft0, %lo(.LCPI26_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; RV64-NEXT: vfabs.v v9, v8 -; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: vmset.m v0 ; RV64-NEXT: fsrmi a0, 3 -; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: vfcvt.x.f.v v8, v8, v0.t ; RV64-NEXT: fsrm a0 -; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; RV64-NEXT: vfcvt.rtz.x.f.v v8, v8 ; RV64-NEXT: ret %a = call @llvm.ceil.nxv4f16( %x) %b = fptosi %a to @@ -1034,34 +928,20 @@ define @ceil_nxv4f16_to_si16( %x) { define @ceil_nxv4f16_to_ui16( %x) { ; RV32-LABEL: ceil_nxv4f16_to_ui16: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI27_0) -; RV32-NEXT: flh ft0, %lo(.LCPI27_0)(a0) ; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; RV32-NEXT: vfabs.v v9, v8 -; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: vmset.m v0 ; RV32-NEXT: fsrmi a0, 3 -; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: vfcvt.xu.f.v v8, v8, v0.t ; RV32-NEXT: fsrm a0 -; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; RV32-NEXT: vfcvt.rtz.xu.f.v v8, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: ceil_nxv4f16_to_ui16: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI27_0) -; RV64-NEXT: flh ft0, %lo(.LCPI27_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; RV64-NEXT: vfabs.v v9, v8 -; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: vmset.m v0 ; RV64-NEXT: fsrmi a0, 3 -; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: vfcvt.xu.f.v v8, v8, v0.t ; RV64-NEXT: fsrm a0 -; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; RV64-NEXT: vfcvt.rtz.xu.f.v v8, v8 ; RV64-NEXT: ret %a = call @llvm.ceil.nxv4f16( %x) %b = fptoui %a to