diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 5e3b42fdc7aba..8e248d22b5776 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -11711,7 +11711,11 @@ static SDValue performFP_TO_INTCombine(SDNode *N, return SDValue(); RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode()); - if (FRM == RISCVFPRndMode::Invalid) + // If the result is invalid, we didn't find a foldable instruction. + // If the result is dynamic, then we found an frint which we don't yet + // support. It will cause 7 to be written to the FRM CSR for vector. + // FIXME: We could support this by using VFCVT_X_F_VL/VFCVT_XU_F_VL below. + if (FRM == RISCVFPRndMode::Invalid || FRM == RISCVFPRndMode::DYN) return SDValue(); SDLoc DL(N); diff --git a/llvm/test/CodeGen/RISCV/rvv/double-round-conv.ll b/llvm/test/CodeGen/RISCV/rvv/double-round-conv.ll index 2639ceaa16d0f..4fdaacef1105d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/double-round-conv.ll +++ b/llvm/test/CodeGen/RISCV/rvv/double-round-conv.ll @@ -1035,3 +1035,623 @@ define @ceil_nxv4f64_to_ui64( %x) { %b = fptoui %a to ret %b } + +; ================================================================================ +; rint +; ================================================================================ + +declare @llvm.rint.nxv1f64() + +define @rint_nxv1f64_to_si8( %x) { +; RV32-LABEL: rint_nxv1f64_to_si8: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI32_0) +; RV32-NEXT: fld fa5, %lo(.LCPI32_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, fa5 +; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; RV32-NEXT: vfncvt.rtz.x.f.w v9, v8 +; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; RV32-NEXT: vnsrl.wi v8, v9, 0 +; RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, ma +; RV32-NEXT: vnsrl.wi v8, v8, 0 +; RV32-NEXT: ret +; +; RV64-LABEL: rint_nxv1f64_to_si8: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI32_0) +; RV64-NEXT: fld fa5, %lo(.LCPI32_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmflt.vf v0, v9, fa5 +; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; RV64-NEXT: vfncvt.rtz.x.f.w v9, v8 +; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; RV64-NEXT: vnsrl.wi v8, v9, 0 +; RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma +; RV64-NEXT: vnsrl.wi v8, v8, 0 +; RV64-NEXT: ret + %a = call @llvm.rint.nxv1f64( %x) + %b = fptosi %a to + ret %b +} + +define @rint_nxv1f64_to_ui8( %x) { +; RV32-LABEL: rint_nxv1f64_to_ui8: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI33_0) +; RV32-NEXT: fld fa5, %lo(.LCPI33_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, fa5 +; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; RV32-NEXT: vfncvt.rtz.xu.f.w v9, v8 +; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; RV32-NEXT: vnsrl.wi v8, v9, 0 +; RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, ma +; RV32-NEXT: vnsrl.wi v8, v8, 0 +; RV32-NEXT: ret +; +; RV64-LABEL: rint_nxv1f64_to_ui8: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI33_0) +; RV64-NEXT: fld fa5, %lo(.LCPI33_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmflt.vf v0, v9, fa5 +; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; RV64-NEXT: vfncvt.rtz.xu.f.w v9, v8 +; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; RV64-NEXT: vnsrl.wi v8, v9, 0 +; RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma +; RV64-NEXT: vnsrl.wi v8, v8, 0 +; RV64-NEXT: ret + %a = call @llvm.rint.nxv1f64( %x) + %b = fptoui %a to + ret %b +} + +define @rint_nxv1f64_to_si16( %x) { +; RV32-LABEL: rint_nxv1f64_to_si16: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI34_0) +; RV32-NEXT: fld fa5, %lo(.LCPI34_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, fa5 +; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; RV32-NEXT: vfncvt.rtz.x.f.w v9, v8 +; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; RV32-NEXT: vnsrl.wi v8, v9, 0 +; RV32-NEXT: ret +; +; RV64-LABEL: rint_nxv1f64_to_si16: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI34_0) +; RV64-NEXT: fld fa5, %lo(.LCPI34_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmflt.vf v0, v9, fa5 +; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; RV64-NEXT: vfncvt.rtz.x.f.w v9, v8 +; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; RV64-NEXT: vnsrl.wi v8, v9, 0 +; RV64-NEXT: ret + %a = call @llvm.rint.nxv1f64( %x) + %b = fptosi %a to + ret %b +} + +define @rint_nxv1f64_to_ui16( %x) { +; RV32-LABEL: rint_nxv1f64_to_ui16: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI35_0) +; RV32-NEXT: fld fa5, %lo(.LCPI35_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, fa5 +; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; RV32-NEXT: vfncvt.rtz.xu.f.w v9, v8 +; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; RV32-NEXT: vnsrl.wi v8, v9, 0 +; RV32-NEXT: ret +; +; RV64-LABEL: rint_nxv1f64_to_ui16: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI35_0) +; RV64-NEXT: fld fa5, %lo(.LCPI35_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmflt.vf v0, v9, fa5 +; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; RV64-NEXT: vfncvt.rtz.xu.f.w v9, v8 +; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; RV64-NEXT: vnsrl.wi v8, v9, 0 +; RV64-NEXT: ret + %a = call @llvm.rint.nxv1f64( %x) + %b = fptoui %a to + ret %b +} + +define @rint_nxv1f64_to_si32( %x) { +; RV32-LABEL: rint_nxv1f64_to_si32: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI36_0) +; RV32-NEXT: fld fa5, %lo(.LCPI36_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, fa5 +; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; RV32-NEXT: vfncvt.rtz.x.f.w v9, v8 +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: rint_nxv1f64_to_si32: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI36_0) +; RV64-NEXT: fld fa5, %lo(.LCPI36_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmflt.vf v0, v9, fa5 +; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; RV64-NEXT: vfncvt.rtz.x.f.w v9, v8 +; RV64-NEXT: vmv1r.v v8, v9 +; RV64-NEXT: ret + %a = call @llvm.rint.nxv1f64( %x) + %b = fptosi %a to + ret %b +} + +define @rint_nxv1f64_to_ui32( %x) { +; RV32-LABEL: rint_nxv1f64_to_ui32: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI37_0) +; RV32-NEXT: fld fa5, %lo(.LCPI37_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, fa5 +; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; RV32-NEXT: vfncvt.rtz.xu.f.w v9, v8 +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: rint_nxv1f64_to_ui32: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI37_0) +; RV64-NEXT: fld fa5, %lo(.LCPI37_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmflt.vf v0, v9, fa5 +; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; RV64-NEXT: vfncvt.rtz.xu.f.w v9, v8 +; RV64-NEXT: vmv1r.v v8, v9 +; RV64-NEXT: ret + %a = call @llvm.rint.nxv1f64( %x) + %b = fptoui %a to + ret %b +} + +define @rint_nxv1f64_to_si64( %x) { +; RV32-LABEL: rint_nxv1f64_to_si64: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI38_0) +; RV32-NEXT: fld fa5, %lo(.LCPI38_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, fa5 +; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: vfcvt.rtz.x.f.v v8, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: rint_nxv1f64_to_si64: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI38_0) +; RV64-NEXT: fld fa5, %lo(.LCPI38_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmflt.vf v0, v9, fa5 +; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: vfcvt.rtz.x.f.v v8, v8 +; RV64-NEXT: ret + %a = call @llvm.rint.nxv1f64( %x) + %b = fptosi %a to + ret %b +} + +define @rint_nxv1f64_to_ui64( %x) { +; RV32-LABEL: rint_nxv1f64_to_ui64: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI39_0) +; RV32-NEXT: fld fa5, %lo(.LCPI39_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, fa5 +; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: vfcvt.rtz.xu.f.v v8, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: rint_nxv1f64_to_ui64: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI39_0) +; RV64-NEXT: fld fa5, %lo(.LCPI39_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmflt.vf v0, v9, fa5 +; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: vfcvt.rtz.xu.f.v v8, v8 +; RV64-NEXT: ret + %a = call @llvm.rint.nxv1f64( %x) + %b = fptoui %a to + ret %b +} + +; ================================================================================ +; rint +; ================================================================================ + +declare @llvm.rint.nxv4f64() + +define @rint_nxv4f64_to_si8( %x) { +; RV32-LABEL: rint_nxv4f64_to_si8: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI40_0) +; RV32-NEXT: fld fa5, %lo(.LCPI40_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32-NEXT: vfabs.v v12, v8 +; RV32-NEXT: vmflt.vf v0, v12, fa5 +; RV32-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32-NEXT: vfncvt.rtz.x.f.w v12, v8 +; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV32-NEXT: vnsrl.wi v8, v12, 0 +; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; RV32-NEXT: vnsrl.wi v8, v8, 0 +; RV32-NEXT: ret +; +; RV64-LABEL: rint_nxv4f64_to_si8: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI40_0) +; RV64-NEXT: fld fa5, %lo(.LCPI40_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV64-NEXT: vfabs.v v12, v8 +; RV64-NEXT: vmflt.vf v0, v12, fa5 +; RV64-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV64-NEXT: vfncvt.rtz.x.f.w v12, v8 +; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV64-NEXT: vnsrl.wi v8, v12, 0 +; RV64-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; RV64-NEXT: vnsrl.wi v8, v8, 0 +; RV64-NEXT: ret + %a = call @llvm.rint.nxv4f64( %x) + %b = fptosi %a to + ret %b +} + +define @rint_nxv4f64_to_ui8( %x) { +; RV32-LABEL: rint_nxv4f64_to_ui8: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI41_0) +; RV32-NEXT: fld fa5, %lo(.LCPI41_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32-NEXT: vfabs.v v12, v8 +; RV32-NEXT: vmflt.vf v0, v12, fa5 +; RV32-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32-NEXT: vfncvt.rtz.xu.f.w v12, v8 +; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV32-NEXT: vnsrl.wi v8, v12, 0 +; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; RV32-NEXT: vnsrl.wi v8, v8, 0 +; RV32-NEXT: ret +; +; RV64-LABEL: rint_nxv4f64_to_ui8: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI41_0) +; RV64-NEXT: fld fa5, %lo(.LCPI41_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV64-NEXT: vfabs.v v12, v8 +; RV64-NEXT: vmflt.vf v0, v12, fa5 +; RV64-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV64-NEXT: vfncvt.rtz.xu.f.w v12, v8 +; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV64-NEXT: vnsrl.wi v8, v12, 0 +; RV64-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; RV64-NEXT: vnsrl.wi v8, v8, 0 +; RV64-NEXT: ret + %a = call @llvm.rint.nxv4f64( %x) + %b = fptoui %a to + ret %b +} + +define @rint_nxv4f64_to_si16( %x) { +; RV32-LABEL: rint_nxv4f64_to_si16: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI42_0) +; RV32-NEXT: fld fa5, %lo(.LCPI42_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32-NEXT: vfabs.v v12, v8 +; RV32-NEXT: vmflt.vf v0, v12, fa5 +; RV32-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32-NEXT: vfncvt.rtz.x.f.w v12, v8 +; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV32-NEXT: vnsrl.wi v8, v12, 0 +; RV32-NEXT: ret +; +; RV64-LABEL: rint_nxv4f64_to_si16: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI42_0) +; RV64-NEXT: fld fa5, %lo(.LCPI42_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV64-NEXT: vfabs.v v12, v8 +; RV64-NEXT: vmflt.vf v0, v12, fa5 +; RV64-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV64-NEXT: vfncvt.rtz.x.f.w v12, v8 +; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV64-NEXT: vnsrl.wi v8, v12, 0 +; RV64-NEXT: ret + %a = call @llvm.rint.nxv4f64( %x) + %b = fptosi %a to + ret %b +} + +define @rint_nxv4f64_to_ui16( %x) { +; RV32-LABEL: rint_nxv4f64_to_ui16: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI43_0) +; RV32-NEXT: fld fa5, %lo(.LCPI43_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32-NEXT: vfabs.v v12, v8 +; RV32-NEXT: vmflt.vf v0, v12, fa5 +; RV32-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32-NEXT: vfncvt.rtz.xu.f.w v12, v8 +; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV32-NEXT: vnsrl.wi v8, v12, 0 +; RV32-NEXT: ret +; +; RV64-LABEL: rint_nxv4f64_to_ui16: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI43_0) +; RV64-NEXT: fld fa5, %lo(.LCPI43_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV64-NEXT: vfabs.v v12, v8 +; RV64-NEXT: vmflt.vf v0, v12, fa5 +; RV64-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV64-NEXT: vfncvt.rtz.xu.f.w v12, v8 +; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV64-NEXT: vnsrl.wi v8, v12, 0 +; RV64-NEXT: ret + %a = call @llvm.rint.nxv4f64( %x) + %b = fptoui %a to + ret %b +} + +define @rint_nxv4f64_to_si32( %x) { +; RV32-LABEL: rint_nxv4f64_to_si32: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI44_0) +; RV32-NEXT: fld fa5, %lo(.LCPI44_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32-NEXT: vfabs.v v12, v8 +; RV32-NEXT: vmflt.vf v0, v12, fa5 +; RV32-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32-NEXT: vfncvt.rtz.x.f.w v12, v8 +; RV32-NEXT: vmv.v.v v8, v12 +; RV32-NEXT: ret +; +; RV64-LABEL: rint_nxv4f64_to_si32: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI44_0) +; RV64-NEXT: fld fa5, %lo(.LCPI44_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV64-NEXT: vfabs.v v12, v8 +; RV64-NEXT: vmflt.vf v0, v12, fa5 +; RV64-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV64-NEXT: vfncvt.rtz.x.f.w v12, v8 +; RV64-NEXT: vmv.v.v v8, v12 +; RV64-NEXT: ret + %a = call @llvm.rint.nxv4f64( %x) + %b = fptosi %a to + ret %b +} + +define @rint_nxv4f64_to_ui32( %x) { +; RV32-LABEL: rint_nxv4f64_to_ui32: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI45_0) +; RV32-NEXT: fld fa5, %lo(.LCPI45_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32-NEXT: vfabs.v v12, v8 +; RV32-NEXT: vmflt.vf v0, v12, fa5 +; RV32-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32-NEXT: vfncvt.rtz.xu.f.w v12, v8 +; RV32-NEXT: vmv.v.v v8, v12 +; RV32-NEXT: ret +; +; RV64-LABEL: rint_nxv4f64_to_ui32: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI45_0) +; RV64-NEXT: fld fa5, %lo(.LCPI45_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV64-NEXT: vfabs.v v12, v8 +; RV64-NEXT: vmflt.vf v0, v12, fa5 +; RV64-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV64-NEXT: vfncvt.rtz.xu.f.w v12, v8 +; RV64-NEXT: vmv.v.v v8, v12 +; RV64-NEXT: ret + %a = call @llvm.rint.nxv4f64( %x) + %b = fptoui %a to + ret %b +} + +define @rint_nxv4f64_to_si64( %x) { +; RV32-LABEL: rint_nxv4f64_to_si64: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI46_0) +; RV32-NEXT: fld fa5, %lo(.LCPI46_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32-NEXT: vfabs.v v12, v8 +; RV32-NEXT: vmflt.vf v0, v12, fa5 +; RV32-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32-NEXT: vfcvt.rtz.x.f.v v8, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: rint_nxv4f64_to_si64: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI46_0) +; RV64-NEXT: fld fa5, %lo(.LCPI46_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV64-NEXT: vfabs.v v12, v8 +; RV64-NEXT: vmflt.vf v0, v12, fa5 +; RV64-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64-NEXT: vfcvt.rtz.x.f.v v8, v8 +; RV64-NEXT: ret + %a = call @llvm.rint.nxv4f64( %x) + %b = fptosi %a to + ret %b +} + +define @rint_nxv4f64_to_ui64( %x) { +; RV32-LABEL: rint_nxv4f64_to_ui64: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI47_0) +; RV32-NEXT: fld fa5, %lo(.LCPI47_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32-NEXT: vfabs.v v12, v8 +; RV32-NEXT: vmflt.vf v0, v12, fa5 +; RV32-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32-NEXT: vfcvt.rtz.xu.f.v v8, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: rint_nxv4f64_to_ui64: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI47_0) +; RV64-NEXT: fld fa5, %lo(.LCPI47_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV64-NEXT: vfabs.v v12, v8 +; RV64-NEXT: vmflt.vf v0, v12, fa5 +; RV64-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64-NEXT: vfcvt.rtz.xu.f.v v8, v8 +; RV64-NEXT: ret + %a = call @llvm.rint.nxv4f64( %x) + %b = fptoui %a to + ret %b +} diff --git a/llvm/test/CodeGen/RISCV/rvv/float-round-conv.ll b/llvm/test/CodeGen/RISCV/rvv/float-round-conv.ll index bad4e74d94ee6..7a31339f619bb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/float-round-conv.ll +++ b/llvm/test/CodeGen/RISCV/rvv/float-round-conv.ll @@ -843,3 +843,297 @@ define @ceil_nxv4f32_to_ui64( %x) { %b = fptoui %a to ret %b } + +; ================================================================================ +; rint +; ================================================================================ + +declare @llvm.rint.nxv4f32() + +define @rint_nxv4f32_to_si8( %x) { +; RV32-LABEL: rint_nxv4f32_to_si8: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; RV32-NEXT: vfabs.v v10, v8 +; RV32-NEXT: lui a0, 307200 +; RV32-NEXT: fmv.w.x fa5, a0 +; RV32-NEXT: vmflt.vf v0, v10, fa5 +; RV32-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV32-NEXT: vfncvt.rtz.x.f.w v10, v8 +; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; RV32-NEXT: vnsrl.wi v8, v10, 0 +; RV32-NEXT: ret +; +; RV64-LABEL: rint_nxv4f32_to_si8: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; RV64-NEXT: vfabs.v v10, v8 +; RV64-NEXT: lui a0, 307200 +; RV64-NEXT: fmv.w.x fa5, a0 +; RV64-NEXT: vmflt.vf v0, v10, fa5 +; RV64-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV64-NEXT: vfncvt.rtz.x.f.w v10, v8 +; RV64-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; RV64-NEXT: vnsrl.wi v8, v10, 0 +; RV64-NEXT: ret + %a = call @llvm.rint.nxv4f32( %x) + %b = fptosi %a to + ret %b +} + +define @rint_nxv4f32_to_ui8( %x) { +; RV32-LABEL: rint_nxv4f32_to_ui8: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; RV32-NEXT: vfabs.v v10, v8 +; RV32-NEXT: lui a0, 307200 +; RV32-NEXT: fmv.w.x fa5, a0 +; RV32-NEXT: vmflt.vf v0, v10, fa5 +; RV32-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV32-NEXT: vfncvt.rtz.xu.f.w v10, v8 +; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; RV32-NEXT: vnsrl.wi v8, v10, 0 +; RV32-NEXT: ret +; +; RV64-LABEL: rint_nxv4f32_to_ui8: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; RV64-NEXT: vfabs.v v10, v8 +; RV64-NEXT: lui a0, 307200 +; RV64-NEXT: fmv.w.x fa5, a0 +; RV64-NEXT: vmflt.vf v0, v10, fa5 +; RV64-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV64-NEXT: vfncvt.rtz.xu.f.w v10, v8 +; RV64-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; RV64-NEXT: vnsrl.wi v8, v10, 0 +; RV64-NEXT: ret + %a = call @llvm.rint.nxv4f32( %x) + %b = fptoui %a to + ret %b +} + +define @rint_nxv4f32_to_si16( %x) { +; RV32-LABEL: rint_nxv4f32_to_si16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; RV32-NEXT: vfabs.v v10, v8 +; RV32-NEXT: lui a0, 307200 +; RV32-NEXT: fmv.w.x fa5, a0 +; RV32-NEXT: vmflt.vf v0, v10, fa5 +; RV32-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV32-NEXT: vfncvt.rtz.x.f.w v10, v8 +; RV32-NEXT: vmv.v.v v8, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: rint_nxv4f32_to_si16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; RV64-NEXT: vfabs.v v10, v8 +; RV64-NEXT: lui a0, 307200 +; RV64-NEXT: fmv.w.x fa5, a0 +; RV64-NEXT: vmflt.vf v0, v10, fa5 +; RV64-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV64-NEXT: vfncvt.rtz.x.f.w v10, v8 +; RV64-NEXT: vmv.v.v v8, v10 +; RV64-NEXT: ret + %a = call @llvm.rint.nxv4f32( %x) + %b = fptosi %a to + ret %b +} + +define @rint_nxv4f32_to_ui16( %x) { +; RV32-LABEL: rint_nxv4f32_to_ui16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; RV32-NEXT: vfabs.v v10, v8 +; RV32-NEXT: lui a0, 307200 +; RV32-NEXT: fmv.w.x fa5, a0 +; RV32-NEXT: vmflt.vf v0, v10, fa5 +; RV32-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV32-NEXT: vfncvt.rtz.xu.f.w v10, v8 +; RV32-NEXT: vmv.v.v v8, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: rint_nxv4f32_to_ui16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; RV64-NEXT: vfabs.v v10, v8 +; RV64-NEXT: lui a0, 307200 +; RV64-NEXT: fmv.w.x fa5, a0 +; RV64-NEXT: vmflt.vf v0, v10, fa5 +; RV64-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV64-NEXT: vfncvt.rtz.xu.f.w v10, v8 +; RV64-NEXT: vmv.v.v v8, v10 +; RV64-NEXT: ret + %a = call @llvm.rint.nxv4f32( %x) + %b = fptoui %a to + ret %b +} + +define @rint_nxv4f32_to_si32( %x) { +; RV32-LABEL: rint_nxv4f32_to_si32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; RV32-NEXT: vfabs.v v10, v8 +; RV32-NEXT: lui a0, 307200 +; RV32-NEXT: fmv.w.x fa5, a0 +; RV32-NEXT: vmflt.vf v0, v10, fa5 +; RV32-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32-NEXT: vfcvt.rtz.x.f.v v8, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: rint_nxv4f32_to_si32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; RV64-NEXT: vfabs.v v10, v8 +; RV64-NEXT: lui a0, 307200 +; RV64-NEXT: fmv.w.x fa5, a0 +; RV64-NEXT: vmflt.vf v0, v10, fa5 +; RV64-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64-NEXT: vfcvt.rtz.x.f.v v8, v8 +; RV64-NEXT: ret + %a = call @llvm.rint.nxv4f32( %x) + %b = fptosi %a to + ret %b +} + +define @rint_nxv4f32_to_ui32( %x) { +; RV32-LABEL: rint_nxv4f32_to_ui32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; RV32-NEXT: vfabs.v v10, v8 +; RV32-NEXT: lui a0, 307200 +; RV32-NEXT: fmv.w.x fa5, a0 +; RV32-NEXT: vmflt.vf v0, v10, fa5 +; RV32-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32-NEXT: vfcvt.rtz.xu.f.v v8, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: rint_nxv4f32_to_ui32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; RV64-NEXT: vfabs.v v10, v8 +; RV64-NEXT: lui a0, 307200 +; RV64-NEXT: fmv.w.x fa5, a0 +; RV64-NEXT: vmflt.vf v0, v10, fa5 +; RV64-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64-NEXT: vfcvt.rtz.xu.f.v v8, v8 +; RV64-NEXT: ret + %a = call @llvm.rint.nxv4f32( %x) + %b = fptoui %a to + ret %b +} + +define @rint_nxv4f32_to_si64( %x) { +; RV32-LABEL: rint_nxv4f32_to_si64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; RV32-NEXT: vfabs.v v10, v8 +; RV32-NEXT: lui a0, 307200 +; RV32-NEXT: fmv.w.x fa5, a0 +; RV32-NEXT: vmflt.vf v0, v10, fa5 +; RV32-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32-NEXT: vfwcvt.rtz.x.f.v v12, v8 +; RV32-NEXT: vmv4r.v v8, v12 +; RV32-NEXT: ret +; +; RV64-LABEL: rint_nxv4f32_to_si64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; RV64-NEXT: vfabs.v v10, v8 +; RV64-NEXT: lui a0, 307200 +; RV64-NEXT: fmv.w.x fa5, a0 +; RV64-NEXT: vmflt.vf v0, v10, fa5 +; RV64-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64-NEXT: vfwcvt.rtz.x.f.v v12, v8 +; RV64-NEXT: vmv4r.v v8, v12 +; RV64-NEXT: ret + %a = call @llvm.rint.nxv4f32( %x) + %b = fptosi %a to + ret %b +} + +define @rint_nxv4f32_to_ui64( %x) { +; RV32-LABEL: rint_nxv4f32_to_ui64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; RV32-NEXT: vfabs.v v10, v8 +; RV32-NEXT: lui a0, 307200 +; RV32-NEXT: fmv.w.x fa5, a0 +; RV32-NEXT: vmflt.vf v0, v10, fa5 +; RV32-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32-NEXT: vfwcvt.rtz.xu.f.v v12, v8 +; RV32-NEXT: vmv4r.v v8, v12 +; RV32-NEXT: ret +; +; RV64-LABEL: rint_nxv4f32_to_ui64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; RV64-NEXT: vfabs.v v10, v8 +; RV64-NEXT: lui a0, 307200 +; RV64-NEXT: fmv.w.x fa5, a0 +; RV64-NEXT: vmflt.vf v0, v10, fa5 +; RV64-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64-NEXT: vfwcvt.rtz.xu.f.v v12, v8 +; RV64-NEXT: vmv4r.v v8, v12 +; RV64-NEXT: ret + %a = call @llvm.rint.nxv4f32( %x) + %b = fptoui %a to + ret %b +} diff --git a/llvm/test/CodeGen/RISCV/rvv/half-round-conv.ll b/llvm/test/CodeGen/RISCV/rvv/half-round-conv.ll index 3074d45cd9536..e1c09032459cd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/half-round-conv.ll +++ b/llvm/test/CodeGen/RISCV/rvv/half-round-conv.ll @@ -643,3 +643,475 @@ define @ceil_nxv4f16_to_ui64( %x) { %b = fptoui %a to ret %b } + +; ================================================================================ +; rint +; ================================================================================ + +declare @llvm.rint.nxv1f16() + +define @rint_nxv1f16_to_si8( %x) { +; CHECK-LABEL: rint_nxv1f16_to_si8: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI32_0) +; CHECK-NEXT: flh fa5, %lo(.LCPI32_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma +; CHECK-NEXT: vfncvt.rtz.x.f.w v9, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %a = call @llvm.rint.nxv1f16( %x) + %b = fptosi %a to + ret %b +} + +define @rint_nxv1f16_to_ui8( %x) { +; CHECK-LABEL: rint_nxv1f16_to_ui8: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI33_0) +; CHECK-NEXT: flh fa5, %lo(.LCPI33_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma +; CHECK-NEXT: vfncvt.rtz.xu.f.w v9, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %a = call @llvm.rint.nxv1f16( %x) + %b = fptoui %a to + ret %b +} + +define @rint_nxv1f16_to_si16( %x) { +; CHECK-LABEL: rint_nxv1f16_to_si16: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI34_0) +; CHECK-NEXT: flh fa5, %lo(.LCPI34_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v8 +; CHECK-NEXT: ret + %a = call @llvm.rint.nxv1f16( %x) + %b = fptosi %a to + ret %b +} + +define @rint_nxv1f16_to_ui16( %x) { +; CHECK-LABEL: rint_nxv1f16_to_ui16: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI35_0) +; CHECK-NEXT: flh fa5, %lo(.LCPI35_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v8 +; CHECK-NEXT: ret + %a = call @llvm.rint.nxv1f16( %x) + %b = fptoui %a to + ret %b +} + +define @rint_nxv1f16_to_si32( %x) { +; CHECK-LABEL: rint_nxv1f16_to_si32: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI36_0) +; CHECK-NEXT: flh fa5, %lo(.LCPI36_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfwcvt.rtz.x.f.v v9, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %a = call @llvm.rint.nxv1f16( %x) + %b = fptosi %a to + ret %b +} + +define @rint_nxv1f16_to_ui32( %x) { +; CHECK-LABEL: rint_nxv1f16_to_ui32: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI37_0) +; CHECK-NEXT: flh fa5, %lo(.LCPI37_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfwcvt.rtz.xu.f.v v9, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %a = call @llvm.rint.nxv1f16( %x) + %b = fptoui %a to + ret %b +} + +define @rint_nxv1f16_to_si64( %x) { +; CHECK-LABEL: rint_nxv1f16_to_si64: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI38_0) +; CHECK-NEXT: flh fa5, %lo(.LCPI38_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfwcvt.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfwcvt.rtz.x.f.v v8, v9 +; CHECK-NEXT: ret +; RV32-LABEL: rint_nxv1f16_to_si64: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI22_0) +; RV32-NEXT: flh fa5, %lo(.LCPI22_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, fa5 +; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: vfwcvt.f.f.v v9, v8 +; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; RV32-NEXT: vfwcvt.rtz.x.f.v v8, v9 +; RV32-NEXT: fsrm a0 +; RV32-NEXT: ret +; RV64-LABEL: rint_nxv1f16_to_si64: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI22_0) +; RV64-NEXT: flh fa5, %lo(.LCPI22_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmflt.vf v0, v9, fa5 +; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: vfwcvt.f.f.v v9, v8 +; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; RV64-NEXT: vfwcvt.rtz.x.f.v v8, v9 +; RV64-NEXT: fsrm a0 +; RV64-NEXT: ret + %a = call @llvm.rint.nxv1f16( %x) + %b = fptosi %a to + ret %b +} + +define @rint_nxv1f16_to_ui64( %x) { +; CHECK-LABEL: rint_nxv1f16_to_ui64: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI39_0) +; CHECK-NEXT: flh fa5, %lo(.LCPI39_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfwcvt.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfwcvt.rtz.xu.f.v v8, v9 +; CHECK-NEXT: ret +; RV32-LABEL: rint_nxv1f16_to_ui64: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI23_0) +; RV32-NEXT: flh fa5, %lo(.LCPI23_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, fa5 +; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: vfwcvt.f.f.v v9, v8 +; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; RV32-NEXT: vfwcvt.rtz.xu.f.v v8, v9 +; RV32-NEXT: fsrm a0 +; RV32-NEXT: ret +; RV64-LABEL: rint_nxv1f16_to_ui64: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI23_0) +; RV64-NEXT: flh fa5, %lo(.LCPI23_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmflt.vf v0, v9, fa5 +; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: vfwcvt.f.f.v v9, v8 +; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; RV64-NEXT: vfwcvt.rtz.xu.f.v v8, v9 +; RV64-NEXT: fsrm a0 +; RV64-NEXT: ret + %a = call @llvm.rint.nxv1f16( %x) + %b = fptoui %a to + ret %b +} + +; ================================================================================ +; rint +; ================================================================================ + +declare @llvm.rint.nxv4f16() + +define @rint_nxv4f16_to_si8( %x) { +; CHECK-LABEL: rint_nxv4f16_to_si8: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI40_0) +; CHECK-NEXT: flh fa5, %lo(.LCPI40_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; CHECK-NEXT: vfncvt.rtz.x.f.w v9, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %a = call @llvm.rint.nxv4f16( %x) + %b = fptosi %a to + ret %b +} + +define @rint_nxv4f16_to_ui8( %x) { +; CHECK-LABEL: rint_nxv4f16_to_ui8: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI41_0) +; CHECK-NEXT: flh fa5, %lo(.LCPI41_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; CHECK-NEXT: vfncvt.rtz.xu.f.w v9, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %a = call @llvm.rint.nxv4f16( %x) + %b = fptoui %a to + ret %b +} + +define @rint_nxv4f16_to_si16( %x) { +; CHECK-LABEL: rint_nxv4f16_to_si16: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI42_0) +; CHECK-NEXT: flh fa5, %lo(.LCPI42_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v8 +; CHECK-NEXT: ret + %a = call @llvm.rint.nxv4f16( %x) + %b = fptosi %a to + ret %b +} + +define @rint_nxv4f16_to_ui16( %x) { +; CHECK-LABEL: rint_nxv4f16_to_ui16: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI43_0) +; CHECK-NEXT: flh fa5, %lo(.LCPI43_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v8 +; CHECK-NEXT: ret + %a = call @llvm.rint.nxv4f16( %x) + %b = fptoui %a to + ret %b +} + +define @rint_nxv4f16_to_si32( %x) { +; CHECK-LABEL: rint_nxv4f16_to_si32: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI44_0) +; CHECK-NEXT: flh fa5, %lo(.LCPI44_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfwcvt.rtz.x.f.v v10, v8 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %a = call @llvm.rint.nxv4f16( %x) + %b = fptosi %a to + ret %b +} + +define @rint_nxv4f16_to_ui32( %x) { +; CHECK-LABEL: rint_nxv4f16_to_ui32: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI45_0) +; CHECK-NEXT: flh fa5, %lo(.LCPI45_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfwcvt.rtz.xu.f.v v10, v8 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %a = call @llvm.rint.nxv4f16( %x) + %b = fptoui %a to + ret %b +} + +define @rint_nxv4f16_to_si64( %x) { +; CHECK-LABEL: rint_nxv4f16_to_si64: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI46_0) +; CHECK-NEXT: flh fa5, %lo(.LCPI46_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfwcvt.f.f.v v12, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vfwcvt.rtz.x.f.v v8, v12 +; CHECK-NEXT: ret +; RV32-LABEL: rint_nxv4f16_to_si64: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI30_0) +; RV32-NEXT: flh fa5, %lo(.LCPI30_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, fa5 +; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: vfwcvt.f.f.v v12, v8 +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32-NEXT: vfwcvt.rtz.x.f.v v8, v12 +; RV32-NEXT: fsrm a0 +; RV32-NEXT: ret +; RV64-LABEL: rint_nxv4f16_to_si64: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI30_0) +; RV64-NEXT: flh fa5, %lo(.LCPI30_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmflt.vf v0, v9, fa5 +; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: vfwcvt.f.f.v v12, v8 +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV64-NEXT: vfwcvt.rtz.x.f.v v8, v12 +; RV64-NEXT: fsrm a0 +; RV64-NEXT: ret + %a = call @llvm.rint.nxv4f16( %x) + %b = fptosi %a to + ret %b +} + +define @rint_nxv4f16_to_ui64( %x) { +; CHECK-LABEL: rint_nxv4f16_to_ui64: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI47_0) +; CHECK-NEXT: flh fa5, %lo(.LCPI47_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfwcvt.f.f.v v12, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vfwcvt.rtz.xu.f.v v8, v12 +; CHECK-NEXT: ret +; RV32-LABEL: rint_nxv4f16_to_ui64: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI31_0) +; RV32-NEXT: flh fa5, %lo(.LCPI31_0)(a0) +; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmflt.vf v0, v9, fa5 +; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV32-NEXT: vfwcvt.f.f.v v12, v8 +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32-NEXT: vfwcvt.rtz.xu.f.v v8, v12 +; RV32-NEXT: fsrm a0 +; RV32-NEXT: ret +; RV64-LABEL: rint_nxv4f16_to_ui64: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI31_0) +; RV64-NEXT: flh fa5, %lo(.LCPI31_0)(a0) +; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmflt.vf v0, v9, fa5 +; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t +; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; RV64-NEXT: vfwcvt.f.f.v v12, v8 +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV64-NEXT: vfwcvt.rtz.xu.f.v v8, v12 +; RV64-NEXT: fsrm a0 +; RV64-NEXT: ret + %a = call @llvm.rint.nxv4f16( %x) + %b = fptoui %a to + ret %b +}