diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 304c05d9378f2..f942f395d5328 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1051,6 +1051,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setTargetDAGCombine(ISD::ZERO_EXTEND); setTargetDAGCombine(ISD::FP_TO_SINT); setTargetDAGCombine(ISD::FP_TO_UINT); + setTargetDAGCombine(ISD::FP_TO_SINT_SAT); + setTargetDAGCombine(ISD::FP_TO_UINT_SAT); } if (Subtarget.hasVInstructions()) { setTargetDAGCombine(ISD::FCOPYSIGN); @@ -7180,13 +7182,24 @@ static SDValue combineMUL_VLToVWMUL(SDNode *N, SDValue Op0, SDValue Op1, return DAG.getNode(WMulOpc, DL, VT, Op0, Op1, Mask, VL); } +static RISCVFPRndMode::RoundingMode matchRoundingOp(SDValue Op) { + switch (Op.getOpcode()) { + case ISD::FROUNDEVEN: return RISCVFPRndMode::RNE; + case ISD::FTRUNC: return RISCVFPRndMode::RTZ; + case ISD::FFLOOR: return RISCVFPRndMode::RDN; + case ISD::FCEIL: return RISCVFPRndMode::RUP; + case ISD::FROUND: return RISCVFPRndMode::RMM; + } + + return RISCVFPRndMode::Invalid; +} + // Fold // (fp_to_int (froundeven X)) -> fcvt X, rne // (fp_to_int (ftrunc X)) -> fcvt X, rtz // (fp_to_int (ffloor X)) -> fcvt X, rdn // (fp_to_int (fceil X)) -> fcvt X, rup // (fp_to_int (fround X)) -> fcvt X, rmm -// FIXME: We should also do this for fp_to_int_sat. static SDValue performFP_TO_INTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget) { @@ -7210,16 +7223,9 @@ static SDValue performFP_TO_INTCombine(SDNode *N, if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh()) return SDValue(); - RISCVFPRndMode::RoundingMode FRM; - switch (Src->getOpcode()) { - default: + RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src); + if (FRM == RISCVFPRndMode::Invalid) return SDValue(); - case ISD::FROUNDEVEN: FRM = RISCVFPRndMode::RNE; break; - case ISD::FTRUNC: FRM = RISCVFPRndMode::RTZ; break; - case ISD::FFLOOR: FRM = RISCVFPRndMode::RDN; break; - case ISD::FCEIL: FRM = RISCVFPRndMode::RUP; break; - case ISD::FROUND: FRM = RISCVFPRndMode::RMM; break; - } bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT; @@ -7235,6 +7241,64 @@ static SDValue performFP_TO_INTCombine(SDNode *N, return DAG.getNode(ISD::TRUNCATE, DL, VT, FpToInt); } +// Fold +// (fp_to_int_sat (froundeven X)) -> (select X == nan, 0, (fcvt X, rne)) +// (fp_to_int_sat (ftrunc X)) -> (select X == nan, 0, (fcvt X, rtz)) +// (fp_to_int_sat (ffloor X)) -> (select X == nan, 0, (fcvt X, rdn)) +// (fp_to_int_sat (fceil X)) -> (select X == nan, 0, (fcvt X, rup)) +// (fp_to_int_sat (fround X)) -> (select X == nan, 0, (fcvt X, rmm)) +static SDValue performFP_TO_INT_SATCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const RISCVSubtarget &Subtarget) { + SelectionDAG &DAG = DCI.DAG; + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + MVT XLenVT = Subtarget.getXLenVT(); + + // Only handle XLen types. Other types narrower than XLen will eventually be + // legalized to XLenVT. + EVT DstVT = N->getValueType(0); + if (DstVT != XLenVT) + return SDValue(); + + SDValue Src = N->getOperand(0); + + // Ensure the FP type is also legal. + if (!TLI.isTypeLegal(Src.getValueType())) + return SDValue(); + + // Don't do this for f16 with Zfhmin and not Zfh. + if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh()) + return SDValue(); + + EVT SatVT = cast(N->getOperand(1))->getVT(); + + RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src); + if (FRM == RISCVFPRndMode::Invalid) + return SDValue(); + + bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT_SAT; + + unsigned Opc; + if (SatVT == DstVT) + Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU; + else if (DstVT == MVT::i64 && SatVT == MVT::i32) + Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64; + else + return SDValue(); + // FIXME: Support other SatVTs by clamping before or after the conversion. + + Src = Src.getOperand(0); + + SDLoc DL(N); + SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src, + DAG.getTargetConstant(FRM, DL, XLenVT)); + + // RISCV FP-to-int conversions saturate to the destination register size, but + // don't produce 0 for nan. + SDValue ZeroInt = DAG.getConstant(0, DL, DstVT); + return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO); +} + SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -7548,6 +7612,9 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: return performFP_TO_INTCombine(N, DCI, Subtarget); + case ISD::FP_TO_SINT_SAT: + case ISD::FP_TO_UINT_SAT: + return performFP_TO_INT_SATCombine(N, DCI, Subtarget); case ISD::FCOPYSIGN: { EVT VT = N->getValueType(0); if (!VT.isVector()) diff --git a/llvm/test/CodeGen/RISCV/double-round-conv-sat.ll b/llvm/test/CodeGen/RISCV/double-round-conv-sat.ll new file mode 100644 index 0000000000000..38d82f6e46ff2 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/double-round-conv-sat.ll @@ -0,0 +1,940 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+d -verify-machineinstrs < %s \ +; RUN: -target-abi=ilp32d | FileCheck -check-prefix=RV32IFD %s +; RUN: llc -mtriple=riscv64 -mattr=+d -verify-machineinstrs < %s \ +; RUN: -target-abi=lp64d | FileCheck -check-prefix=RV64IFD %s + +define signext i32 @test_floor_si32(double %x) { +; RV32IFD-LABEL: test_floor_si32: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: feq.d a0, fa0, fa0 +; RV32IFD-NEXT: bnez a0, .LBB0_2 +; RV32IFD-NEXT: # %bb.1: +; RV32IFD-NEXT: li a0, 0 +; RV32IFD-NEXT: ret +; RV32IFD-NEXT: .LBB0_2: +; RV32IFD-NEXT: fcvt.w.d a0, fa0, rdn +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: test_floor_si32: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: feq.d a0, fa0, fa0 +; RV64IFD-NEXT: bnez a0, .LBB0_2 +; RV64IFD-NEXT: # %bb.1: +; RV64IFD-NEXT: li a0, 0 +; RV64IFD-NEXT: ret +; RV64IFD-NEXT: .LBB0_2: +; RV64IFD-NEXT: fcvt.w.d a0, fa0, rdn +; RV64IFD-NEXT: ret + %a = call double @llvm.floor.f64(double %x) + %b = call i32 @llvm.fptosi.sat.i32.f64(double %a) + ret i32 %b +} + +define i64 @test_floor_si64(double %x) nounwind { +; RV32IFD-LABEL: test_floor_si64: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill +; RV32IFD-NEXT: call floor@plt +; RV32IFD-NEXT: lui a0, %hi(.LCPI1_0) +; RV32IFD-NEXT: fld ft0, %lo(.LCPI1_0)(a0) +; RV32IFD-NEXT: fmv.d fs0, fa0 +; RV32IFD-NEXT: fle.d s0, ft0, fa0 +; RV32IFD-NEXT: call __fixdfdi@plt +; RV32IFD-NEXT: mv a2, a0 +; RV32IFD-NEXT: bnez s0, .LBB1_2 +; RV32IFD-NEXT: # %bb.1: +; RV32IFD-NEXT: li a2, 0 +; RV32IFD-NEXT: .LBB1_2: +; RV32IFD-NEXT: lui a0, %hi(.LCPI1_1) +; RV32IFD-NEXT: fld ft0, %lo(.LCPI1_1)(a0) +; RV32IFD-NEXT: flt.d a3, ft0, fs0 +; RV32IFD-NEXT: li a0, -1 +; RV32IFD-NEXT: beqz a3, .LBB1_9 +; RV32IFD-NEXT: # %bb.3: +; RV32IFD-NEXT: feq.d a2, fs0, fs0 +; RV32IFD-NEXT: beqz a2, .LBB1_10 +; RV32IFD-NEXT: .LBB1_4: +; RV32IFD-NEXT: lui a4, 524288 +; RV32IFD-NEXT: beqz s0, .LBB1_11 +; RV32IFD-NEXT: .LBB1_5: +; RV32IFD-NEXT: bnez a3, .LBB1_12 +; RV32IFD-NEXT: .LBB1_6: +; RV32IFD-NEXT: bnez a2, .LBB1_8 +; RV32IFD-NEXT: .LBB1_7: +; RV32IFD-NEXT: li a1, 0 +; RV32IFD-NEXT: .LBB1_8: +; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; RV32IFD-NEXT: .LBB1_9: +; RV32IFD-NEXT: mv a0, a2 +; RV32IFD-NEXT: feq.d a2, fs0, fs0 +; RV32IFD-NEXT: bnez a2, .LBB1_4 +; RV32IFD-NEXT: .LBB1_10: +; RV32IFD-NEXT: li a0, 0 +; RV32IFD-NEXT: lui a4, 524288 +; RV32IFD-NEXT: bnez s0, .LBB1_5 +; RV32IFD-NEXT: .LBB1_11: +; RV32IFD-NEXT: lui a1, 524288 +; RV32IFD-NEXT: beqz a3, .LBB1_6 +; RV32IFD-NEXT: .LBB1_12: +; RV32IFD-NEXT: addi a1, a4, -1 +; RV32IFD-NEXT: beqz a2, .LBB1_7 +; RV32IFD-NEXT: j .LBB1_8 +; +; RV64IFD-LABEL: test_floor_si64: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: feq.d a0, fa0, fa0 +; RV64IFD-NEXT: bnez a0, .LBB1_2 +; RV64IFD-NEXT: # %bb.1: +; RV64IFD-NEXT: li a0, 0 +; RV64IFD-NEXT: ret +; RV64IFD-NEXT: .LBB1_2: +; RV64IFD-NEXT: fcvt.l.d a0, fa0, rdn +; RV64IFD-NEXT: ret + %a = call double @llvm.floor.f64(double %x) + %b = call i64 @llvm.fptosi.sat.i64.f64(double %a) + ret i64 %b +} + +define signext i32 @test_floor_ui32(double %x) { +; RV32IFD-LABEL: test_floor_ui32: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: feq.d a0, fa0, fa0 +; RV32IFD-NEXT: bnez a0, .LBB2_2 +; RV32IFD-NEXT: # %bb.1: +; RV32IFD-NEXT: li a0, 0 +; RV32IFD-NEXT: ret +; RV32IFD-NEXT: .LBB2_2: +; RV32IFD-NEXT: fcvt.wu.d a0, fa0, rdn +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: test_floor_ui32: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: feq.d a0, fa0, fa0 +; RV64IFD-NEXT: bnez a0, .LBB2_2 +; RV64IFD-NEXT: # %bb.1: +; RV64IFD-NEXT: li a0, 0 +; RV64IFD-NEXT: ret +; RV64IFD-NEXT: .LBB2_2: +; RV64IFD-NEXT: fcvt.wu.d a0, fa0, rdn +; RV64IFD-NEXT: ret + %a = call double @llvm.floor.f64(double %x) + %b = call i32 @llvm.fptoui.sat.i32.f64(double %a) + ret i32 %b +} + +define i64 @test_floor_ui64(double %x) nounwind { +; RV32IFD-LABEL: test_floor_ui64: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill +; RV32IFD-NEXT: call floor@plt +; RV32IFD-NEXT: fmv.d fs0, fa0 +; RV32IFD-NEXT: fcvt.d.w ft0, zero +; RV32IFD-NEXT: fle.d s0, ft0, fa0 +; RV32IFD-NEXT: call __fixunsdfdi@plt +; RV32IFD-NEXT: mv a3, a0 +; RV32IFD-NEXT: bnez s0, .LBB3_2 +; RV32IFD-NEXT: # %bb.1: +; RV32IFD-NEXT: li a3, 0 +; RV32IFD-NEXT: .LBB3_2: +; RV32IFD-NEXT: lui a0, %hi(.LCPI3_0) +; RV32IFD-NEXT: fld ft0, %lo(.LCPI3_0)(a0) +; RV32IFD-NEXT: flt.d a4, ft0, fs0 +; RV32IFD-NEXT: li a2, -1 +; RV32IFD-NEXT: li a0, -1 +; RV32IFD-NEXT: beqz a4, .LBB3_7 +; RV32IFD-NEXT: # %bb.3: +; RV32IFD-NEXT: beqz s0, .LBB3_8 +; RV32IFD-NEXT: .LBB3_4: +; RV32IFD-NEXT: bnez a4, .LBB3_6 +; RV32IFD-NEXT: .LBB3_5: +; RV32IFD-NEXT: mv a2, a1 +; RV32IFD-NEXT: .LBB3_6: +; RV32IFD-NEXT: mv a1, a2 +; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; RV32IFD-NEXT: .LBB3_7: +; RV32IFD-NEXT: mv a0, a3 +; RV32IFD-NEXT: bnez s0, .LBB3_4 +; RV32IFD-NEXT: .LBB3_8: +; RV32IFD-NEXT: li a1, 0 +; RV32IFD-NEXT: beqz a4, .LBB3_5 +; RV32IFD-NEXT: j .LBB3_6 +; +; RV64IFD-LABEL: test_floor_ui64: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: feq.d a0, fa0, fa0 +; RV64IFD-NEXT: bnez a0, .LBB3_2 +; RV64IFD-NEXT: # %bb.1: +; RV64IFD-NEXT: li a0, 0 +; RV64IFD-NEXT: ret +; RV64IFD-NEXT: .LBB3_2: +; RV64IFD-NEXT: fcvt.lu.d a0, fa0, rdn +; RV64IFD-NEXT: ret + %a = call double @llvm.floor.f64(double %x) + %b = call i64 @llvm.fptoui.sat.i64.f64(double %a) + ret i64 %b +} + +define signext i32 @test_ceil_si32(double %x) { +; RV32IFD-LABEL: test_ceil_si32: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: feq.d a0, fa0, fa0 +; RV32IFD-NEXT: bnez a0, .LBB4_2 +; RV32IFD-NEXT: # %bb.1: +; RV32IFD-NEXT: li a0, 0 +; RV32IFD-NEXT: ret +; RV32IFD-NEXT: .LBB4_2: +; RV32IFD-NEXT: fcvt.w.d a0, fa0, rup +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: test_ceil_si32: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: feq.d a0, fa0, fa0 +; RV64IFD-NEXT: bnez a0, .LBB4_2 +; RV64IFD-NEXT: # %bb.1: +; RV64IFD-NEXT: li a0, 0 +; RV64IFD-NEXT: ret +; RV64IFD-NEXT: .LBB4_2: +; RV64IFD-NEXT: fcvt.w.d a0, fa0, rup +; RV64IFD-NEXT: ret + %a = call double @llvm.ceil.f64(double %x) + %b = call i32 @llvm.fptosi.sat.i32.f64(double %a) + ret i32 %b +} + +define i64 @test_ceil_si64(double %x) nounwind { +; RV32IFD-LABEL: test_ceil_si64: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill +; RV32IFD-NEXT: call ceil@plt +; RV32IFD-NEXT: lui a0, %hi(.LCPI5_0) +; RV32IFD-NEXT: fld ft0, %lo(.LCPI5_0)(a0) +; RV32IFD-NEXT: fmv.d fs0, fa0 +; RV32IFD-NEXT: fle.d s0, ft0, fa0 +; RV32IFD-NEXT: call __fixdfdi@plt +; RV32IFD-NEXT: mv a2, a0 +; RV32IFD-NEXT: bnez s0, .LBB5_2 +; RV32IFD-NEXT: # %bb.1: +; RV32IFD-NEXT: li a2, 0 +; RV32IFD-NEXT: .LBB5_2: +; RV32IFD-NEXT: lui a0, %hi(.LCPI5_1) +; RV32IFD-NEXT: fld ft0, %lo(.LCPI5_1)(a0) +; RV32IFD-NEXT: flt.d a3, ft0, fs0 +; RV32IFD-NEXT: li a0, -1 +; RV32IFD-NEXT: beqz a3, .LBB5_9 +; RV32IFD-NEXT: # %bb.3: +; RV32IFD-NEXT: feq.d a2, fs0, fs0 +; RV32IFD-NEXT: beqz a2, .LBB5_10 +; RV32IFD-NEXT: .LBB5_4: +; RV32IFD-NEXT: lui a4, 524288 +; RV32IFD-NEXT: beqz s0, .LBB5_11 +; RV32IFD-NEXT: .LBB5_5: +; RV32IFD-NEXT: bnez a3, .LBB5_12 +; RV32IFD-NEXT: .LBB5_6: +; RV32IFD-NEXT: bnez a2, .LBB5_8 +; RV32IFD-NEXT: .LBB5_7: +; RV32IFD-NEXT: li a1, 0 +; RV32IFD-NEXT: .LBB5_8: +; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; RV32IFD-NEXT: .LBB5_9: +; RV32IFD-NEXT: mv a0, a2 +; RV32IFD-NEXT: feq.d a2, fs0, fs0 +; RV32IFD-NEXT: bnez a2, .LBB5_4 +; RV32IFD-NEXT: .LBB5_10: +; RV32IFD-NEXT: li a0, 0 +; RV32IFD-NEXT: lui a4, 524288 +; RV32IFD-NEXT: bnez s0, .LBB5_5 +; RV32IFD-NEXT: .LBB5_11: +; RV32IFD-NEXT: lui a1, 524288 +; RV32IFD-NEXT: beqz a3, .LBB5_6 +; RV32IFD-NEXT: .LBB5_12: +; RV32IFD-NEXT: addi a1, a4, -1 +; RV32IFD-NEXT: beqz a2, .LBB5_7 +; RV32IFD-NEXT: j .LBB5_8 +; +; RV64IFD-LABEL: test_ceil_si64: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: feq.d a0, fa0, fa0 +; RV64IFD-NEXT: bnez a0, .LBB5_2 +; RV64IFD-NEXT: # %bb.1: +; RV64IFD-NEXT: li a0, 0 +; RV64IFD-NEXT: ret +; RV64IFD-NEXT: .LBB5_2: +; RV64IFD-NEXT: fcvt.l.d a0, fa0, rup +; RV64IFD-NEXT: ret + %a = call double @llvm.ceil.f64(double %x) + %b = call i64 @llvm.fptosi.sat.i64.f64(double %a) + ret i64 %b +} + +define signext i32 @test_ceil_ui32(double %x) { +; RV32IFD-LABEL: test_ceil_ui32: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: feq.d a0, fa0, fa0 +; RV32IFD-NEXT: bnez a0, .LBB6_2 +; RV32IFD-NEXT: # %bb.1: +; RV32IFD-NEXT: li a0, 0 +; RV32IFD-NEXT: ret +; RV32IFD-NEXT: .LBB6_2: +; RV32IFD-NEXT: fcvt.wu.d a0, fa0, rup +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: test_ceil_ui32: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: feq.d a0, fa0, fa0 +; RV64IFD-NEXT: bnez a0, .LBB6_2 +; RV64IFD-NEXT: # %bb.1: +; RV64IFD-NEXT: li a0, 0 +; RV64IFD-NEXT: ret +; RV64IFD-NEXT: .LBB6_2: +; RV64IFD-NEXT: fcvt.wu.d a0, fa0, rup +; RV64IFD-NEXT: ret + %a = call double @llvm.ceil.f64(double %x) + %b = call i32 @llvm.fptoui.sat.i32.f64(double %a) + ret i32 %b +} + +define i64 @test_ceil_ui64(double %x) nounwind { +; RV32IFD-LABEL: test_ceil_ui64: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill +; RV32IFD-NEXT: call ceil@plt +; RV32IFD-NEXT: fmv.d fs0, fa0 +; RV32IFD-NEXT: fcvt.d.w ft0, zero +; RV32IFD-NEXT: fle.d s0, ft0, fa0 +; RV32IFD-NEXT: call __fixunsdfdi@plt +; RV32IFD-NEXT: mv a3, a0 +; RV32IFD-NEXT: bnez s0, .LBB7_2 +; RV32IFD-NEXT: # %bb.1: +; RV32IFD-NEXT: li a3, 0 +; RV32IFD-NEXT: .LBB7_2: +; RV32IFD-NEXT: lui a0, %hi(.LCPI7_0) +; RV32IFD-NEXT: fld ft0, %lo(.LCPI7_0)(a0) +; RV32IFD-NEXT: flt.d a4, ft0, fs0 +; RV32IFD-NEXT: li a2, -1 +; RV32IFD-NEXT: li a0, -1 +; RV32IFD-NEXT: beqz a4, .LBB7_7 +; RV32IFD-NEXT: # %bb.3: +; RV32IFD-NEXT: beqz s0, .LBB7_8 +; RV32IFD-NEXT: .LBB7_4: +; RV32IFD-NEXT: bnez a4, .LBB7_6 +; RV32IFD-NEXT: .LBB7_5: +; RV32IFD-NEXT: mv a2, a1 +; RV32IFD-NEXT: .LBB7_6: +; RV32IFD-NEXT: mv a1, a2 +; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; RV32IFD-NEXT: .LBB7_7: +; RV32IFD-NEXT: mv a0, a3 +; RV32IFD-NEXT: bnez s0, .LBB7_4 +; RV32IFD-NEXT: .LBB7_8: +; RV32IFD-NEXT: li a1, 0 +; RV32IFD-NEXT: beqz a4, .LBB7_5 +; RV32IFD-NEXT: j .LBB7_6 +; +; RV64IFD-LABEL: test_ceil_ui64: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: feq.d a0, fa0, fa0 +; RV64IFD-NEXT: bnez a0, .LBB7_2 +; RV64IFD-NEXT: # %bb.1: +; RV64IFD-NEXT: li a0, 0 +; RV64IFD-NEXT: ret +; RV64IFD-NEXT: .LBB7_2: +; RV64IFD-NEXT: fcvt.lu.d a0, fa0, rup +; RV64IFD-NEXT: ret + %a = call double @llvm.ceil.f64(double %x) + %b = call i64 @llvm.fptoui.sat.i64.f64(double %a) + ret i64 %b +} + +define signext i32 @test_trunc_si32(double %x) { +; RV32IFD-LABEL: test_trunc_si32: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: feq.d a0, fa0, fa0 +; RV32IFD-NEXT: bnez a0, .LBB8_2 +; RV32IFD-NEXT: # %bb.1: +; RV32IFD-NEXT: li a0, 0 +; RV32IFD-NEXT: ret +; RV32IFD-NEXT: .LBB8_2: +; RV32IFD-NEXT: fcvt.w.d a0, fa0, rtz +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: test_trunc_si32: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: feq.d a0, fa0, fa0 +; RV64IFD-NEXT: bnez a0, .LBB8_2 +; RV64IFD-NEXT: # %bb.1: +; RV64IFD-NEXT: li a0, 0 +; RV64IFD-NEXT: ret +; RV64IFD-NEXT: .LBB8_2: +; RV64IFD-NEXT: fcvt.w.d a0, fa0, rtz +; RV64IFD-NEXT: ret + %a = call double @llvm.trunc.f64(double %x) + %b = call i32 @llvm.fptosi.sat.i32.f64(double %a) + ret i32 %b +} + +define i64 @test_trunc_si64(double %x) nounwind { +; RV32IFD-LABEL: test_trunc_si64: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill +; RV32IFD-NEXT: call trunc@plt +; RV32IFD-NEXT: lui a0, %hi(.LCPI9_0) +; RV32IFD-NEXT: fld ft0, %lo(.LCPI9_0)(a0) +; RV32IFD-NEXT: fmv.d fs0, fa0 +; RV32IFD-NEXT: fle.d s0, ft0, fa0 +; RV32IFD-NEXT: call __fixdfdi@plt +; RV32IFD-NEXT: mv a2, a0 +; RV32IFD-NEXT: bnez s0, .LBB9_2 +; RV32IFD-NEXT: # %bb.1: +; RV32IFD-NEXT: li a2, 0 +; RV32IFD-NEXT: .LBB9_2: +; RV32IFD-NEXT: lui a0, %hi(.LCPI9_1) +; RV32IFD-NEXT: fld ft0, %lo(.LCPI9_1)(a0) +; RV32IFD-NEXT: flt.d a3, ft0, fs0 +; RV32IFD-NEXT: li a0, -1 +; RV32IFD-NEXT: beqz a3, .LBB9_9 +; RV32IFD-NEXT: # %bb.3: +; RV32IFD-NEXT: feq.d a2, fs0, fs0 +; RV32IFD-NEXT: beqz a2, .LBB9_10 +; RV32IFD-NEXT: .LBB9_4: +; RV32IFD-NEXT: lui a4, 524288 +; RV32IFD-NEXT: beqz s0, .LBB9_11 +; RV32IFD-NEXT: .LBB9_5: +; RV32IFD-NEXT: bnez a3, .LBB9_12 +; RV32IFD-NEXT: .LBB9_6: +; RV32IFD-NEXT: bnez a2, .LBB9_8 +; RV32IFD-NEXT: .LBB9_7: +; RV32IFD-NEXT: li a1, 0 +; RV32IFD-NEXT: .LBB9_8: +; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; RV32IFD-NEXT: .LBB9_9: +; RV32IFD-NEXT: mv a0, a2 +; RV32IFD-NEXT: feq.d a2, fs0, fs0 +; RV32IFD-NEXT: bnez a2, .LBB9_4 +; RV32IFD-NEXT: .LBB9_10: +; RV32IFD-NEXT: li a0, 0 +; RV32IFD-NEXT: lui a4, 524288 +; RV32IFD-NEXT: bnez s0, .LBB9_5 +; RV32IFD-NEXT: .LBB9_11: +; RV32IFD-NEXT: lui a1, 524288 +; RV32IFD-NEXT: beqz a3, .LBB9_6 +; RV32IFD-NEXT: .LBB9_12: +; RV32IFD-NEXT: addi a1, a4, -1 +; RV32IFD-NEXT: beqz a2, .LBB9_7 +; RV32IFD-NEXT: j .LBB9_8 +; +; RV64IFD-LABEL: test_trunc_si64: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: feq.d a0, fa0, fa0 +; RV64IFD-NEXT: bnez a0, .LBB9_2 +; RV64IFD-NEXT: # %bb.1: +; RV64IFD-NEXT: li a0, 0 +; RV64IFD-NEXT: ret +; RV64IFD-NEXT: .LBB9_2: +; RV64IFD-NEXT: fcvt.l.d a0, fa0, rtz +; RV64IFD-NEXT: ret + %a = call double @llvm.trunc.f64(double %x) + %b = call i64 @llvm.fptosi.sat.i64.f64(double %a) + ret i64 %b +} + +define signext i32 @test_trunc_ui32(double %x) { +; RV32IFD-LABEL: test_trunc_ui32: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: feq.d a0, fa0, fa0 +; RV32IFD-NEXT: bnez a0, .LBB10_2 +; RV32IFD-NEXT: # %bb.1: +; RV32IFD-NEXT: li a0, 0 +; RV32IFD-NEXT: ret +; RV32IFD-NEXT: .LBB10_2: +; RV32IFD-NEXT: fcvt.wu.d a0, fa0, rtz +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: test_trunc_ui32: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: feq.d a0, fa0, fa0 +; RV64IFD-NEXT: bnez a0, .LBB10_2 +; RV64IFD-NEXT: # %bb.1: +; RV64IFD-NEXT: li a0, 0 +; RV64IFD-NEXT: ret +; RV64IFD-NEXT: .LBB10_2: +; RV64IFD-NEXT: fcvt.wu.d a0, fa0, rtz +; RV64IFD-NEXT: ret + %a = call double @llvm.trunc.f64(double %x) + %b = call i32 @llvm.fptoui.sat.i32.f64(double %a) + ret i32 %b +} + +define i64 @test_trunc_ui64(double %x) nounwind { +; RV32IFD-LABEL: test_trunc_ui64: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill +; RV32IFD-NEXT: call trunc@plt +; RV32IFD-NEXT: fmv.d fs0, fa0 +; RV32IFD-NEXT: fcvt.d.w ft0, zero +; RV32IFD-NEXT: fle.d s0, ft0, fa0 +; RV32IFD-NEXT: call __fixunsdfdi@plt +; RV32IFD-NEXT: mv a3, a0 +; RV32IFD-NEXT: bnez s0, .LBB11_2 +; RV32IFD-NEXT: # %bb.1: +; RV32IFD-NEXT: li a3, 0 +; RV32IFD-NEXT: .LBB11_2: +; RV32IFD-NEXT: lui a0, %hi(.LCPI11_0) +; RV32IFD-NEXT: fld ft0, %lo(.LCPI11_0)(a0) +; RV32IFD-NEXT: flt.d a4, ft0, fs0 +; RV32IFD-NEXT: li a2, -1 +; RV32IFD-NEXT: li a0, -1 +; RV32IFD-NEXT: beqz a4, .LBB11_7 +; RV32IFD-NEXT: # %bb.3: +; RV32IFD-NEXT: beqz s0, .LBB11_8 +; RV32IFD-NEXT: .LBB11_4: +; RV32IFD-NEXT: bnez a4, .LBB11_6 +; RV32IFD-NEXT: .LBB11_5: +; RV32IFD-NEXT: mv a2, a1 +; RV32IFD-NEXT: .LBB11_6: +; RV32IFD-NEXT: mv a1, a2 +; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; RV32IFD-NEXT: .LBB11_7: +; RV32IFD-NEXT: mv a0, a3 +; RV32IFD-NEXT: bnez s0, .LBB11_4 +; RV32IFD-NEXT: .LBB11_8: +; RV32IFD-NEXT: li a1, 0 +; RV32IFD-NEXT: beqz a4, .LBB11_5 +; RV32IFD-NEXT: j .LBB11_6 +; +; RV64IFD-LABEL: test_trunc_ui64: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: feq.d a0, fa0, fa0 +; RV64IFD-NEXT: bnez a0, .LBB11_2 +; RV64IFD-NEXT: # %bb.1: +; RV64IFD-NEXT: li a0, 0 +; RV64IFD-NEXT: ret +; RV64IFD-NEXT: .LBB11_2: +; RV64IFD-NEXT: fcvt.lu.d a0, fa0, rtz +; RV64IFD-NEXT: ret + %a = call double @llvm.trunc.f64(double %x) + %b = call i64 @llvm.fptoui.sat.i64.f64(double %a) + ret i64 %b +} + +define signext i32 @test_round_si32(double %x) { +; RV32IFD-LABEL: test_round_si32: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: feq.d a0, fa0, fa0 +; RV32IFD-NEXT: bnez a0, .LBB12_2 +; RV32IFD-NEXT: # %bb.1: +; RV32IFD-NEXT: li a0, 0 +; RV32IFD-NEXT: ret +; RV32IFD-NEXT: .LBB12_2: +; RV32IFD-NEXT: fcvt.w.d a0, fa0, rmm +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: test_round_si32: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: feq.d a0, fa0, fa0 +; RV64IFD-NEXT: bnez a0, .LBB12_2 +; RV64IFD-NEXT: # %bb.1: +; RV64IFD-NEXT: li a0, 0 +; RV64IFD-NEXT: ret +; RV64IFD-NEXT: .LBB12_2: +; RV64IFD-NEXT: fcvt.w.d a0, fa0, rmm +; RV64IFD-NEXT: ret + %a = call double @llvm.round.f64(double %x) + %b = call i32 @llvm.fptosi.sat.i32.f64(double %a) + ret i32 %b +} + +define i64 @test_round_si64(double %x) nounwind { +; RV32IFD-LABEL: test_round_si64: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill +; RV32IFD-NEXT: call round@plt +; RV32IFD-NEXT: lui a0, %hi(.LCPI13_0) +; RV32IFD-NEXT: fld ft0, %lo(.LCPI13_0)(a0) +; RV32IFD-NEXT: fmv.d fs0, fa0 +; RV32IFD-NEXT: fle.d s0, ft0, fa0 +; RV32IFD-NEXT: call __fixdfdi@plt +; RV32IFD-NEXT: mv a2, a0 +; RV32IFD-NEXT: bnez s0, .LBB13_2 +; RV32IFD-NEXT: # %bb.1: +; RV32IFD-NEXT: li a2, 0 +; RV32IFD-NEXT: .LBB13_2: +; RV32IFD-NEXT: lui a0, %hi(.LCPI13_1) +; RV32IFD-NEXT: fld ft0, %lo(.LCPI13_1)(a0) +; RV32IFD-NEXT: flt.d a3, ft0, fs0 +; RV32IFD-NEXT: li a0, -1 +; RV32IFD-NEXT: beqz a3, .LBB13_9 +; RV32IFD-NEXT: # %bb.3: +; RV32IFD-NEXT: feq.d a2, fs0, fs0 +; RV32IFD-NEXT: beqz a2, .LBB13_10 +; RV32IFD-NEXT: .LBB13_4: +; RV32IFD-NEXT: lui a4, 524288 +; RV32IFD-NEXT: beqz s0, .LBB13_11 +; RV32IFD-NEXT: .LBB13_5: +; RV32IFD-NEXT: bnez a3, .LBB13_12 +; RV32IFD-NEXT: .LBB13_6: +; RV32IFD-NEXT: bnez a2, .LBB13_8 +; RV32IFD-NEXT: .LBB13_7: +; RV32IFD-NEXT: li a1, 0 +; RV32IFD-NEXT: .LBB13_8: +; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; RV32IFD-NEXT: .LBB13_9: +; RV32IFD-NEXT: mv a0, a2 +; RV32IFD-NEXT: feq.d a2, fs0, fs0 +; RV32IFD-NEXT: bnez a2, .LBB13_4 +; RV32IFD-NEXT: .LBB13_10: +; RV32IFD-NEXT: li a0, 0 +; RV32IFD-NEXT: lui a4, 524288 +; RV32IFD-NEXT: bnez s0, .LBB13_5 +; RV32IFD-NEXT: .LBB13_11: +; RV32IFD-NEXT: lui a1, 524288 +; RV32IFD-NEXT: beqz a3, .LBB13_6 +; RV32IFD-NEXT: .LBB13_12: +; RV32IFD-NEXT: addi a1, a4, -1 +; RV32IFD-NEXT: beqz a2, .LBB13_7 +; RV32IFD-NEXT: j .LBB13_8 +; +; RV64IFD-LABEL: test_round_si64: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: feq.d a0, fa0, fa0 +; RV64IFD-NEXT: bnez a0, .LBB13_2 +; RV64IFD-NEXT: # %bb.1: +; RV64IFD-NEXT: li a0, 0 +; RV64IFD-NEXT: ret +; RV64IFD-NEXT: .LBB13_2: +; RV64IFD-NEXT: fcvt.l.d a0, fa0, rmm +; RV64IFD-NEXT: ret + %a = call double @llvm.round.f64(double %x) + %b = call i64 @llvm.fptosi.sat.i64.f64(double %a) + ret i64 %b +} + +define signext i32 @test_round_ui32(double %x) { +; RV32IFD-LABEL: test_round_ui32: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: feq.d a0, fa0, fa0 +; RV32IFD-NEXT: bnez a0, .LBB14_2 +; RV32IFD-NEXT: # %bb.1: +; RV32IFD-NEXT: li a0, 0 +; RV32IFD-NEXT: ret +; RV32IFD-NEXT: .LBB14_2: +; RV32IFD-NEXT: fcvt.wu.d a0, fa0, rmm +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: test_round_ui32: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: feq.d a0, fa0, fa0 +; RV64IFD-NEXT: bnez a0, .LBB14_2 +; RV64IFD-NEXT: # %bb.1: +; RV64IFD-NEXT: li a0, 0 +; RV64IFD-NEXT: ret +; RV64IFD-NEXT: .LBB14_2: +; RV64IFD-NEXT: fcvt.wu.d a0, fa0, rmm +; RV64IFD-NEXT: ret + %a = call double @llvm.round.f64(double %x) + %b = call i32 @llvm.fptoui.sat.i32.f64(double %a) + ret i32 %b +} + +define i64 @test_round_ui64(double %x) nounwind { +; RV32IFD-LABEL: test_round_ui64: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill +; RV32IFD-NEXT: call round@plt +; RV32IFD-NEXT: fmv.d fs0, fa0 +; RV32IFD-NEXT: fcvt.d.w ft0, zero +; RV32IFD-NEXT: fle.d s0, ft0, fa0 +; RV32IFD-NEXT: call __fixunsdfdi@plt +; RV32IFD-NEXT: mv a3, a0 +; RV32IFD-NEXT: bnez s0, .LBB15_2 +; RV32IFD-NEXT: # %bb.1: +; RV32IFD-NEXT: li a3, 0 +; RV32IFD-NEXT: .LBB15_2: +; RV32IFD-NEXT: lui a0, %hi(.LCPI15_0) +; RV32IFD-NEXT: fld ft0, %lo(.LCPI15_0)(a0) +; RV32IFD-NEXT: flt.d a4, ft0, fs0 +; RV32IFD-NEXT: li a2, -1 +; RV32IFD-NEXT: li a0, -1 +; RV32IFD-NEXT: beqz a4, .LBB15_7 +; RV32IFD-NEXT: # %bb.3: +; RV32IFD-NEXT: beqz s0, .LBB15_8 +; RV32IFD-NEXT: .LBB15_4: +; RV32IFD-NEXT: bnez a4, .LBB15_6 +; RV32IFD-NEXT: .LBB15_5: +; RV32IFD-NEXT: mv a2, a1 +; RV32IFD-NEXT: .LBB15_6: +; RV32IFD-NEXT: mv a1, a2 +; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; RV32IFD-NEXT: .LBB15_7: +; RV32IFD-NEXT: mv a0, a3 +; RV32IFD-NEXT: bnez s0, .LBB15_4 +; RV32IFD-NEXT: .LBB15_8: +; RV32IFD-NEXT: li a1, 0 +; RV32IFD-NEXT: beqz a4, .LBB15_5 +; RV32IFD-NEXT: j .LBB15_6 +; +; RV64IFD-LABEL: test_round_ui64: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: feq.d a0, fa0, fa0 +; RV64IFD-NEXT: bnez a0, .LBB15_2 +; RV64IFD-NEXT: # %bb.1: +; RV64IFD-NEXT: li a0, 0 +; RV64IFD-NEXT: ret +; RV64IFD-NEXT: .LBB15_2: +; RV64IFD-NEXT: fcvt.lu.d a0, fa0, rmm +; RV64IFD-NEXT: ret + %a = call double @llvm.round.f64(double %x) + %b = call i64 @llvm.fptoui.sat.i64.f64(double %a) + ret i64 %b +} + +define signext i32 @test_roundeven_si32(double %x) { +; RV32IFD-LABEL: test_roundeven_si32: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: feq.d a0, fa0, fa0 +; RV32IFD-NEXT: bnez a0, .LBB16_2 +; RV32IFD-NEXT: # %bb.1: +; RV32IFD-NEXT: li a0, 0 +; RV32IFD-NEXT: ret +; RV32IFD-NEXT: .LBB16_2: +; RV32IFD-NEXT: fcvt.w.d a0, fa0, rne +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: test_roundeven_si32: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: feq.d a0, fa0, fa0 +; RV64IFD-NEXT: bnez a0, .LBB16_2 +; RV64IFD-NEXT: # %bb.1: +; RV64IFD-NEXT: li a0, 0 +; RV64IFD-NEXT: ret +; RV64IFD-NEXT: .LBB16_2: +; RV64IFD-NEXT: fcvt.w.d a0, fa0, rne +; RV64IFD-NEXT: ret + %a = call double @llvm.roundeven.f64(double %x) + %b = call i32 @llvm.fptosi.sat.i32.f64(double %a) + ret i32 %b +} + +define i64 @test_roundeven_si64(double %x) nounwind { +; RV32IFD-LABEL: test_roundeven_si64: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill +; RV32IFD-NEXT: call roundeven@plt +; RV32IFD-NEXT: lui a0, %hi(.LCPI17_0) +; RV32IFD-NEXT: fld ft0, %lo(.LCPI17_0)(a0) +; RV32IFD-NEXT: fmv.d fs0, fa0 +; RV32IFD-NEXT: fle.d s0, ft0, fa0 +; RV32IFD-NEXT: call __fixdfdi@plt +; RV32IFD-NEXT: mv a2, a0 +; RV32IFD-NEXT: bnez s0, .LBB17_2 +; RV32IFD-NEXT: # %bb.1: +; RV32IFD-NEXT: li a2, 0 +; RV32IFD-NEXT: .LBB17_2: +; RV32IFD-NEXT: lui a0, %hi(.LCPI17_1) +; RV32IFD-NEXT: fld ft0, %lo(.LCPI17_1)(a0) +; RV32IFD-NEXT: flt.d a3, ft0, fs0 +; RV32IFD-NEXT: li a0, -1 +; RV32IFD-NEXT: beqz a3, .LBB17_9 +; RV32IFD-NEXT: # %bb.3: +; RV32IFD-NEXT: feq.d a2, fs0, fs0 +; RV32IFD-NEXT: beqz a2, .LBB17_10 +; RV32IFD-NEXT: .LBB17_4: +; RV32IFD-NEXT: lui a4, 524288 +; RV32IFD-NEXT: beqz s0, .LBB17_11 +; RV32IFD-NEXT: .LBB17_5: +; RV32IFD-NEXT: bnez a3, .LBB17_12 +; RV32IFD-NEXT: .LBB17_6: +; RV32IFD-NEXT: bnez a2, .LBB17_8 +; RV32IFD-NEXT: .LBB17_7: +; RV32IFD-NEXT: li a1, 0 +; RV32IFD-NEXT: .LBB17_8: +; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; RV32IFD-NEXT: .LBB17_9: +; RV32IFD-NEXT: mv a0, a2 +; RV32IFD-NEXT: feq.d a2, fs0, fs0 +; RV32IFD-NEXT: bnez a2, .LBB17_4 +; RV32IFD-NEXT: .LBB17_10: +; RV32IFD-NEXT: li a0, 0 +; RV32IFD-NEXT: lui a4, 524288 +; RV32IFD-NEXT: bnez s0, .LBB17_5 +; RV32IFD-NEXT: .LBB17_11: +; RV32IFD-NEXT: lui a1, 524288 +; RV32IFD-NEXT: beqz a3, .LBB17_6 +; RV32IFD-NEXT: .LBB17_12: +; RV32IFD-NEXT: addi a1, a4, -1 +; RV32IFD-NEXT: beqz a2, .LBB17_7 +; RV32IFD-NEXT: j .LBB17_8 +; +; RV64IFD-LABEL: test_roundeven_si64: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: feq.d a0, fa0, fa0 +; RV64IFD-NEXT: bnez a0, .LBB17_2 +; RV64IFD-NEXT: # %bb.1: +; RV64IFD-NEXT: li a0, 0 +; RV64IFD-NEXT: ret +; RV64IFD-NEXT: .LBB17_2: +; RV64IFD-NEXT: fcvt.l.d a0, fa0, rne +; RV64IFD-NEXT: ret + %a = call double @llvm.roundeven.f64(double %x) + %b = call i64 @llvm.fptosi.sat.i64.f64(double %a) + ret i64 %b +} + +define signext i32 @test_roundeven_ui32(double %x) { +; RV32IFD-LABEL: test_roundeven_ui32: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: feq.d a0, fa0, fa0 +; RV32IFD-NEXT: bnez a0, .LBB18_2 +; RV32IFD-NEXT: # %bb.1: +; RV32IFD-NEXT: li a0, 0 +; RV32IFD-NEXT: ret +; RV32IFD-NEXT: .LBB18_2: +; RV32IFD-NEXT: fcvt.wu.d a0, fa0, rne +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: test_roundeven_ui32: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: feq.d a0, fa0, fa0 +; RV64IFD-NEXT: bnez a0, .LBB18_2 +; RV64IFD-NEXT: # %bb.1: +; RV64IFD-NEXT: li a0, 0 +; RV64IFD-NEXT: ret +; RV64IFD-NEXT: .LBB18_2: +; RV64IFD-NEXT: fcvt.wu.d a0, fa0, rne +; RV64IFD-NEXT: ret + %a = call double @llvm.roundeven.f64(double %x) + %b = call i32 @llvm.fptoui.sat.i32.f64(double %a) + ret i32 %b +} + +define i64 @test_roundeven_ui64(double %x) nounwind { +; RV32IFD-LABEL: test_roundeven_ui64: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill +; RV32IFD-NEXT: call roundeven@plt +; RV32IFD-NEXT: fmv.d fs0, fa0 +; RV32IFD-NEXT: fcvt.d.w ft0, zero +; RV32IFD-NEXT: fle.d s0, ft0, fa0 +; RV32IFD-NEXT: call __fixunsdfdi@plt +; RV32IFD-NEXT: mv a3, a0 +; RV32IFD-NEXT: bnez s0, .LBB19_2 +; RV32IFD-NEXT: # %bb.1: +; RV32IFD-NEXT: li a3, 0 +; RV32IFD-NEXT: .LBB19_2: +; RV32IFD-NEXT: lui a0, %hi(.LCPI19_0) +; RV32IFD-NEXT: fld ft0, %lo(.LCPI19_0)(a0) +; RV32IFD-NEXT: flt.d a4, ft0, fs0 +; RV32IFD-NEXT: li a2, -1 +; RV32IFD-NEXT: li a0, -1 +; RV32IFD-NEXT: beqz a4, .LBB19_7 +; RV32IFD-NEXT: # %bb.3: +; RV32IFD-NEXT: beqz s0, .LBB19_8 +; RV32IFD-NEXT: .LBB19_4: +; RV32IFD-NEXT: bnez a4, .LBB19_6 +; RV32IFD-NEXT: .LBB19_5: +; RV32IFD-NEXT: mv a2, a1 +; RV32IFD-NEXT: .LBB19_6: +; RV32IFD-NEXT: mv a1, a2 +; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; RV32IFD-NEXT: .LBB19_7: +; RV32IFD-NEXT: mv a0, a3 +; RV32IFD-NEXT: bnez s0, .LBB19_4 +; RV32IFD-NEXT: .LBB19_8: +; RV32IFD-NEXT: li a1, 0 +; RV32IFD-NEXT: beqz a4, .LBB19_5 +; RV32IFD-NEXT: j .LBB19_6 +; +; RV64IFD-LABEL: test_roundeven_ui64: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: feq.d a0, fa0, fa0 +; RV64IFD-NEXT: bnez a0, .LBB19_2 +; RV64IFD-NEXT: # %bb.1: +; RV64IFD-NEXT: li a0, 0 +; RV64IFD-NEXT: ret +; RV64IFD-NEXT: .LBB19_2: +; RV64IFD-NEXT: fcvt.lu.d a0, fa0, rne +; RV64IFD-NEXT: ret + %a = call double @llvm.roundeven.f64(double %x) + %b = call i64 @llvm.fptoui.sat.i64.f64(double %a) + ret i64 %b +} + +declare double @llvm.floor.f64(double) +declare double @llvm.ceil.f64(double) +declare double @llvm.trunc.f64(double) +declare double @llvm.round.f64(double) +declare double @llvm.roundeven.f64(double) +declare i32 @llvm.fptosi.sat.i32.f64(double) +declare i64 @llvm.fptosi.sat.i64.f64(double) +declare i32 @llvm.fptoui.sat.i32.f64(double) +declare i64 @llvm.fptoui.sat.i64.f64(double) diff --git a/llvm/test/CodeGen/RISCV/float-round-conv-sat.ll b/llvm/test/CodeGen/RISCV/float-round-conv-sat.ll new file mode 100644 index 0000000000000..9893b697af294 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/float-round-conv-sat.ll @@ -0,0 +1,940 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+f -verify-machineinstrs < %s \ +; RUN: -target-abi=ilp32f | FileCheck -check-prefix=RV32IF %s +; RUN: llc -mtriple=riscv64 -mattr=+f -verify-machineinstrs < %s \ +; RUN: -target-abi=lp64f | FileCheck -check-prefix=RV64IF %s + +define signext i32 @test_floor_si32(float %x) { +; RV32IF-LABEL: test_floor_si32: +; RV32IF: # %bb.0: +; RV32IF-NEXT: feq.s a0, fa0, fa0 +; RV32IF-NEXT: bnez a0, .LBB0_2 +; RV32IF-NEXT: # %bb.1: +; RV32IF-NEXT: li a0, 0 +; RV32IF-NEXT: ret +; RV32IF-NEXT: .LBB0_2: +; RV32IF-NEXT: fcvt.w.s a0, fa0, rdn +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: test_floor_si32: +; RV64IF: # %bb.0: +; RV64IF-NEXT: feq.s a0, fa0, fa0 +; RV64IF-NEXT: bnez a0, .LBB0_2 +; RV64IF-NEXT: # %bb.1: +; RV64IF-NEXT: li a0, 0 +; RV64IF-NEXT: ret +; RV64IF-NEXT: .LBB0_2: +; RV64IF-NEXT: fcvt.w.s a0, fa0, rdn +; RV64IF-NEXT: ret + %a = call float @llvm.floor.f32(float %x) + %b = call i32 @llvm.fptosi.sat.i32.f32(float %a) + ret i32 %b +} + +define i64 @test_floor_si64(float %x) nounwind { +; RV32IF-LABEL: test_floor_si64: +; RV32IF: # %bb.0: +; RV32IF-NEXT: addi sp, sp, -16 +; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IF-NEXT: call floorf@plt +; RV32IF-NEXT: lui a0, %hi(.LCPI1_0) +; RV32IF-NEXT: flw ft0, %lo(.LCPI1_0)(a0) +; RV32IF-NEXT: fmv.s fs0, fa0 +; RV32IF-NEXT: fle.s s0, ft0, fa0 +; RV32IF-NEXT: call __fixsfdi@plt +; RV32IF-NEXT: mv a2, a0 +; RV32IF-NEXT: bnez s0, .LBB1_2 +; RV32IF-NEXT: # %bb.1: +; RV32IF-NEXT: li a2, 0 +; RV32IF-NEXT: .LBB1_2: +; RV32IF-NEXT: lui a0, %hi(.LCPI1_1) +; RV32IF-NEXT: flw ft0, %lo(.LCPI1_1)(a0) +; RV32IF-NEXT: flt.s a3, ft0, fs0 +; RV32IF-NEXT: li a0, -1 +; RV32IF-NEXT: beqz a3, .LBB1_9 +; RV32IF-NEXT: # %bb.3: +; RV32IF-NEXT: feq.s a2, fs0, fs0 +; RV32IF-NEXT: beqz a2, .LBB1_10 +; RV32IF-NEXT: .LBB1_4: +; RV32IF-NEXT: lui a4, 524288 +; RV32IF-NEXT: beqz s0, .LBB1_11 +; RV32IF-NEXT: .LBB1_5: +; RV32IF-NEXT: bnez a3, .LBB1_12 +; RV32IF-NEXT: .LBB1_6: +; RV32IF-NEXT: bnez a2, .LBB1_8 +; RV32IF-NEXT: .LBB1_7: +; RV32IF-NEXT: li a1, 0 +; RV32IF-NEXT: .LBB1_8: +; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IF-NEXT: addi sp, sp, 16 +; RV32IF-NEXT: ret +; RV32IF-NEXT: .LBB1_9: +; RV32IF-NEXT: mv a0, a2 +; RV32IF-NEXT: feq.s a2, fs0, fs0 +; RV32IF-NEXT: bnez a2, .LBB1_4 +; RV32IF-NEXT: .LBB1_10: +; RV32IF-NEXT: li a0, 0 +; RV32IF-NEXT: lui a4, 524288 +; RV32IF-NEXT: bnez s0, .LBB1_5 +; RV32IF-NEXT: .LBB1_11: +; RV32IF-NEXT: lui a1, 524288 +; RV32IF-NEXT: beqz a3, .LBB1_6 +; RV32IF-NEXT: .LBB1_12: +; RV32IF-NEXT: addi a1, a4, -1 +; RV32IF-NEXT: beqz a2, .LBB1_7 +; RV32IF-NEXT: j .LBB1_8 +; +; RV64IF-LABEL: test_floor_si64: +; RV64IF: # %bb.0: +; RV64IF-NEXT: feq.s a0, fa0, fa0 +; RV64IF-NEXT: bnez a0, .LBB1_2 +; RV64IF-NEXT: # %bb.1: +; RV64IF-NEXT: li a0, 0 +; RV64IF-NEXT: ret +; RV64IF-NEXT: .LBB1_2: +; RV64IF-NEXT: fcvt.l.s a0, fa0, rdn +; RV64IF-NEXT: ret + %a = call float @llvm.floor.f32(float %x) + %b = call i64 @llvm.fptosi.sat.i64.f32(float %a) + ret i64 %b +} + +define signext i32 @test_floor_ui32(float %x) { +; RV32IF-LABEL: test_floor_ui32: +; RV32IF: # %bb.0: +; RV32IF-NEXT: feq.s a0, fa0, fa0 +; RV32IF-NEXT: bnez a0, .LBB2_2 +; RV32IF-NEXT: # %bb.1: +; RV32IF-NEXT: li a0, 0 +; RV32IF-NEXT: ret +; RV32IF-NEXT: .LBB2_2: +; RV32IF-NEXT: fcvt.wu.s a0, fa0, rdn +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: test_floor_ui32: +; RV64IF: # %bb.0: +; RV64IF-NEXT: feq.s a0, fa0, fa0 +; RV64IF-NEXT: bnez a0, .LBB2_2 +; RV64IF-NEXT: # %bb.1: +; RV64IF-NEXT: li a0, 0 +; RV64IF-NEXT: ret +; RV64IF-NEXT: .LBB2_2: +; RV64IF-NEXT: fcvt.wu.s a0, fa0, rdn +; RV64IF-NEXT: ret + %a = call float @llvm.floor.f32(float %x) + %b = call i32 @llvm.fptoui.sat.i32.f32(float %a) + ret i32 %b +} + +define i64 @test_floor_ui64(float %x) nounwind { +; RV32IF-LABEL: test_floor_ui64: +; RV32IF: # %bb.0: +; RV32IF-NEXT: addi sp, sp, -16 +; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IF-NEXT: call floorf@plt +; RV32IF-NEXT: fmv.s fs0, fa0 +; RV32IF-NEXT: fmv.w.x ft0, zero +; RV32IF-NEXT: fle.s s0, ft0, fa0 +; RV32IF-NEXT: call __fixunssfdi@plt +; RV32IF-NEXT: mv a3, a0 +; RV32IF-NEXT: bnez s0, .LBB3_2 +; RV32IF-NEXT: # %bb.1: +; RV32IF-NEXT: li a3, 0 +; RV32IF-NEXT: .LBB3_2: +; RV32IF-NEXT: lui a0, %hi(.LCPI3_0) +; RV32IF-NEXT: flw ft0, %lo(.LCPI3_0)(a0) +; RV32IF-NEXT: flt.s a4, ft0, fs0 +; RV32IF-NEXT: li a2, -1 +; RV32IF-NEXT: li a0, -1 +; RV32IF-NEXT: beqz a4, .LBB3_7 +; RV32IF-NEXT: # %bb.3: +; RV32IF-NEXT: beqz s0, .LBB3_8 +; RV32IF-NEXT: .LBB3_4: +; RV32IF-NEXT: bnez a4, .LBB3_6 +; RV32IF-NEXT: .LBB3_5: +; RV32IF-NEXT: mv a2, a1 +; RV32IF-NEXT: .LBB3_6: +; RV32IF-NEXT: mv a1, a2 +; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IF-NEXT: addi sp, sp, 16 +; RV32IF-NEXT: ret +; RV32IF-NEXT: .LBB3_7: +; RV32IF-NEXT: mv a0, a3 +; RV32IF-NEXT: bnez s0, .LBB3_4 +; RV32IF-NEXT: .LBB3_8: +; RV32IF-NEXT: li a1, 0 +; RV32IF-NEXT: beqz a4, .LBB3_5 +; RV32IF-NEXT: j .LBB3_6 +; +; RV64IF-LABEL: test_floor_ui64: +; RV64IF: # %bb.0: +; RV64IF-NEXT: feq.s a0, fa0, fa0 +; RV64IF-NEXT: bnez a0, .LBB3_2 +; RV64IF-NEXT: # %bb.1: +; RV64IF-NEXT: li a0, 0 +; RV64IF-NEXT: ret +; RV64IF-NEXT: .LBB3_2: +; RV64IF-NEXT: fcvt.lu.s a0, fa0, rdn +; RV64IF-NEXT: ret + %a = call float @llvm.floor.f32(float %x) + %b = call i64 @llvm.fptoui.sat.i64.f32(float %a) + ret i64 %b +} + +define signext i32 @test_ceil_si32(float %x) { +; RV32IF-LABEL: test_ceil_si32: +; RV32IF: # %bb.0: +; RV32IF-NEXT: feq.s a0, fa0, fa0 +; RV32IF-NEXT: bnez a0, .LBB4_2 +; RV32IF-NEXT: # %bb.1: +; RV32IF-NEXT: li a0, 0 +; RV32IF-NEXT: ret +; RV32IF-NEXT: .LBB4_2: +; RV32IF-NEXT: fcvt.w.s a0, fa0, rup +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: test_ceil_si32: +; RV64IF: # %bb.0: +; RV64IF-NEXT: feq.s a0, fa0, fa0 +; RV64IF-NEXT: bnez a0, .LBB4_2 +; RV64IF-NEXT: # %bb.1: +; RV64IF-NEXT: li a0, 0 +; RV64IF-NEXT: ret +; RV64IF-NEXT: .LBB4_2: +; RV64IF-NEXT: fcvt.w.s a0, fa0, rup +; RV64IF-NEXT: ret + %a = call float @llvm.ceil.f32(float %x) + %b = call i32 @llvm.fptosi.sat.i32.f32(float %a) + ret i32 %b +} + +define i64 @test_ceil_si64(float %x) nounwind { +; RV32IF-LABEL: test_ceil_si64: +; RV32IF: # %bb.0: +; RV32IF-NEXT: addi sp, sp, -16 +; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IF-NEXT: call ceilf@plt +; RV32IF-NEXT: lui a0, %hi(.LCPI5_0) +; RV32IF-NEXT: flw ft0, %lo(.LCPI5_0)(a0) +; RV32IF-NEXT: fmv.s fs0, fa0 +; RV32IF-NEXT: fle.s s0, ft0, fa0 +; RV32IF-NEXT: call __fixsfdi@plt +; RV32IF-NEXT: mv a2, a0 +; RV32IF-NEXT: bnez s0, .LBB5_2 +; RV32IF-NEXT: # %bb.1: +; RV32IF-NEXT: li a2, 0 +; RV32IF-NEXT: .LBB5_2: +; RV32IF-NEXT: lui a0, %hi(.LCPI5_1) +; RV32IF-NEXT: flw ft0, %lo(.LCPI5_1)(a0) +; RV32IF-NEXT: flt.s a3, ft0, fs0 +; RV32IF-NEXT: li a0, -1 +; RV32IF-NEXT: beqz a3, .LBB5_9 +; RV32IF-NEXT: # %bb.3: +; RV32IF-NEXT: feq.s a2, fs0, fs0 +; RV32IF-NEXT: beqz a2, .LBB5_10 +; RV32IF-NEXT: .LBB5_4: +; RV32IF-NEXT: lui a4, 524288 +; RV32IF-NEXT: beqz s0, .LBB5_11 +; RV32IF-NEXT: .LBB5_5: +; RV32IF-NEXT: bnez a3, .LBB5_12 +; RV32IF-NEXT: .LBB5_6: +; RV32IF-NEXT: bnez a2, .LBB5_8 +; RV32IF-NEXT: .LBB5_7: +; RV32IF-NEXT: li a1, 0 +; RV32IF-NEXT: .LBB5_8: +; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IF-NEXT: addi sp, sp, 16 +; RV32IF-NEXT: ret +; RV32IF-NEXT: .LBB5_9: +; RV32IF-NEXT: mv a0, a2 +; RV32IF-NEXT: feq.s a2, fs0, fs0 +; RV32IF-NEXT: bnez a2, .LBB5_4 +; RV32IF-NEXT: .LBB5_10: +; RV32IF-NEXT: li a0, 0 +; RV32IF-NEXT: lui a4, 524288 +; RV32IF-NEXT: bnez s0, .LBB5_5 +; RV32IF-NEXT: .LBB5_11: +; RV32IF-NEXT: lui a1, 524288 +; RV32IF-NEXT: beqz a3, .LBB5_6 +; RV32IF-NEXT: .LBB5_12: +; RV32IF-NEXT: addi a1, a4, -1 +; RV32IF-NEXT: beqz a2, .LBB5_7 +; RV32IF-NEXT: j .LBB5_8 +; +; RV64IF-LABEL: test_ceil_si64: +; RV64IF: # %bb.0: +; RV64IF-NEXT: feq.s a0, fa0, fa0 +; RV64IF-NEXT: bnez a0, .LBB5_2 +; RV64IF-NEXT: # %bb.1: +; RV64IF-NEXT: li a0, 0 +; RV64IF-NEXT: ret +; RV64IF-NEXT: .LBB5_2: +; RV64IF-NEXT: fcvt.l.s a0, fa0, rup +; RV64IF-NEXT: ret + %a = call float @llvm.ceil.f32(float %x) + %b = call i64 @llvm.fptosi.sat.i64.f32(float %a) + ret i64 %b +} + +define signext i32 @test_ceil_ui32(float %x) { +; RV32IF-LABEL: test_ceil_ui32: +; RV32IF: # %bb.0: +; RV32IF-NEXT: feq.s a0, fa0, fa0 +; RV32IF-NEXT: bnez a0, .LBB6_2 +; RV32IF-NEXT: # %bb.1: +; RV32IF-NEXT: li a0, 0 +; RV32IF-NEXT: ret +; RV32IF-NEXT: .LBB6_2: +; RV32IF-NEXT: fcvt.wu.s a0, fa0, rup +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: test_ceil_ui32: +; RV64IF: # %bb.0: +; RV64IF-NEXT: feq.s a0, fa0, fa0 +; RV64IF-NEXT: bnez a0, .LBB6_2 +; RV64IF-NEXT: # %bb.1: +; RV64IF-NEXT: li a0, 0 +; RV64IF-NEXT: ret +; RV64IF-NEXT: .LBB6_2: +; RV64IF-NEXT: fcvt.wu.s a0, fa0, rup +; RV64IF-NEXT: ret + %a = call float @llvm.ceil.f32(float %x) + %b = call i32 @llvm.fptoui.sat.i32.f32(float %a) + ret i32 %b +} + +define i64 @test_ceil_ui64(float %x) nounwind { +; RV32IF-LABEL: test_ceil_ui64: +; RV32IF: # %bb.0: +; RV32IF-NEXT: addi sp, sp, -16 +; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IF-NEXT: call ceilf@plt +; RV32IF-NEXT: fmv.s fs0, fa0 +; RV32IF-NEXT: fmv.w.x ft0, zero +; RV32IF-NEXT: fle.s s0, ft0, fa0 +; RV32IF-NEXT: call __fixunssfdi@plt +; RV32IF-NEXT: mv a3, a0 +; RV32IF-NEXT: bnez s0, .LBB7_2 +; RV32IF-NEXT: # %bb.1: +; RV32IF-NEXT: li a3, 0 +; RV32IF-NEXT: .LBB7_2: +; RV32IF-NEXT: lui a0, %hi(.LCPI7_0) +; RV32IF-NEXT: flw ft0, %lo(.LCPI7_0)(a0) +; RV32IF-NEXT: flt.s a4, ft0, fs0 +; RV32IF-NEXT: li a2, -1 +; RV32IF-NEXT: li a0, -1 +; RV32IF-NEXT: beqz a4, .LBB7_7 +; RV32IF-NEXT: # %bb.3: +; RV32IF-NEXT: beqz s0, .LBB7_8 +; RV32IF-NEXT: .LBB7_4: +; RV32IF-NEXT: bnez a4, .LBB7_6 +; RV32IF-NEXT: .LBB7_5: +; RV32IF-NEXT: mv a2, a1 +; RV32IF-NEXT: .LBB7_6: +; RV32IF-NEXT: mv a1, a2 +; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IF-NEXT: addi sp, sp, 16 +; RV32IF-NEXT: ret +; RV32IF-NEXT: .LBB7_7: +; RV32IF-NEXT: mv a0, a3 +; RV32IF-NEXT: bnez s0, .LBB7_4 +; RV32IF-NEXT: .LBB7_8: +; RV32IF-NEXT: li a1, 0 +; RV32IF-NEXT: beqz a4, .LBB7_5 +; RV32IF-NEXT: j .LBB7_6 +; +; RV64IF-LABEL: test_ceil_ui64: +; RV64IF: # %bb.0: +; RV64IF-NEXT: feq.s a0, fa0, fa0 +; RV64IF-NEXT: bnez a0, .LBB7_2 +; RV64IF-NEXT: # %bb.1: +; RV64IF-NEXT: li a0, 0 +; RV64IF-NEXT: ret +; RV64IF-NEXT: .LBB7_2: +; RV64IF-NEXT: fcvt.lu.s a0, fa0, rup +; RV64IF-NEXT: ret + %a = call float @llvm.ceil.f32(float %x) + %b = call i64 @llvm.fptoui.sat.i64.f32(float %a) + ret i64 %b +} + +define signext i32 @test_trunc_si32(float %x) { +; RV32IF-LABEL: test_trunc_si32: +; RV32IF: # %bb.0: +; RV32IF-NEXT: feq.s a0, fa0, fa0 +; RV32IF-NEXT: bnez a0, .LBB8_2 +; RV32IF-NEXT: # %bb.1: +; RV32IF-NEXT: li a0, 0 +; RV32IF-NEXT: ret +; RV32IF-NEXT: .LBB8_2: +; RV32IF-NEXT: fcvt.w.s a0, fa0, rtz +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: test_trunc_si32: +; RV64IF: # %bb.0: +; RV64IF-NEXT: feq.s a0, fa0, fa0 +; RV64IF-NEXT: bnez a0, .LBB8_2 +; RV64IF-NEXT: # %bb.1: +; RV64IF-NEXT: li a0, 0 +; RV64IF-NEXT: ret +; RV64IF-NEXT: .LBB8_2: +; RV64IF-NEXT: fcvt.w.s a0, fa0, rtz +; RV64IF-NEXT: ret + %a = call float @llvm.trunc.f32(float %x) + %b = call i32 @llvm.fptosi.sat.i32.f32(float %a) + ret i32 %b +} + +define i64 @test_trunc_si64(float %x) nounwind { +; RV32IF-LABEL: test_trunc_si64: +; RV32IF: # %bb.0: +; RV32IF-NEXT: addi sp, sp, -16 +; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IF-NEXT: call truncf@plt +; RV32IF-NEXT: lui a0, %hi(.LCPI9_0) +; RV32IF-NEXT: flw ft0, %lo(.LCPI9_0)(a0) +; RV32IF-NEXT: fmv.s fs0, fa0 +; RV32IF-NEXT: fle.s s0, ft0, fa0 +; RV32IF-NEXT: call __fixsfdi@plt +; RV32IF-NEXT: mv a2, a0 +; RV32IF-NEXT: bnez s0, .LBB9_2 +; RV32IF-NEXT: # %bb.1: +; RV32IF-NEXT: li a2, 0 +; RV32IF-NEXT: .LBB9_2: +; RV32IF-NEXT: lui a0, %hi(.LCPI9_1) +; RV32IF-NEXT: flw ft0, %lo(.LCPI9_1)(a0) +; RV32IF-NEXT: flt.s a3, ft0, fs0 +; RV32IF-NEXT: li a0, -1 +; RV32IF-NEXT: beqz a3, .LBB9_9 +; RV32IF-NEXT: # %bb.3: +; RV32IF-NEXT: feq.s a2, fs0, fs0 +; RV32IF-NEXT: beqz a2, .LBB9_10 +; RV32IF-NEXT: .LBB9_4: +; RV32IF-NEXT: lui a4, 524288 +; RV32IF-NEXT: beqz s0, .LBB9_11 +; RV32IF-NEXT: .LBB9_5: +; RV32IF-NEXT: bnez a3, .LBB9_12 +; RV32IF-NEXT: .LBB9_6: +; RV32IF-NEXT: bnez a2, .LBB9_8 +; RV32IF-NEXT: .LBB9_7: +; RV32IF-NEXT: li a1, 0 +; RV32IF-NEXT: .LBB9_8: +; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IF-NEXT: addi sp, sp, 16 +; RV32IF-NEXT: ret +; RV32IF-NEXT: .LBB9_9: +; RV32IF-NEXT: mv a0, a2 +; RV32IF-NEXT: feq.s a2, fs0, fs0 +; RV32IF-NEXT: bnez a2, .LBB9_4 +; RV32IF-NEXT: .LBB9_10: +; RV32IF-NEXT: li a0, 0 +; RV32IF-NEXT: lui a4, 524288 +; RV32IF-NEXT: bnez s0, .LBB9_5 +; RV32IF-NEXT: .LBB9_11: +; RV32IF-NEXT: lui a1, 524288 +; RV32IF-NEXT: beqz a3, .LBB9_6 +; RV32IF-NEXT: .LBB9_12: +; RV32IF-NEXT: addi a1, a4, -1 +; RV32IF-NEXT: beqz a2, .LBB9_7 +; RV32IF-NEXT: j .LBB9_8 +; +; RV64IF-LABEL: test_trunc_si64: +; RV64IF: # %bb.0: +; RV64IF-NEXT: feq.s a0, fa0, fa0 +; RV64IF-NEXT: bnez a0, .LBB9_2 +; RV64IF-NEXT: # %bb.1: +; RV64IF-NEXT: li a0, 0 +; RV64IF-NEXT: ret +; RV64IF-NEXT: .LBB9_2: +; RV64IF-NEXT: fcvt.l.s a0, fa0, rtz +; RV64IF-NEXT: ret + %a = call float @llvm.trunc.f32(float %x) + %b = call i64 @llvm.fptosi.sat.i64.f32(float %a) + ret i64 %b +} + +define signext i32 @test_trunc_ui32(float %x) { +; RV32IF-LABEL: test_trunc_ui32: +; RV32IF: # %bb.0: +; RV32IF-NEXT: feq.s a0, fa0, fa0 +; RV32IF-NEXT: bnez a0, .LBB10_2 +; RV32IF-NEXT: # %bb.1: +; RV32IF-NEXT: li a0, 0 +; RV32IF-NEXT: ret +; RV32IF-NEXT: .LBB10_2: +; RV32IF-NEXT: fcvt.wu.s a0, fa0, rtz +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: test_trunc_ui32: +; RV64IF: # %bb.0: +; RV64IF-NEXT: feq.s a0, fa0, fa0 +; RV64IF-NEXT: bnez a0, .LBB10_2 +; RV64IF-NEXT: # %bb.1: +; RV64IF-NEXT: li a0, 0 +; RV64IF-NEXT: ret +; RV64IF-NEXT: .LBB10_2: +; RV64IF-NEXT: fcvt.wu.s a0, fa0, rtz +; RV64IF-NEXT: ret + %a = call float @llvm.trunc.f32(float %x) + %b = call i32 @llvm.fptoui.sat.i32.f32(float %a) + ret i32 %b +} + +define i64 @test_trunc_ui64(float %x) nounwind { +; RV32IF-LABEL: test_trunc_ui64: +; RV32IF: # %bb.0: +; RV32IF-NEXT: addi sp, sp, -16 +; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IF-NEXT: call truncf@plt +; RV32IF-NEXT: fmv.s fs0, fa0 +; RV32IF-NEXT: fmv.w.x ft0, zero +; RV32IF-NEXT: fle.s s0, ft0, fa0 +; RV32IF-NEXT: call __fixunssfdi@plt +; RV32IF-NEXT: mv a3, a0 +; RV32IF-NEXT: bnez s0, .LBB11_2 +; RV32IF-NEXT: # %bb.1: +; RV32IF-NEXT: li a3, 0 +; RV32IF-NEXT: .LBB11_2: +; RV32IF-NEXT: lui a0, %hi(.LCPI11_0) +; RV32IF-NEXT: flw ft0, %lo(.LCPI11_0)(a0) +; RV32IF-NEXT: flt.s a4, ft0, fs0 +; RV32IF-NEXT: li a2, -1 +; RV32IF-NEXT: li a0, -1 +; RV32IF-NEXT: beqz a4, .LBB11_7 +; RV32IF-NEXT: # %bb.3: +; RV32IF-NEXT: beqz s0, .LBB11_8 +; RV32IF-NEXT: .LBB11_4: +; RV32IF-NEXT: bnez a4, .LBB11_6 +; RV32IF-NEXT: .LBB11_5: +; RV32IF-NEXT: mv a2, a1 +; RV32IF-NEXT: .LBB11_6: +; RV32IF-NEXT: mv a1, a2 +; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IF-NEXT: addi sp, sp, 16 +; RV32IF-NEXT: ret +; RV32IF-NEXT: .LBB11_7: +; RV32IF-NEXT: mv a0, a3 +; RV32IF-NEXT: bnez s0, .LBB11_4 +; RV32IF-NEXT: .LBB11_8: +; RV32IF-NEXT: li a1, 0 +; RV32IF-NEXT: beqz a4, .LBB11_5 +; RV32IF-NEXT: j .LBB11_6 +; +; RV64IF-LABEL: test_trunc_ui64: +; RV64IF: # %bb.0: +; RV64IF-NEXT: feq.s a0, fa0, fa0 +; RV64IF-NEXT: bnez a0, .LBB11_2 +; RV64IF-NEXT: # %bb.1: +; RV64IF-NEXT: li a0, 0 +; RV64IF-NEXT: ret +; RV64IF-NEXT: .LBB11_2: +; RV64IF-NEXT: fcvt.lu.s a0, fa0, rtz +; RV64IF-NEXT: ret + %a = call float @llvm.trunc.f32(float %x) + %b = call i64 @llvm.fptoui.sat.i64.f32(float %a) + ret i64 %b +} + +define signext i32 @test_round_si32(float %x) { +; RV32IF-LABEL: test_round_si32: +; RV32IF: # %bb.0: +; RV32IF-NEXT: feq.s a0, fa0, fa0 +; RV32IF-NEXT: bnez a0, .LBB12_2 +; RV32IF-NEXT: # %bb.1: +; RV32IF-NEXT: li a0, 0 +; RV32IF-NEXT: ret +; RV32IF-NEXT: .LBB12_2: +; RV32IF-NEXT: fcvt.w.s a0, fa0, rmm +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: test_round_si32: +; RV64IF: # %bb.0: +; RV64IF-NEXT: feq.s a0, fa0, fa0 +; RV64IF-NEXT: bnez a0, .LBB12_2 +; RV64IF-NEXT: # %bb.1: +; RV64IF-NEXT: li a0, 0 +; RV64IF-NEXT: ret +; RV64IF-NEXT: .LBB12_2: +; RV64IF-NEXT: fcvt.w.s a0, fa0, rmm +; RV64IF-NEXT: ret + %a = call float @llvm.round.f32(float %x) + %b = call i32 @llvm.fptosi.sat.i32.f32(float %a) + ret i32 %b +} + +define i64 @test_round_si64(float %x) nounwind { +; RV32IF-LABEL: test_round_si64: +; RV32IF: # %bb.0: +; RV32IF-NEXT: addi sp, sp, -16 +; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IF-NEXT: call roundf@plt +; RV32IF-NEXT: lui a0, %hi(.LCPI13_0) +; RV32IF-NEXT: flw ft0, %lo(.LCPI13_0)(a0) +; RV32IF-NEXT: fmv.s fs0, fa0 +; RV32IF-NEXT: fle.s s0, ft0, fa0 +; RV32IF-NEXT: call __fixsfdi@plt +; RV32IF-NEXT: mv a2, a0 +; RV32IF-NEXT: bnez s0, .LBB13_2 +; RV32IF-NEXT: # %bb.1: +; RV32IF-NEXT: li a2, 0 +; RV32IF-NEXT: .LBB13_2: +; RV32IF-NEXT: lui a0, %hi(.LCPI13_1) +; RV32IF-NEXT: flw ft0, %lo(.LCPI13_1)(a0) +; RV32IF-NEXT: flt.s a3, ft0, fs0 +; RV32IF-NEXT: li a0, -1 +; RV32IF-NEXT: beqz a3, .LBB13_9 +; RV32IF-NEXT: # %bb.3: +; RV32IF-NEXT: feq.s a2, fs0, fs0 +; RV32IF-NEXT: beqz a2, .LBB13_10 +; RV32IF-NEXT: .LBB13_4: +; RV32IF-NEXT: lui a4, 524288 +; RV32IF-NEXT: beqz s0, .LBB13_11 +; RV32IF-NEXT: .LBB13_5: +; RV32IF-NEXT: bnez a3, .LBB13_12 +; RV32IF-NEXT: .LBB13_6: +; RV32IF-NEXT: bnez a2, .LBB13_8 +; RV32IF-NEXT: .LBB13_7: +; RV32IF-NEXT: li a1, 0 +; RV32IF-NEXT: .LBB13_8: +; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IF-NEXT: addi sp, sp, 16 +; RV32IF-NEXT: ret +; RV32IF-NEXT: .LBB13_9: +; RV32IF-NEXT: mv a0, a2 +; RV32IF-NEXT: feq.s a2, fs0, fs0 +; RV32IF-NEXT: bnez a2, .LBB13_4 +; RV32IF-NEXT: .LBB13_10: +; RV32IF-NEXT: li a0, 0 +; RV32IF-NEXT: lui a4, 524288 +; RV32IF-NEXT: bnez s0, .LBB13_5 +; RV32IF-NEXT: .LBB13_11: +; RV32IF-NEXT: lui a1, 524288 +; RV32IF-NEXT: beqz a3, .LBB13_6 +; RV32IF-NEXT: .LBB13_12: +; RV32IF-NEXT: addi a1, a4, -1 +; RV32IF-NEXT: beqz a2, .LBB13_7 +; RV32IF-NEXT: j .LBB13_8 +; +; RV64IF-LABEL: test_round_si64: +; RV64IF: # %bb.0: +; RV64IF-NEXT: feq.s a0, fa0, fa0 +; RV64IF-NEXT: bnez a0, .LBB13_2 +; RV64IF-NEXT: # %bb.1: +; RV64IF-NEXT: li a0, 0 +; RV64IF-NEXT: ret +; RV64IF-NEXT: .LBB13_2: +; RV64IF-NEXT: fcvt.l.s a0, fa0, rmm +; RV64IF-NEXT: ret + %a = call float @llvm.round.f32(float %x) + %b = call i64 @llvm.fptosi.sat.i64.f32(float %a) + ret i64 %b +} + +define signext i32 @test_round_ui32(float %x) { +; RV32IF-LABEL: test_round_ui32: +; RV32IF: # %bb.0: +; RV32IF-NEXT: feq.s a0, fa0, fa0 +; RV32IF-NEXT: bnez a0, .LBB14_2 +; RV32IF-NEXT: # %bb.1: +; RV32IF-NEXT: li a0, 0 +; RV32IF-NEXT: ret +; RV32IF-NEXT: .LBB14_2: +; RV32IF-NEXT: fcvt.wu.s a0, fa0, rmm +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: test_round_ui32: +; RV64IF: # %bb.0: +; RV64IF-NEXT: feq.s a0, fa0, fa0 +; RV64IF-NEXT: bnez a0, .LBB14_2 +; RV64IF-NEXT: # %bb.1: +; RV64IF-NEXT: li a0, 0 +; RV64IF-NEXT: ret +; RV64IF-NEXT: .LBB14_2: +; RV64IF-NEXT: fcvt.wu.s a0, fa0, rmm +; RV64IF-NEXT: ret + %a = call float @llvm.round.f32(float %x) + %b = call i32 @llvm.fptoui.sat.i32.f32(float %a) + ret i32 %b +} + +define i64 @test_round_ui64(float %x) nounwind { +; RV32IF-LABEL: test_round_ui64: +; RV32IF: # %bb.0: +; RV32IF-NEXT: addi sp, sp, -16 +; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IF-NEXT: call roundf@plt +; RV32IF-NEXT: fmv.s fs0, fa0 +; RV32IF-NEXT: fmv.w.x ft0, zero +; RV32IF-NEXT: fle.s s0, ft0, fa0 +; RV32IF-NEXT: call __fixunssfdi@plt +; RV32IF-NEXT: mv a3, a0 +; RV32IF-NEXT: bnez s0, .LBB15_2 +; RV32IF-NEXT: # %bb.1: +; RV32IF-NEXT: li a3, 0 +; RV32IF-NEXT: .LBB15_2: +; RV32IF-NEXT: lui a0, %hi(.LCPI15_0) +; RV32IF-NEXT: flw ft0, %lo(.LCPI15_0)(a0) +; RV32IF-NEXT: flt.s a4, ft0, fs0 +; RV32IF-NEXT: li a2, -1 +; RV32IF-NEXT: li a0, -1 +; RV32IF-NEXT: beqz a4, .LBB15_7 +; RV32IF-NEXT: # %bb.3: +; RV32IF-NEXT: beqz s0, .LBB15_8 +; RV32IF-NEXT: .LBB15_4: +; RV32IF-NEXT: bnez a4, .LBB15_6 +; RV32IF-NEXT: .LBB15_5: +; RV32IF-NEXT: mv a2, a1 +; RV32IF-NEXT: .LBB15_6: +; RV32IF-NEXT: mv a1, a2 +; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IF-NEXT: addi sp, sp, 16 +; RV32IF-NEXT: ret +; RV32IF-NEXT: .LBB15_7: +; RV32IF-NEXT: mv a0, a3 +; RV32IF-NEXT: bnez s0, .LBB15_4 +; RV32IF-NEXT: .LBB15_8: +; RV32IF-NEXT: li a1, 0 +; RV32IF-NEXT: beqz a4, .LBB15_5 +; RV32IF-NEXT: j .LBB15_6 +; +; RV64IF-LABEL: test_round_ui64: +; RV64IF: # %bb.0: +; RV64IF-NEXT: feq.s a0, fa0, fa0 +; RV64IF-NEXT: bnez a0, .LBB15_2 +; RV64IF-NEXT: # %bb.1: +; RV64IF-NEXT: li a0, 0 +; RV64IF-NEXT: ret +; RV64IF-NEXT: .LBB15_2: +; RV64IF-NEXT: fcvt.lu.s a0, fa0, rmm +; RV64IF-NEXT: ret + %a = call float @llvm.round.f32(float %x) + %b = call i64 @llvm.fptoui.sat.i64.f32(float %a) + ret i64 %b +} + +define signext i32 @test_roundeven_si32(float %x) { +; RV32IF-LABEL: test_roundeven_si32: +; RV32IF: # %bb.0: +; RV32IF-NEXT: feq.s a0, fa0, fa0 +; RV32IF-NEXT: bnez a0, .LBB16_2 +; RV32IF-NEXT: # %bb.1: +; RV32IF-NEXT: li a0, 0 +; RV32IF-NEXT: ret +; RV32IF-NEXT: .LBB16_2: +; RV32IF-NEXT: fcvt.w.s a0, fa0, rne +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: test_roundeven_si32: +; RV64IF: # %bb.0: +; RV64IF-NEXT: feq.s a0, fa0, fa0 +; RV64IF-NEXT: bnez a0, .LBB16_2 +; RV64IF-NEXT: # %bb.1: +; RV64IF-NEXT: li a0, 0 +; RV64IF-NEXT: ret +; RV64IF-NEXT: .LBB16_2: +; RV64IF-NEXT: fcvt.w.s a0, fa0, rne +; RV64IF-NEXT: ret + %a = call float @llvm.roundeven.f32(float %x) + %b = call i32 @llvm.fptosi.sat.i32.f32(float %a) + ret i32 %b +} + +define i64 @test_roundeven_si64(float %x) nounwind { +; RV32IF-LABEL: test_roundeven_si64: +; RV32IF: # %bb.0: +; RV32IF-NEXT: addi sp, sp, -16 +; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IF-NEXT: call roundevenf@plt +; RV32IF-NEXT: lui a0, %hi(.LCPI17_0) +; RV32IF-NEXT: flw ft0, %lo(.LCPI17_0)(a0) +; RV32IF-NEXT: fmv.s fs0, fa0 +; RV32IF-NEXT: fle.s s0, ft0, fa0 +; RV32IF-NEXT: call __fixsfdi@plt +; RV32IF-NEXT: mv a2, a0 +; RV32IF-NEXT: bnez s0, .LBB17_2 +; RV32IF-NEXT: # %bb.1: +; RV32IF-NEXT: li a2, 0 +; RV32IF-NEXT: .LBB17_2: +; RV32IF-NEXT: lui a0, %hi(.LCPI17_1) +; RV32IF-NEXT: flw ft0, %lo(.LCPI17_1)(a0) +; RV32IF-NEXT: flt.s a3, ft0, fs0 +; RV32IF-NEXT: li a0, -1 +; RV32IF-NEXT: beqz a3, .LBB17_9 +; RV32IF-NEXT: # %bb.3: +; RV32IF-NEXT: feq.s a2, fs0, fs0 +; RV32IF-NEXT: beqz a2, .LBB17_10 +; RV32IF-NEXT: .LBB17_4: +; RV32IF-NEXT: lui a4, 524288 +; RV32IF-NEXT: beqz s0, .LBB17_11 +; RV32IF-NEXT: .LBB17_5: +; RV32IF-NEXT: bnez a3, .LBB17_12 +; RV32IF-NEXT: .LBB17_6: +; RV32IF-NEXT: bnez a2, .LBB17_8 +; RV32IF-NEXT: .LBB17_7: +; RV32IF-NEXT: li a1, 0 +; RV32IF-NEXT: .LBB17_8: +; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IF-NEXT: addi sp, sp, 16 +; RV32IF-NEXT: ret +; RV32IF-NEXT: .LBB17_9: +; RV32IF-NEXT: mv a0, a2 +; RV32IF-NEXT: feq.s a2, fs0, fs0 +; RV32IF-NEXT: bnez a2, .LBB17_4 +; RV32IF-NEXT: .LBB17_10: +; RV32IF-NEXT: li a0, 0 +; RV32IF-NEXT: lui a4, 524288 +; RV32IF-NEXT: bnez s0, .LBB17_5 +; RV32IF-NEXT: .LBB17_11: +; RV32IF-NEXT: lui a1, 524288 +; RV32IF-NEXT: beqz a3, .LBB17_6 +; RV32IF-NEXT: .LBB17_12: +; RV32IF-NEXT: addi a1, a4, -1 +; RV32IF-NEXT: beqz a2, .LBB17_7 +; RV32IF-NEXT: j .LBB17_8 +; +; RV64IF-LABEL: test_roundeven_si64: +; RV64IF: # %bb.0: +; RV64IF-NEXT: feq.s a0, fa0, fa0 +; RV64IF-NEXT: bnez a0, .LBB17_2 +; RV64IF-NEXT: # %bb.1: +; RV64IF-NEXT: li a0, 0 +; RV64IF-NEXT: ret +; RV64IF-NEXT: .LBB17_2: +; RV64IF-NEXT: fcvt.l.s a0, fa0, rne +; RV64IF-NEXT: ret + %a = call float @llvm.roundeven.f32(float %x) + %b = call i64 @llvm.fptosi.sat.i64.f32(float %a) + ret i64 %b +} + +define signext i32 @test_roundeven_ui32(float %x) { +; RV32IF-LABEL: test_roundeven_ui32: +; RV32IF: # %bb.0: +; RV32IF-NEXT: feq.s a0, fa0, fa0 +; RV32IF-NEXT: bnez a0, .LBB18_2 +; RV32IF-NEXT: # %bb.1: +; RV32IF-NEXT: li a0, 0 +; RV32IF-NEXT: ret +; RV32IF-NEXT: .LBB18_2: +; RV32IF-NEXT: fcvt.wu.s a0, fa0, rne +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: test_roundeven_ui32: +; RV64IF: # %bb.0: +; RV64IF-NEXT: feq.s a0, fa0, fa0 +; RV64IF-NEXT: bnez a0, .LBB18_2 +; RV64IF-NEXT: # %bb.1: +; RV64IF-NEXT: li a0, 0 +; RV64IF-NEXT: ret +; RV64IF-NEXT: .LBB18_2: +; RV64IF-NEXT: fcvt.wu.s a0, fa0, rne +; RV64IF-NEXT: ret + %a = call float @llvm.roundeven.f32(float %x) + %b = call i32 @llvm.fptoui.sat.i32.f32(float %a) + ret i32 %b +} + +define i64 @test_roundeven_ui64(float %x) nounwind { +; RV32IF-LABEL: test_roundeven_ui64: +; RV32IF: # %bb.0: +; RV32IF-NEXT: addi sp, sp, -16 +; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IF-NEXT: call roundevenf@plt +; RV32IF-NEXT: fmv.s fs0, fa0 +; RV32IF-NEXT: fmv.w.x ft0, zero +; RV32IF-NEXT: fle.s s0, ft0, fa0 +; RV32IF-NEXT: call __fixunssfdi@plt +; RV32IF-NEXT: mv a3, a0 +; RV32IF-NEXT: bnez s0, .LBB19_2 +; RV32IF-NEXT: # %bb.1: +; RV32IF-NEXT: li a3, 0 +; RV32IF-NEXT: .LBB19_2: +; RV32IF-NEXT: lui a0, %hi(.LCPI19_0) +; RV32IF-NEXT: flw ft0, %lo(.LCPI19_0)(a0) +; RV32IF-NEXT: flt.s a4, ft0, fs0 +; RV32IF-NEXT: li a2, -1 +; RV32IF-NEXT: li a0, -1 +; RV32IF-NEXT: beqz a4, .LBB19_7 +; RV32IF-NEXT: # %bb.3: +; RV32IF-NEXT: beqz s0, .LBB19_8 +; RV32IF-NEXT: .LBB19_4: +; RV32IF-NEXT: bnez a4, .LBB19_6 +; RV32IF-NEXT: .LBB19_5: +; RV32IF-NEXT: mv a2, a1 +; RV32IF-NEXT: .LBB19_6: +; RV32IF-NEXT: mv a1, a2 +; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IF-NEXT: addi sp, sp, 16 +; RV32IF-NEXT: ret +; RV32IF-NEXT: .LBB19_7: +; RV32IF-NEXT: mv a0, a3 +; RV32IF-NEXT: bnez s0, .LBB19_4 +; RV32IF-NEXT: .LBB19_8: +; RV32IF-NEXT: li a1, 0 +; RV32IF-NEXT: beqz a4, .LBB19_5 +; RV32IF-NEXT: j .LBB19_6 +; +; RV64IF-LABEL: test_roundeven_ui64: +; RV64IF: # %bb.0: +; RV64IF-NEXT: feq.s a0, fa0, fa0 +; RV64IF-NEXT: bnez a0, .LBB19_2 +; RV64IF-NEXT: # %bb.1: +; RV64IF-NEXT: li a0, 0 +; RV64IF-NEXT: ret +; RV64IF-NEXT: .LBB19_2: +; RV64IF-NEXT: fcvt.lu.s a0, fa0, rne +; RV64IF-NEXT: ret + %a = call float @llvm.roundeven.f32(float %x) + %b = call i64 @llvm.fptoui.sat.i64.f32(float %a) + ret i64 %b +} + +declare float @llvm.floor.f32(float) +declare float @llvm.ceil.f32(float) +declare float @llvm.trunc.f32(float) +declare float @llvm.round.f32(float) +declare float @llvm.roundeven.f32(float) +declare i32 @llvm.fptosi.sat.i32.f32(float) +declare i64 @llvm.fptosi.sat.i64.f32(float) +declare i32 @llvm.fptoui.sat.i32.f32(float) +declare i64 @llvm.fptoui.sat.i64.f32(float) diff --git a/llvm/test/CodeGen/RISCV/half-round-conv-sat.ll b/llvm/test/CodeGen/RISCV/half-round-conv-sat.ll new file mode 100644 index 0000000000000..7b3104c69bef6 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/half-round-conv-sat.ll @@ -0,0 +1,970 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+zfh -verify-machineinstrs < %s \ +; RUN: -target-abi=ilp32f | FileCheck -check-prefix=RV32IZFH %s +; RUN: llc -mtriple=riscv64 -mattr=+zfh -verify-machineinstrs < %s \ +; RUN: -target-abi=lp64f | FileCheck -check-prefix=RV64IZFH %s + +define signext i32 @test_floor_si32(half %x) { +; RV32IZFH-LABEL: test_floor_si32: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: feq.h a0, fa0, fa0 +; RV32IZFH-NEXT: bnez a0, .LBB0_2 +; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: li a0, 0 +; RV32IZFH-NEXT: ret +; RV32IZFH-NEXT: .LBB0_2: +; RV32IZFH-NEXT: fcvt.w.h a0, fa0, rdn +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: test_floor_si32: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: feq.h a0, fa0, fa0 +; RV64IZFH-NEXT: bnez a0, .LBB0_2 +; RV64IZFH-NEXT: # %bb.1: +; RV64IZFH-NEXT: li a0, 0 +; RV64IZFH-NEXT: ret +; RV64IZFH-NEXT: .LBB0_2: +; RV64IZFH-NEXT: fcvt.w.h a0, fa0, rdn +; RV64IZFH-NEXT: ret + %a = call half @llvm.floor.f16(half %x) + %b = call i32 @llvm.fptosi.sat.i32.f16(half %a) + ret i32 %b +} + +define i64 @test_floor_si64(half %x) nounwind { +; RV32IZFH-LABEL: test_floor_si64: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: addi sp, sp, -16 +; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fcvt.s.h fa0, fa0 +; RV32IZFH-NEXT: call floorf@plt +; RV32IZFH-NEXT: lui a0, %hi(.LCPI1_0) +; RV32IZFH-NEXT: flw ft0, %lo(.LCPI1_0)(a0) +; RV32IZFH-NEXT: fcvt.h.s ft1, fa0 +; RV32IZFH-NEXT: fcvt.s.h fs0, ft1 +; RV32IZFH-NEXT: fle.s s0, ft0, fs0 +; RV32IZFH-NEXT: fmv.s fa0, fs0 +; RV32IZFH-NEXT: call __fixsfdi@plt +; RV32IZFH-NEXT: mv a2, a0 +; RV32IZFH-NEXT: bnez s0, .LBB1_2 +; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: li a2, 0 +; RV32IZFH-NEXT: .LBB1_2: +; RV32IZFH-NEXT: lui a0, %hi(.LCPI1_1) +; RV32IZFH-NEXT: flw ft0, %lo(.LCPI1_1)(a0) +; RV32IZFH-NEXT: flt.s a3, ft0, fs0 +; RV32IZFH-NEXT: li a0, -1 +; RV32IZFH-NEXT: beqz a3, .LBB1_9 +; RV32IZFH-NEXT: # %bb.3: +; RV32IZFH-NEXT: feq.s a2, fs0, fs0 +; RV32IZFH-NEXT: beqz a2, .LBB1_10 +; RV32IZFH-NEXT: .LBB1_4: +; RV32IZFH-NEXT: lui a4, 524288 +; RV32IZFH-NEXT: beqz s0, .LBB1_11 +; RV32IZFH-NEXT: .LBB1_5: +; RV32IZFH-NEXT: bnez a3, .LBB1_12 +; RV32IZFH-NEXT: .LBB1_6: +; RV32IZFH-NEXT: bnez a2, .LBB1_8 +; RV32IZFH-NEXT: .LBB1_7: +; RV32IZFH-NEXT: li a1, 0 +; RV32IZFH-NEXT: .LBB1_8: +; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: ret +; RV32IZFH-NEXT: .LBB1_9: +; RV32IZFH-NEXT: mv a0, a2 +; RV32IZFH-NEXT: feq.s a2, fs0, fs0 +; RV32IZFH-NEXT: bnez a2, .LBB1_4 +; RV32IZFH-NEXT: .LBB1_10: +; RV32IZFH-NEXT: li a0, 0 +; RV32IZFH-NEXT: lui a4, 524288 +; RV32IZFH-NEXT: bnez s0, .LBB1_5 +; RV32IZFH-NEXT: .LBB1_11: +; RV32IZFH-NEXT: lui a1, 524288 +; RV32IZFH-NEXT: beqz a3, .LBB1_6 +; RV32IZFH-NEXT: .LBB1_12: +; RV32IZFH-NEXT: addi a1, a4, -1 +; RV32IZFH-NEXT: beqz a2, .LBB1_7 +; RV32IZFH-NEXT: j .LBB1_8 +; +; RV64IZFH-LABEL: test_floor_si64: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: feq.h a0, fa0, fa0 +; RV64IZFH-NEXT: bnez a0, .LBB1_2 +; RV64IZFH-NEXT: # %bb.1: +; RV64IZFH-NEXT: li a0, 0 +; RV64IZFH-NEXT: ret +; RV64IZFH-NEXT: .LBB1_2: +; RV64IZFH-NEXT: fcvt.l.h a0, fa0, rdn +; RV64IZFH-NEXT: ret + %a = call half @llvm.floor.f16(half %x) + %b = call i64 @llvm.fptosi.sat.i64.f16(half %a) + ret i64 %b +} + +define signext i32 @test_floor_ui32(half %x) { +; RV32IZFH-LABEL: test_floor_ui32: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: feq.h a0, fa0, fa0 +; RV32IZFH-NEXT: bnez a0, .LBB2_2 +; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: li a0, 0 +; RV32IZFH-NEXT: ret +; RV32IZFH-NEXT: .LBB2_2: +; RV32IZFH-NEXT: fcvt.wu.h a0, fa0, rdn +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: test_floor_ui32: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: feq.h a0, fa0, fa0 +; RV64IZFH-NEXT: bnez a0, .LBB2_2 +; RV64IZFH-NEXT: # %bb.1: +; RV64IZFH-NEXT: li a0, 0 +; RV64IZFH-NEXT: ret +; RV64IZFH-NEXT: .LBB2_2: +; RV64IZFH-NEXT: fcvt.wu.h a0, fa0, rdn +; RV64IZFH-NEXT: ret + %a = call half @llvm.floor.f16(half %x) + %b = call i32 @llvm.fptoui.sat.i32.f16(half %a) + ret i32 %b +} + +define i64 @test_floor_ui64(half %x) nounwind { +; RV32IZFH-LABEL: test_floor_ui64: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: addi sp, sp, -16 +; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fcvt.s.h fa0, fa0 +; RV32IZFH-NEXT: call floorf@plt +; RV32IZFH-NEXT: fcvt.h.s ft0, fa0 +; RV32IZFH-NEXT: fcvt.s.h fs0, ft0 +; RV32IZFH-NEXT: fmv.w.x ft0, zero +; RV32IZFH-NEXT: fle.s s0, ft0, fs0 +; RV32IZFH-NEXT: fmv.s fa0, fs0 +; RV32IZFH-NEXT: call __fixunssfdi@plt +; RV32IZFH-NEXT: mv a3, a0 +; RV32IZFH-NEXT: bnez s0, .LBB3_2 +; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: li a3, 0 +; RV32IZFH-NEXT: .LBB3_2: +; RV32IZFH-NEXT: lui a0, %hi(.LCPI3_0) +; RV32IZFH-NEXT: flw ft0, %lo(.LCPI3_0)(a0) +; RV32IZFH-NEXT: flt.s a4, ft0, fs0 +; RV32IZFH-NEXT: li a2, -1 +; RV32IZFH-NEXT: li a0, -1 +; RV32IZFH-NEXT: beqz a4, .LBB3_7 +; RV32IZFH-NEXT: # %bb.3: +; RV32IZFH-NEXT: beqz s0, .LBB3_8 +; RV32IZFH-NEXT: .LBB3_4: +; RV32IZFH-NEXT: bnez a4, .LBB3_6 +; RV32IZFH-NEXT: .LBB3_5: +; RV32IZFH-NEXT: mv a2, a1 +; RV32IZFH-NEXT: .LBB3_6: +; RV32IZFH-NEXT: mv a1, a2 +; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: ret +; RV32IZFH-NEXT: .LBB3_7: +; RV32IZFH-NEXT: mv a0, a3 +; RV32IZFH-NEXT: bnez s0, .LBB3_4 +; RV32IZFH-NEXT: .LBB3_8: +; RV32IZFH-NEXT: li a1, 0 +; RV32IZFH-NEXT: beqz a4, .LBB3_5 +; RV32IZFH-NEXT: j .LBB3_6 +; +; RV64IZFH-LABEL: test_floor_ui64: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: feq.h a0, fa0, fa0 +; RV64IZFH-NEXT: bnez a0, .LBB3_2 +; RV64IZFH-NEXT: # %bb.1: +; RV64IZFH-NEXT: li a0, 0 +; RV64IZFH-NEXT: ret +; RV64IZFH-NEXT: .LBB3_2: +; RV64IZFH-NEXT: fcvt.lu.h a0, fa0, rdn +; RV64IZFH-NEXT: ret + %a = call half @llvm.floor.f16(half %x) + %b = call i64 @llvm.fptoui.sat.i64.f16(half %a) + ret i64 %b +} + +define signext i32 @test_ceil_si32(half %x) { +; RV32IZFH-LABEL: test_ceil_si32: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: feq.h a0, fa0, fa0 +; RV32IZFH-NEXT: bnez a0, .LBB4_2 +; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: li a0, 0 +; RV32IZFH-NEXT: ret +; RV32IZFH-NEXT: .LBB4_2: +; RV32IZFH-NEXT: fcvt.w.h a0, fa0, rup +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: test_ceil_si32: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: feq.h a0, fa0, fa0 +; RV64IZFH-NEXT: bnez a0, .LBB4_2 +; RV64IZFH-NEXT: # %bb.1: +; RV64IZFH-NEXT: li a0, 0 +; RV64IZFH-NEXT: ret +; RV64IZFH-NEXT: .LBB4_2: +; RV64IZFH-NEXT: fcvt.w.h a0, fa0, rup +; RV64IZFH-NEXT: ret + %a = call half @llvm.ceil.f16(half %x) + %b = call i32 @llvm.fptosi.sat.i32.f16(half %a) + ret i32 %b +} + +define i64 @test_ceil_si64(half %x) nounwind { +; RV32IZFH-LABEL: test_ceil_si64: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: addi sp, sp, -16 +; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fcvt.s.h fa0, fa0 +; RV32IZFH-NEXT: call ceilf@plt +; RV32IZFH-NEXT: lui a0, %hi(.LCPI5_0) +; RV32IZFH-NEXT: flw ft0, %lo(.LCPI5_0)(a0) +; RV32IZFH-NEXT: fcvt.h.s ft1, fa0 +; RV32IZFH-NEXT: fcvt.s.h fs0, ft1 +; RV32IZFH-NEXT: fle.s s0, ft0, fs0 +; RV32IZFH-NEXT: fmv.s fa0, fs0 +; RV32IZFH-NEXT: call __fixsfdi@plt +; RV32IZFH-NEXT: mv a2, a0 +; RV32IZFH-NEXT: bnez s0, .LBB5_2 +; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: li a2, 0 +; RV32IZFH-NEXT: .LBB5_2: +; RV32IZFH-NEXT: lui a0, %hi(.LCPI5_1) +; RV32IZFH-NEXT: flw ft0, %lo(.LCPI5_1)(a0) +; RV32IZFH-NEXT: flt.s a3, ft0, fs0 +; RV32IZFH-NEXT: li a0, -1 +; RV32IZFH-NEXT: beqz a3, .LBB5_9 +; RV32IZFH-NEXT: # %bb.3: +; RV32IZFH-NEXT: feq.s a2, fs0, fs0 +; RV32IZFH-NEXT: beqz a2, .LBB5_10 +; RV32IZFH-NEXT: .LBB5_4: +; RV32IZFH-NEXT: lui a4, 524288 +; RV32IZFH-NEXT: beqz s0, .LBB5_11 +; RV32IZFH-NEXT: .LBB5_5: +; RV32IZFH-NEXT: bnez a3, .LBB5_12 +; RV32IZFH-NEXT: .LBB5_6: +; RV32IZFH-NEXT: bnez a2, .LBB5_8 +; RV32IZFH-NEXT: .LBB5_7: +; RV32IZFH-NEXT: li a1, 0 +; RV32IZFH-NEXT: .LBB5_8: +; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: ret +; RV32IZFH-NEXT: .LBB5_9: +; RV32IZFH-NEXT: mv a0, a2 +; RV32IZFH-NEXT: feq.s a2, fs0, fs0 +; RV32IZFH-NEXT: bnez a2, .LBB5_4 +; RV32IZFH-NEXT: .LBB5_10: +; RV32IZFH-NEXT: li a0, 0 +; RV32IZFH-NEXT: lui a4, 524288 +; RV32IZFH-NEXT: bnez s0, .LBB5_5 +; RV32IZFH-NEXT: .LBB5_11: +; RV32IZFH-NEXT: lui a1, 524288 +; RV32IZFH-NEXT: beqz a3, .LBB5_6 +; RV32IZFH-NEXT: .LBB5_12: +; RV32IZFH-NEXT: addi a1, a4, -1 +; RV32IZFH-NEXT: beqz a2, .LBB5_7 +; RV32IZFH-NEXT: j .LBB5_8 +; +; RV64IZFH-LABEL: test_ceil_si64: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: feq.h a0, fa0, fa0 +; RV64IZFH-NEXT: bnez a0, .LBB5_2 +; RV64IZFH-NEXT: # %bb.1: +; RV64IZFH-NEXT: li a0, 0 +; RV64IZFH-NEXT: ret +; RV64IZFH-NEXT: .LBB5_2: +; RV64IZFH-NEXT: fcvt.l.h a0, fa0, rup +; RV64IZFH-NEXT: ret + %a = call half @llvm.ceil.f16(half %x) + %b = call i64 @llvm.fptosi.sat.i64.f16(half %a) + ret i64 %b +} + +define signext i32 @test_ceil_ui32(half %x) { +; RV32IZFH-LABEL: test_ceil_ui32: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: feq.h a0, fa0, fa0 +; RV32IZFH-NEXT: bnez a0, .LBB6_2 +; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: li a0, 0 +; RV32IZFH-NEXT: ret +; RV32IZFH-NEXT: .LBB6_2: +; RV32IZFH-NEXT: fcvt.wu.h a0, fa0, rup +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: test_ceil_ui32: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: feq.h a0, fa0, fa0 +; RV64IZFH-NEXT: bnez a0, .LBB6_2 +; RV64IZFH-NEXT: # %bb.1: +; RV64IZFH-NEXT: li a0, 0 +; RV64IZFH-NEXT: ret +; RV64IZFH-NEXT: .LBB6_2: +; RV64IZFH-NEXT: fcvt.wu.h a0, fa0, rup +; RV64IZFH-NEXT: ret + %a = call half @llvm.ceil.f16(half %x) + %b = call i32 @llvm.fptoui.sat.i32.f16(half %a) + ret i32 %b +} + +define i64 @test_ceil_ui64(half %x) nounwind { +; RV32IZFH-LABEL: test_ceil_ui64: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: addi sp, sp, -16 +; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fcvt.s.h fa0, fa0 +; RV32IZFH-NEXT: call ceilf@plt +; RV32IZFH-NEXT: fcvt.h.s ft0, fa0 +; RV32IZFH-NEXT: fcvt.s.h fs0, ft0 +; RV32IZFH-NEXT: fmv.w.x ft0, zero +; RV32IZFH-NEXT: fle.s s0, ft0, fs0 +; RV32IZFH-NEXT: fmv.s fa0, fs0 +; RV32IZFH-NEXT: call __fixunssfdi@plt +; RV32IZFH-NEXT: mv a3, a0 +; RV32IZFH-NEXT: bnez s0, .LBB7_2 +; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: li a3, 0 +; RV32IZFH-NEXT: .LBB7_2: +; RV32IZFH-NEXT: lui a0, %hi(.LCPI7_0) +; RV32IZFH-NEXT: flw ft0, %lo(.LCPI7_0)(a0) +; RV32IZFH-NEXT: flt.s a4, ft0, fs0 +; RV32IZFH-NEXT: li a2, -1 +; RV32IZFH-NEXT: li a0, -1 +; RV32IZFH-NEXT: beqz a4, .LBB7_7 +; RV32IZFH-NEXT: # %bb.3: +; RV32IZFH-NEXT: beqz s0, .LBB7_8 +; RV32IZFH-NEXT: .LBB7_4: +; RV32IZFH-NEXT: bnez a4, .LBB7_6 +; RV32IZFH-NEXT: .LBB7_5: +; RV32IZFH-NEXT: mv a2, a1 +; RV32IZFH-NEXT: .LBB7_6: +; RV32IZFH-NEXT: mv a1, a2 +; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: ret +; RV32IZFH-NEXT: .LBB7_7: +; RV32IZFH-NEXT: mv a0, a3 +; RV32IZFH-NEXT: bnez s0, .LBB7_4 +; RV32IZFH-NEXT: .LBB7_8: +; RV32IZFH-NEXT: li a1, 0 +; RV32IZFH-NEXT: beqz a4, .LBB7_5 +; RV32IZFH-NEXT: j .LBB7_6 +; +; RV64IZFH-LABEL: test_ceil_ui64: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: feq.h a0, fa0, fa0 +; RV64IZFH-NEXT: bnez a0, .LBB7_2 +; RV64IZFH-NEXT: # %bb.1: +; RV64IZFH-NEXT: li a0, 0 +; RV64IZFH-NEXT: ret +; RV64IZFH-NEXT: .LBB7_2: +; RV64IZFH-NEXT: fcvt.lu.h a0, fa0, rup +; RV64IZFH-NEXT: ret + %a = call half @llvm.ceil.f16(half %x) + %b = call i64 @llvm.fptoui.sat.i64.f16(half %a) + ret i64 %b +} + +define signext i32 @test_trunc_si32(half %x) { +; RV32IZFH-LABEL: test_trunc_si32: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: feq.h a0, fa0, fa0 +; RV32IZFH-NEXT: bnez a0, .LBB8_2 +; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: li a0, 0 +; RV32IZFH-NEXT: ret +; RV32IZFH-NEXT: .LBB8_2: +; RV32IZFH-NEXT: fcvt.w.h a0, fa0, rtz +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: test_trunc_si32: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: feq.h a0, fa0, fa0 +; RV64IZFH-NEXT: bnez a0, .LBB8_2 +; RV64IZFH-NEXT: # %bb.1: +; RV64IZFH-NEXT: li a0, 0 +; RV64IZFH-NEXT: ret +; RV64IZFH-NEXT: .LBB8_2: +; RV64IZFH-NEXT: fcvt.w.h a0, fa0, rtz +; RV64IZFH-NEXT: ret + %a = call half @llvm.trunc.f16(half %x) + %b = call i32 @llvm.fptosi.sat.i32.f16(half %a) + ret i32 %b +} + +define i64 @test_trunc_si64(half %x) nounwind { +; RV32IZFH-LABEL: test_trunc_si64: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: addi sp, sp, -16 +; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fcvt.s.h fa0, fa0 +; RV32IZFH-NEXT: call truncf@plt +; RV32IZFH-NEXT: lui a0, %hi(.LCPI9_0) +; RV32IZFH-NEXT: flw ft0, %lo(.LCPI9_0)(a0) +; RV32IZFH-NEXT: fcvt.h.s ft1, fa0 +; RV32IZFH-NEXT: fcvt.s.h fs0, ft1 +; RV32IZFH-NEXT: fle.s s0, ft0, fs0 +; RV32IZFH-NEXT: fmv.s fa0, fs0 +; RV32IZFH-NEXT: call __fixsfdi@plt +; RV32IZFH-NEXT: mv a2, a0 +; RV32IZFH-NEXT: bnez s0, .LBB9_2 +; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: li a2, 0 +; RV32IZFH-NEXT: .LBB9_2: +; RV32IZFH-NEXT: lui a0, %hi(.LCPI9_1) +; RV32IZFH-NEXT: flw ft0, %lo(.LCPI9_1)(a0) +; RV32IZFH-NEXT: flt.s a3, ft0, fs0 +; RV32IZFH-NEXT: li a0, -1 +; RV32IZFH-NEXT: beqz a3, .LBB9_9 +; RV32IZFH-NEXT: # %bb.3: +; RV32IZFH-NEXT: feq.s a2, fs0, fs0 +; RV32IZFH-NEXT: beqz a2, .LBB9_10 +; RV32IZFH-NEXT: .LBB9_4: +; RV32IZFH-NEXT: lui a4, 524288 +; RV32IZFH-NEXT: beqz s0, .LBB9_11 +; RV32IZFH-NEXT: .LBB9_5: +; RV32IZFH-NEXT: bnez a3, .LBB9_12 +; RV32IZFH-NEXT: .LBB9_6: +; RV32IZFH-NEXT: bnez a2, .LBB9_8 +; RV32IZFH-NEXT: .LBB9_7: +; RV32IZFH-NEXT: li a1, 0 +; RV32IZFH-NEXT: .LBB9_8: +; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: ret +; RV32IZFH-NEXT: .LBB9_9: +; RV32IZFH-NEXT: mv a0, a2 +; RV32IZFH-NEXT: feq.s a2, fs0, fs0 +; RV32IZFH-NEXT: bnez a2, .LBB9_4 +; RV32IZFH-NEXT: .LBB9_10: +; RV32IZFH-NEXT: li a0, 0 +; RV32IZFH-NEXT: lui a4, 524288 +; RV32IZFH-NEXT: bnez s0, .LBB9_5 +; RV32IZFH-NEXT: .LBB9_11: +; RV32IZFH-NEXT: lui a1, 524288 +; RV32IZFH-NEXT: beqz a3, .LBB9_6 +; RV32IZFH-NEXT: .LBB9_12: +; RV32IZFH-NEXT: addi a1, a4, -1 +; RV32IZFH-NEXT: beqz a2, .LBB9_7 +; RV32IZFH-NEXT: j .LBB9_8 +; +; RV64IZFH-LABEL: test_trunc_si64: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: feq.h a0, fa0, fa0 +; RV64IZFH-NEXT: bnez a0, .LBB9_2 +; RV64IZFH-NEXT: # %bb.1: +; RV64IZFH-NEXT: li a0, 0 +; RV64IZFH-NEXT: ret +; RV64IZFH-NEXT: .LBB9_2: +; RV64IZFH-NEXT: fcvt.l.h a0, fa0, rtz +; RV64IZFH-NEXT: ret + %a = call half @llvm.trunc.f16(half %x) + %b = call i64 @llvm.fptosi.sat.i64.f16(half %a) + ret i64 %b +} + +define signext i32 @test_trunc_ui32(half %x) { +; RV32IZFH-LABEL: test_trunc_ui32: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: feq.h a0, fa0, fa0 +; RV32IZFH-NEXT: bnez a0, .LBB10_2 +; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: li a0, 0 +; RV32IZFH-NEXT: ret +; RV32IZFH-NEXT: .LBB10_2: +; RV32IZFH-NEXT: fcvt.wu.h a0, fa0, rtz +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: test_trunc_ui32: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: feq.h a0, fa0, fa0 +; RV64IZFH-NEXT: bnez a0, .LBB10_2 +; RV64IZFH-NEXT: # %bb.1: +; RV64IZFH-NEXT: li a0, 0 +; RV64IZFH-NEXT: ret +; RV64IZFH-NEXT: .LBB10_2: +; RV64IZFH-NEXT: fcvt.wu.h a0, fa0, rtz +; RV64IZFH-NEXT: ret + %a = call half @llvm.trunc.f16(half %x) + %b = call i32 @llvm.fptoui.sat.i32.f16(half %a) + ret i32 %b +} + +define i64 @test_trunc_ui64(half %x) nounwind { +; RV32IZFH-LABEL: test_trunc_ui64: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: addi sp, sp, -16 +; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fcvt.s.h fa0, fa0 +; RV32IZFH-NEXT: call truncf@plt +; RV32IZFH-NEXT: fcvt.h.s ft0, fa0 +; RV32IZFH-NEXT: fcvt.s.h fs0, ft0 +; RV32IZFH-NEXT: fmv.w.x ft0, zero +; RV32IZFH-NEXT: fle.s s0, ft0, fs0 +; RV32IZFH-NEXT: fmv.s fa0, fs0 +; RV32IZFH-NEXT: call __fixunssfdi@plt +; RV32IZFH-NEXT: mv a3, a0 +; RV32IZFH-NEXT: bnez s0, .LBB11_2 +; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: li a3, 0 +; RV32IZFH-NEXT: .LBB11_2: +; RV32IZFH-NEXT: lui a0, %hi(.LCPI11_0) +; RV32IZFH-NEXT: flw ft0, %lo(.LCPI11_0)(a0) +; RV32IZFH-NEXT: flt.s a4, ft0, fs0 +; RV32IZFH-NEXT: li a2, -1 +; RV32IZFH-NEXT: li a0, -1 +; RV32IZFH-NEXT: beqz a4, .LBB11_7 +; RV32IZFH-NEXT: # %bb.3: +; RV32IZFH-NEXT: beqz s0, .LBB11_8 +; RV32IZFH-NEXT: .LBB11_4: +; RV32IZFH-NEXT: bnez a4, .LBB11_6 +; RV32IZFH-NEXT: .LBB11_5: +; RV32IZFH-NEXT: mv a2, a1 +; RV32IZFH-NEXT: .LBB11_6: +; RV32IZFH-NEXT: mv a1, a2 +; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: ret +; RV32IZFH-NEXT: .LBB11_7: +; RV32IZFH-NEXT: mv a0, a3 +; RV32IZFH-NEXT: bnez s0, .LBB11_4 +; RV32IZFH-NEXT: .LBB11_8: +; RV32IZFH-NEXT: li a1, 0 +; RV32IZFH-NEXT: beqz a4, .LBB11_5 +; RV32IZFH-NEXT: j .LBB11_6 +; +; RV64IZFH-LABEL: test_trunc_ui64: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: feq.h a0, fa0, fa0 +; RV64IZFH-NEXT: bnez a0, .LBB11_2 +; RV64IZFH-NEXT: # %bb.1: +; RV64IZFH-NEXT: li a0, 0 +; RV64IZFH-NEXT: ret +; RV64IZFH-NEXT: .LBB11_2: +; RV64IZFH-NEXT: fcvt.lu.h a0, fa0, rtz +; RV64IZFH-NEXT: ret + %a = call half @llvm.trunc.f16(half %x) + %b = call i64 @llvm.fptoui.sat.i64.f16(half %a) + ret i64 %b +} + +define signext i32 @test_round_si32(half %x) { +; RV32IZFH-LABEL: test_round_si32: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: feq.h a0, fa0, fa0 +; RV32IZFH-NEXT: bnez a0, .LBB12_2 +; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: li a0, 0 +; RV32IZFH-NEXT: ret +; RV32IZFH-NEXT: .LBB12_2: +; RV32IZFH-NEXT: fcvt.w.h a0, fa0, rmm +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: test_round_si32: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: feq.h a0, fa0, fa0 +; RV64IZFH-NEXT: bnez a0, .LBB12_2 +; RV64IZFH-NEXT: # %bb.1: +; RV64IZFH-NEXT: li a0, 0 +; RV64IZFH-NEXT: ret +; RV64IZFH-NEXT: .LBB12_2: +; RV64IZFH-NEXT: fcvt.w.h a0, fa0, rmm +; RV64IZFH-NEXT: ret + %a = call half @llvm.round.f16(half %x) + %b = call i32 @llvm.fptosi.sat.i32.f16(half %a) + ret i32 %b +} + +define i64 @test_round_si64(half %x) nounwind { +; RV32IZFH-LABEL: test_round_si64: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: addi sp, sp, -16 +; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fcvt.s.h fa0, fa0 +; RV32IZFH-NEXT: call roundf@plt +; RV32IZFH-NEXT: lui a0, %hi(.LCPI13_0) +; RV32IZFH-NEXT: flw ft0, %lo(.LCPI13_0)(a0) +; RV32IZFH-NEXT: fcvt.h.s ft1, fa0 +; RV32IZFH-NEXT: fcvt.s.h fs0, ft1 +; RV32IZFH-NEXT: fle.s s0, ft0, fs0 +; RV32IZFH-NEXT: fmv.s fa0, fs0 +; RV32IZFH-NEXT: call __fixsfdi@plt +; RV32IZFH-NEXT: mv a2, a0 +; RV32IZFH-NEXT: bnez s0, .LBB13_2 +; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: li a2, 0 +; RV32IZFH-NEXT: .LBB13_2: +; RV32IZFH-NEXT: lui a0, %hi(.LCPI13_1) +; RV32IZFH-NEXT: flw ft0, %lo(.LCPI13_1)(a0) +; RV32IZFH-NEXT: flt.s a3, ft0, fs0 +; RV32IZFH-NEXT: li a0, -1 +; RV32IZFH-NEXT: beqz a3, .LBB13_9 +; RV32IZFH-NEXT: # %bb.3: +; RV32IZFH-NEXT: feq.s a2, fs0, fs0 +; RV32IZFH-NEXT: beqz a2, .LBB13_10 +; RV32IZFH-NEXT: .LBB13_4: +; RV32IZFH-NEXT: lui a4, 524288 +; RV32IZFH-NEXT: beqz s0, .LBB13_11 +; RV32IZFH-NEXT: .LBB13_5: +; RV32IZFH-NEXT: bnez a3, .LBB13_12 +; RV32IZFH-NEXT: .LBB13_6: +; RV32IZFH-NEXT: bnez a2, .LBB13_8 +; RV32IZFH-NEXT: .LBB13_7: +; RV32IZFH-NEXT: li a1, 0 +; RV32IZFH-NEXT: .LBB13_8: +; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: ret +; RV32IZFH-NEXT: .LBB13_9: +; RV32IZFH-NEXT: mv a0, a2 +; RV32IZFH-NEXT: feq.s a2, fs0, fs0 +; RV32IZFH-NEXT: bnez a2, .LBB13_4 +; RV32IZFH-NEXT: .LBB13_10: +; RV32IZFH-NEXT: li a0, 0 +; RV32IZFH-NEXT: lui a4, 524288 +; RV32IZFH-NEXT: bnez s0, .LBB13_5 +; RV32IZFH-NEXT: .LBB13_11: +; RV32IZFH-NEXT: lui a1, 524288 +; RV32IZFH-NEXT: beqz a3, .LBB13_6 +; RV32IZFH-NEXT: .LBB13_12: +; RV32IZFH-NEXT: addi a1, a4, -1 +; RV32IZFH-NEXT: beqz a2, .LBB13_7 +; RV32IZFH-NEXT: j .LBB13_8 +; +; RV64IZFH-LABEL: test_round_si64: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: feq.h a0, fa0, fa0 +; RV64IZFH-NEXT: bnez a0, .LBB13_2 +; RV64IZFH-NEXT: # %bb.1: +; RV64IZFH-NEXT: li a0, 0 +; RV64IZFH-NEXT: ret +; RV64IZFH-NEXT: .LBB13_2: +; RV64IZFH-NEXT: fcvt.l.h a0, fa0, rmm +; RV64IZFH-NEXT: ret + %a = call half @llvm.round.f16(half %x) + %b = call i64 @llvm.fptosi.sat.i64.f16(half %a) + ret i64 %b +} + +define signext i32 @test_round_ui32(half %x) { +; RV32IZFH-LABEL: test_round_ui32: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: feq.h a0, fa0, fa0 +; RV32IZFH-NEXT: bnez a0, .LBB14_2 +; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: li a0, 0 +; RV32IZFH-NEXT: ret +; RV32IZFH-NEXT: .LBB14_2: +; RV32IZFH-NEXT: fcvt.wu.h a0, fa0, rmm +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: test_round_ui32: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: feq.h a0, fa0, fa0 +; RV64IZFH-NEXT: bnez a0, .LBB14_2 +; RV64IZFH-NEXT: # %bb.1: +; RV64IZFH-NEXT: li a0, 0 +; RV64IZFH-NEXT: ret +; RV64IZFH-NEXT: .LBB14_2: +; RV64IZFH-NEXT: fcvt.wu.h a0, fa0, rmm +; RV64IZFH-NEXT: ret + %a = call half @llvm.round.f16(half %x) + %b = call i32 @llvm.fptoui.sat.i32.f16(half %a) + ret i32 %b +} + +define i64 @test_round_ui64(half %x) nounwind { +; RV32IZFH-LABEL: test_round_ui64: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: addi sp, sp, -16 +; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fcvt.s.h fa0, fa0 +; RV32IZFH-NEXT: call roundf@plt +; RV32IZFH-NEXT: fcvt.h.s ft0, fa0 +; RV32IZFH-NEXT: fcvt.s.h fs0, ft0 +; RV32IZFH-NEXT: fmv.w.x ft0, zero +; RV32IZFH-NEXT: fle.s s0, ft0, fs0 +; RV32IZFH-NEXT: fmv.s fa0, fs0 +; RV32IZFH-NEXT: call __fixunssfdi@plt +; RV32IZFH-NEXT: mv a3, a0 +; RV32IZFH-NEXT: bnez s0, .LBB15_2 +; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: li a3, 0 +; RV32IZFH-NEXT: .LBB15_2: +; RV32IZFH-NEXT: lui a0, %hi(.LCPI15_0) +; RV32IZFH-NEXT: flw ft0, %lo(.LCPI15_0)(a0) +; RV32IZFH-NEXT: flt.s a4, ft0, fs0 +; RV32IZFH-NEXT: li a2, -1 +; RV32IZFH-NEXT: li a0, -1 +; RV32IZFH-NEXT: beqz a4, .LBB15_7 +; RV32IZFH-NEXT: # %bb.3: +; RV32IZFH-NEXT: beqz s0, .LBB15_8 +; RV32IZFH-NEXT: .LBB15_4: +; RV32IZFH-NEXT: bnez a4, .LBB15_6 +; RV32IZFH-NEXT: .LBB15_5: +; RV32IZFH-NEXT: mv a2, a1 +; RV32IZFH-NEXT: .LBB15_6: +; RV32IZFH-NEXT: mv a1, a2 +; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: ret +; RV32IZFH-NEXT: .LBB15_7: +; RV32IZFH-NEXT: mv a0, a3 +; RV32IZFH-NEXT: bnez s0, .LBB15_4 +; RV32IZFH-NEXT: .LBB15_8: +; RV32IZFH-NEXT: li a1, 0 +; RV32IZFH-NEXT: beqz a4, .LBB15_5 +; RV32IZFH-NEXT: j .LBB15_6 +; +; RV64IZFH-LABEL: test_round_ui64: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: feq.h a0, fa0, fa0 +; RV64IZFH-NEXT: bnez a0, .LBB15_2 +; RV64IZFH-NEXT: # %bb.1: +; RV64IZFH-NEXT: li a0, 0 +; RV64IZFH-NEXT: ret +; RV64IZFH-NEXT: .LBB15_2: +; RV64IZFH-NEXT: fcvt.lu.h a0, fa0, rmm +; RV64IZFH-NEXT: ret + %a = call half @llvm.round.f16(half %x) + %b = call i64 @llvm.fptoui.sat.i64.f16(half %a) + ret i64 %b +} + +define signext i32 @test_roundeven_si32(half %x) { +; RV32IZFH-LABEL: test_roundeven_si32: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: feq.h a0, fa0, fa0 +; RV32IZFH-NEXT: bnez a0, .LBB16_2 +; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: li a0, 0 +; RV32IZFH-NEXT: ret +; RV32IZFH-NEXT: .LBB16_2: +; RV32IZFH-NEXT: fcvt.w.h a0, fa0, rne +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: test_roundeven_si32: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: feq.h a0, fa0, fa0 +; RV64IZFH-NEXT: bnez a0, .LBB16_2 +; RV64IZFH-NEXT: # %bb.1: +; RV64IZFH-NEXT: li a0, 0 +; RV64IZFH-NEXT: ret +; RV64IZFH-NEXT: .LBB16_2: +; RV64IZFH-NEXT: fcvt.w.h a0, fa0, rne +; RV64IZFH-NEXT: ret + %a = call half @llvm.roundeven.f16(half %x) + %b = call i32 @llvm.fptosi.sat.i32.f16(half %a) + ret i32 %b +} + +define i64 @test_roundeven_si64(half %x) nounwind { +; RV32IZFH-LABEL: test_roundeven_si64: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: addi sp, sp, -16 +; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fcvt.s.h fa0, fa0 +; RV32IZFH-NEXT: call roundevenf@plt +; RV32IZFH-NEXT: lui a0, %hi(.LCPI17_0) +; RV32IZFH-NEXT: flw ft0, %lo(.LCPI17_0)(a0) +; RV32IZFH-NEXT: fcvt.h.s ft1, fa0 +; RV32IZFH-NEXT: fcvt.s.h fs0, ft1 +; RV32IZFH-NEXT: fle.s s0, ft0, fs0 +; RV32IZFH-NEXT: fmv.s fa0, fs0 +; RV32IZFH-NEXT: call __fixsfdi@plt +; RV32IZFH-NEXT: mv a2, a0 +; RV32IZFH-NEXT: bnez s0, .LBB17_2 +; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: li a2, 0 +; RV32IZFH-NEXT: .LBB17_2: +; RV32IZFH-NEXT: lui a0, %hi(.LCPI17_1) +; RV32IZFH-NEXT: flw ft0, %lo(.LCPI17_1)(a0) +; RV32IZFH-NEXT: flt.s a3, ft0, fs0 +; RV32IZFH-NEXT: li a0, -1 +; RV32IZFH-NEXT: beqz a3, .LBB17_9 +; RV32IZFH-NEXT: # %bb.3: +; RV32IZFH-NEXT: feq.s a2, fs0, fs0 +; RV32IZFH-NEXT: beqz a2, .LBB17_10 +; RV32IZFH-NEXT: .LBB17_4: +; RV32IZFH-NEXT: lui a4, 524288 +; RV32IZFH-NEXT: beqz s0, .LBB17_11 +; RV32IZFH-NEXT: .LBB17_5: +; RV32IZFH-NEXT: bnez a3, .LBB17_12 +; RV32IZFH-NEXT: .LBB17_6: +; RV32IZFH-NEXT: bnez a2, .LBB17_8 +; RV32IZFH-NEXT: .LBB17_7: +; RV32IZFH-NEXT: li a1, 0 +; RV32IZFH-NEXT: .LBB17_8: +; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: ret +; RV32IZFH-NEXT: .LBB17_9: +; RV32IZFH-NEXT: mv a0, a2 +; RV32IZFH-NEXT: feq.s a2, fs0, fs0 +; RV32IZFH-NEXT: bnez a2, .LBB17_4 +; RV32IZFH-NEXT: .LBB17_10: +; RV32IZFH-NEXT: li a0, 0 +; RV32IZFH-NEXT: lui a4, 524288 +; RV32IZFH-NEXT: bnez s0, .LBB17_5 +; RV32IZFH-NEXT: .LBB17_11: +; RV32IZFH-NEXT: lui a1, 524288 +; RV32IZFH-NEXT: beqz a3, .LBB17_6 +; RV32IZFH-NEXT: .LBB17_12: +; RV32IZFH-NEXT: addi a1, a4, -1 +; RV32IZFH-NEXT: beqz a2, .LBB17_7 +; RV32IZFH-NEXT: j .LBB17_8 +; +; RV64IZFH-LABEL: test_roundeven_si64: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: feq.h a0, fa0, fa0 +; RV64IZFH-NEXT: bnez a0, .LBB17_2 +; RV64IZFH-NEXT: # %bb.1: +; RV64IZFH-NEXT: li a0, 0 +; RV64IZFH-NEXT: ret +; RV64IZFH-NEXT: .LBB17_2: +; RV64IZFH-NEXT: fcvt.l.h a0, fa0, rne +; RV64IZFH-NEXT: ret + %a = call half @llvm.roundeven.f16(half %x) + %b = call i64 @llvm.fptosi.sat.i64.f16(half %a) + ret i64 %b +} + +define signext i32 @test_roundeven_ui32(half %x) { +; RV32IZFH-LABEL: test_roundeven_ui32: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: feq.h a0, fa0, fa0 +; RV32IZFH-NEXT: bnez a0, .LBB18_2 +; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: li a0, 0 +; RV32IZFH-NEXT: ret +; RV32IZFH-NEXT: .LBB18_2: +; RV32IZFH-NEXT: fcvt.wu.h a0, fa0, rne +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: test_roundeven_ui32: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: feq.h a0, fa0, fa0 +; RV64IZFH-NEXT: bnez a0, .LBB18_2 +; RV64IZFH-NEXT: # %bb.1: +; RV64IZFH-NEXT: li a0, 0 +; RV64IZFH-NEXT: ret +; RV64IZFH-NEXT: .LBB18_2: +; RV64IZFH-NEXT: fcvt.wu.h a0, fa0, rne +; RV64IZFH-NEXT: ret + %a = call half @llvm.roundeven.f16(half %x) + %b = call i32 @llvm.fptoui.sat.i32.f16(half %a) + ret i32 %b +} + +define i64 @test_roundeven_ui64(half %x) nounwind { +; RV32IZFH-LABEL: test_roundeven_ui64: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: addi sp, sp, -16 +; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fcvt.s.h fa0, fa0 +; RV32IZFH-NEXT: call roundevenf@plt +; RV32IZFH-NEXT: fcvt.h.s ft0, fa0 +; RV32IZFH-NEXT: fcvt.s.h fs0, ft0 +; RV32IZFH-NEXT: fmv.w.x ft0, zero +; RV32IZFH-NEXT: fle.s s0, ft0, fs0 +; RV32IZFH-NEXT: fmv.s fa0, fs0 +; RV32IZFH-NEXT: call __fixunssfdi@plt +; RV32IZFH-NEXT: mv a3, a0 +; RV32IZFH-NEXT: bnez s0, .LBB19_2 +; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: li a3, 0 +; RV32IZFH-NEXT: .LBB19_2: +; RV32IZFH-NEXT: lui a0, %hi(.LCPI19_0) +; RV32IZFH-NEXT: flw ft0, %lo(.LCPI19_0)(a0) +; RV32IZFH-NEXT: flt.s a4, ft0, fs0 +; RV32IZFH-NEXT: li a2, -1 +; RV32IZFH-NEXT: li a0, -1 +; RV32IZFH-NEXT: beqz a4, .LBB19_7 +; RV32IZFH-NEXT: # %bb.3: +; RV32IZFH-NEXT: beqz s0, .LBB19_8 +; RV32IZFH-NEXT: .LBB19_4: +; RV32IZFH-NEXT: bnez a4, .LBB19_6 +; RV32IZFH-NEXT: .LBB19_5: +; RV32IZFH-NEXT: mv a2, a1 +; RV32IZFH-NEXT: .LBB19_6: +; RV32IZFH-NEXT: mv a1, a2 +; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: ret +; RV32IZFH-NEXT: .LBB19_7: +; RV32IZFH-NEXT: mv a0, a3 +; RV32IZFH-NEXT: bnez s0, .LBB19_4 +; RV32IZFH-NEXT: .LBB19_8: +; RV32IZFH-NEXT: li a1, 0 +; RV32IZFH-NEXT: beqz a4, .LBB19_5 +; RV32IZFH-NEXT: j .LBB19_6 +; +; RV64IZFH-LABEL: test_roundeven_ui64: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: feq.h a0, fa0, fa0 +; RV64IZFH-NEXT: bnez a0, .LBB19_2 +; RV64IZFH-NEXT: # %bb.1: +; RV64IZFH-NEXT: li a0, 0 +; RV64IZFH-NEXT: ret +; RV64IZFH-NEXT: .LBB19_2: +; RV64IZFH-NEXT: fcvt.lu.h a0, fa0, rne +; RV64IZFH-NEXT: ret + %a = call half @llvm.roundeven.f16(half %x) + %b = call i64 @llvm.fptoui.sat.i64.f16(half %a) + ret i64 %b +} + +declare half @llvm.floor.f16(half) +declare half @llvm.ceil.f16(half) +declare half @llvm.trunc.f16(half) +declare half @llvm.round.f16(half) +declare half @llvm.roundeven.f16(half) +declare i32 @llvm.fptosi.sat.i32.f16(half) +declare i64 @llvm.fptosi.sat.i64.f16(half) +declare i32 @llvm.fptoui.sat.i32.f16(half) +declare i64 @llvm.fptoui.sat.i64.f16(half)