diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 22c61eb20885b..97d76ca494cbe 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -13594,6 +13594,7 @@ static SDValue performFP_TO_INTCombine(SDNode *N, // (fp_to_int_sat (ffloor X)) -> (select X == nan, 0, (fcvt X, rdn)) // (fp_to_int_sat (fceil X)) -> (select X == nan, 0, (fcvt X, rup)) // (fp_to_int_sat (fround X)) -> (select X == nan, 0, (fcvt X, rmm)) +// (fp_to_int_sat (frint X)) -> (select X == nan, 0, (fcvt X, dyn)) static SDValue performFP_TO_INT_SATCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget) { diff --git a/llvm/test/CodeGen/RISCV/double-round-conv-sat.ll b/llvm/test/CodeGen/RISCV/double-round-conv-sat.ll index 1fd0d629e9a7a..5c5b4bb723b68 100644 --- a/llvm/test/CodeGen/RISCV/double-round-conv-sat.ll +++ b/llvm/test/CodeGen/RISCV/double-round-conv-sat.ll @@ -1338,11 +1338,278 @@ define i64 @test_roundeven_ui64(double %x) nounwind { ret i64 %b } +define signext i32 @test_rint_si32(double %x) { +; CHECKIFD-LABEL: test_rint_si32: +; CHECKIFD: # %bb.0: +; CHECKIFD-NEXT: fcvt.w.d a0, fa0 +; CHECKIFD-NEXT: feq.d a1, fa0, fa0 +; CHECKIFD-NEXT: seqz a1, a1 +; CHECKIFD-NEXT: addi a1, a1, -1 +; CHECKIFD-NEXT: and a0, a1, a0 +; CHECKIFD-NEXT: ret +; +; RV32IZFINXZDINX-LABEL: test_rint_si32: +; RV32IZFINXZDINX: # %bb.0: +; RV32IZFINXZDINX-NEXT: addi sp, sp, -16 +; RV32IZFINXZDINX-NEXT: .cfi_def_cfa_offset 16 +; RV32IZFINXZDINX-NEXT: sw a0, 8(sp) +; RV32IZFINXZDINX-NEXT: sw a1, 12(sp) +; RV32IZFINXZDINX-NEXT: lw a0, 8(sp) +; RV32IZFINXZDINX-NEXT: lw a1, 12(sp) +; RV32IZFINXZDINX-NEXT: fcvt.w.d a2, a0 +; RV32IZFINXZDINX-NEXT: feq.d a0, a0, a0 +; RV32IZFINXZDINX-NEXT: seqz a0, a0 +; RV32IZFINXZDINX-NEXT: addi a0, a0, -1 +; RV32IZFINXZDINX-NEXT: and a0, a0, a2 +; RV32IZFINXZDINX-NEXT: addi sp, sp, 16 +; RV32IZFINXZDINX-NEXT: ret +; +; RV64IZFINXZDINX-LABEL: test_rint_si32: +; RV64IZFINXZDINX: # %bb.0: +; RV64IZFINXZDINX-NEXT: fcvt.w.d a1, a0 +; RV64IZFINXZDINX-NEXT: feq.d a0, a0, a0 +; RV64IZFINXZDINX-NEXT: seqz a0, a0 +; RV64IZFINXZDINX-NEXT: addi a0, a0, -1 +; RV64IZFINXZDINX-NEXT: and a0, a0, a1 +; RV64IZFINXZDINX-NEXT: ret + %a = call double @llvm.rint.f64(double %x) + %b = call i32 @llvm.fptosi.sat.i32.f64(double %a) + ret i32 %b +} + +define i64 @test_rint_si64(double %x) nounwind { +; RV32IFD-LABEL: test_rint_si64: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill +; RV32IFD-NEXT: call rint@plt +; RV32IFD-NEXT: lui a0, %hi(.LCPI21_0) +; RV32IFD-NEXT: fld fa5, %lo(.LCPI21_0)(a0) +; RV32IFD-NEXT: fmv.d fs0, fa0 +; RV32IFD-NEXT: fle.d s0, fa5, fa0 +; RV32IFD-NEXT: call __fixdfdi@plt +; RV32IFD-NEXT: lui a4, 524288 +; RV32IFD-NEXT: lui a2, 524288 +; RV32IFD-NEXT: beqz s0, .LBB21_2 +; RV32IFD-NEXT: # %bb.1: +; RV32IFD-NEXT: mv a2, a1 +; RV32IFD-NEXT: .LBB21_2: +; RV32IFD-NEXT: lui a1, %hi(.LCPI21_1) +; RV32IFD-NEXT: fld fa5, %lo(.LCPI21_1)(a1) +; RV32IFD-NEXT: flt.d a3, fa5, fs0 +; RV32IFD-NEXT: beqz a3, .LBB21_4 +; RV32IFD-NEXT: # %bb.3: +; RV32IFD-NEXT: addi a2, a4, -1 +; RV32IFD-NEXT: .LBB21_4: +; RV32IFD-NEXT: feq.d a1, fs0, fs0 +; RV32IFD-NEXT: neg a4, a1 +; RV32IFD-NEXT: and a1, a4, a2 +; RV32IFD-NEXT: neg a2, a3 +; RV32IFD-NEXT: neg a3, s0 +; RV32IFD-NEXT: and a0, a3, a0 +; RV32IFD-NEXT: or a0, a2, a0 +; RV32IFD-NEXT: and a0, a4, a0 +; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: test_rint_si64: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: fcvt.l.d a0, fa0 +; RV64IFD-NEXT: feq.d a1, fa0, fa0 +; RV64IFD-NEXT: seqz a1, a1 +; RV64IFD-NEXT: addi a1, a1, -1 +; RV64IFD-NEXT: and a0, a1, a0 +; RV64IFD-NEXT: ret +; +; RV32IZFINXZDINX-LABEL: test_rint_si64: +; RV32IZFINXZDINX: # %bb.0: +; RV32IZFINXZDINX-NEXT: addi sp, sp, -32 +; RV32IZFINXZDINX-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32IZFINXZDINX-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32IZFINXZDINX-NEXT: sw s2, 20(sp) # 4-byte Folded Spill +; RV32IZFINXZDINX-NEXT: sw s3, 16(sp) # 4-byte Folded Spill +; RV32IZFINXZDINX-NEXT: call rint@plt +; RV32IZFINXZDINX-NEXT: sw a0, 8(sp) +; RV32IZFINXZDINX-NEXT: sw a1, 12(sp) +; RV32IZFINXZDINX-NEXT: lw s2, 8(sp) +; RV32IZFINXZDINX-NEXT: lw s3, 12(sp) +; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI21_0) +; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI21_0+4)(a2) +; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI21_0)(a2) +; RV32IZFINXZDINX-NEXT: fle.d s0, a2, s2 +; RV32IZFINXZDINX-NEXT: call __fixdfdi@plt +; RV32IZFINXZDINX-NEXT: lui a4, 524288 +; RV32IZFINXZDINX-NEXT: lui a2, 524288 +; RV32IZFINXZDINX-NEXT: beqz s0, .LBB21_2 +; RV32IZFINXZDINX-NEXT: # %bb.1: +; RV32IZFINXZDINX-NEXT: mv a2, a1 +; RV32IZFINXZDINX-NEXT: .LBB21_2: +; RV32IZFINXZDINX-NEXT: lui a1, %hi(.LCPI21_1) +; RV32IZFINXZDINX-NEXT: lw a6, %lo(.LCPI21_1)(a1) +; RV32IZFINXZDINX-NEXT: lw a7, %lo(.LCPI21_1+4)(a1) +; RV32IZFINXZDINX-NEXT: flt.d a3, a6, s2 +; RV32IZFINXZDINX-NEXT: beqz a3, .LBB21_4 +; RV32IZFINXZDINX-NEXT: # %bb.3: +; RV32IZFINXZDINX-NEXT: addi a2, a4, -1 +; RV32IZFINXZDINX-NEXT: .LBB21_4: +; RV32IZFINXZDINX-NEXT: feq.d a1, s2, s2 +; RV32IZFINXZDINX-NEXT: neg a4, a1 +; RV32IZFINXZDINX-NEXT: and a1, a4, a2 +; RV32IZFINXZDINX-NEXT: neg a2, s0 +; RV32IZFINXZDINX-NEXT: and a0, a2, a0 +; RV32IZFINXZDINX-NEXT: neg a2, a3 +; RV32IZFINXZDINX-NEXT: or a0, a2, a0 +; RV32IZFINXZDINX-NEXT: and a0, a4, a0 +; RV32IZFINXZDINX-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32IZFINXZDINX-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32IZFINXZDINX-NEXT: lw s2, 20(sp) # 4-byte Folded Reload +; RV32IZFINXZDINX-NEXT: lw s3, 16(sp) # 4-byte Folded Reload +; RV32IZFINXZDINX-NEXT: addi sp, sp, 32 +; RV32IZFINXZDINX-NEXT: ret +; +; RV64IZFINXZDINX-LABEL: test_rint_si64: +; RV64IZFINXZDINX: # %bb.0: +; RV64IZFINXZDINX-NEXT: fcvt.l.d a1, a0 +; RV64IZFINXZDINX-NEXT: feq.d a0, a0, a0 +; RV64IZFINXZDINX-NEXT: seqz a0, a0 +; RV64IZFINXZDINX-NEXT: addi a0, a0, -1 +; RV64IZFINXZDINX-NEXT: and a0, a0, a1 +; RV64IZFINXZDINX-NEXT: ret + %a = call double @llvm.rint.f64(double %x) + %b = call i64 @llvm.fptosi.sat.i64.f64(double %a) + ret i64 %b +} + +define signext i32 @test_rint_ui32(double %x) { +; CHECKIFD-LABEL: test_rint_ui32: +; CHECKIFD: # %bb.0: +; CHECKIFD-NEXT: fcvt.wu.d a0, fa0 +; CHECKIFD-NEXT: feq.d a1, fa0, fa0 +; CHECKIFD-NEXT: seqz a1, a1 +; CHECKIFD-NEXT: addi a1, a1, -1 +; CHECKIFD-NEXT: and a0, a1, a0 +; CHECKIFD-NEXT: ret +; +; RV32IZFINXZDINX-LABEL: test_rint_ui32: +; RV32IZFINXZDINX: # %bb.0: +; RV32IZFINXZDINX-NEXT: addi sp, sp, -16 +; RV32IZFINXZDINX-NEXT: .cfi_def_cfa_offset 16 +; RV32IZFINXZDINX-NEXT: sw a0, 8(sp) +; RV32IZFINXZDINX-NEXT: sw a1, 12(sp) +; RV32IZFINXZDINX-NEXT: lw a0, 8(sp) +; RV32IZFINXZDINX-NEXT: lw a1, 12(sp) +; RV32IZFINXZDINX-NEXT: fcvt.wu.d a2, a0 +; RV32IZFINXZDINX-NEXT: feq.d a0, a0, a0 +; RV32IZFINXZDINX-NEXT: seqz a0, a0 +; RV32IZFINXZDINX-NEXT: addi a0, a0, -1 +; RV32IZFINXZDINX-NEXT: and a0, a0, a2 +; RV32IZFINXZDINX-NEXT: addi sp, sp, 16 +; RV32IZFINXZDINX-NEXT: ret +; +; RV64IZFINXZDINX-LABEL: test_rint_ui32: +; RV64IZFINXZDINX: # %bb.0: +; RV64IZFINXZDINX-NEXT: fcvt.wu.d a1, a0 +; RV64IZFINXZDINX-NEXT: feq.d a0, a0, a0 +; RV64IZFINXZDINX-NEXT: seqz a0, a0 +; RV64IZFINXZDINX-NEXT: addi a0, a0, -1 +; RV64IZFINXZDINX-NEXT: and a0, a0, a1 +; RV64IZFINXZDINX-NEXT: ret + %a = call double @llvm.rint.f64(double %x) + %b = call i32 @llvm.fptoui.sat.i32.f64(double %a) + ret i32 %b +} + +define i64 @test_rint_ui64(double %x) nounwind { +; RV32IFD-LABEL: test_rint_ui64: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: call rint@plt +; RV32IFD-NEXT: lui a0, %hi(.LCPI23_0) +; RV32IFD-NEXT: fld fa5, %lo(.LCPI23_0)(a0) +; RV32IFD-NEXT: flt.d a0, fa5, fa0 +; RV32IFD-NEXT: neg s0, a0 +; RV32IFD-NEXT: fcvt.d.w fa5, zero +; RV32IFD-NEXT: fle.d a0, fa5, fa0 +; RV32IFD-NEXT: neg s1, a0 +; RV32IFD-NEXT: call __fixunsdfdi@plt +; RV32IFD-NEXT: and a0, s1, a0 +; RV32IFD-NEXT: or a0, s0, a0 +; RV32IFD-NEXT: and a1, s1, a1 +; RV32IFD-NEXT: or a1, s0, a1 +; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: test_rint_ui64: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: fcvt.lu.d a0, fa0 +; RV64IFD-NEXT: feq.d a1, fa0, fa0 +; RV64IFD-NEXT: seqz a1, a1 +; RV64IFD-NEXT: addi a1, a1, -1 +; RV64IFD-NEXT: and a0, a1, a0 +; RV64IFD-NEXT: ret +; +; RV32IZFINXZDINX-LABEL: test_rint_ui64: +; RV32IZFINXZDINX: # %bb.0: +; RV32IZFINXZDINX-NEXT: addi sp, sp, -32 +; RV32IZFINXZDINX-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32IZFINXZDINX-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32IZFINXZDINX-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32IZFINXZDINX-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32IZFINXZDINX-NEXT: call rint@plt +; RV32IZFINXZDINX-NEXT: sw a0, 8(sp) +; RV32IZFINXZDINX-NEXT: sw a1, 12(sp) +; RV32IZFINXZDINX-NEXT: lw s0, 8(sp) +; RV32IZFINXZDINX-NEXT: lw s1, 12(sp) +; RV32IZFINXZDINX-NEXT: fcvt.d.w a2, zero +; RV32IZFINXZDINX-NEXT: fle.d a2, a2, s0 +; RV32IZFINXZDINX-NEXT: neg s2, a2 +; RV32IZFINXZDINX-NEXT: call __fixunsdfdi@plt +; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI23_0) +; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI23_0+4)(a2) +; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI23_0)(a2) +; RV32IZFINXZDINX-NEXT: and a0, s2, a0 +; RV32IZFINXZDINX-NEXT: flt.d a2, a2, s0 +; RV32IZFINXZDINX-NEXT: neg a2, a2 +; RV32IZFINXZDINX-NEXT: or a0, a2, a0 +; RV32IZFINXZDINX-NEXT: and a1, s2, a1 +; RV32IZFINXZDINX-NEXT: or a1, a2, a1 +; RV32IZFINXZDINX-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32IZFINXZDINX-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32IZFINXZDINX-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32IZFINXZDINX-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32IZFINXZDINX-NEXT: addi sp, sp, 32 +; RV32IZFINXZDINX-NEXT: ret +; +; RV64IZFINXZDINX-LABEL: test_rint_ui64: +; RV64IZFINXZDINX: # %bb.0: +; RV64IZFINXZDINX-NEXT: fcvt.lu.d a1, a0 +; RV64IZFINXZDINX-NEXT: feq.d a0, a0, a0 +; RV64IZFINXZDINX-NEXT: seqz a0, a0 +; RV64IZFINXZDINX-NEXT: addi a0, a0, -1 +; RV64IZFINXZDINX-NEXT: and a0, a0, a1 +; RV64IZFINXZDINX-NEXT: ret + %a = call double @llvm.rint.f64(double %x) + %b = call i64 @llvm.fptoui.sat.i64.f64(double %a) + ret i64 %b +} + declare double @llvm.floor.f64(double) declare double @llvm.ceil.f64(double) declare double @llvm.trunc.f64(double) declare double @llvm.round.f64(double) declare double @llvm.roundeven.f64(double) +declare double @llvm.rint.f64(double) declare i32 @llvm.fptosi.sat.i32.f64(double) declare i64 @llvm.fptosi.sat.i64.f64(double) declare i32 @llvm.fptoui.sat.i32.f64(double) diff --git a/llvm/test/CodeGen/RISCV/float-round-conv-sat.ll b/llvm/test/CodeGen/RISCV/float-round-conv-sat.ll index 61337216c7fb5..d947d0f25cdd8 100644 --- a/llvm/test/CodeGen/RISCV/float-round-conv-sat.ll +++ b/llvm/test/CodeGen/RISCV/float-round-conv-sat.ll @@ -1308,11 +1308,272 @@ define i64 @test_roundeven_ui64(float %x) nounwind { ret i64 %b } +define signext i32 @test_rint_si32(float %x) { +; CHECKIF-LABEL: test_rint_si32: +; CHECKIF: # %bb.0: +; CHECKIF-NEXT: fcvt.w.s a0, fa0 +; CHECKIF-NEXT: feq.s a1, fa0, fa0 +; CHECKIF-NEXT: seqz a1, a1 +; CHECKIF-NEXT: addi a1, a1, -1 +; CHECKIF-NEXT: and a0, a1, a0 +; CHECKIF-NEXT: ret +; +; CHECKIZFINX-LABEL: test_rint_si32: +; CHECKIZFINX: # %bb.0: +; CHECKIZFINX-NEXT: fcvt.w.s a1, a0 +; CHECKIZFINX-NEXT: feq.s a0, a0, a0 +; CHECKIZFINX-NEXT: seqz a0, a0 +; CHECKIZFINX-NEXT: addi a0, a0, -1 +; CHECKIZFINX-NEXT: and a0, a0, a1 +; CHECKIZFINX-NEXT: ret + %a = call float @llvm.rint.f32(float %x) + %b = call i32 @llvm.fptosi.sat.i32.f32(float %a) + ret i32 %b +} + +define i64 @test_rint_si64(float %x) nounwind { +; RV32IF-LABEL: test_rint_si64: +; RV32IF: # %bb.0: +; RV32IF-NEXT: addi sp, sp, -16 +; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IF-NEXT: fmv.s fs0, fa0 +; RV32IF-NEXT: lui a0, 307200 +; RV32IF-NEXT: fmv.w.x fa5, a0 +; RV32IF-NEXT: fabs.s fa4, fa0 +; RV32IF-NEXT: flt.s a0, fa4, fa5 +; RV32IF-NEXT: beqz a0, .LBB21_2 +; RV32IF-NEXT: # %bb.1: +; RV32IF-NEXT: fcvt.w.s a0, fs0 +; RV32IF-NEXT: fcvt.s.w fa5, a0 +; RV32IF-NEXT: fsgnj.s fs0, fa5, fs0 +; RV32IF-NEXT: .LBB21_2: +; RV32IF-NEXT: lui a0, 913408 +; RV32IF-NEXT: fmv.w.x fa5, a0 +; RV32IF-NEXT: fle.s s0, fa5, fs0 +; RV32IF-NEXT: fmv.s fa0, fs0 +; RV32IF-NEXT: call __fixsfdi@plt +; RV32IF-NEXT: lui a4, 524288 +; RV32IF-NEXT: lui a2, 524288 +; RV32IF-NEXT: beqz s0, .LBB21_4 +; RV32IF-NEXT: # %bb.3: +; RV32IF-NEXT: mv a2, a1 +; RV32IF-NEXT: .LBB21_4: +; RV32IF-NEXT: lui a1, %hi(.LCPI21_0) +; RV32IF-NEXT: flw fa5, %lo(.LCPI21_0)(a1) +; RV32IF-NEXT: flt.s a3, fa5, fs0 +; RV32IF-NEXT: beqz a3, .LBB21_6 +; RV32IF-NEXT: # %bb.5: +; RV32IF-NEXT: addi a2, a4, -1 +; RV32IF-NEXT: .LBB21_6: +; RV32IF-NEXT: feq.s a1, fs0, fs0 +; RV32IF-NEXT: neg a4, a1 +; RV32IF-NEXT: and a1, a4, a2 +; RV32IF-NEXT: neg a2, s0 +; RV32IF-NEXT: and a0, a2, a0 +; RV32IF-NEXT: neg a2, a3 +; RV32IF-NEXT: or a0, a2, a0 +; RV32IF-NEXT: and a0, a4, a0 +; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IF-NEXT: addi sp, sp, 16 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: test_rint_si64: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fcvt.l.s a0, fa0 +; RV64IF-NEXT: feq.s a1, fa0, fa0 +; RV64IF-NEXT: seqz a1, a1 +; RV64IF-NEXT: addi a1, a1, -1 +; RV64IF-NEXT: and a0, a1, a0 +; RV64IF-NEXT: ret +; +; RV32IZFINX-LABEL: test_rint_si64: +; RV32IZFINX: # %bb.0: +; RV32IZFINX-NEXT: addi sp, sp, -16 +; RV32IZFINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IZFINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32IZFINX-NEXT: sw s2, 0(sp) # 4-byte Folded Spill +; RV32IZFINX-NEXT: mv s0, a0 +; RV32IZFINX-NEXT: lui a0, 307200 +; RV32IZFINX-NEXT: fabs.s a1, s0 +; RV32IZFINX-NEXT: flt.s a0, a1, a0 +; RV32IZFINX-NEXT: beqz a0, .LBB21_2 +; RV32IZFINX-NEXT: # %bb.1: +; RV32IZFINX-NEXT: fcvt.w.s a0, s0 +; RV32IZFINX-NEXT: fcvt.s.w a0, a0 +; RV32IZFINX-NEXT: fsgnj.s s0, a0, s0 +; RV32IZFINX-NEXT: .LBB21_2: +; RV32IZFINX-NEXT: lui a0, 913408 +; RV32IZFINX-NEXT: fle.s s1, a0, s0 +; RV32IZFINX-NEXT: neg s2, s1 +; RV32IZFINX-NEXT: mv a0, s0 +; RV32IZFINX-NEXT: call __fixsfdi@plt +; RV32IZFINX-NEXT: lui a2, %hi(.LCPI21_0) +; RV32IZFINX-NEXT: lw a2, %lo(.LCPI21_0)(a2) +; RV32IZFINX-NEXT: and a0, s2, a0 +; RV32IZFINX-NEXT: flt.s a4, a2, s0 +; RV32IZFINX-NEXT: neg a2, a4 +; RV32IZFINX-NEXT: or a0, a2, a0 +; RV32IZFINX-NEXT: feq.s a2, s0, s0 +; RV32IZFINX-NEXT: neg a2, a2 +; RV32IZFINX-NEXT: lui a5, 524288 +; RV32IZFINX-NEXT: lui a3, 524288 +; RV32IZFINX-NEXT: beqz s1, .LBB21_4 +; RV32IZFINX-NEXT: # %bb.3: +; RV32IZFINX-NEXT: mv a3, a1 +; RV32IZFINX-NEXT: .LBB21_4: +; RV32IZFINX-NEXT: and a0, a2, a0 +; RV32IZFINX-NEXT: beqz a4, .LBB21_6 +; RV32IZFINX-NEXT: # %bb.5: +; RV32IZFINX-NEXT: addi a3, a5, -1 +; RV32IZFINX-NEXT: .LBB21_6: +; RV32IZFINX-NEXT: and a1, a2, a3 +; RV32IZFINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IZFINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32IZFINX-NEXT: lw s2, 0(sp) # 4-byte Folded Reload +; RV32IZFINX-NEXT: addi sp, sp, 16 +; RV32IZFINX-NEXT: ret +; +; RV64IZFINX-LABEL: test_rint_si64: +; RV64IZFINX: # %bb.0: +; RV64IZFINX-NEXT: fcvt.l.s a1, a0 +; RV64IZFINX-NEXT: feq.s a0, a0, a0 +; RV64IZFINX-NEXT: seqz a0, a0 +; RV64IZFINX-NEXT: addi a0, a0, -1 +; RV64IZFINX-NEXT: and a0, a0, a1 +; RV64IZFINX-NEXT: ret + %a = call float @llvm.rint.f32(float %x) + %b = call i64 @llvm.fptosi.sat.i64.f32(float %a) + ret i64 %b +} + +define signext i32 @test_rint_ui32(float %x) { +; CHECKIF-LABEL: test_rint_ui32: +; CHECKIF: # %bb.0: +; CHECKIF-NEXT: fcvt.wu.s a0, fa0 +; CHECKIF-NEXT: feq.s a1, fa0, fa0 +; CHECKIF-NEXT: seqz a1, a1 +; CHECKIF-NEXT: addi a1, a1, -1 +; CHECKIF-NEXT: and a0, a1, a0 +; CHECKIF-NEXT: ret +; +; CHECKIZFINX-LABEL: test_rint_ui32: +; CHECKIZFINX: # %bb.0: +; CHECKIZFINX-NEXT: fcvt.wu.s a1, a0 +; CHECKIZFINX-NEXT: feq.s a0, a0, a0 +; CHECKIZFINX-NEXT: seqz a0, a0 +; CHECKIZFINX-NEXT: addi a0, a0, -1 +; CHECKIZFINX-NEXT: and a0, a0, a1 +; CHECKIZFINX-NEXT: ret + %a = call float @llvm.rint.f32(float %x) + %b = call i32 @llvm.fptoui.sat.i32.f32(float %a) + ret i32 %b +} + +define i64 @test_rint_ui64(float %x) nounwind { +; RV32IF-LABEL: test_rint_ui64: +; RV32IF: # %bb.0: +; RV32IF-NEXT: addi sp, sp, -16 +; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IF-NEXT: fmv.s fs0, fa0 +; RV32IF-NEXT: lui a0, 307200 +; RV32IF-NEXT: fmv.w.x fa5, a0 +; RV32IF-NEXT: fabs.s fa4, fa0 +; RV32IF-NEXT: flt.s a0, fa4, fa5 +; RV32IF-NEXT: beqz a0, .LBB23_2 +; RV32IF-NEXT: # %bb.1: +; RV32IF-NEXT: fcvt.w.s a0, fs0 +; RV32IF-NEXT: fcvt.s.w fa5, a0 +; RV32IF-NEXT: fsgnj.s fs0, fa5, fs0 +; RV32IF-NEXT: .LBB23_2: +; RV32IF-NEXT: fmv.w.x fa5, zero +; RV32IF-NEXT: fle.s a0, fa5, fs0 +; RV32IF-NEXT: neg s0, a0 +; RV32IF-NEXT: fmv.s fa0, fs0 +; RV32IF-NEXT: call __fixunssfdi@plt +; RV32IF-NEXT: lui a2, %hi(.LCPI23_0) +; RV32IF-NEXT: flw fa5, %lo(.LCPI23_0)(a2) +; RV32IF-NEXT: and a0, s0, a0 +; RV32IF-NEXT: flt.s a2, fa5, fs0 +; RV32IF-NEXT: neg a2, a2 +; RV32IF-NEXT: or a0, a2, a0 +; RV32IF-NEXT: and a1, s0, a1 +; RV32IF-NEXT: or a1, a2, a1 +; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IF-NEXT: addi sp, sp, 16 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: test_rint_ui64: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fcvt.lu.s a0, fa0 +; RV64IF-NEXT: feq.s a1, fa0, fa0 +; RV64IF-NEXT: seqz a1, a1 +; RV64IF-NEXT: addi a1, a1, -1 +; RV64IF-NEXT: and a0, a1, a0 +; RV64IF-NEXT: ret +; +; RV32IZFINX-LABEL: test_rint_ui64: +; RV32IZFINX: # %bb.0: +; RV32IZFINX-NEXT: addi sp, sp, -16 +; RV32IZFINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IZFINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32IZFINX-NEXT: mv s0, a0 +; RV32IZFINX-NEXT: lui a0, 307200 +; RV32IZFINX-NEXT: fabs.s a1, s0 +; RV32IZFINX-NEXT: flt.s a0, a1, a0 +; RV32IZFINX-NEXT: beqz a0, .LBB23_2 +; RV32IZFINX-NEXT: # %bb.1: +; RV32IZFINX-NEXT: fcvt.w.s a0, s0 +; RV32IZFINX-NEXT: fcvt.s.w a0, a0 +; RV32IZFINX-NEXT: fsgnj.s s0, a0, s0 +; RV32IZFINX-NEXT: .LBB23_2: +; RV32IZFINX-NEXT: fle.s a0, zero, s0 +; RV32IZFINX-NEXT: neg s1, a0 +; RV32IZFINX-NEXT: mv a0, s0 +; RV32IZFINX-NEXT: call __fixunssfdi@plt +; RV32IZFINX-NEXT: lui a2, %hi(.LCPI23_0) +; RV32IZFINX-NEXT: lw a2, %lo(.LCPI23_0)(a2) +; RV32IZFINX-NEXT: and a0, s1, a0 +; RV32IZFINX-NEXT: flt.s a2, a2, s0 +; RV32IZFINX-NEXT: neg a2, a2 +; RV32IZFINX-NEXT: or a0, a2, a0 +; RV32IZFINX-NEXT: and a1, s1, a1 +; RV32IZFINX-NEXT: or a1, a2, a1 +; RV32IZFINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IZFINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32IZFINX-NEXT: addi sp, sp, 16 +; RV32IZFINX-NEXT: ret +; +; RV64IZFINX-LABEL: test_rint_ui64: +; RV64IZFINX: # %bb.0: +; RV64IZFINX-NEXT: fcvt.lu.s a1, a0 +; RV64IZFINX-NEXT: feq.s a0, a0, a0 +; RV64IZFINX-NEXT: seqz a0, a0 +; RV64IZFINX-NEXT: addi a0, a0, -1 +; RV64IZFINX-NEXT: and a0, a0, a1 +; RV64IZFINX-NEXT: ret + %a = call float @llvm.rint.f32(float %x) + %b = call i64 @llvm.fptoui.sat.i64.f32(float %a) + ret i64 %b +} + declare float @llvm.floor.f32(float) declare float @llvm.ceil.f32(float) declare float @llvm.trunc.f32(float) declare float @llvm.round.f32(float) declare float @llvm.roundeven.f32(float) +declare float @llvm.rint.f32(float) declare i32 @llvm.fptosi.sat.i32.f32(float) declare i64 @llvm.fptosi.sat.i64.f32(float) declare i32 @llvm.fptoui.sat.i32.f32(float) diff --git a/llvm/test/CodeGen/RISCV/half-round-conv-sat.ll b/llvm/test/CodeGen/RISCV/half-round-conv-sat.ll index e7215f07c2204..962ed8393b726 100644 --- a/llvm/test/CodeGen/RISCV/half-round-conv-sat.ll +++ b/llvm/test/CodeGen/RISCV/half-round-conv-sat.ll @@ -3576,11 +3576,724 @@ define i64 @test_roundeven_ui64(half %x) nounwind { ret i64 %b } +define signext i32 @test_rint_si32(half %x) { +; CHECKIZFH-LABEL: test_rint_si32: +; CHECKIZFH: # %bb.0: +; CHECKIZFH-NEXT: fcvt.w.h a0, fa0 +; CHECKIZFH-NEXT: feq.h a1, fa0, fa0 +; CHECKIZFH-NEXT: seqz a1, a1 +; CHECKIZFH-NEXT: addi a1, a1, -1 +; CHECKIZFH-NEXT: and a0, a1, a0 +; CHECKIZFH-NEXT: ret +; +; CHECKIZHINX-LABEL: test_rint_si32: +; CHECKIZHINX: # %bb.0: +; CHECKIZHINX-NEXT: lui a1, %hi(.LCPI20_0) +; CHECKIZHINX-NEXT: lh a1, %lo(.LCPI20_0)(a1) +; CHECKIZHINX-NEXT: fabs.h a2, a0 +; CHECKIZHINX-NEXT: flt.h a1, a2, a1 +; CHECKIZHINX-NEXT: beqz a1, .LBB20_2 +; CHECKIZHINX-NEXT: # %bb.1: +; CHECKIZHINX-NEXT: fcvt.w.h a1, a0 +; CHECKIZHINX-NEXT: fcvt.h.w a1, a1 +; CHECKIZHINX-NEXT: fsgnj.h a0, a1, a0 +; CHECKIZHINX-NEXT: .LBB20_2: +; CHECKIZHINX-NEXT: fcvt.w.h a1, a0, rtz +; CHECKIZHINX-NEXT: feq.h a0, a0, a0 +; CHECKIZHINX-NEXT: seqz a0, a0 +; CHECKIZHINX-NEXT: addi a0, a0, -1 +; CHECKIZHINX-NEXT: and a0, a0, a1 +; CHECKIZHINX-NEXT: ret +; +; CHECKIZFHMIN-LABEL: test_rint_si32: +; CHECKIZFHMIN: # %bb.0: +; CHECKIZFHMIN-NEXT: fcvt.s.h fa5, fa0 +; CHECKIZFHMIN-NEXT: lui a0, 307200 +; CHECKIZFHMIN-NEXT: fmv.w.x fa4, a0 +; CHECKIZFHMIN-NEXT: fabs.s fa3, fa5 +; CHECKIZFHMIN-NEXT: flt.s a0, fa3, fa4 +; CHECKIZFHMIN-NEXT: beqz a0, .LBB20_2 +; CHECKIZFHMIN-NEXT: # %bb.1: +; CHECKIZFHMIN-NEXT: fcvt.w.s a0, fa5 +; CHECKIZFHMIN-NEXT: fcvt.s.w fa4, a0 +; CHECKIZFHMIN-NEXT: fsgnj.s fa5, fa4, fa5 +; CHECKIZFHMIN-NEXT: .LBB20_2: +; CHECKIZFHMIN-NEXT: fcvt.h.s fa5, fa5 +; CHECKIZFHMIN-NEXT: fcvt.s.h fa5, fa5 +; CHECKIZFHMIN-NEXT: fcvt.w.s a0, fa5, rtz +; CHECKIZFHMIN-NEXT: feq.s a1, fa5, fa5 +; CHECKIZFHMIN-NEXT: seqz a1, a1 +; CHECKIZFHMIN-NEXT: addi a1, a1, -1 +; CHECKIZFHMIN-NEXT: and a0, a1, a0 +; CHECKIZFHMIN-NEXT: ret +; +; CHECKIZHINXMIN-LABEL: test_rint_si32: +; CHECKIZHINXMIN: # %bb.0: +; CHECKIZHINXMIN-NEXT: fcvt.s.h a0, a0 +; CHECKIZHINXMIN-NEXT: lui a1, 307200 +; CHECKIZHINXMIN-NEXT: fabs.s a2, a0 +; CHECKIZHINXMIN-NEXT: flt.s a1, a2, a1 +; CHECKIZHINXMIN-NEXT: beqz a1, .LBB20_2 +; CHECKIZHINXMIN-NEXT: # %bb.1: +; CHECKIZHINXMIN-NEXT: fcvt.w.s a1, a0 +; CHECKIZHINXMIN-NEXT: fcvt.s.w a1, a1 +; CHECKIZHINXMIN-NEXT: fsgnj.s a0, a1, a0 +; CHECKIZHINXMIN-NEXT: .LBB20_2: +; CHECKIZHINXMIN-NEXT: fcvt.h.s a0, a0 +; CHECKIZHINXMIN-NEXT: fcvt.s.h a0, a0 +; CHECKIZHINXMIN-NEXT: fcvt.w.s a1, a0, rtz +; CHECKIZHINXMIN-NEXT: feq.s a0, a0, a0 +; CHECKIZHINXMIN-NEXT: seqz a0, a0 +; CHECKIZHINXMIN-NEXT: addi a0, a0, -1 +; CHECKIZHINXMIN-NEXT: and a0, a0, a1 +; CHECKIZHINXMIN-NEXT: ret + %a = call half @llvm.rint.f16(half %x) + %b = call i32 @llvm.fptosi.sat.i32.f16(half %a) + ret i32 %b +} + +define i64 @test_rint_si64(half %x) nounwind { +; RV32IZFH-LABEL: test_rint_si64: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: lui a0, %hi(.LCPI21_0) +; RV32IZFH-NEXT: flh fa5, %lo(.LCPI21_0)(a0) +; RV32IZFH-NEXT: fabs.h fa4, fa0 +; RV32IZFH-NEXT: flt.h a0, fa4, fa5 +; RV32IZFH-NEXT: beqz a0, .LBB21_2 +; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: fcvt.w.h a0, fa0 +; RV32IZFH-NEXT: fcvt.h.w fa5, a0 +; RV32IZFH-NEXT: fsgnj.h fa0, fa5, fa0 +; RV32IZFH-NEXT: .LBB21_2: +; RV32IZFH-NEXT: addi sp, sp, -16 +; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fcvt.s.h fs0, fa0 +; RV32IZFH-NEXT: lui a0, 913408 +; RV32IZFH-NEXT: fmv.w.x fa5, a0 +; RV32IZFH-NEXT: fle.s s0, fa5, fs0 +; RV32IZFH-NEXT: fmv.s fa0, fs0 +; RV32IZFH-NEXT: call __fixsfdi@plt +; RV32IZFH-NEXT: lui a4, 524288 +; RV32IZFH-NEXT: lui a2, 524288 +; RV32IZFH-NEXT: beqz s0, .LBB21_4 +; RV32IZFH-NEXT: # %bb.3: +; RV32IZFH-NEXT: mv a2, a1 +; RV32IZFH-NEXT: .LBB21_4: +; RV32IZFH-NEXT: lui a1, %hi(.LCPI21_1) +; RV32IZFH-NEXT: flw fa5, %lo(.LCPI21_1)(a1) +; RV32IZFH-NEXT: flt.s a3, fa5, fs0 +; RV32IZFH-NEXT: beqz a3, .LBB21_6 +; RV32IZFH-NEXT: # %bb.5: +; RV32IZFH-NEXT: addi a2, a4, -1 +; RV32IZFH-NEXT: .LBB21_6: +; RV32IZFH-NEXT: feq.s a1, fs0, fs0 +; RV32IZFH-NEXT: neg a4, a1 +; RV32IZFH-NEXT: and a1, a4, a2 +; RV32IZFH-NEXT: neg a2, s0 +; RV32IZFH-NEXT: and a0, a2, a0 +; RV32IZFH-NEXT: neg a2, a3 +; RV32IZFH-NEXT: or a0, a2, a0 +; RV32IZFH-NEXT: and a0, a4, a0 +; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: test_rint_si64: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.l.h a0, fa0 +; RV64IZFH-NEXT: feq.h a1, fa0, fa0 +; RV64IZFH-NEXT: seqz a1, a1 +; RV64IZFH-NEXT: addi a1, a1, -1 +; RV64IZFH-NEXT: and a0, a1, a0 +; RV64IZFH-NEXT: ret +; +; RV32IZHINX-LABEL: test_rint_si64: +; RV32IZHINX: # %bb.0: +; RV32IZHINX-NEXT: lui a1, %hi(.LCPI21_0) +; RV32IZHINX-NEXT: lh a1, %lo(.LCPI21_0)(a1) +; RV32IZHINX-NEXT: fabs.h a2, a0 +; RV32IZHINX-NEXT: flt.h a1, a2, a1 +; RV32IZHINX-NEXT: beqz a1, .LBB21_2 +; RV32IZHINX-NEXT: # %bb.1: +; RV32IZHINX-NEXT: fcvt.w.h a1, a0 +; RV32IZHINX-NEXT: fcvt.h.w a1, a1 +; RV32IZHINX-NEXT: fsgnj.h a0, a1, a0 +; RV32IZHINX-NEXT: .LBB21_2: +; RV32IZHINX-NEXT: addi sp, sp, -16 +; RV32IZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZHINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IZHINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32IZHINX-NEXT: sw s2, 0(sp) # 4-byte Folded Spill +; RV32IZHINX-NEXT: fcvt.s.h s0, a0 +; RV32IZHINX-NEXT: lui a0, 913408 +; RV32IZHINX-NEXT: fle.s s1, a0, s0 +; RV32IZHINX-NEXT: neg s2, s1 +; RV32IZHINX-NEXT: mv a0, s0 +; RV32IZHINX-NEXT: call __fixsfdi@plt +; RV32IZHINX-NEXT: lui a2, %hi(.LCPI21_1) +; RV32IZHINX-NEXT: lw a2, %lo(.LCPI21_1)(a2) +; RV32IZHINX-NEXT: and a0, s2, a0 +; RV32IZHINX-NEXT: flt.s a4, a2, s0 +; RV32IZHINX-NEXT: neg a2, a4 +; RV32IZHINX-NEXT: or a0, a2, a0 +; RV32IZHINX-NEXT: feq.s a2, s0, s0 +; RV32IZHINX-NEXT: neg a2, a2 +; RV32IZHINX-NEXT: lui a5, 524288 +; RV32IZHINX-NEXT: lui a3, 524288 +; RV32IZHINX-NEXT: beqz s1, .LBB21_4 +; RV32IZHINX-NEXT: # %bb.3: +; RV32IZHINX-NEXT: mv a3, a1 +; RV32IZHINX-NEXT: .LBB21_4: +; RV32IZHINX-NEXT: and a0, a2, a0 +; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZHINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IZHINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32IZHINX-NEXT: lw s2, 0(sp) # 4-byte Folded Reload +; RV32IZHINX-NEXT: addi sp, sp, 16 +; RV32IZHINX-NEXT: beqz a4, .LBB21_6 +; RV32IZHINX-NEXT: # %bb.5: +; RV32IZHINX-NEXT: addi a3, a5, -1 +; RV32IZHINX-NEXT: .LBB21_6: +; RV32IZHINX-NEXT: and a1, a2, a3 +; RV32IZHINX-NEXT: ret +; +; RV64IZHINX-LABEL: test_rint_si64: +; RV64IZHINX: # %bb.0: +; RV64IZHINX-NEXT: lui a1, %hi(.LCPI21_0) +; RV64IZHINX-NEXT: lh a1, %lo(.LCPI21_0)(a1) +; RV64IZHINX-NEXT: fabs.h a2, a0 +; RV64IZHINX-NEXT: flt.h a1, a2, a1 +; RV64IZHINX-NEXT: beqz a1, .LBB21_2 +; RV64IZHINX-NEXT: # %bb.1: +; RV64IZHINX-NEXT: fcvt.w.h a1, a0 +; RV64IZHINX-NEXT: fcvt.h.w a1, a1 +; RV64IZHINX-NEXT: fsgnj.h a0, a1, a0 +; RV64IZHINX-NEXT: .LBB21_2: +; RV64IZHINX-NEXT: fcvt.l.h a1, a0, rtz +; RV64IZHINX-NEXT: feq.h a0, a0, a0 +; RV64IZHINX-NEXT: seqz a0, a0 +; RV64IZHINX-NEXT: addi a0, a0, -1 +; RV64IZHINX-NEXT: and a0, a0, a1 +; RV64IZHINX-NEXT: ret +; +; RV32IZFHMIN-LABEL: test_rint_si64: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: fcvt.s.h fa5, fa0 +; RV32IZFHMIN-NEXT: lui a0, 307200 +; RV32IZFHMIN-NEXT: fmv.w.x fa4, a0 +; RV32IZFHMIN-NEXT: fabs.s fa3, fa5 +; RV32IZFHMIN-NEXT: flt.s a0, fa3, fa4 +; RV32IZFHMIN-NEXT: beqz a0, .LBB21_2 +; RV32IZFHMIN-NEXT: # %bb.1: +; RV32IZFHMIN-NEXT: fcvt.w.s a0, fa5 +; RV32IZFHMIN-NEXT: fcvt.s.w fa4, a0 +; RV32IZFHMIN-NEXT: fsgnj.s fa5, fa4, fa5 +; RV32IZFHMIN-NEXT: .LBB21_2: +; RV32IZFHMIN-NEXT: addi sp, sp, -16 +; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: fcvt.h.s fa5, fa5 +; RV32IZFHMIN-NEXT: fcvt.s.h fs0, fa5 +; RV32IZFHMIN-NEXT: lui a0, 913408 +; RV32IZFHMIN-NEXT: fmv.w.x fa5, a0 +; RV32IZFHMIN-NEXT: fle.s s0, fa5, fs0 +; RV32IZFHMIN-NEXT: fmv.s fa0, fs0 +; RV32IZFHMIN-NEXT: call __fixsfdi@plt +; RV32IZFHMIN-NEXT: lui a4, 524288 +; RV32IZFHMIN-NEXT: lui a2, 524288 +; RV32IZFHMIN-NEXT: beqz s0, .LBB21_4 +; RV32IZFHMIN-NEXT: # %bb.3: +; RV32IZFHMIN-NEXT: mv a2, a1 +; RV32IZFHMIN-NEXT: .LBB21_4: +; RV32IZFHMIN-NEXT: lui a1, %hi(.LCPI21_0) +; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI21_0)(a1) +; RV32IZFHMIN-NEXT: flt.s a3, fa5, fs0 +; RV32IZFHMIN-NEXT: beqz a3, .LBB21_6 +; RV32IZFHMIN-NEXT: # %bb.5: +; RV32IZFHMIN-NEXT: addi a2, a4, -1 +; RV32IZFHMIN-NEXT: .LBB21_6: +; RV32IZFHMIN-NEXT: feq.s a1, fs0, fs0 +; RV32IZFHMIN-NEXT: neg a4, a1 +; RV32IZFHMIN-NEXT: and a1, a4, a2 +; RV32IZFHMIN-NEXT: neg a2, s0 +; RV32IZFHMIN-NEXT: and a0, a2, a0 +; RV32IZFHMIN-NEXT: neg a2, a3 +; RV32IZFHMIN-NEXT: or a0, a2, a0 +; RV32IZFHMIN-NEXT: and a0, a4, a0 +; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: addi sp, sp, 16 +; RV32IZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: test_rint_si64: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: fcvt.s.h fa5, fa0 +; RV64IZFHMIN-NEXT: lui a0, 307200 +; RV64IZFHMIN-NEXT: fmv.w.x fa4, a0 +; RV64IZFHMIN-NEXT: fabs.s fa3, fa5 +; RV64IZFHMIN-NEXT: flt.s a0, fa3, fa4 +; RV64IZFHMIN-NEXT: beqz a0, .LBB21_2 +; RV64IZFHMIN-NEXT: # %bb.1: +; RV64IZFHMIN-NEXT: fcvt.w.s a0, fa5 +; RV64IZFHMIN-NEXT: fcvt.s.w fa4, a0 +; RV64IZFHMIN-NEXT: fsgnj.s fa5, fa4, fa5 +; RV64IZFHMIN-NEXT: .LBB21_2: +; RV64IZFHMIN-NEXT: fcvt.h.s fa5, fa5 +; RV64IZFHMIN-NEXT: fcvt.s.h fa5, fa5 +; RV64IZFHMIN-NEXT: fcvt.l.s a0, fa5, rtz +; RV64IZFHMIN-NEXT: feq.s a1, fa5, fa5 +; RV64IZFHMIN-NEXT: seqz a1, a1 +; RV64IZFHMIN-NEXT: addi a1, a1, -1 +; RV64IZFHMIN-NEXT: and a0, a1, a0 +; RV64IZFHMIN-NEXT: ret +; +; RV32IZHINXMIN-LABEL: test_rint_si64: +; RV32IZHINXMIN: # %bb.0: +; RV32IZHINXMIN-NEXT: fcvt.s.h a0, a0 +; RV32IZHINXMIN-NEXT: lui a1, 307200 +; RV32IZHINXMIN-NEXT: fabs.s a2, a0 +; RV32IZHINXMIN-NEXT: flt.s a1, a2, a1 +; RV32IZHINXMIN-NEXT: beqz a1, .LBB21_2 +; RV32IZHINXMIN-NEXT: # %bb.1: +; RV32IZHINXMIN-NEXT: fcvt.w.s a1, a0 +; RV32IZHINXMIN-NEXT: fcvt.s.w a1, a1 +; RV32IZHINXMIN-NEXT: fsgnj.s a0, a1, a0 +; RV32IZHINXMIN-NEXT: .LBB21_2: +; RV32IZHINXMIN-NEXT: addi sp, sp, -16 +; RV32IZHINXMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZHINXMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IZHINXMIN-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32IZHINXMIN-NEXT: sw s2, 0(sp) # 4-byte Folded Spill +; RV32IZHINXMIN-NEXT: fcvt.h.s a0, a0 +; RV32IZHINXMIN-NEXT: fcvt.s.h s0, a0 +; RV32IZHINXMIN-NEXT: lui a0, 913408 +; RV32IZHINXMIN-NEXT: fle.s s1, a0, s0 +; RV32IZHINXMIN-NEXT: neg s2, s1 +; RV32IZHINXMIN-NEXT: mv a0, s0 +; RV32IZHINXMIN-NEXT: call __fixsfdi@plt +; RV32IZHINXMIN-NEXT: lui a2, %hi(.LCPI21_0) +; RV32IZHINXMIN-NEXT: lw a2, %lo(.LCPI21_0)(a2) +; RV32IZHINXMIN-NEXT: and a0, s2, a0 +; RV32IZHINXMIN-NEXT: flt.s a4, a2, s0 +; RV32IZHINXMIN-NEXT: neg a2, a4 +; RV32IZHINXMIN-NEXT: or a0, a2, a0 +; RV32IZHINXMIN-NEXT: feq.s a2, s0, s0 +; RV32IZHINXMIN-NEXT: neg a2, a2 +; RV32IZHINXMIN-NEXT: lui a5, 524288 +; RV32IZHINXMIN-NEXT: lui a3, 524288 +; RV32IZHINXMIN-NEXT: beqz s1, .LBB21_4 +; RV32IZHINXMIN-NEXT: # %bb.3: +; RV32IZHINXMIN-NEXT: mv a3, a1 +; RV32IZHINXMIN-NEXT: .LBB21_4: +; RV32IZHINXMIN-NEXT: and a0, a2, a0 +; RV32IZHINXMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZHINXMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IZHINXMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32IZHINXMIN-NEXT: lw s2, 0(sp) # 4-byte Folded Reload +; RV32IZHINXMIN-NEXT: addi sp, sp, 16 +; RV32IZHINXMIN-NEXT: beqz a4, .LBB21_6 +; RV32IZHINXMIN-NEXT: # %bb.5: +; RV32IZHINXMIN-NEXT: addi a3, a5, -1 +; RV32IZHINXMIN-NEXT: .LBB21_6: +; RV32IZHINXMIN-NEXT: and a1, a2, a3 +; RV32IZHINXMIN-NEXT: ret +; +; RV64IZHINXMIN-LABEL: test_rint_si64: +; RV64IZHINXMIN: # %bb.0: +; RV64IZHINXMIN-NEXT: fcvt.s.h a0, a0 +; RV64IZHINXMIN-NEXT: lui a1, 307200 +; RV64IZHINXMIN-NEXT: fabs.s a2, a0 +; RV64IZHINXMIN-NEXT: flt.s a1, a2, a1 +; RV64IZHINXMIN-NEXT: beqz a1, .LBB21_2 +; RV64IZHINXMIN-NEXT: # %bb.1: +; RV64IZHINXMIN-NEXT: fcvt.w.s a1, a0 +; RV64IZHINXMIN-NEXT: fcvt.s.w a1, a1 +; RV64IZHINXMIN-NEXT: fsgnj.s a0, a1, a0 +; RV64IZHINXMIN-NEXT: .LBB21_2: +; RV64IZHINXMIN-NEXT: fcvt.h.s a0, a0 +; RV64IZHINXMIN-NEXT: fcvt.s.h a0, a0 +; RV64IZHINXMIN-NEXT: fcvt.l.s a1, a0, rtz +; RV64IZHINXMIN-NEXT: feq.s a0, a0, a0 +; RV64IZHINXMIN-NEXT: seqz a0, a0 +; RV64IZHINXMIN-NEXT: addi a0, a0, -1 +; RV64IZHINXMIN-NEXT: and a0, a0, a1 +; RV64IZHINXMIN-NEXT: ret + %a = call half @llvm.rint.f16(half %x) + %b = call i64 @llvm.fptosi.sat.i64.f16(half %a) + ret i64 %b +} + +define signext i32 @test_rint_ui32(half %x) { +; CHECKIZFH-LABEL: test_rint_ui32: +; CHECKIZFH: # %bb.0: +; CHECKIZFH-NEXT: fcvt.wu.h a0, fa0 +; CHECKIZFH-NEXT: feq.h a1, fa0, fa0 +; CHECKIZFH-NEXT: seqz a1, a1 +; CHECKIZFH-NEXT: addi a1, a1, -1 +; CHECKIZFH-NEXT: and a0, a1, a0 +; CHECKIZFH-NEXT: ret +; +; RV32IZHINX-LABEL: test_rint_ui32: +; RV32IZHINX: # %bb.0: +; RV32IZHINX-NEXT: lui a1, %hi(.LCPI22_0) +; RV32IZHINX-NEXT: lh a1, %lo(.LCPI22_0)(a1) +; RV32IZHINX-NEXT: fabs.h a2, a0 +; RV32IZHINX-NEXT: flt.h a1, a2, a1 +; RV32IZHINX-NEXT: beqz a1, .LBB22_2 +; RV32IZHINX-NEXT: # %bb.1: +; RV32IZHINX-NEXT: fcvt.w.h a1, a0 +; RV32IZHINX-NEXT: fcvt.h.w a1, a1 +; RV32IZHINX-NEXT: fsgnj.h a0, a1, a0 +; RV32IZHINX-NEXT: .LBB22_2: +; RV32IZHINX-NEXT: fcvt.wu.h a1, a0, rtz +; RV32IZHINX-NEXT: feq.h a0, a0, a0 +; RV32IZHINX-NEXT: seqz a0, a0 +; RV32IZHINX-NEXT: addi a0, a0, -1 +; RV32IZHINX-NEXT: and a0, a0, a1 +; RV32IZHINX-NEXT: ret +; +; RV64IZHINX-LABEL: test_rint_ui32: +; RV64IZHINX: # %bb.0: +; RV64IZHINX-NEXT: lui a1, %hi(.LCPI22_0) +; RV64IZHINX-NEXT: lh a1, %lo(.LCPI22_0)(a1) +; RV64IZHINX-NEXT: fabs.h a2, a0 +; RV64IZHINX-NEXT: flt.h a1, a2, a1 +; RV64IZHINX-NEXT: beqz a1, .LBB22_2 +; RV64IZHINX-NEXT: # %bb.1: +; RV64IZHINX-NEXT: fcvt.w.h a1, a0 +; RV64IZHINX-NEXT: fcvt.h.w a1, a1 +; RV64IZHINX-NEXT: fsgnj.h a0, a1, a0 +; RV64IZHINX-NEXT: .LBB22_2: +; RV64IZHINX-NEXT: fcvt.wu.h a1, a0, rtz +; RV64IZHINX-NEXT: feq.h a0, a0, a0 +; RV64IZHINX-NEXT: seqz a0, a0 +; RV64IZHINX-NEXT: addi a0, a0, -1 +; RV64IZHINX-NEXT: and a0, a1, a0 +; RV64IZHINX-NEXT: ret +; +; RV32IZFHMIN-LABEL: test_rint_ui32: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: fcvt.s.h fa5, fa0 +; RV32IZFHMIN-NEXT: lui a0, 307200 +; RV32IZFHMIN-NEXT: fmv.w.x fa4, a0 +; RV32IZFHMIN-NEXT: fabs.s fa3, fa5 +; RV32IZFHMIN-NEXT: flt.s a0, fa3, fa4 +; RV32IZFHMIN-NEXT: beqz a0, .LBB22_2 +; RV32IZFHMIN-NEXT: # %bb.1: +; RV32IZFHMIN-NEXT: fcvt.w.s a0, fa5 +; RV32IZFHMIN-NEXT: fcvt.s.w fa4, a0 +; RV32IZFHMIN-NEXT: fsgnj.s fa5, fa4, fa5 +; RV32IZFHMIN-NEXT: .LBB22_2: +; RV32IZFHMIN-NEXT: fcvt.h.s fa5, fa5 +; RV32IZFHMIN-NEXT: fcvt.s.h fa5, fa5 +; RV32IZFHMIN-NEXT: fcvt.wu.s a0, fa5, rtz +; RV32IZFHMIN-NEXT: feq.s a1, fa5, fa5 +; RV32IZFHMIN-NEXT: seqz a1, a1 +; RV32IZFHMIN-NEXT: addi a1, a1, -1 +; RV32IZFHMIN-NEXT: and a0, a1, a0 +; RV32IZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: test_rint_ui32: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: fcvt.s.h fa5, fa0 +; RV64IZFHMIN-NEXT: lui a0, 307200 +; RV64IZFHMIN-NEXT: fmv.w.x fa4, a0 +; RV64IZFHMIN-NEXT: fabs.s fa3, fa5 +; RV64IZFHMIN-NEXT: flt.s a0, fa3, fa4 +; RV64IZFHMIN-NEXT: beqz a0, .LBB22_2 +; RV64IZFHMIN-NEXT: # %bb.1: +; RV64IZFHMIN-NEXT: fcvt.w.s a0, fa5 +; RV64IZFHMIN-NEXT: fcvt.s.w fa4, a0 +; RV64IZFHMIN-NEXT: fsgnj.s fa5, fa4, fa5 +; RV64IZFHMIN-NEXT: .LBB22_2: +; RV64IZFHMIN-NEXT: fcvt.h.s fa5, fa5 +; RV64IZFHMIN-NEXT: fcvt.s.h fa5, fa5 +; RV64IZFHMIN-NEXT: fcvt.wu.s a0, fa5, rtz +; RV64IZFHMIN-NEXT: feq.s a1, fa5, fa5 +; RV64IZFHMIN-NEXT: seqz a1, a1 +; RV64IZFHMIN-NEXT: addi a1, a1, -1 +; RV64IZFHMIN-NEXT: and a0, a0, a1 +; RV64IZFHMIN-NEXT: ret +; +; RV32IZHINXMIN-LABEL: test_rint_ui32: +; RV32IZHINXMIN: # %bb.0: +; RV32IZHINXMIN-NEXT: fcvt.s.h a0, a0 +; RV32IZHINXMIN-NEXT: lui a1, 307200 +; RV32IZHINXMIN-NEXT: fabs.s a2, a0 +; RV32IZHINXMIN-NEXT: flt.s a1, a2, a1 +; RV32IZHINXMIN-NEXT: beqz a1, .LBB22_2 +; RV32IZHINXMIN-NEXT: # %bb.1: +; RV32IZHINXMIN-NEXT: fcvt.w.s a1, a0 +; RV32IZHINXMIN-NEXT: fcvt.s.w a1, a1 +; RV32IZHINXMIN-NEXT: fsgnj.s a0, a1, a0 +; RV32IZHINXMIN-NEXT: .LBB22_2: +; RV32IZHINXMIN-NEXT: fcvt.h.s a0, a0 +; RV32IZHINXMIN-NEXT: fcvt.s.h a0, a0 +; RV32IZHINXMIN-NEXT: fcvt.wu.s a1, a0, rtz +; RV32IZHINXMIN-NEXT: feq.s a0, a0, a0 +; RV32IZHINXMIN-NEXT: seqz a0, a0 +; RV32IZHINXMIN-NEXT: addi a0, a0, -1 +; RV32IZHINXMIN-NEXT: and a0, a0, a1 +; RV32IZHINXMIN-NEXT: ret +; +; RV64IZHINXMIN-LABEL: test_rint_ui32: +; RV64IZHINXMIN: # %bb.0: +; RV64IZHINXMIN-NEXT: fcvt.s.h a0, a0 +; RV64IZHINXMIN-NEXT: lui a1, 307200 +; RV64IZHINXMIN-NEXT: fabs.s a2, a0 +; RV64IZHINXMIN-NEXT: flt.s a1, a2, a1 +; RV64IZHINXMIN-NEXT: beqz a1, .LBB22_2 +; RV64IZHINXMIN-NEXT: # %bb.1: +; RV64IZHINXMIN-NEXT: fcvt.w.s a1, a0 +; RV64IZHINXMIN-NEXT: fcvt.s.w a1, a1 +; RV64IZHINXMIN-NEXT: fsgnj.s a0, a1, a0 +; RV64IZHINXMIN-NEXT: .LBB22_2: +; RV64IZHINXMIN-NEXT: fcvt.h.s a0, a0 +; RV64IZHINXMIN-NEXT: fcvt.s.h a0, a0 +; RV64IZHINXMIN-NEXT: fcvt.wu.s a1, a0, rtz +; RV64IZHINXMIN-NEXT: feq.s a0, a0, a0 +; RV64IZHINXMIN-NEXT: seqz a0, a0 +; RV64IZHINXMIN-NEXT: addi a0, a0, -1 +; RV64IZHINXMIN-NEXT: and a0, a1, a0 +; RV64IZHINXMIN-NEXT: ret + %a = call half @llvm.rint.f16(half %x) + %b = call i32 @llvm.fptoui.sat.i32.f16(half %a) + ret i32 %b +} + +define i64 @test_rint_ui64(half %x) nounwind { +; RV32IZFH-LABEL: test_rint_ui64: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: lui a0, %hi(.LCPI23_0) +; RV32IZFH-NEXT: flh fa5, %lo(.LCPI23_0)(a0) +; RV32IZFH-NEXT: fabs.h fa4, fa0 +; RV32IZFH-NEXT: flt.h a0, fa4, fa5 +; RV32IZFH-NEXT: beqz a0, .LBB23_2 +; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: fcvt.w.h a0, fa0 +; RV32IZFH-NEXT: fcvt.h.w fa5, a0 +; RV32IZFH-NEXT: fsgnj.h fa0, fa5, fa0 +; RV32IZFH-NEXT: .LBB23_2: +; RV32IZFH-NEXT: addi sp, sp, -16 +; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fcvt.s.h fs0, fa0 +; RV32IZFH-NEXT: fmv.w.x fa5, zero +; RV32IZFH-NEXT: fle.s a0, fa5, fs0 +; RV32IZFH-NEXT: neg s0, a0 +; RV32IZFH-NEXT: fmv.s fa0, fs0 +; RV32IZFH-NEXT: call __fixunssfdi@plt +; RV32IZFH-NEXT: lui a2, %hi(.LCPI23_1) +; RV32IZFH-NEXT: flw fa5, %lo(.LCPI23_1)(a2) +; RV32IZFH-NEXT: and a0, s0, a0 +; RV32IZFH-NEXT: flt.s a2, fa5, fs0 +; RV32IZFH-NEXT: neg a2, a2 +; RV32IZFH-NEXT: or a0, a2, a0 +; RV32IZFH-NEXT: and a1, s0, a1 +; RV32IZFH-NEXT: or a1, a2, a1 +; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: test_rint_ui64: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.lu.h a0, fa0 +; RV64IZFH-NEXT: feq.h a1, fa0, fa0 +; RV64IZFH-NEXT: seqz a1, a1 +; RV64IZFH-NEXT: addi a1, a1, -1 +; RV64IZFH-NEXT: and a0, a1, a0 +; RV64IZFH-NEXT: ret +; +; RV32IZHINX-LABEL: test_rint_ui64: +; RV32IZHINX: # %bb.0: +; RV32IZHINX-NEXT: lui a1, %hi(.LCPI23_0) +; RV32IZHINX-NEXT: lh a1, %lo(.LCPI23_0)(a1) +; RV32IZHINX-NEXT: fabs.h a2, a0 +; RV32IZHINX-NEXT: flt.h a1, a2, a1 +; RV32IZHINX-NEXT: beqz a1, .LBB23_2 +; RV32IZHINX-NEXT: # %bb.1: +; RV32IZHINX-NEXT: fcvt.w.h a1, a0 +; RV32IZHINX-NEXT: fcvt.h.w a1, a1 +; RV32IZHINX-NEXT: fsgnj.h a0, a1, a0 +; RV32IZHINX-NEXT: .LBB23_2: +; RV32IZHINX-NEXT: addi sp, sp, -16 +; RV32IZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZHINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IZHINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32IZHINX-NEXT: fcvt.s.h s0, a0 +; RV32IZHINX-NEXT: fle.s a0, zero, s0 +; RV32IZHINX-NEXT: neg s1, a0 +; RV32IZHINX-NEXT: mv a0, s0 +; RV32IZHINX-NEXT: call __fixunssfdi@plt +; RV32IZHINX-NEXT: lui a2, %hi(.LCPI23_1) +; RV32IZHINX-NEXT: lw a2, %lo(.LCPI23_1)(a2) +; RV32IZHINX-NEXT: and a0, s1, a0 +; RV32IZHINX-NEXT: flt.s a2, a2, s0 +; RV32IZHINX-NEXT: neg a2, a2 +; RV32IZHINX-NEXT: or a0, a2, a0 +; RV32IZHINX-NEXT: and a1, s1, a1 +; RV32IZHINX-NEXT: or a1, a2, a1 +; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZHINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IZHINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32IZHINX-NEXT: addi sp, sp, 16 +; RV32IZHINX-NEXT: ret +; +; RV64IZHINX-LABEL: test_rint_ui64: +; RV64IZHINX: # %bb.0: +; RV64IZHINX-NEXT: lui a1, %hi(.LCPI23_0) +; RV64IZHINX-NEXT: lh a1, %lo(.LCPI23_0)(a1) +; RV64IZHINX-NEXT: fabs.h a2, a0 +; RV64IZHINX-NEXT: flt.h a1, a2, a1 +; RV64IZHINX-NEXT: beqz a1, .LBB23_2 +; RV64IZHINX-NEXT: # %bb.1: +; RV64IZHINX-NEXT: fcvt.w.h a1, a0 +; RV64IZHINX-NEXT: fcvt.h.w a1, a1 +; RV64IZHINX-NEXT: fsgnj.h a0, a1, a0 +; RV64IZHINX-NEXT: .LBB23_2: +; RV64IZHINX-NEXT: fcvt.lu.h a1, a0, rtz +; RV64IZHINX-NEXT: feq.h a0, a0, a0 +; RV64IZHINX-NEXT: seqz a0, a0 +; RV64IZHINX-NEXT: addi a0, a0, -1 +; RV64IZHINX-NEXT: and a0, a0, a1 +; RV64IZHINX-NEXT: ret +; +; RV32IZFHMIN-LABEL: test_rint_ui64: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: fcvt.s.h fa5, fa0 +; RV32IZFHMIN-NEXT: lui a0, 307200 +; RV32IZFHMIN-NEXT: fmv.w.x fa4, a0 +; RV32IZFHMIN-NEXT: fabs.s fa3, fa5 +; RV32IZFHMIN-NEXT: flt.s a0, fa3, fa4 +; RV32IZFHMIN-NEXT: beqz a0, .LBB23_2 +; RV32IZFHMIN-NEXT: # %bb.1: +; RV32IZFHMIN-NEXT: fcvt.w.s a0, fa5 +; RV32IZFHMIN-NEXT: fcvt.s.w fa4, a0 +; RV32IZFHMIN-NEXT: fsgnj.s fa5, fa4, fa5 +; RV32IZFHMIN-NEXT: .LBB23_2: +; RV32IZFHMIN-NEXT: addi sp, sp, -16 +; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: fcvt.h.s fa5, fa5 +; RV32IZFHMIN-NEXT: fcvt.s.h fs0, fa5 +; RV32IZFHMIN-NEXT: fmv.w.x fa5, zero +; RV32IZFHMIN-NEXT: fle.s a0, fa5, fs0 +; RV32IZFHMIN-NEXT: neg s0, a0 +; RV32IZFHMIN-NEXT: fmv.s fa0, fs0 +; RV32IZFHMIN-NEXT: call __fixunssfdi@plt +; RV32IZFHMIN-NEXT: lui a2, %hi(.LCPI23_0) +; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI23_0)(a2) +; RV32IZFHMIN-NEXT: and a0, s0, a0 +; RV32IZFHMIN-NEXT: flt.s a2, fa5, fs0 +; RV32IZFHMIN-NEXT: neg a2, a2 +; RV32IZFHMIN-NEXT: or a0, a2, a0 +; RV32IZFHMIN-NEXT: and a1, s0, a1 +; RV32IZFHMIN-NEXT: or a1, a2, a1 +; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: addi sp, sp, 16 +; RV32IZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: test_rint_ui64: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: fcvt.s.h fa5, fa0 +; RV64IZFHMIN-NEXT: lui a0, 307200 +; RV64IZFHMIN-NEXT: fmv.w.x fa4, a0 +; RV64IZFHMIN-NEXT: fabs.s fa3, fa5 +; RV64IZFHMIN-NEXT: flt.s a0, fa3, fa4 +; RV64IZFHMIN-NEXT: beqz a0, .LBB23_2 +; RV64IZFHMIN-NEXT: # %bb.1: +; RV64IZFHMIN-NEXT: fcvt.w.s a0, fa5 +; RV64IZFHMIN-NEXT: fcvt.s.w fa4, a0 +; RV64IZFHMIN-NEXT: fsgnj.s fa5, fa4, fa5 +; RV64IZFHMIN-NEXT: .LBB23_2: +; RV64IZFHMIN-NEXT: fcvt.h.s fa5, fa5 +; RV64IZFHMIN-NEXT: fcvt.s.h fa5, fa5 +; RV64IZFHMIN-NEXT: fcvt.lu.s a0, fa5, rtz +; RV64IZFHMIN-NEXT: feq.s a1, fa5, fa5 +; RV64IZFHMIN-NEXT: seqz a1, a1 +; RV64IZFHMIN-NEXT: addi a1, a1, -1 +; RV64IZFHMIN-NEXT: and a0, a1, a0 +; RV64IZFHMIN-NEXT: ret +; +; RV32IZHINXMIN-LABEL: test_rint_ui64: +; RV32IZHINXMIN: # %bb.0: +; RV32IZHINXMIN-NEXT: fcvt.s.h a0, a0 +; RV32IZHINXMIN-NEXT: lui a1, 307200 +; RV32IZHINXMIN-NEXT: fabs.s a2, a0 +; RV32IZHINXMIN-NEXT: flt.s a1, a2, a1 +; RV32IZHINXMIN-NEXT: beqz a1, .LBB23_2 +; RV32IZHINXMIN-NEXT: # %bb.1: +; RV32IZHINXMIN-NEXT: fcvt.w.s a1, a0 +; RV32IZHINXMIN-NEXT: fcvt.s.w a1, a1 +; RV32IZHINXMIN-NEXT: fsgnj.s a0, a1, a0 +; RV32IZHINXMIN-NEXT: .LBB23_2: +; RV32IZHINXMIN-NEXT: addi sp, sp, -16 +; RV32IZHINXMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZHINXMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IZHINXMIN-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32IZHINXMIN-NEXT: fcvt.h.s a0, a0 +; RV32IZHINXMIN-NEXT: fcvt.s.h s0, a0 +; RV32IZHINXMIN-NEXT: fle.s a0, zero, s0 +; RV32IZHINXMIN-NEXT: neg s1, a0 +; RV32IZHINXMIN-NEXT: mv a0, s0 +; RV32IZHINXMIN-NEXT: call __fixunssfdi@plt +; RV32IZHINXMIN-NEXT: lui a2, %hi(.LCPI23_0) +; RV32IZHINXMIN-NEXT: lw a2, %lo(.LCPI23_0)(a2) +; RV32IZHINXMIN-NEXT: and a0, s1, a0 +; RV32IZHINXMIN-NEXT: flt.s a2, a2, s0 +; RV32IZHINXMIN-NEXT: neg a2, a2 +; RV32IZHINXMIN-NEXT: or a0, a2, a0 +; RV32IZHINXMIN-NEXT: and a1, s1, a1 +; RV32IZHINXMIN-NEXT: or a1, a2, a1 +; RV32IZHINXMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZHINXMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IZHINXMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32IZHINXMIN-NEXT: addi sp, sp, 16 +; RV32IZHINXMIN-NEXT: ret +; +; RV64IZHINXMIN-LABEL: test_rint_ui64: +; RV64IZHINXMIN: # %bb.0: +; RV64IZHINXMIN-NEXT: fcvt.s.h a0, a0 +; RV64IZHINXMIN-NEXT: lui a1, 307200 +; RV64IZHINXMIN-NEXT: fabs.s a2, a0 +; RV64IZHINXMIN-NEXT: flt.s a1, a2, a1 +; RV64IZHINXMIN-NEXT: beqz a1, .LBB23_2 +; RV64IZHINXMIN-NEXT: # %bb.1: +; RV64IZHINXMIN-NEXT: fcvt.w.s a1, a0 +; RV64IZHINXMIN-NEXT: fcvt.s.w a1, a1 +; RV64IZHINXMIN-NEXT: fsgnj.s a0, a1, a0 +; RV64IZHINXMIN-NEXT: .LBB23_2: +; RV64IZHINXMIN-NEXT: fcvt.h.s a0, a0 +; RV64IZHINXMIN-NEXT: fcvt.s.h a0, a0 +; RV64IZHINXMIN-NEXT: fcvt.lu.s a1, a0, rtz +; RV64IZHINXMIN-NEXT: feq.s a0, a0, a0 +; RV64IZHINXMIN-NEXT: seqz a0, a0 +; RV64IZHINXMIN-NEXT: addi a0, a0, -1 +; RV64IZHINXMIN-NEXT: and a0, a0, a1 +; RV64IZHINXMIN-NEXT: ret + %a = call half @llvm.rint.f16(half %x) + %b = call i64 @llvm.fptoui.sat.i64.f16(half %a) + ret i64 %b +} + declare half @llvm.floor.f16(half) declare half @llvm.ceil.f16(half) declare half @llvm.trunc.f16(half) declare half @llvm.round.f16(half) declare half @llvm.roundeven.f16(half) +declare half @llvm.rint.f16(half) declare i32 @llvm.fptosi.sat.i32.f16(half) declare i64 @llvm.fptosi.sat.i64.f16(half) declare i32 @llvm.fptoui.sat.i32.f16(half)