-
Notifications
You must be signed in to change notification settings - Fork 11.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[RISCV][NFC] Add comments and tests for frint case of performFP_TO_INT_SATCombine. #76014
Conversation
…T_SATCombine. performFP_TO_INT_SATCombine could also serve pattern (fp_to_int_sat (frint X)).
@llvm/pr-subscribers-backend-risc-v Author: Yeting Kuo (yetingk) ChangesperformFP_TO_INT_SATCombine could also serve pattern (fp_to_int_sat (frint X)). Patch is 49.09 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/76014.diff 4 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 22c61eb20885b8..97d76ca494cbee 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -13594,6 +13594,7 @@ static SDValue performFP_TO_INTCombine(SDNode *N,
// (fp_to_int_sat (ffloor X)) -> (select X == nan, 0, (fcvt X, rdn))
// (fp_to_int_sat (fceil X)) -> (select X == nan, 0, (fcvt X, rup))
// (fp_to_int_sat (fround X)) -> (select X == nan, 0, (fcvt X, rmm))
+// (fp_to_int_sat (frint X)) -> (select X == nan, 0, (fcvt X, dyn))
static SDValue performFP_TO_INT_SATCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const RISCVSubtarget &Subtarget) {
diff --git a/llvm/test/CodeGen/RISCV/double-round-conv-sat.ll b/llvm/test/CodeGen/RISCV/double-round-conv-sat.ll
index 1fd0d629e9a7a9..5c5b4bb723b686 100644
--- a/llvm/test/CodeGen/RISCV/double-round-conv-sat.ll
+++ b/llvm/test/CodeGen/RISCV/double-round-conv-sat.ll
@@ -1338,11 +1338,278 @@ define i64 @test_roundeven_ui64(double %x) nounwind {
ret i64 %b
}
+define signext i32 @test_rint_si32(double %x) {
+; CHECKIFD-LABEL: test_rint_si32:
+; CHECKIFD: # %bb.0:
+; CHECKIFD-NEXT: fcvt.w.d a0, fa0
+; CHECKIFD-NEXT: feq.d a1, fa0, fa0
+; CHECKIFD-NEXT: seqz a1, a1
+; CHECKIFD-NEXT: addi a1, a1, -1
+; CHECKIFD-NEXT: and a0, a1, a0
+; CHECKIFD-NEXT: ret
+;
+; RV32IZFINXZDINX-LABEL: test_rint_si32:
+; RV32IZFINXZDINX: # %bb.0:
+; RV32IZFINXZDINX-NEXT: addi sp, sp, -16
+; RV32IZFINXZDINX-NEXT: .cfi_def_cfa_offset 16
+; RV32IZFINXZDINX-NEXT: sw a0, 8(sp)
+; RV32IZFINXZDINX-NEXT: sw a1, 12(sp)
+; RV32IZFINXZDINX-NEXT: lw a0, 8(sp)
+; RV32IZFINXZDINX-NEXT: lw a1, 12(sp)
+; RV32IZFINXZDINX-NEXT: fcvt.w.d a2, a0
+; RV32IZFINXZDINX-NEXT: feq.d a0, a0, a0
+; RV32IZFINXZDINX-NEXT: seqz a0, a0
+; RV32IZFINXZDINX-NEXT: addi a0, a0, -1
+; RV32IZFINXZDINX-NEXT: and a0, a0, a2
+; RV32IZFINXZDINX-NEXT: addi sp, sp, 16
+; RV32IZFINXZDINX-NEXT: ret
+;
+; RV64IZFINXZDINX-LABEL: test_rint_si32:
+; RV64IZFINXZDINX: # %bb.0:
+; RV64IZFINXZDINX-NEXT: fcvt.w.d a1, a0
+; RV64IZFINXZDINX-NEXT: feq.d a0, a0, a0
+; RV64IZFINXZDINX-NEXT: seqz a0, a0
+; RV64IZFINXZDINX-NEXT: addi a0, a0, -1
+; RV64IZFINXZDINX-NEXT: and a0, a0, a1
+; RV64IZFINXZDINX-NEXT: ret
+ %a = call double @llvm.rint.f64(double %x)
+ %b = call i32 @llvm.fptosi.sat.i32.f64(double %a)
+ ret i32 %b
+}
+
+define i64 @test_rint_si64(double %x) nounwind {
+; RV32IFD-LABEL: test_rint_si64:
+; RV32IFD: # %bb.0:
+; RV32IFD-NEXT: addi sp, sp, -16
+; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill
+; RV32IFD-NEXT: call rint@plt
+; RV32IFD-NEXT: lui a0, %hi(.LCPI21_0)
+; RV32IFD-NEXT: fld fa5, %lo(.LCPI21_0)(a0)
+; RV32IFD-NEXT: fmv.d fs0, fa0
+; RV32IFD-NEXT: fle.d s0, fa5, fa0
+; RV32IFD-NEXT: call __fixdfdi@plt
+; RV32IFD-NEXT: lui a4, 524288
+; RV32IFD-NEXT: lui a2, 524288
+; RV32IFD-NEXT: beqz s0, .LBB21_2
+; RV32IFD-NEXT: # %bb.1:
+; RV32IFD-NEXT: mv a2, a1
+; RV32IFD-NEXT: .LBB21_2:
+; RV32IFD-NEXT: lui a1, %hi(.LCPI21_1)
+; RV32IFD-NEXT: fld fa5, %lo(.LCPI21_1)(a1)
+; RV32IFD-NEXT: flt.d a3, fa5, fs0
+; RV32IFD-NEXT: beqz a3, .LBB21_4
+; RV32IFD-NEXT: # %bb.3:
+; RV32IFD-NEXT: addi a2, a4, -1
+; RV32IFD-NEXT: .LBB21_4:
+; RV32IFD-NEXT: feq.d a1, fs0, fs0
+; RV32IFD-NEXT: neg a4, a1
+; RV32IFD-NEXT: and a1, a4, a2
+; RV32IFD-NEXT: neg a2, a3
+; RV32IFD-NEXT: neg a3, s0
+; RV32IFD-NEXT: and a0, a3, a0
+; RV32IFD-NEXT: or a0, a2, a0
+; RV32IFD-NEXT: and a0, a4, a0
+; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload
+; RV32IFD-NEXT: addi sp, sp, 16
+; RV32IFD-NEXT: ret
+;
+; RV64IFD-LABEL: test_rint_si64:
+; RV64IFD: # %bb.0:
+; RV64IFD-NEXT: fcvt.l.d a0, fa0
+; RV64IFD-NEXT: feq.d a1, fa0, fa0
+; RV64IFD-NEXT: seqz a1, a1
+; RV64IFD-NEXT: addi a1, a1, -1
+; RV64IFD-NEXT: and a0, a1, a0
+; RV64IFD-NEXT: ret
+;
+; RV32IZFINXZDINX-LABEL: test_rint_si64:
+; RV32IZFINXZDINX: # %bb.0:
+; RV32IZFINXZDINX-NEXT: addi sp, sp, -32
+; RV32IZFINXZDINX-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32IZFINXZDINX-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32IZFINXZDINX-NEXT: sw s2, 20(sp) # 4-byte Folded Spill
+; RV32IZFINXZDINX-NEXT: sw s3, 16(sp) # 4-byte Folded Spill
+; RV32IZFINXZDINX-NEXT: call rint@plt
+; RV32IZFINXZDINX-NEXT: sw a0, 8(sp)
+; RV32IZFINXZDINX-NEXT: sw a1, 12(sp)
+; RV32IZFINXZDINX-NEXT: lw s2, 8(sp)
+; RV32IZFINXZDINX-NEXT: lw s3, 12(sp)
+; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI21_0)
+; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI21_0+4)(a2)
+; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI21_0)(a2)
+; RV32IZFINXZDINX-NEXT: fle.d s0, a2, s2
+; RV32IZFINXZDINX-NEXT: call __fixdfdi@plt
+; RV32IZFINXZDINX-NEXT: lui a4, 524288
+; RV32IZFINXZDINX-NEXT: lui a2, 524288
+; RV32IZFINXZDINX-NEXT: beqz s0, .LBB21_2
+; RV32IZFINXZDINX-NEXT: # %bb.1:
+; RV32IZFINXZDINX-NEXT: mv a2, a1
+; RV32IZFINXZDINX-NEXT: .LBB21_2:
+; RV32IZFINXZDINX-NEXT: lui a1, %hi(.LCPI21_1)
+; RV32IZFINXZDINX-NEXT: lw a6, %lo(.LCPI21_1)(a1)
+; RV32IZFINXZDINX-NEXT: lw a7, %lo(.LCPI21_1+4)(a1)
+; RV32IZFINXZDINX-NEXT: flt.d a3, a6, s2
+; RV32IZFINXZDINX-NEXT: beqz a3, .LBB21_4
+; RV32IZFINXZDINX-NEXT: # %bb.3:
+; RV32IZFINXZDINX-NEXT: addi a2, a4, -1
+; RV32IZFINXZDINX-NEXT: .LBB21_4:
+; RV32IZFINXZDINX-NEXT: feq.d a1, s2, s2
+; RV32IZFINXZDINX-NEXT: neg a4, a1
+; RV32IZFINXZDINX-NEXT: and a1, a4, a2
+; RV32IZFINXZDINX-NEXT: neg a2, s0
+; RV32IZFINXZDINX-NEXT: and a0, a2, a0
+; RV32IZFINXZDINX-NEXT: neg a2, a3
+; RV32IZFINXZDINX-NEXT: or a0, a2, a0
+; RV32IZFINXZDINX-NEXT: and a0, a4, a0
+; RV32IZFINXZDINX-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32IZFINXZDINX-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32IZFINXZDINX-NEXT: lw s2, 20(sp) # 4-byte Folded Reload
+; RV32IZFINXZDINX-NEXT: lw s3, 16(sp) # 4-byte Folded Reload
+; RV32IZFINXZDINX-NEXT: addi sp, sp, 32
+; RV32IZFINXZDINX-NEXT: ret
+;
+; RV64IZFINXZDINX-LABEL: test_rint_si64:
+; RV64IZFINXZDINX: # %bb.0:
+; RV64IZFINXZDINX-NEXT: fcvt.l.d a1, a0
+; RV64IZFINXZDINX-NEXT: feq.d a0, a0, a0
+; RV64IZFINXZDINX-NEXT: seqz a0, a0
+; RV64IZFINXZDINX-NEXT: addi a0, a0, -1
+; RV64IZFINXZDINX-NEXT: and a0, a0, a1
+; RV64IZFINXZDINX-NEXT: ret
+ %a = call double @llvm.rint.f64(double %x)
+ %b = call i64 @llvm.fptosi.sat.i64.f64(double %a)
+ ret i64 %b
+}
+
+define signext i32 @test_rint_ui32(double %x) {
+; CHECKIFD-LABEL: test_rint_ui32:
+; CHECKIFD: # %bb.0:
+; CHECKIFD-NEXT: fcvt.wu.d a0, fa0
+; CHECKIFD-NEXT: feq.d a1, fa0, fa0
+; CHECKIFD-NEXT: seqz a1, a1
+; CHECKIFD-NEXT: addi a1, a1, -1
+; CHECKIFD-NEXT: and a0, a1, a0
+; CHECKIFD-NEXT: ret
+;
+; RV32IZFINXZDINX-LABEL: test_rint_ui32:
+; RV32IZFINXZDINX: # %bb.0:
+; RV32IZFINXZDINX-NEXT: addi sp, sp, -16
+; RV32IZFINXZDINX-NEXT: .cfi_def_cfa_offset 16
+; RV32IZFINXZDINX-NEXT: sw a0, 8(sp)
+; RV32IZFINXZDINX-NEXT: sw a1, 12(sp)
+; RV32IZFINXZDINX-NEXT: lw a0, 8(sp)
+; RV32IZFINXZDINX-NEXT: lw a1, 12(sp)
+; RV32IZFINXZDINX-NEXT: fcvt.wu.d a2, a0
+; RV32IZFINXZDINX-NEXT: feq.d a0, a0, a0
+; RV32IZFINXZDINX-NEXT: seqz a0, a0
+; RV32IZFINXZDINX-NEXT: addi a0, a0, -1
+; RV32IZFINXZDINX-NEXT: and a0, a0, a2
+; RV32IZFINXZDINX-NEXT: addi sp, sp, 16
+; RV32IZFINXZDINX-NEXT: ret
+;
+; RV64IZFINXZDINX-LABEL: test_rint_ui32:
+; RV64IZFINXZDINX: # %bb.0:
+; RV64IZFINXZDINX-NEXT: fcvt.wu.d a1, a0
+; RV64IZFINXZDINX-NEXT: feq.d a0, a0, a0
+; RV64IZFINXZDINX-NEXT: seqz a0, a0
+; RV64IZFINXZDINX-NEXT: addi a0, a0, -1
+; RV64IZFINXZDINX-NEXT: and a0, a0, a1
+; RV64IZFINXZDINX-NEXT: ret
+ %a = call double @llvm.rint.f64(double %x)
+ %b = call i32 @llvm.fptoui.sat.i32.f64(double %a)
+ ret i32 %b
+}
+
+define i64 @test_rint_ui64(double %x) nounwind {
+; RV32IFD-LABEL: test_rint_ui64:
+; RV32IFD: # %bb.0:
+; RV32IFD-NEXT: addi sp, sp, -16
+; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT: call rint@plt
+; RV32IFD-NEXT: lui a0, %hi(.LCPI23_0)
+; RV32IFD-NEXT: fld fa5, %lo(.LCPI23_0)(a0)
+; RV32IFD-NEXT: flt.d a0, fa5, fa0
+; RV32IFD-NEXT: neg s0, a0
+; RV32IFD-NEXT: fcvt.d.w fa5, zero
+; RV32IFD-NEXT: fle.d a0, fa5, fa0
+; RV32IFD-NEXT: neg s1, a0
+; RV32IFD-NEXT: call __fixunsdfdi@plt
+; RV32IFD-NEXT: and a0, s1, a0
+; RV32IFD-NEXT: or a0, s0, a0
+; RV32IFD-NEXT: and a1, s1, a1
+; RV32IFD-NEXT: or a1, s0, a1
+; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT: addi sp, sp, 16
+; RV32IFD-NEXT: ret
+;
+; RV64IFD-LABEL: test_rint_ui64:
+; RV64IFD: # %bb.0:
+; RV64IFD-NEXT: fcvt.lu.d a0, fa0
+; RV64IFD-NEXT: feq.d a1, fa0, fa0
+; RV64IFD-NEXT: seqz a1, a1
+; RV64IFD-NEXT: addi a1, a1, -1
+; RV64IFD-NEXT: and a0, a1, a0
+; RV64IFD-NEXT: ret
+;
+; RV32IZFINXZDINX-LABEL: test_rint_ui64:
+; RV32IZFINXZDINX: # %bb.0:
+; RV32IZFINXZDINX-NEXT: addi sp, sp, -32
+; RV32IZFINXZDINX-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32IZFINXZDINX-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32IZFINXZDINX-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32IZFINXZDINX-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32IZFINXZDINX-NEXT: call rint@plt
+; RV32IZFINXZDINX-NEXT: sw a0, 8(sp)
+; RV32IZFINXZDINX-NEXT: sw a1, 12(sp)
+; RV32IZFINXZDINX-NEXT: lw s0, 8(sp)
+; RV32IZFINXZDINX-NEXT: lw s1, 12(sp)
+; RV32IZFINXZDINX-NEXT: fcvt.d.w a2, zero
+; RV32IZFINXZDINX-NEXT: fle.d a2, a2, s0
+; RV32IZFINXZDINX-NEXT: neg s2, a2
+; RV32IZFINXZDINX-NEXT: call __fixunsdfdi@plt
+; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI23_0)
+; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI23_0+4)(a2)
+; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI23_0)(a2)
+; RV32IZFINXZDINX-NEXT: and a0, s2, a0
+; RV32IZFINXZDINX-NEXT: flt.d a2, a2, s0
+; RV32IZFINXZDINX-NEXT: neg a2, a2
+; RV32IZFINXZDINX-NEXT: or a0, a2, a0
+; RV32IZFINXZDINX-NEXT: and a1, s2, a1
+; RV32IZFINXZDINX-NEXT: or a1, a2, a1
+; RV32IZFINXZDINX-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32IZFINXZDINX-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32IZFINXZDINX-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32IZFINXZDINX-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32IZFINXZDINX-NEXT: addi sp, sp, 32
+; RV32IZFINXZDINX-NEXT: ret
+;
+; RV64IZFINXZDINX-LABEL: test_rint_ui64:
+; RV64IZFINXZDINX: # %bb.0:
+; RV64IZFINXZDINX-NEXT: fcvt.lu.d a1, a0
+; RV64IZFINXZDINX-NEXT: feq.d a0, a0, a0
+; RV64IZFINXZDINX-NEXT: seqz a0, a0
+; RV64IZFINXZDINX-NEXT: addi a0, a0, -1
+; RV64IZFINXZDINX-NEXT: and a0, a0, a1
+; RV64IZFINXZDINX-NEXT: ret
+ %a = call double @llvm.rint.f64(double %x)
+ %b = call i64 @llvm.fptoui.sat.i64.f64(double %a)
+ ret i64 %b
+}
+
declare double @llvm.floor.f64(double)
declare double @llvm.ceil.f64(double)
declare double @llvm.trunc.f64(double)
declare double @llvm.round.f64(double)
declare double @llvm.roundeven.f64(double)
+declare double @llvm.rint.f64(double)
declare i32 @llvm.fptosi.sat.i32.f64(double)
declare i64 @llvm.fptosi.sat.i64.f64(double)
declare i32 @llvm.fptoui.sat.i32.f64(double)
diff --git a/llvm/test/CodeGen/RISCV/float-round-conv-sat.ll b/llvm/test/CodeGen/RISCV/float-round-conv-sat.ll
index 61337216c7fb5b..d947d0f25cdd8f 100644
--- a/llvm/test/CodeGen/RISCV/float-round-conv-sat.ll
+++ b/llvm/test/CodeGen/RISCV/float-round-conv-sat.ll
@@ -1308,11 +1308,272 @@ define i64 @test_roundeven_ui64(float %x) nounwind {
ret i64 %b
}
+define signext i32 @test_rint_si32(float %x) {
+; CHECKIF-LABEL: test_rint_si32:
+; CHECKIF: # %bb.0:
+; CHECKIF-NEXT: fcvt.w.s a0, fa0
+; CHECKIF-NEXT: feq.s a1, fa0, fa0
+; CHECKIF-NEXT: seqz a1, a1
+; CHECKIF-NEXT: addi a1, a1, -1
+; CHECKIF-NEXT: and a0, a1, a0
+; CHECKIF-NEXT: ret
+;
+; CHECKIZFINX-LABEL: test_rint_si32:
+; CHECKIZFINX: # %bb.0:
+; CHECKIZFINX-NEXT: fcvt.w.s a1, a0
+; CHECKIZFINX-NEXT: feq.s a0, a0, a0
+; CHECKIZFINX-NEXT: seqz a0, a0
+; CHECKIZFINX-NEXT: addi a0, a0, -1
+; CHECKIZFINX-NEXT: and a0, a0, a1
+; CHECKIZFINX-NEXT: ret
+ %a = call float @llvm.rint.f32(float %x)
+ %b = call i32 @llvm.fptosi.sat.i32.f32(float %a)
+ ret i32 %b
+}
+
+define i64 @test_rint_si64(float %x) nounwind {
+; RV32IF-LABEL: test_rint_si64:
+; RV32IF: # %bb.0:
+; RV32IF-NEXT: addi sp, sp, -16
+; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IF-NEXT: fmv.s fs0, fa0
+; RV32IF-NEXT: lui a0, 307200
+; RV32IF-NEXT: fmv.w.x fa5, a0
+; RV32IF-NEXT: fabs.s fa4, fa0
+; RV32IF-NEXT: flt.s a0, fa4, fa5
+; RV32IF-NEXT: beqz a0, .LBB21_2
+; RV32IF-NEXT: # %bb.1:
+; RV32IF-NEXT: fcvt.w.s a0, fs0
+; RV32IF-NEXT: fcvt.s.w fa5, a0
+; RV32IF-NEXT: fsgnj.s fs0, fa5, fs0
+; RV32IF-NEXT: .LBB21_2:
+; RV32IF-NEXT: lui a0, 913408
+; RV32IF-NEXT: fmv.w.x fa5, a0
+; RV32IF-NEXT: fle.s s0, fa5, fs0
+; RV32IF-NEXT: fmv.s fa0, fs0
+; RV32IF-NEXT: call __fixsfdi@plt
+; RV32IF-NEXT: lui a4, 524288
+; RV32IF-NEXT: lui a2, 524288
+; RV32IF-NEXT: beqz s0, .LBB21_4
+; RV32IF-NEXT: # %bb.3:
+; RV32IF-NEXT: mv a2, a1
+; RV32IF-NEXT: .LBB21_4:
+; RV32IF-NEXT: lui a1, %hi(.LCPI21_0)
+; RV32IF-NEXT: flw fa5, %lo(.LCPI21_0)(a1)
+; RV32IF-NEXT: flt.s a3, fa5, fs0
+; RV32IF-NEXT: beqz a3, .LBB21_6
+; RV32IF-NEXT: # %bb.5:
+; RV32IF-NEXT: addi a2, a4, -1
+; RV32IF-NEXT: .LBB21_6:
+; RV32IF-NEXT: feq.s a1, fs0, fs0
+; RV32IF-NEXT: neg a4, a1
+; RV32IF-NEXT: and a1, a4, a2
+; RV32IF-NEXT: neg a2, s0
+; RV32IF-NEXT: and a0, a2, a0
+; RV32IF-NEXT: neg a2, a3
+; RV32IF-NEXT: or a0, a2, a0
+; RV32IF-NEXT: and a0, a4, a0
+; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IF-NEXT: addi sp, sp, 16
+; RV32IF-NEXT: ret
+;
+; RV64IF-LABEL: test_rint_si64:
+; RV64IF: # %bb.0:
+; RV64IF-NEXT: fcvt.l.s a0, fa0
+; RV64IF-NEXT: feq.s a1, fa0, fa0
+; RV64IF-NEXT: seqz a1, a1
+; RV64IF-NEXT: addi a1, a1, -1
+; RV64IF-NEXT: and a0, a1, a0
+; RV64IF-NEXT: ret
+;
+; RV32IZFINX-LABEL: test_rint_si64:
+; RV32IZFINX: # %bb.0:
+; RV32IZFINX-NEXT: addi sp, sp, -16
+; RV32IZFINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IZFINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IZFINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32IZFINX-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
+; RV32IZFINX-NEXT: mv s0, a0
+; RV32IZFINX-NEXT: lui a0, 307200
+; RV32IZFINX-NEXT: fabs.s a1, s0
+; RV32IZFINX-NEXT: flt.s a0, a1, a0
+; RV32IZFINX-NEXT: beqz a0, .LBB21_2
+; RV32IZFINX-NEXT: # %bb.1:
+; RV32IZFINX-NEXT: fcvt.w.s a0, s0
+; RV32IZFINX-NEXT: fcvt.s.w a0, a0
+; RV32IZFINX-NEXT: fsgnj.s s0, a0, s0
+; RV32IZFINX-NEXT: .LBB21_2:
+; RV32IZFINX-NEXT: lui a0, 913408
+; RV32IZFINX-NEXT: fle.s s1, a0, s0
+; RV32IZFINX-NEXT: neg s2, s1
+; RV32IZFINX-NEXT: mv a0, s0
+; RV32IZFINX-NEXT: call __fixsfdi@plt
+; RV32IZFINX-NEXT: lui a2, %hi(.LCPI21_0)
+; RV32IZFINX-NEXT: lw a2, %lo(.LCPI21_0)(a2)
+; RV32IZFINX-NEXT: and a0, s2, a0
+; RV32IZFINX-NEXT: flt.s a4, a2, s0
+; RV32IZFINX-NEXT: neg a2, a4
+; RV32IZFINX-NEXT: or a0, a2, a0
+; RV32IZFINX-NEXT: feq.s a2, s0, s0
+; RV32IZFINX-NEXT: neg a2, a2
+; RV32IZFINX-NEXT: lui a5, 524288
+; RV32IZFINX-NEXT: lui a3, 524288
+; RV32IZFINX-NEXT: beqz s1, .LBB21_4
+; RV32IZFINX-NEXT: # %bb.3:
+; RV32IZFINX-NEXT: mv a3, a1
+; RV32IZFINX-NEXT: .LBB21_4:
+; RV32IZFINX-NEXT: and a0, a2, a0
+; RV32IZFINX-NEXT: beqz a4, .LBB21_6
+; RV32IZFINX-NEXT: # %bb.5:
+; RV32IZFINX-NEXT: addi a3, a5, -1
+; RV32IZFINX-NEXT: .LBB21_6:
+; RV32IZFINX-NEXT: and a1, a2, a3
+; RV32IZFINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IZFINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IZFINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32IZFINX-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
+; RV32IZFINX-NEXT: addi sp, sp, 16
+; RV32IZFINX-NEXT: ret
+;
+; RV64IZFINX-LABEL: test_rint_si64:
+; RV64IZFINX: # %bb.0:
+; RV64IZFINX-NEXT: fcvt.l.s a1, a0
+; RV64IZFINX-NEXT: feq.s a0, a0, a0
+; RV64IZFINX-NEXT: seqz a0, a0
+; RV64IZFINX-NEXT: addi a0, a0, -1
+; RV64IZFINX-NEXT: and a0, a0, a1
+; RV64IZFINX-NEXT: ret
+ %a = call float @llvm.rint.f32(float %x)
+ %b = call i64 @llvm.fptosi.sat.i64.f32(float %a)
+ ret i64 %b
+}
+
+define signext i32 @test_rint_ui32(float %x) {
+; CHECKIF-LABEL: test_rint_ui32:
+; CHECKIF: # %bb.0:
+; CHECKIF-NEXT: fcvt.wu.s a0, fa0
+; CHECKIF-NEXT: feq.s a1, fa0, fa0
+; CHECKIF-NEXT: seqz a1, a1
+; CHECKIF-NEXT: addi a1, a1, -1
+; CHECKIF-NEXT: and a0, a1, a0
+; CHECKIF-NEXT: ret
+;
+; CHECKIZFINX-LABEL: test_rint_ui32:
+; CHECKIZFINX: # %bb.0:
+; CHECKIZFINX-NEXT: fcvt.wu.s a1, a0
+; CHECKIZFINX-NEXT: feq.s a0, a0, a0
+; CHECKIZFINX-NEXT: seqz a0, a0
+; CHECKIZFINX-NEXT: addi a0, a0, -1
+; CHECKIZFINX-NEXT: and a0, a0, a1
+; CHECKIZFINX-NEXT: ret
+ %a = call float @llvm.rint.f32(float %x)
+ %b = call i32 @llvm.fptoui.sat.i32.f32(float %a)
+ ret i32 %b
+}
+
+define i64 @test_rint_ui64(float %x) nounwind {
+; RV32IF-LABEL: test_rint_ui64:
+; RV32IF: # %bb.0:
+; RV32IF-NEXT: addi sp, sp, -16
+; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IF-NEXT: fmv.s fs0, fa0
+; RV32IF-NEXT: lui a0, 307200
+; RV32IF-NEXT: fmv.w.x fa5, a0
+; RV32IF-NEXT: fabs.s fa4, fa0
+; RV32IF-NEXT: flt.s a0, fa4, fa5
+; RV32IF-NEXT: beqz a0, .LBB23_2
+; RV32IF-NEXT: # %bb.1:
+; RV32IF-NEXT: fcvt.w.s a0, fs0
+; RV32IF-NEXT: fcvt.s.w fa5, a0
+; RV32IF-NEXT: fsgnj.s fs0, fa5, fs0
+; RV32IF-NEXT: .LBB23_2:
+; RV32IF-NEXT: fmv.w.x fa5, zero
+; RV32IF-NEXT: fle.s a0, fa5, fs0
+; RV32IF-NEXT: neg s0, a0
+; RV32IF-NEXT: fmv.s fa0, fs0
+; RV32IF-NEXT: call __fixunssfdi@plt
+; RV32IF-NEXT: lui a2, %hi(.LCPI23_0)
+; RV32IF-NEXT: flw fa5, %lo(.LCPI23_0)(a2)
+; RV32IF-NEXT: and a0, s0, a0
+; RV32IF-NEXT: flt.s a2, fa5, fs0
+; RV32IF-NEXT: neg a2, a2
+; RV32IF-NEXT: or a0, a2, a0
+; RV32IF-NEXT: and a1, s0, a1
+; RV32IF-NEXT: or a1, a2, a1
+; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IF-NEXT: addi sp, sp, 16
+; RV32IF-NEXT: ret
+;
+; RV64IF-LABEL: test_rint_ui64:
+; RV64IF: # %bb.0:
+; RV64IF-NEXT: fcvt.lu.s a0, fa0
+; RV64IF-NEXT: feq.s a1, fa0, fa0
+; RV64IF-NEXT: seqz a1, a1
+; RV64IF-NEXT: addi a1, a1, -1
+; RV64IF-NEXT: and a0, a1, a0
+; RV64IF-NEXT: ret
+;
+; RV32IZFINX-LABEL: test_rint_ui64:
+; RV32IZFINX: # %bb.0:
+; RV32IZFINX-NEXT: addi sp, sp, -16
+; RV32IZFINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IZFINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IZFINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32IZFINX-NEXT: mv s0, a0
+; RV32IZFINX-NEXT: lui a0, 307200
+; RV32IZFINX-NEXT: fabs.s a1, s0
+; RV32IZFINX-NEXT: flt.s a0, a1, a0
+; RV32IZFINX-NEXT: beqz a0, .LBB23_2
+; RV32IZFINX-NEXT: # %bb.1:
+; RV32IZFINX-NEXT: fcvt.w.s a0, s0
+; RV32IZFINX-NEXT: fcvt.s.w a0, a0
+; RV32IZFINX-NEXT: fsgnj.s s0, a0, s0
+; RV32IZFIN...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
performFP_TO_INT_SATCombine could also serve pattern (fp_to_int_sat (frint X)).