Skip to content

Commit 19147f3

Browse files
Olga MikhaltsovaVladimir Kempik
andcommitted
8318158: RISC-V: implement roundD/roundF intrinsics
Co-authored-by: Vladimir Kempik <vkempik@openjdk.org> Reviewed-by: luhenry, fyang, mli
1 parent 2a59243 commit 19147f3

File tree

3 files changed

+82
-0
lines changed

3 files changed

+82
-0
lines changed

src/hotspot/cpu/riscv/macroAssembler_riscv.cpp

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4481,6 +4481,57 @@ void MacroAssembler::zero_dcache_blocks(Register base, Register cnt, Register tm
44814481
bge(cnt, tmp1, loop);
44824482
}
44834483

4484+
// java.lang.Math.round(float a)
4485+
// Returns the closest int to the argument, with ties rounding to positive infinity.
4486+
void MacroAssembler::java_round_float(Register dst, FloatRegister src, FloatRegister ftmp) {
4487+
// this instructions calling sequence provides performance improvement on all tested devices;
4488+
// don't change it without re-verification
4489+
Label done;
4490+
mv(t0, jint_cast(0.5f));
4491+
fmv_w_x(ftmp, t0);
4492+
4493+
// dst = 0 if NaN
4494+
feq_s(t0, src, src); // replacing fclass with feq as performance optimization
4495+
mv(dst, zr);
4496+
beqz(t0, done);
4497+
4498+
// dst = (src + 0.5f) rounded down towards negative infinity
4499+
// Adding 0.5f to some floats exceeds the precision limits for a float and rounding takes place.
4500+
// RDN is required for fadd_s, RNE gives incorrect results:
4501+
// --------------------------------------------------------------------
4502+
// fadd.s rne (src + 0.5f): src = 8388609.000000 ftmp = 8388610.000000
4503+
// fcvt.w.s rdn: ftmp = 8388610.000000 dst = 8388610
4504+
// --------------------------------------------------------------------
4505+
// fadd.s rdn (src + 0.5f): src = 8388609.000000 ftmp = 8388609.000000
4506+
// fcvt.w.s rdn: ftmp = 8388609.000000 dst = 8388609
4507+
// --------------------------------------------------------------------
4508+
fadd_s(ftmp, src, ftmp, RoundingMode::rdn);
4509+
fcvt_w_s(dst, ftmp, RoundingMode::rdn);
4510+
4511+
bind(done);
4512+
}
4513+
4514+
// java.lang.Math.round(double a)
4515+
// Returns the closest long to the argument, with ties rounding to positive infinity.
4516+
void MacroAssembler::java_round_double(Register dst, FloatRegister src, FloatRegister ftmp) {
4517+
// this instructions calling sequence provides performance improvement on all tested devices;
4518+
// don't change it without re-verification
4519+
Label done;
4520+
mv(t0, julong_cast(0.5));
4521+
fmv_d_x(ftmp, t0);
4522+
4523+
// dst = 0 if NaN
4524+
feq_d(t0, src, src); // replacing fclass with feq as performance optimization
4525+
mv(dst, zr);
4526+
beqz(t0, done);
4527+
4528+
// dst = (src + 0.5) rounded down towards negative infinity
4529+
fadd_d(ftmp, src, ftmp, RoundingMode::rdn); // RDN is required here otherwise some inputs produce incorrect results
4530+
fcvt_l_d(dst, ftmp, RoundingMode::rdn);
4531+
4532+
bind(done);
4533+
}
4534+
44844535
#define FCVT_SAFE(FLOATCVT, FLOATSIG) \
44854536
void MacroAssembler::FLOATCVT##_safe(Register dst, FloatRegister src, Register tmp) { \
44864537
Label done; \

src/hotspot/cpu/riscv/macroAssembler_riscv.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1265,6 +1265,9 @@ class MacroAssembler: public Assembler {
12651265
void fcvt_w_d_safe(Register dst, FloatRegister src, Register tmp = t0);
12661266
void fcvt_l_d_safe(Register dst, FloatRegister src, Register tmp = t0);
12671267

1268+
void java_round_float(Register dst, FloatRegister src, FloatRegister ftmp);
1269+
void java_round_double(Register dst, FloatRegister src, FloatRegister ftmp);
1270+
12681271
// vector load/store unit-stride instructions
12691272
void vlex_v(VectorRegister vd, Register base, Assembler::SEW sew, VectorMask vm = unmasked) {
12701273
switch (sew) {

src/hotspot/cpu/riscv/riscv.ad

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8417,6 +8417,34 @@ instruct convN2I(iRegINoSp dst, iRegN src)
84178417
ins_pipe(ialu_reg);
84188418
%}
84198419

8420+
instruct round_double_reg(iRegLNoSp dst, fRegD src, fRegD ftmp) %{
8421+
match(Set dst (RoundD src));
8422+
8423+
ins_cost(XFER_COST + BRANCH_COST);
8424+
effect(TEMP ftmp);
8425+
format %{ "java_round_double $dst, $src\t#@round_double_reg" %}
8426+
8427+
ins_encode %{
8428+
__ java_round_double($dst$$Register, as_FloatRegister($src$$reg), as_FloatRegister($ftmp$$reg));
8429+
%}
8430+
8431+
ins_pipe(pipe_slow);
8432+
%}
8433+
8434+
instruct round_float_reg(iRegINoSp dst, fRegF src, fRegF ftmp) %{
8435+
match(Set dst (RoundF src));
8436+
8437+
ins_cost(XFER_COST + BRANCH_COST);
8438+
effect(TEMP ftmp);
8439+
format %{ "java_round_float $dst, $src\t#@round_float_reg" %}
8440+
8441+
ins_encode %{
8442+
__ java_round_float($dst$$Register, as_FloatRegister($src$$reg), as_FloatRegister($ftmp$$reg));
8443+
%}
8444+
8445+
ins_pipe(pipe_slow);
8446+
%}
8447+
84208448
// Convert oop pointer into compressed form
84218449
instruct encodeHeapOop(iRegNNoSp dst, iRegP src) %{
84228450
match(Set dst (EncodeP src));

0 commit comments

Comments
 (0)