Skip to content
This repository was archived by the owner on Oct 9, 2025. It is now read-only.

Commit 8ff288f

Browse files
author
Olga Mikhaltsova
committed
8318158: RISC-V: implement roundD/roundF intrinsics
Backport-of: 19147f326c6b0e78fe72f9a7e7100047f16a0921
1 parent e27e599 commit 8ff288f

File tree

3 files changed

+82
-0
lines changed

3 files changed

+82
-0
lines changed

src/hotspot/cpu/riscv/macroAssembler_riscv.cpp

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4342,6 +4342,57 @@ void MacroAssembler::zero_dcache_blocks(Register base, Register cnt, Register tm
43424342
bge(cnt, tmp1, loop);
43434343
}
43444344

4345+
// java.lang.Math.round(float a)
4346+
// Returns the closest int to the argument, with ties rounding to positive infinity.
4347+
void MacroAssembler::java_round_float(Register dst, FloatRegister src, FloatRegister ftmp) {
4348+
// this instructions calling sequence provides performance improvement on all tested devices;
4349+
// don't change it without re-verification
4350+
Label done;
4351+
mv(t0, jint_cast(0.5f));
4352+
fmv_w_x(ftmp, t0);
4353+
4354+
// dst = 0 if NaN
4355+
feq_s(t0, src, src); // replacing fclass with feq as performance optimization
4356+
mv(dst, zr);
4357+
beqz(t0, done);
4358+
4359+
// dst = (src + 0.5f) rounded down towards negative infinity
4360+
// Adding 0.5f to some floats exceeds the precision limits for a float and rounding takes place.
4361+
// RDN is required for fadd_s, RNE gives incorrect results:
4362+
// --------------------------------------------------------------------
4363+
// fadd.s rne (src + 0.5f): src = 8388609.000000 ftmp = 8388610.000000
4364+
// fcvt.w.s rdn: ftmp = 8388610.000000 dst = 8388610
4365+
// --------------------------------------------------------------------
4366+
// fadd.s rdn (src + 0.5f): src = 8388609.000000 ftmp = 8388609.000000
4367+
// fcvt.w.s rdn: ftmp = 8388609.000000 dst = 8388609
4368+
// --------------------------------------------------------------------
4369+
fadd_s(ftmp, src, ftmp, RoundingMode::rdn);
4370+
fcvt_w_s(dst, ftmp, RoundingMode::rdn);
4371+
4372+
bind(done);
4373+
}
4374+
4375+
// java.lang.Math.round(double a)
4376+
// Returns the closest long to the argument, with ties rounding to positive infinity.
4377+
void MacroAssembler::java_round_double(Register dst, FloatRegister src, FloatRegister ftmp) {
4378+
// this instructions calling sequence provides performance improvement on all tested devices;
4379+
// don't change it without re-verification
4380+
Label done;
4381+
mv(t0, julong_cast(0.5));
4382+
fmv_d_x(ftmp, t0);
4383+
4384+
// dst = 0 if NaN
4385+
feq_d(t0, src, src); // replacing fclass with feq as performance optimization
4386+
mv(dst, zr);
4387+
beqz(t0, done);
4388+
4389+
// dst = (src + 0.5) rounded down towards negative infinity
4390+
fadd_d(ftmp, src, ftmp, RoundingMode::rdn); // RDN is required here otherwise some inputs produce incorrect results
4391+
fcvt_l_d(dst, ftmp, RoundingMode::rdn);
4392+
4393+
bind(done);
4394+
}
4395+
43454396
#define FCVT_SAFE(FLOATCVT, FLOATSIG) \
43464397
void MacroAssembler::FLOATCVT##_safe(Register dst, FloatRegister src, Register tmp) { \
43474398
Label done; \

src/hotspot/cpu/riscv/macroAssembler_riscv.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1252,6 +1252,9 @@ class MacroAssembler: public Assembler {
12521252
void fcvt_w_d_safe(Register dst, FloatRegister src, Register tmp = t0);
12531253
void fcvt_l_d_safe(Register dst, FloatRegister src, Register tmp = t0);
12541254

1255+
void java_round_float(Register dst, FloatRegister src, FloatRegister ftmp);
1256+
void java_round_double(Register dst, FloatRegister src, FloatRegister ftmp);
1257+
12551258
// vector load/store unit-stride instructions
12561259
void vlex_v(VectorRegister vd, Register base, Assembler::SEW sew, VectorMask vm = unmasked) {
12571260
switch (sew) {

src/hotspot/cpu/riscv/riscv.ad

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8417,6 +8417,34 @@ instruct convN2I(iRegINoSp dst, iRegN src)
84178417
ins_pipe(ialu_reg);
84188418
%}
84198419

8420+
instruct round_double_reg(iRegLNoSp dst, fRegD src, fRegD ftmp) %{
8421+
match(Set dst (RoundD src));
8422+
8423+
ins_cost(XFER_COST + BRANCH_COST);
8424+
effect(TEMP ftmp);
8425+
format %{ "java_round_double $dst, $src\t#@round_double_reg" %}
8426+
8427+
ins_encode %{
8428+
__ java_round_double($dst$$Register, as_FloatRegister($src$$reg), as_FloatRegister($ftmp$$reg));
8429+
%}
8430+
8431+
ins_pipe(pipe_slow);
8432+
%}
8433+
8434+
instruct round_float_reg(iRegINoSp dst, fRegF src, fRegF ftmp) %{
8435+
match(Set dst (RoundF src));
8436+
8437+
ins_cost(XFER_COST + BRANCH_COST);
8438+
effect(TEMP ftmp);
8439+
format %{ "java_round_float $dst, $src\t#@round_float_reg" %}
8440+
8441+
ins_encode %{
8442+
__ java_round_float($dst$$Register, as_FloatRegister($src$$reg), as_FloatRegister($ftmp$$reg));
8443+
%}
8444+
8445+
ins_pipe(pipe_slow);
8446+
%}
8447+
84208448
// Convert oop pointer into compressed form
84218449
instruct encodeHeapOop(iRegNNoSp dst, iRegP src) %{
84228450
match(Set dst (EncodeP src));

0 commit comments

Comments
 (0)