Skip to content

Commit

Permalink
8318158: RISC-V: implement roundD/roundF intrinsics
Browse files Browse the repository at this point in the history
Backport-of: 19147f326c6b0e78fe72f9a7e7100047f16a0921
  • Loading branch information
Olga Mikhaltsova committed Jan 22, 2024
1 parent 1dd2f2a commit ada400c
Show file tree
Hide file tree
Showing 3 changed files with 82 additions and 0 deletions.
51 changes: 51 additions & 0 deletions src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
Expand Up @@ -4220,6 +4220,57 @@ void MacroAssembler::zero_dcache_blocks(Register base, Register cnt, Register tm
bge(cnt, tmp1, loop);
}

// java.lang.Math.round(float a)
// Returns the closest int to the argument, with ties rounding to positive infinity.
void MacroAssembler::java_round_float(Register dst, FloatRegister src, FloatRegister ftmp) {
// this instructions calling sequence provides performance improvement on all tested devices;
// don't change it without re-verification
Label done;
mv(t0, jint_cast(0.5f));
fmv_w_x(ftmp, t0);

// dst = 0 if NaN
feq_s(t0, src, src); // replacing fclass with feq as performance optimization
mv(dst, zr);
beqz(t0, done);

// dst = (src + 0.5f) rounded down towards negative infinity
// Adding 0.5f to some floats exceeds the precision limits for a float and rounding takes place.
// RDN is required for fadd_s, RNE gives incorrect results:
// --------------------------------------------------------------------
// fadd.s rne (src + 0.5f): src = 8388609.000000 ftmp = 8388610.000000
// fcvt.w.s rdn: ftmp = 8388610.000000 dst = 8388610
// --------------------------------------------------------------------
// fadd.s rdn (src + 0.5f): src = 8388609.000000 ftmp = 8388609.000000
// fcvt.w.s rdn: ftmp = 8388609.000000 dst = 8388609
// --------------------------------------------------------------------
fadd_s(ftmp, src, ftmp, RoundingMode::rdn);
fcvt_w_s(dst, ftmp, RoundingMode::rdn);

bind(done);
}

// java.lang.Math.round(double a)
// Returns the closest long to the argument, with ties rounding to positive infinity.
void MacroAssembler::java_round_double(Register dst, FloatRegister src, FloatRegister ftmp) {
// this instructions calling sequence provides performance improvement on all tested devices;
// don't change it without re-verification
Label done;
mv(t0, julong_cast(0.5));
fmv_d_x(ftmp, t0);

// dst = 0 if NaN
feq_d(t0, src, src); // replacing fclass with feq as performance optimization
mv(dst, zr);
beqz(t0, done);

// dst = (src + 0.5) rounded down towards negative infinity
fadd_d(ftmp, src, ftmp, RoundingMode::rdn); // RDN is required here otherwise some inputs produce incorrect results
fcvt_l_d(dst, ftmp, RoundingMode::rdn);

bind(done);
}

#define FCVT_SAFE(FLOATCVT, FLOATSIG) \
void MacroAssembler::FLOATCVT##_safe(Register dst, FloatRegister src, Register tmp) { \
Label done; \
Expand Down
3 changes: 3 additions & 0 deletions src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
Expand Up @@ -1235,6 +1235,9 @@ class MacroAssembler: public Assembler {
void fcvt_w_d_safe(Register dst, FloatRegister src, Register tmp = t0);
void fcvt_l_d_safe(Register dst, FloatRegister src, Register tmp = t0);

void java_round_float(Register dst, FloatRegister src, FloatRegister ftmp);
void java_round_double(Register dst, FloatRegister src, FloatRegister ftmp);

// vector load/store unit-stride instructions
void vlex_v(VectorRegister vd, Register base, Assembler::SEW sew, VectorMask vm = unmasked) {
switch (sew) {
Expand Down
28 changes: 28 additions & 0 deletions src/hotspot/cpu/riscv/riscv.ad
Expand Up @@ -8221,6 +8221,34 @@ instruct convN2I(iRegINoSp dst, iRegN src)
ins_pipe(ialu_reg);
%}

instruct round_double_reg(iRegLNoSp dst, fRegD src, fRegD ftmp) %{
match(Set dst (RoundD src));

ins_cost(XFER_COST + BRANCH_COST);
effect(TEMP ftmp);
format %{ "java_round_double $dst, $src\t#@round_double_reg" %}

ins_encode %{
__ java_round_double($dst$$Register, as_FloatRegister($src$$reg), as_FloatRegister($ftmp$$reg));
%}

ins_pipe(pipe_slow);
%}

instruct round_float_reg(iRegINoSp dst, fRegF src, fRegF ftmp) %{
match(Set dst (RoundF src));

ins_cost(XFER_COST + BRANCH_COST);
effect(TEMP ftmp);
format %{ "java_round_float $dst, $src\t#@round_float_reg" %}

ins_encode %{
__ java_round_float($dst$$Register, as_FloatRegister($src$$reg), as_FloatRegister($ftmp$$reg));
%}

ins_pipe(pipe_slow);
%}

// Convert oop pointer into compressed form
instruct encodeHeapOop(iRegNNoSp dst, iRegP src) %{
match(Set dst (EncodeP src));
Expand Down

1 comment on commit ada400c

@openjdk-notifier
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.