diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp index 02e0a195138..0444c64c2b1 100644 --- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp @@ -4342,6 +4342,57 @@ void MacroAssembler::zero_dcache_blocks(Register base, Register cnt, Register tm bge(cnt, tmp1, loop); } +// java.lang.Math.round(float a) +// Returns the closest int to the argument, with ties rounding to positive infinity. +void MacroAssembler::java_round_float(Register dst, FloatRegister src, FloatRegister ftmp) { + // this instructions calling sequence provides performance improvement on all tested devices; + // don't change it without re-verification + Label done; + mv(t0, jint_cast(0.5f)); + fmv_w_x(ftmp, t0); + + // dst = 0 if NaN + feq_s(t0, src, src); // replacing fclass with feq as performance optimization + mv(dst, zr); + beqz(t0, done); + + // dst = (src + 0.5f) rounded down towards negative infinity + // Adding 0.5f to some floats exceeds the precision limits for a float and rounding takes place. + // RDN is required for fadd_s, RNE gives incorrect results: + // -------------------------------------------------------------------- + // fadd.s rne (src + 0.5f): src = 8388609.000000 ftmp = 8388610.000000 + // fcvt.w.s rdn: ftmp = 8388610.000000 dst = 8388610 + // -------------------------------------------------------------------- + // fadd.s rdn (src + 0.5f): src = 8388609.000000 ftmp = 8388609.000000 + // fcvt.w.s rdn: ftmp = 8388609.000000 dst = 8388609 + // -------------------------------------------------------------------- + fadd_s(ftmp, src, ftmp, RoundingMode::rdn); + fcvt_w_s(dst, ftmp, RoundingMode::rdn); + + bind(done); +} + +// java.lang.Math.round(double a) +// Returns the closest long to the argument, with ties rounding to positive infinity. +void MacroAssembler::java_round_double(Register dst, FloatRegister src, FloatRegister ftmp) { + // this instructions calling sequence provides performance improvement on all tested devices; + // don't change it without re-verification + Label done; + mv(t0, julong_cast(0.5)); + fmv_d_x(ftmp, t0); + + // dst = 0 if NaN + feq_d(t0, src, src); // replacing fclass with feq as performance optimization + mv(dst, zr); + beqz(t0, done); + + // dst = (src + 0.5) rounded down towards negative infinity + fadd_d(ftmp, src, ftmp, RoundingMode::rdn); // RDN is required here otherwise some inputs produce incorrect results + fcvt_l_d(dst, ftmp, RoundingMode::rdn); + + bind(done); +} + #define FCVT_SAFE(FLOATCVT, FLOATSIG) \ void MacroAssembler::FLOATCVT##_safe(Register dst, FloatRegister src, Register tmp) { \ Label done; \ diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp index 3b110cd3e28..4b4c7a59aab 100644 --- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp @@ -1252,6 +1252,9 @@ class MacroAssembler: public Assembler { void fcvt_w_d_safe(Register dst, FloatRegister src, Register tmp = t0); void fcvt_l_d_safe(Register dst, FloatRegister src, Register tmp = t0); + void java_round_float(Register dst, FloatRegister src, FloatRegister ftmp); + void java_round_double(Register dst, FloatRegister src, FloatRegister ftmp); + // vector load/store unit-stride instructions void vlex_v(VectorRegister vd, Register base, Assembler::SEW sew, VectorMask vm = unmasked) { switch (sew) { diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad index 1096ca80043..a0520efcf02 100644 --- a/src/hotspot/cpu/riscv/riscv.ad +++ b/src/hotspot/cpu/riscv/riscv.ad @@ -8417,6 +8417,34 @@ instruct convN2I(iRegINoSp dst, iRegN src) ins_pipe(ialu_reg); %} +instruct round_double_reg(iRegLNoSp dst, fRegD src, fRegD ftmp) %{ + match(Set dst (RoundD src)); + + ins_cost(XFER_COST + BRANCH_COST); + effect(TEMP ftmp); + format %{ "java_round_double $dst, $src\t#@round_double_reg" %} + + ins_encode %{ + __ java_round_double($dst$$Register, as_FloatRegister($src$$reg), as_FloatRegister($ftmp$$reg)); + %} + + ins_pipe(pipe_slow); +%} + +instruct round_float_reg(iRegINoSp dst, fRegF src, fRegF ftmp) %{ + match(Set dst (RoundF src)); + + ins_cost(XFER_COST + BRANCH_COST); + effect(TEMP ftmp); + format %{ "java_round_float $dst, $src\t#@round_float_reg" %} + + ins_encode %{ + __ java_round_float($dst$$Register, as_FloatRegister($src$$reg), as_FloatRegister($ftmp$$reg)); + %} + + ins_pipe(pipe_slow); +%} + // Convert oop pointer into compressed form instruct encodeHeapOop(iRegNNoSp dst, iRegP src) %{ match(Set dst (EncodeP src));