Skip to content

Commit ada400c

Browse files
author
Olga Mikhaltsova
committed
8318158: RISC-V: implement roundD/roundF intrinsics
Backport-of: 19147f326c6b0e78fe72f9a7e7100047f16a0921
1 parent 1dd2f2a commit ada400c

File tree

3 files changed

+82
-0
lines changed

3 files changed

+82
-0
lines changed

src/hotspot/cpu/riscv/macroAssembler_riscv.cpp

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4220,6 +4220,57 @@ void MacroAssembler::zero_dcache_blocks(Register base, Register cnt, Register tm
42204220
bge(cnt, tmp1, loop);
42214221
}
42224222

4223+
// java.lang.Math.round(float a)
4224+
// Returns the closest int to the argument, with ties rounding to positive infinity.
4225+
void MacroAssembler::java_round_float(Register dst, FloatRegister src, FloatRegister ftmp) {
4226+
// this instructions calling sequence provides performance improvement on all tested devices;
4227+
// don't change it without re-verification
4228+
Label done;
4229+
mv(t0, jint_cast(0.5f));
4230+
fmv_w_x(ftmp, t0);
4231+
4232+
// dst = 0 if NaN
4233+
feq_s(t0, src, src); // replacing fclass with feq as performance optimization
4234+
mv(dst, zr);
4235+
beqz(t0, done);
4236+
4237+
// dst = (src + 0.5f) rounded down towards negative infinity
4238+
// Adding 0.5f to some floats exceeds the precision limits for a float and rounding takes place.
4239+
// RDN is required for fadd_s, RNE gives incorrect results:
4240+
// --------------------------------------------------------------------
4241+
// fadd.s rne (src + 0.5f): src = 8388609.000000 ftmp = 8388610.000000
4242+
// fcvt.w.s rdn: ftmp = 8388610.000000 dst = 8388610
4243+
// --------------------------------------------------------------------
4244+
// fadd.s rdn (src + 0.5f): src = 8388609.000000 ftmp = 8388609.000000
4245+
// fcvt.w.s rdn: ftmp = 8388609.000000 dst = 8388609
4246+
// --------------------------------------------------------------------
4247+
fadd_s(ftmp, src, ftmp, RoundingMode::rdn);
4248+
fcvt_w_s(dst, ftmp, RoundingMode::rdn);
4249+
4250+
bind(done);
4251+
}
4252+
4253+
// java.lang.Math.round(double a)
4254+
// Returns the closest long to the argument, with ties rounding to positive infinity.
4255+
void MacroAssembler::java_round_double(Register dst, FloatRegister src, FloatRegister ftmp) {
4256+
// this instructions calling sequence provides performance improvement on all tested devices;
4257+
// don't change it without re-verification
4258+
Label done;
4259+
mv(t0, julong_cast(0.5));
4260+
fmv_d_x(ftmp, t0);
4261+
4262+
// dst = 0 if NaN
4263+
feq_d(t0, src, src); // replacing fclass with feq as performance optimization
4264+
mv(dst, zr);
4265+
beqz(t0, done);
4266+
4267+
// dst = (src + 0.5) rounded down towards negative infinity
4268+
fadd_d(ftmp, src, ftmp, RoundingMode::rdn); // RDN is required here otherwise some inputs produce incorrect results
4269+
fcvt_l_d(dst, ftmp, RoundingMode::rdn);
4270+
4271+
bind(done);
4272+
}
4273+
42234274
#define FCVT_SAFE(FLOATCVT, FLOATSIG) \
42244275
void MacroAssembler::FLOATCVT##_safe(Register dst, FloatRegister src, Register tmp) { \
42254276
Label done; \

src/hotspot/cpu/riscv/macroAssembler_riscv.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1235,6 +1235,9 @@ class MacroAssembler: public Assembler {
12351235
void fcvt_w_d_safe(Register dst, FloatRegister src, Register tmp = t0);
12361236
void fcvt_l_d_safe(Register dst, FloatRegister src, Register tmp = t0);
12371237

1238+
void java_round_float(Register dst, FloatRegister src, FloatRegister ftmp);
1239+
void java_round_double(Register dst, FloatRegister src, FloatRegister ftmp);
1240+
12381241
// vector load/store unit-stride instructions
12391242
void vlex_v(VectorRegister vd, Register base, Assembler::SEW sew, VectorMask vm = unmasked) {
12401243
switch (sew) {

src/hotspot/cpu/riscv/riscv.ad

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8221,6 +8221,34 @@ instruct convN2I(iRegINoSp dst, iRegN src)
82218221
ins_pipe(ialu_reg);
82228222
%}
82238223

8224+
instruct round_double_reg(iRegLNoSp dst, fRegD src, fRegD ftmp) %{
8225+
match(Set dst (RoundD src));
8226+
8227+
ins_cost(XFER_COST + BRANCH_COST);
8228+
effect(TEMP ftmp);
8229+
format %{ "java_round_double $dst, $src\t#@round_double_reg" %}
8230+
8231+
ins_encode %{
8232+
__ java_round_double($dst$$Register, as_FloatRegister($src$$reg), as_FloatRegister($ftmp$$reg));
8233+
%}
8234+
8235+
ins_pipe(pipe_slow);
8236+
%}
8237+
8238+
instruct round_float_reg(iRegINoSp dst, fRegF src, fRegF ftmp) %{
8239+
match(Set dst (RoundF src));
8240+
8241+
ins_cost(XFER_COST + BRANCH_COST);
8242+
effect(TEMP ftmp);
8243+
format %{ "java_round_float $dst, $src\t#@round_float_reg" %}
8244+
8245+
ins_encode %{
8246+
__ java_round_float($dst$$Register, as_FloatRegister($src$$reg), as_FloatRegister($ftmp$$reg));
8247+
%}
8248+
8249+
ins_pipe(pipe_slow);
8250+
%}
8251+
82248252
// Convert oop pointer into compressed form
82258253
instruct encodeHeapOop(iRegNNoSp dst, iRegP src) %{
82268254
match(Set dst (EncodeP src));

0 commit comments

Comments
 (0)