@@ -4342,6 +4342,57 @@ void MacroAssembler::zero_dcache_blocks(Register base, Register cnt, Register tm
43424342 bge (cnt, tmp1, loop);
43434343}
43444344
4345+ // java.lang.Math.round(float a)
4346+ // Returns the closest int to the argument, with ties rounding to positive infinity.
4347+ void MacroAssembler::java_round_float (Register dst, FloatRegister src, FloatRegister ftmp) {
4348+ // this instructions calling sequence provides performance improvement on all tested devices;
4349+ // don't change it without re-verification
4350+ Label done;
4351+ mv (t0, jint_cast (0 .5f ));
4352+ fmv_w_x (ftmp, t0);
4353+
4354+ // dst = 0 if NaN
4355+ feq_s (t0, src, src); // replacing fclass with feq as performance optimization
4356+ mv (dst, zr);
4357+ beqz (t0, done);
4358+
4359+ // dst = (src + 0.5f) rounded down towards negative infinity
4360+ // Adding 0.5f to some floats exceeds the precision limits for a float and rounding takes place.
4361+ // RDN is required for fadd_s, RNE gives incorrect results:
4362+ // --------------------------------------------------------------------
4363+ // fadd.s rne (src + 0.5f): src = 8388609.000000 ftmp = 8388610.000000
4364+ // fcvt.w.s rdn: ftmp = 8388610.000000 dst = 8388610
4365+ // --------------------------------------------------------------------
4366+ // fadd.s rdn (src + 0.5f): src = 8388609.000000 ftmp = 8388609.000000
4367+ // fcvt.w.s rdn: ftmp = 8388609.000000 dst = 8388609
4368+ // --------------------------------------------------------------------
4369+ fadd_s (ftmp, src, ftmp, RoundingMode::rdn);
4370+ fcvt_w_s (dst, ftmp, RoundingMode::rdn);
4371+
4372+ bind (done);
4373+ }
4374+
4375+ // java.lang.Math.round(double a)
4376+ // Returns the closest long to the argument, with ties rounding to positive infinity.
4377+ void MacroAssembler::java_round_double (Register dst, FloatRegister src, FloatRegister ftmp) {
4378+ // this instructions calling sequence provides performance improvement on all tested devices;
4379+ // don't change it without re-verification
4380+ Label done;
4381+ mv (t0, julong_cast (0.5 ));
4382+ fmv_d_x (ftmp, t0);
4383+
4384+ // dst = 0 if NaN
4385+ feq_d (t0, src, src); // replacing fclass with feq as performance optimization
4386+ mv (dst, zr);
4387+ beqz (t0, done);
4388+
4389+ // dst = (src + 0.5) rounded down towards negative infinity
4390+ fadd_d (ftmp, src, ftmp, RoundingMode::rdn); // RDN is required here otherwise some inputs produce incorrect results
4391+ fcvt_l_d (dst, ftmp, RoundingMode::rdn);
4392+
4393+ bind (done);
4394+ }
4395+
43454396#define FCVT_SAFE (FLOATCVT, FLOATSIG ) \
43464397void MacroAssembler::FLOATCVT##_safe(Register dst, FloatRegister src, Register tmp) { \
43474398 Label done; \
0 commit comments