@@ -4220,6 +4220,57 @@ void MacroAssembler::zero_dcache_blocks(Register base, Register cnt, Register tm
42204220 bge (cnt, tmp1, loop);
42214221}
42224222
4223+ // java.lang.Math.round(float a)
4224+ // Returns the closest int to the argument, with ties rounding to positive infinity.
4225+ void MacroAssembler::java_round_float (Register dst, FloatRegister src, FloatRegister ftmp) {
4226+ // this instructions calling sequence provides performance improvement on all tested devices;
4227+ // don't change it without re-verification
4228+ Label done;
4229+ mv (t0, jint_cast (0 .5f ));
4230+ fmv_w_x (ftmp, t0);
4231+
4232+ // dst = 0 if NaN
4233+ feq_s (t0, src, src); // replacing fclass with feq as performance optimization
4234+ mv (dst, zr);
4235+ beqz (t0, done);
4236+
4237+ // dst = (src + 0.5f) rounded down towards negative infinity
4238+ // Adding 0.5f to some floats exceeds the precision limits for a float and rounding takes place.
4239+ // RDN is required for fadd_s, RNE gives incorrect results:
4240+ // --------------------------------------------------------------------
4241+ // fadd.s rne (src + 0.5f): src = 8388609.000000 ftmp = 8388610.000000
4242+ // fcvt.w.s rdn: ftmp = 8388610.000000 dst = 8388610
4243+ // --------------------------------------------------------------------
4244+ // fadd.s rdn (src + 0.5f): src = 8388609.000000 ftmp = 8388609.000000
4245+ // fcvt.w.s rdn: ftmp = 8388609.000000 dst = 8388609
4246+ // --------------------------------------------------------------------
4247+ fadd_s (ftmp, src, ftmp, RoundingMode::rdn);
4248+ fcvt_w_s (dst, ftmp, RoundingMode::rdn);
4249+
4250+ bind (done);
4251+ }
4252+
4253+ // java.lang.Math.round(double a)
4254+ // Returns the closest long to the argument, with ties rounding to positive infinity.
4255+ void MacroAssembler::java_round_double (Register dst, FloatRegister src, FloatRegister ftmp) {
4256+ // this instructions calling sequence provides performance improvement on all tested devices;
4257+ // don't change it without re-verification
4258+ Label done;
4259+ mv (t0, julong_cast (0.5 ));
4260+ fmv_d_x (ftmp, t0);
4261+
4262+ // dst = 0 if NaN
4263+ feq_d (t0, src, src); // replacing fclass with feq as performance optimization
4264+ mv (dst, zr);
4265+ beqz (t0, done);
4266+
4267+ // dst = (src + 0.5) rounded down towards negative infinity
4268+ fadd_d (ftmp, src, ftmp, RoundingMode::rdn); // RDN is required here otherwise some inputs produce incorrect results
4269+ fcvt_l_d (dst, ftmp, RoundingMode::rdn);
4270+
4271+ bind (done);
4272+ }
4273+
42234274#define FCVT_SAFE (FLOATCVT, FLOATSIG ) \
42244275void MacroAssembler::FLOATCVT##_safe(Register dst, FloatRegister src, Register tmp) { \
42254276 Label done; \
0 commit comments