@@ -4481,6 +4481,57 @@ void MacroAssembler::zero_dcache_blocks(Register base, Register cnt, Register tm
44814481 bge (cnt, tmp1, loop);
44824482}
44834483
4484+ // java.lang.Math.round(float a)
4485+ // Returns the closest int to the argument, with ties rounding to positive infinity.
4486+ void MacroAssembler::java_round_float (Register dst, FloatRegister src, FloatRegister ftmp) {
4487+ // this instructions calling sequence provides performance improvement on all tested devices;
4488+ // don't change it without re-verification
4489+ Label done;
4490+ mv (t0, jint_cast (0 .5f ));
4491+ fmv_w_x (ftmp, t0);
4492+
4493+ // dst = 0 if NaN
4494+ feq_s (t0, src, src); // replacing fclass with feq as performance optimization
4495+ mv (dst, zr);
4496+ beqz (t0, done);
4497+
4498+ // dst = (src + 0.5f) rounded down towards negative infinity
4499+ // Adding 0.5f to some floats exceeds the precision limits for a float and rounding takes place.
4500+ // RDN is required for fadd_s, RNE gives incorrect results:
4501+ // --------------------------------------------------------------------
4502+ // fadd.s rne (src + 0.5f): src = 8388609.000000 ftmp = 8388610.000000
4503+ // fcvt.w.s rdn: ftmp = 8388610.000000 dst = 8388610
4504+ // --------------------------------------------------------------------
4505+ // fadd.s rdn (src + 0.5f): src = 8388609.000000 ftmp = 8388609.000000
4506+ // fcvt.w.s rdn: ftmp = 8388609.000000 dst = 8388609
4507+ // --------------------------------------------------------------------
4508+ fadd_s (ftmp, src, ftmp, RoundingMode::rdn);
4509+ fcvt_w_s (dst, ftmp, RoundingMode::rdn);
4510+
4511+ bind (done);
4512+ }
4513+
4514+ // java.lang.Math.round(double a)
4515+ // Returns the closest long to the argument, with ties rounding to positive infinity.
4516+ void MacroAssembler::java_round_double (Register dst, FloatRegister src, FloatRegister ftmp) {
4517+ // this instructions calling sequence provides performance improvement on all tested devices;
4518+ // don't change it without re-verification
4519+ Label done;
4520+ mv (t0, julong_cast (0.5 ));
4521+ fmv_d_x (ftmp, t0);
4522+
4523+ // dst = 0 if NaN
4524+ feq_d (t0, src, src); // replacing fclass with feq as performance optimization
4525+ mv (dst, zr);
4526+ beqz (t0, done);
4527+
4528+ // dst = (src + 0.5) rounded down towards negative infinity
4529+ fadd_d (ftmp, src, ftmp, RoundingMode::rdn); // RDN is required here otherwise some inputs produce incorrect results
4530+ fcvt_l_d (dst, ftmp, RoundingMode::rdn);
4531+
4532+ bind (done);
4533+ }
4534+
44844535#define FCVT_SAFE (FLOATCVT, FLOATSIG ) \
44854536void MacroAssembler::FLOATCVT##_safe(Register dst, FloatRegister src, Register tmp) { \
44864537 Label done; \
0 commit comments