Skip to content

Commit b64a3fb

Browse files
committed
8265325: Optimize StubRoutines::dpow() for Math.pow(x, 0.5)
Reviewed-by: kvn, neliasso
1 parent d1c8c9e commit b64a3fb

File tree

3 files changed

+146
-1
lines changed

3 files changed

+146
-1
lines changed

src/hotspot/cpu/x86/macroAssembler_x86_pow.cpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
/*
22
* Copyright (c) 2016, Intel Corporation.
3+
* Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved.
34
* Intel Math Library (LIBM) Source Code
45
*
56
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
@@ -765,6 +766,16 @@ ATTRIBUTE_ALIGNED(8) juint _DOUBLE2[] =
765766
0x00000000UL, 0x40000000UL
766767
};
767768

769+
ATTRIBUTE_ALIGNED(8) juint _DOUBLE0[] =
770+
{
771+
0x00000000UL, 0x00000000UL
772+
};
773+
774+
ATTRIBUTE_ALIGNED(8) juint _DOUBLE0DOT5[] =
775+
{
776+
0x00000000UL, 0x3fe00000UL
777+
};
778+
768779
//registers,
769780
// input: xmm0, xmm1
770781
// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
@@ -789,6 +800,7 @@ void MacroAssembler::fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
789800
Label L_2TAG_PACKET_52_0_2, L_2TAG_PACKET_53_0_2, L_2TAG_PACKET_54_0_2, L_2TAG_PACKET_55_0_2;
790801
Label L_2TAG_PACKET_56_0_2;
791802
Label B1_2, B1_3, B1_5, start;
803+
Label L_POW;
792804

793805
assert_different_registers(tmp1, tmp2, eax, ecx, edx);
794806
jmp(start);
@@ -804,6 +816,8 @@ void MacroAssembler::fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
804816
address HALFMASK = (address)_HALFMASK;
805817
address log2 = (address)_log2_pow;
806818
address DOUBLE2 = (address)_DOUBLE2;
819+
address DOUBLE0 = (address)_DOUBLE0;
820+
address DOUBLE0DOT5 = (address)_DOUBLE0DOT5;
807821

808822

809823
bind(start);
@@ -818,7 +832,17 @@ void MacroAssembler::fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xm
818832
mulsd(xmm0, xmm0);
819833
jmp(B1_5);
820834

835+
// Special case: pow(x, 0.5) => sqrt(x)
821836
bind(B1_2);
837+
cmp64(tmp1, ExternalAddress(DOUBLE0DOT5));
838+
jccb(Assembler::notEqual, L_POW); // For pow(x, y), check whether y == 0.5
839+
movdq(tmp2, xmm0);
840+
cmp64(tmp2, ExternalAddress(DOUBLE0));
841+
jccb(Assembler::less, L_POW); // pow(x, 0.5) => sqrt(x) only for x >= 0.0 or x is +inf/NaN
842+
sqrtsd(xmm0, xmm0);
843+
jmp(B1_5);
844+
845+
bind(L_POW);
822846
pextrw(eax, xmm0, 3);
823847
xorpd(xmm2, xmm2);
824848
mov64(tmp2, 0x3ff0000000000000);
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
/*
2+
* Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved.
3+
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4+
*
5+
* This code is free software; you can redistribute it and/or modify it
6+
* under the terms of the GNU General Public License version 2 only, as
7+
* published by the Free Software Foundation.
8+
*
9+
* This code is distributed in the hope that it will be useful, but WITHOUT
10+
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11+
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12+
* version 2 for more details (a copy is included in the LICENSE file that
13+
* accompanied this code).
14+
*
15+
* You should have received a copy of the GNU General Public License version
16+
* 2 along with this work; if not, write to the Free Software Foundation,
17+
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18+
*
19+
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20+
* or visit www.oracle.com if you need additional information or have any
21+
* questions.
22+
*/
23+
24+
/*
25+
* @test
26+
* @bug 8265325
27+
* @summary test the optimization of pow(x, 0.5)
28+
* @requires os.arch=="amd64" | os.arch=="x86_64"
29+
* @run main/othervm TestPow0Dot5Opt
30+
* @run main/othervm -Xint TestPow0Dot5Opt
31+
* @run main/othervm -Xbatch -XX:TieredStopAtLevel=1 TestPow0Dot5Opt
32+
* @run main/othervm -Xbatch -XX:-TieredCompilation TestPow0Dot5Opt
33+
*/
34+
35+
public class TestPow0Dot5Opt {
36+
37+
static void test(double a) throws Exception {
38+
// pow(x, 0.5) isn't replaced with sqrt(x) for x < 0.0
39+
if (a < 0.0) return;
40+
41+
double r1 = Math.sqrt(a);
42+
double r2 = Math.pow(a, 0.5);
43+
if (r1 != r2) {
44+
throw new RuntimeException("pow(" + a + ", 0.5), expected: " + r1 + ", actual: " + r2);
45+
}
46+
}
47+
48+
public static void main(String[] args) throws Exception {
49+
for (int i = 0; i < 10; i++) {
50+
for (int j = 1; j < 100000; j++) {
51+
test(j * 1.0);
52+
test(1.0 / j);
53+
}
54+
}
55+
56+
test(0.0);
57+
58+
// Special case: pow(+0.0, 0.5) = 0.0
59+
double r = Math.pow(+0.0, 0.5);
60+
if (Double.doubleToRawLongBits(r) != Double.doubleToRawLongBits(0.0)) {
61+
throw new RuntimeException("pow(+0.0, 0.5), expected: 0.0, actual: " + r);
62+
}
63+
64+
// Special case: pow(-0.0, 0.5) = 0.0
65+
r = Math.pow(-0.0, 0.5);
66+
if (Double.doubleToRawLongBits(r) != Double.doubleToRawLongBits(0.0)) {
67+
throw new RuntimeException("pow(-0.0, 0.5), expected: 0.0, actual: " + r);
68+
}
69+
70+
// Special case: pow(Double.POSITIVE_INFINITY, 0.5) = Infinity
71+
r = Math.pow(Double.POSITIVE_INFINITY, 0.5);
72+
if (!(r > 0 && Double.isInfinite(r))) {
73+
throw new RuntimeException("pow(+Infinity, 0.5), expected: Infinity, actual: " + r);
74+
}
75+
76+
// Special case: pow(Double.NEGATIVE_INFINITY, 0.5) = Infinity
77+
r = Math.pow(Double.NEGATIVE_INFINITY, 0.5);
78+
if (!(r > 0 && Double.isInfinite(r))) {
79+
throw new RuntimeException("pow(-Infinity, 0.5), expected: Infinity, actual: " + r);
80+
}
81+
82+
// Special case: pow(Double.NaN, 0.5) = NaN
83+
r = Math.pow(Double.NaN, 0.5);
84+
if (!Double.isNaN(r)) {
85+
throw new RuntimeException("pow(NaN, 0.5), expected: NaN, actual: " + r);
86+
}
87+
}
88+
89+
}

test/micro/org/openjdk/bench/java/lang/MathBench.java

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ public class MathBench {
6060
public int int1 = 1, int2 = 2, int42 = 42, int5 = 5;
6161
public long long1 = 1L, long2 = 2L, long747 = 747L, long13 = 13L;
6262
public float float1 = 1.0f, float2 = 2.0f, floatNegative99 = -99.0f, float7 = 7.0f, eFloat = 2.718f;
63-
public double double1 = 1.0d, double2 = 2.0d, double81 = 81.0d, doubleNegative12 = -12.0d, double4Dot1 = 4.1d;
63+
public double double1 = 1.0d, double2 = 2.0d, double81 = 81.0d, doubleNegative12 = -12.0d, double4Dot1 = 4.1d, double0Dot5 = 0.5d;
6464

6565
@Setup
6666
public void setupValues() {
@@ -408,6 +408,38 @@ public double powDouble() {
408408
return Math.pow(double4Dot1, double2);
409409
}
410410

411+
@Benchmark
412+
public double powDoubleLoop() {
413+
double sum = 0.0;
414+
for (int i = 0; i < 1000; i++) {
415+
for (int j = 0; j < 1000; j++) {
416+
sum += i + Math.pow(j * 1.0, i * 1.0);
417+
}
418+
}
419+
return sum;
420+
}
421+
422+
@Benchmark
423+
public double powDouble0Dot5() {
424+
return Math.pow(double4Dot1, double0Dot5);
425+
}
426+
427+
@Benchmark
428+
public double powDouble0Dot5Const() {
429+
return Math.pow(double4Dot1, 0.5);
430+
}
431+
432+
@Benchmark
433+
public double powDouble0Dot5Loop() {
434+
double sum = 0.0;
435+
for (int i = 0; i < 1000; i++) {
436+
for (int j = 0; j < 1000; j++) {
437+
sum += i + Math.pow(j * 1.0, 0.5);
438+
}
439+
}
440+
return sum;
441+
}
442+
411443
@Benchmark
412444
public double random() {
413445
return Math.random();

0 commit comments

Comments
 (0)