Skip to content

Commit ff65920

Browse files
mgkwillSandhya Viswanathan
authored and
Sandhya Viswanathan
committed
8265491: Math Signum optimization for x86
Reviewed-by: jiefu, jbhateja, neliasso
1 parent 55cc0af commit ff65920

File tree

7 files changed

+217
-6
lines changed

7 files changed

+217
-6
lines changed

src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp

+29
Original file line numberDiff line numberDiff line change
@@ -1045,6 +1045,35 @@ void C2_MacroAssembler::evminmax_fp(int opcode, BasicType elem_bt,
10451045
}
10461046
}
10471047

1048+
// Float/Double signum
1049+
void C2_MacroAssembler::signum_fp(int opcode, XMMRegister dst,
1050+
XMMRegister zero, XMMRegister one,
1051+
Register scratch) {
1052+
assert(opcode == Op_SignumF || opcode == Op_SignumD, "sanity");
1053+
1054+
Label DONE_LABEL;
1055+
1056+
if (opcode == Op_SignumF) {
1057+
assert(UseSSE > 0, "required");
1058+
ucomiss(dst, zero);
1059+
jcc(Assembler::equal, DONE_LABEL); // handle special case +0.0/-0.0, if argument is +0.0/-0.0, return argument
1060+
jcc(Assembler::parity, DONE_LABEL); // handle special case NaN, if argument NaN, return NaN
1061+
movflt(dst, one);
1062+
jcc(Assembler::above, DONE_LABEL);
1063+
xorps(dst, ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), scratch);
1064+
} else if (opcode == Op_SignumD) {
1065+
assert(UseSSE > 1, "required");
1066+
ucomisd(dst, zero);
1067+
jcc(Assembler::equal, DONE_LABEL); // handle special case +0.0/-0.0, if argument is +0.0/-0.0, return argument
1068+
jcc(Assembler::parity, DONE_LABEL); // handle special case NaN, if argument NaN, return NaN
1069+
movdbl(dst, one);
1070+
jcc(Assembler::above, DONE_LABEL);
1071+
xorpd(dst, ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), scratch);
1072+
}
1073+
1074+
bind(DONE_LABEL);
1075+
}
1076+
10481077
void C2_MacroAssembler::vextendbw(bool sign, XMMRegister dst, XMMRegister src) {
10491078
if (sign) {
10501079
pmovsxbw(dst, src);

src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp

+4
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,10 @@
8989
KRegister ktmp, XMMRegister atmp, XMMRegister btmp,
9090
int vlen_enc);
9191

92+
void signum_fp(int opcode, XMMRegister dst,
93+
XMMRegister zero, XMMRegister one,
94+
Register scratch);
95+
9296
void vextendbw(bool sign, XMMRegister dst, XMMRegister src, int vector_len);
9397
void vextendbw(bool sign, XMMRegister dst, XMMRegister src);
9498
void vextendbd(bool sign, XMMRegister dst, XMMRegister src, int vector_len);

src/hotspot/cpu/x86/vm_version_x86.cpp

+3
Original file line numberDiff line numberDiff line change
@@ -1702,6 +1702,9 @@ void VM_Version::get_processor_features() {
17021702
}
17031703
}
17041704
#endif // !PRODUCT
1705+
if (FLAG_IS_DEFAULT(UseSignumIntrinsic)) {
1706+
FLAG_SET_DEFAULT(UseSignumIntrinsic, true);
1707+
}
17051708
}
17061709

17071710
void VM_Version::print_platform_virtualization_info(outputStream* st) {

src/hotspot/cpu/x86/x86.ad

+34
Original file line numberDiff line numberDiff line change
@@ -1599,6 +1599,16 @@ const bool Matcher::match_rule_supported(int opcode) {
15991599
}
16001600
break;
16011601
#endif // !LP64
1602+
case Op_SignumF:
1603+
if (UseSSE < 1) {
1604+
return false;
1605+
}
1606+
break;
1607+
case Op_SignumD:
1608+
if (UseSSE < 2) {
1609+
return false;
1610+
}
1611+
break;
16021612
}
16031613
return true; // Match rules are supported by default.
16041614
}
@@ -5779,6 +5789,30 @@ instruct evminmaxFP_reg_eavx(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktm
57795789
ins_pipe( pipe_slow );
57805790
%}
57815791

5792+
// --------------------------------- Signum ---------------------------
5793+
5794+
instruct signumF_reg(regF dst, regF zero, regF one, rRegP scratch, rFlagsReg cr) %{
5795+
match(Set dst (SignumF dst (Binary zero one)));
5796+
effect(TEMP scratch, KILL cr);
5797+
format %{ "signumF $dst, $dst\t! using $scratch as TEMP" %}
5798+
ins_encode %{
5799+
int opcode = this->ideal_Opcode();
5800+
__ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister, $scratch$$Register);
5801+
%}
5802+
ins_pipe( pipe_slow );
5803+
%}
5804+
5805+
instruct signumD_reg(regD dst, regD zero, regD one, rRegP scratch, rFlagsReg cr) %{
5806+
match(Set dst (SignumD dst (Binary zero one)));
5807+
effect(TEMP scratch, KILL cr);
5808+
format %{ "signumD $dst, $dst\t! using $scratch as TEMP" %}
5809+
ins_encode %{
5810+
int opcode = this->ideal_Opcode();
5811+
__ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister, $scratch$$Register);
5812+
%}
5813+
ins_pipe( pipe_slow );
5814+
%}
5815+
57825816
// --------------------------------- Sqrt --------------------------------------
57835817

57845818
instruct vsqrtF_reg(vec dst, vec src) %{

src/hotspot/share/opto/library_call.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -1737,8 +1737,8 @@ bool LibraryCallKit::inline_math_native(vmIntrinsics::ID id) {
17371737
case vmIntrinsics::_dpow: return inline_math_pow();
17381738
case vmIntrinsics::_dcopySign: return inline_double_math(id);
17391739
case vmIntrinsics::_fcopySign: return inline_math(id);
1740-
case vmIntrinsics::_dsignum: return inline_double_math(id);
1741-
case vmIntrinsics::_fsignum: return inline_math(id);
1740+
case vmIntrinsics::_dsignum: return Matcher::match_rule_supported(Op_SignumD) ? inline_double_math(id) : false;
1741+
case vmIntrinsics::_fsignum: return Matcher::match_rule_supported(Op_SignumF) ? inline_math(id) : false;
17421742

17431743
// These intrinsics are not yet correctly implemented
17441744
case vmIntrinsics::_datan2:

test/hotspot/jtreg/compiler/intrinsics/math/TestSignumIntrinsic.java

+3-4
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 2020, 2021, Oracle and/or its affiliates. All rights reserved.
33
* Copyright (c) 2020, BELLSOFT. All rights reserved.
44
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
55
*
@@ -25,7 +25,6 @@
2525
/*
2626
* @test
2727
* @summary Test compiler intrinsics for signum
28-
* @requires os.arch=="aarch64"
2928
* @library /test/lib
3029
*
3130
* @run main/othervm
@@ -100,7 +99,7 @@ private static float floatTest() {
10099
float arg = fcase[0];
101100
float expected = fcase[1];
102101
float calculated = Math.signum(arg);
103-
Asserts.assertEQ(expected, calculated, "Unexpected float result");
102+
Asserts.assertEQ(expected, calculated, "Unexpected float result from " + arg);
104103
accum += calculated;
105104
}
106105
return accum;
@@ -112,7 +111,7 @@ private static double doubleTest() {
112111
double arg = dcase[0];
113112
double expected = dcase[1];
114113
double calculated = Math.signum(arg);
115-
Asserts.assertEQ(expected, calculated, "Unexpected double result");
114+
Asserts.assertEQ(expected, calculated, "Unexpected double result from " + arg);
116115
accum += calculated;
117116
}
118117
return accum;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
/*
2+
* Copyright (c) Intel, 2021 All rights reserved.
3+
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4+
*
5+
* This code is free software; you can redistribute it and/or modify it
6+
* under the terms of the GNU General Public License version 2 only, as
7+
* published by the Free Software Foundation.
8+
*
9+
* This code is distributed in the hope that it will be useful, but WITHOUT
10+
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11+
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12+
* version 2 for more details (a copy is included in the LICENSE file that
13+
* accompanied this code).
14+
*
15+
* You should have received a copy of the GNU General Public License version
16+
* 2 along with this work; if not, write to the Free Software Foundation,
17+
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18+
*
19+
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20+
* or visit www.oracle.com if you need additional information or have any
21+
* questions.
22+
*/
23+
24+
package org.openjdk.bench.vm.compiler;
25+
26+
import org.openjdk.jmh.annotations.Benchmark;
27+
import org.openjdk.jmh.annotations.BenchmarkMode;
28+
import org.openjdk.jmh.annotations.Fork;
29+
import org.openjdk.jmh.annotations.Measurement;
30+
import org.openjdk.jmh.annotations.Mode;
31+
import org.openjdk.jmh.annotations.OperationsPerInvocation;
32+
import org.openjdk.jmh.annotations.OutputTimeUnit;
33+
import org.openjdk.jmh.annotations.Scope;
34+
import org.openjdk.jmh.annotations.State;
35+
import org.openjdk.jmh.annotations.Warmup;
36+
import org.openjdk.jmh.infra.Blackhole;
37+
38+
import java.util.concurrent.TimeUnit;
39+
40+
@BenchmarkMode(Mode.AverageTime)
41+
@OutputTimeUnit(TimeUnit.NANOSECONDS)
42+
@State(Scope.Thread)
43+
@Warmup(iterations = 10, time = 500, timeUnit = TimeUnit.MILLISECONDS)
44+
@Measurement(iterations = 5, time = 1000, timeUnit = TimeUnit.MILLISECONDS)
45+
@Fork(3)
46+
public class Signum {
47+
48+
private final int ITERATIONS = 15000;
49+
50+
private double doubleValue = 1D;
51+
private float floatValue = 1F;
52+
53+
private static final float[] float_values = {
54+
123.4f,
55+
-56.7f,
56+
7e30f,
57+
-0.3e30f,
58+
Float.MAX_VALUE,
59+
-Float.MAX_VALUE,
60+
Float.MIN_VALUE,
61+
-Float.MIN_VALUE,
62+
0.0f,
63+
-0.0f,
64+
Float.POSITIVE_INFINITY,
65+
Float.NEGATIVE_INFINITY,
66+
Float.NaN,
67+
Float.MIN_NORMAL,
68+
-Float.MIN_NORMAL,
69+
0x0.0002P-126f,
70+
-0x0.0002P-126f
71+
};
72+
73+
private static final double[] double_values = {
74+
123.4d,
75+
-56.7d,
76+
7e30d,
77+
-0.3e30d,
78+
Double.MAX_VALUE,
79+
-Double.MAX_VALUE,
80+
Double.MIN_VALUE,
81+
-Double.MIN_VALUE,
82+
0.0d,
83+
-0.0d,
84+
Double.POSITIVE_INFINITY,
85+
Double.NEGATIVE_INFINITY,
86+
Double.NaN,
87+
Double.MIN_NORMAL,
88+
-Double.MIN_NORMAL,
89+
0x0.00000001P-1022,
90+
-0x0.00000001P-1022,
91+
};
92+
93+
private static double Signum_Kernel(double data)
94+
{
95+
return Math.signum(data);
96+
}
97+
98+
private static float Signum_Kernel(float data)
99+
{
100+
return Math.signum(data);
101+
}
102+
103+
@Benchmark
104+
@OperationsPerInvocation(ITERATIONS * 17)
105+
public void _1_signumFloatTest(Blackhole bh) {
106+
for (int i = 0; i < ITERATIONS; i++) {
107+
for (float f : float_values) {
108+
bh.consume(Signum_Kernel(f));
109+
}
110+
}
111+
}
112+
113+
@Benchmark
114+
@OperationsPerInvocation(ITERATIONS * 17)
115+
public void _2_overheadFloat(Blackhole bh) {
116+
for (int i = 0; i < ITERATIONS; i++) {
117+
for (float f : float_values) {
118+
bh.consume(f);
119+
}
120+
}
121+
}
122+
123+
@Benchmark
124+
@OperationsPerInvocation(ITERATIONS * 17)
125+
public void _3_signumDoubleTest(Blackhole bh) {
126+
for (int i = 0; i < ITERATIONS; i++) {
127+
for (double d : double_values) {
128+
bh.consume(Signum_Kernel(d));
129+
}
130+
}
131+
}
132+
133+
@Benchmark
134+
@OperationsPerInvocation(ITERATIONS * 17)
135+
public void _4_overheadDouble(Blackhole bh) {
136+
for (int i = 0; i < ITERATIONS; i++) {
137+
for (double d : double_values) {
138+
bh.consume(d);
139+
}
140+
}
141+
}
142+
}

0 commit comments

Comments
 (0)