Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

8265491: Math Signum optimization for x86 #3581

Closed
wants to merge 16 commits into from
36 changes: 36 additions & 0 deletions src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp
Expand Up @@ -1045,6 +1045,42 @@ void C2_MacroAssembler::evminmax_fp(int opcode, BasicType elem_bt,
}
}

// Float/Double signum
void C2_MacroAssembler::signum_fp(int opcode, XMMRegister dst,
XMMRegister zero, XMMRegister one,
Register scratch) {
assert(opcode == Op_SignumF || opcode == Op_SignumD, "sanity");

Label DONE_LABEL;

if (opcode == Op_SignumF){
assert(UseSSE > 0, "required");
ucomiss(dst, zero);
} else if (opcode == Op_SignumD){
assert(UseSSE > 1, "required");
ucomisd(dst, zero);
}

jcc(Assembler::equal, DONE_LABEL); // handle special case +0.0/-0.0, if argument is +0.0/-0.0, return argument
jcc(Assembler::parity, DONE_LABEL); // handle special case NaN, if argument NaN, return NaN

if (opcode == Op_SignumF){
mgkwill marked this conversation as resolved.
Show resolved Hide resolved
movflt(dst, one);
} else if (opcode == Op_SignumD){
movdbl(dst, one);
}

jcc(Assembler::above, DONE_LABEL);

if (opcode == Op_SignumF){
xorps(dst, ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), scratch);
mgkwill marked this conversation as resolved.
Show resolved Hide resolved
} else if (opcode == Op_SignumD){
xorpd(dst, ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), scratch);
}

bind(DONE_LABEL);
}

void C2_MacroAssembler::vextendbw(bool sign, XMMRegister dst, XMMRegister src) {
if (sign) {
pmovsxbw(dst, src);
Expand Down
4 changes: 4 additions & 0 deletions src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp
Expand Up @@ -89,6 +89,10 @@
KRegister ktmp, XMMRegister atmp, XMMRegister btmp,
int vlen_enc);

void signum_fp(int opcode, XMMRegister dst,
XMMRegister zero, XMMRegister one,
Register scratch);

void vextendbw(bool sign, XMMRegister dst, XMMRegister src, int vector_len);
void vextendbw(bool sign, XMMRegister dst, XMMRegister src);
void vextendbd(bool sign, XMMRegister dst, XMMRegister src, int vector_len);
Expand Down
3 changes: 3 additions & 0 deletions src/hotspot/cpu/x86/vm_version_x86.cpp
Expand Up @@ -1700,6 +1700,9 @@ void VM_Version::get_processor_features() {
}
}
#endif // !PRODUCT
if (FLAG_IS_DEFAULT(UseSignumIntrinsic)) {
FLAG_SET_DEFAULT(UseSignumIntrinsic, true);
}
}

void VM_Version::print_platform_virtualization_info(outputStream* st) {
Expand Down
36 changes: 36 additions & 0 deletions src/hotspot/cpu/x86/x86.ad
Expand Up @@ -1599,6 +1599,16 @@ const bool Matcher::match_rule_supported(int opcode) {
}
break;
#endif // !LP64
case Op_SignumF:
if (UseSSE < 1) {
return false;
}
break;
case Op_SignumD:
if (UseSSE < 2) {
return false;
}
break;
}
return true; // Match rules are supported by default.
}
Expand Down Expand Up @@ -5775,6 +5785,32 @@ instruct evminmaxFP_reg_eavx(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktm
ins_pipe( pipe_slow );
%}

// --------------------------------- Signum ---------------------------

instruct signumF_reg(regF dst, regF zero, regF one, rRegP scratch, rFlagsReg cr) %{
predicate(UseSSE>=1);
mgkwill marked this conversation as resolved.
Show resolved Hide resolved
match(Set dst (SignumF dst (Binary zero one)));
effect(TEMP scratch, KILL cr);
format %{ "signumF $dst, $dst\t! using $scratch as TEMP" %}
ins_encode %{
int opcode = this->ideal_Opcode();
__ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister, $scratch$$Register);
%}
ins_pipe( pipe_slow );
%}

instruct signumD_reg(regD dst, regD zero, regD one, rRegP scratch, rFlagsReg cr) %{
predicate(UseSSE>=2);
mgkwill marked this conversation as resolved.
Show resolved Hide resolved
match(Set dst (SignumD dst (Binary zero one)));
effect(TEMP scratch, KILL cr);
format %{ "signumD $dst, $dst\t! using $scratch as TEMP" %}
ins_encode %{
int opcode = this->ideal_Opcode();
__ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister, $scratch$$Register);
%}
ins_pipe( pipe_slow );
%}

// --------------------------------- Sqrt --------------------------------------

instruct vsqrtF_reg(vec dst, vec src) %{
Expand Down
4 changes: 2 additions & 2 deletions src/hotspot/share/opto/library_call.cpp
Expand Up @@ -1690,8 +1690,8 @@ bool LibraryCallKit::inline_math_native(vmIntrinsics::ID id) {

case vmIntrinsics::_dcopySign: return inline_double_math(id);
case vmIntrinsics::_fcopySign: return inline_math(id);
case vmIntrinsics::_dsignum: return inline_double_math(id);
case vmIntrinsics::_fsignum: return inline_math(id);
case vmIntrinsics::_dsignum: return Matcher::match_rule_supported(Op_SignumD) ? inline_double_math(id) : false;
case vmIntrinsics::_fsignum: return Matcher::match_rule_supported(Op_SignumF) ? inline_math(id) : false;

// These intrinsics are not yet correctly implemented
case vmIntrinsics::_datan2:
Expand Down
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2020, 2021, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2020, BELLSOFT. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
Expand All @@ -25,7 +25,6 @@
/*
* @test
* @summary Test compiler intrinsics for signum
* @requires os.arch=="aarch64"
* @library /test/lib
*
* @run main/othervm
Expand Down Expand Up @@ -100,7 +99,7 @@ private static float floatTest() {
float arg = fcase[0];
float expected = fcase[1];
float calculated = Math.signum(arg);
Asserts.assertEQ(expected, calculated, "Unexpected float result");
Asserts.assertEQ(expected, calculated, "Unexpected float result from " + arg);
accum += calculated;
}
return accum;
Expand All @@ -112,7 +111,7 @@ private static double doubleTest() {
double arg = dcase[0];
double expected = dcase[1];
double calculated = Math.signum(arg);
Asserts.assertEQ(expected, calculated, "Unexpected double result");
Asserts.assertEQ(expected, calculated, "Unexpected double result from " + arg);
accum += calculated;
}
return accum;
Expand Down
142 changes: 142 additions & 0 deletions test/micro/org/openjdk/bench/vm/compiler/Signum.java
@@ -0,0 +1,142 @@
/*
* Copyright (c) Intel, 2021 All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/

package org.openjdk.bench.vm.compiler;

import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.OperationsPerInvocation;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.Warmup;
import org.openjdk.jmh.infra.Blackhole;

import java.util.concurrent.TimeUnit;

@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.NANOSECONDS)
@State(Scope.Thread)
@Warmup(iterations = 10, time = 500, timeUnit = TimeUnit.MILLISECONDS)
@Measurement(iterations = 5, time = 1000, timeUnit = TimeUnit.MILLISECONDS)
@Fork(3)
public class Signum {

private final int ITERATIONS = 15000;

private double doubleValue = 1D;
private float floatValue = 1F;

private static final float[] float_values = {
123.4f,
-56.7f,
7e30f,
-0.3e30f,
Float.MAX_VALUE,
-Float.MAX_VALUE,
Float.MIN_VALUE,
-Float.MIN_VALUE,
0.0f,
-0.0f,
Float.POSITIVE_INFINITY,
Float.NEGATIVE_INFINITY,
Float.NaN,
Float.MIN_NORMAL,
-Float.MIN_NORMAL,
0x0.0002P-126f,
-0x0.0002P-126f
};

private static final double[] double_values = {
123.4d,
-56.7d,
7e30d,
-0.3e30d,
Double.MAX_VALUE,
-Double.MAX_VALUE,
Double.MIN_VALUE,
-Double.MIN_VALUE,
0.0d,
-0.0d,
Double.POSITIVE_INFINITY,
Double.NEGATIVE_INFINITY,
Double.NaN,
Double.MIN_NORMAL,
-Double.MIN_NORMAL,
0x0.00000001P-1022,
-0x0.00000001P-1022,
};

private static double Signum_Kernel(double data)
{
return Math.signum(data);
}

private static float Signum_Kernel(float data)
{
return Math.signum(data);
}

@Benchmark
@OperationsPerInvocation(ITERATIONS * 17)
public void _1_signumFloatTest(Blackhole bh) {
for (int i = 0; i < ITERATIONS; i++) {
for (float f : float_values) {
bh.consume(Signum_Kernel(f));
}
}
}

@Benchmark
@OperationsPerInvocation(ITERATIONS * 17)
public void _2_overheadFloat(Blackhole bh) {
for (int i = 0; i < ITERATIONS; i++) {
for (float f : float_values) {
bh.consume(f);
}
}
}

@Benchmark
@OperationsPerInvocation(ITERATIONS * 17)
public void _3_signumDoubleTest(Blackhole bh) {
for (int i = 0; i < ITERATIONS; i++) {
for (double d : double_values) {
bh.consume(Signum_Kernel(d));
}
}
}

@Benchmark
@OperationsPerInvocation(ITERATIONS * 17)
public void _4_overheadDouble(Blackhole bh) {
for (int i = 0; i < ITERATIONS; i++) {
for (double d : double_values) {
bh.consume(d);
}
}
}
}