Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

8265491: Math Signum optimization for x86 #3581

Closed
wants to merge 16 commits into from
@@ -1045,6 +1045,35 @@ void C2_MacroAssembler::evminmax_fp(int opcode, BasicType elem_bt,
}
}

// Float/Double signum
void C2_MacroAssembler::signum_fp(int opcode, XMMRegister dst,
XMMRegister zero, XMMRegister one,
Register scratch) {
assert(opcode == Op_SignumF || opcode == Op_SignumD, "sanity");

Label DONE_LABEL;

if (opcode == Op_SignumF) {
assert(UseSSE > 0, "required");
ucomiss(dst, zero);
jcc(Assembler::equal, DONE_LABEL); // handle special case +0.0/-0.0, if argument is +0.0/-0.0, return argument
jcc(Assembler::parity, DONE_LABEL); // handle special case NaN, if argument NaN, return NaN
movflt(dst, one);
jcc(Assembler::above, DONE_LABEL);
xorps(dst, ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), scratch);
} else if (opcode == Op_SignumD) {
assert(UseSSE > 1, "required");
ucomisd(dst, zero);
jcc(Assembler::equal, DONE_LABEL); // handle special case +0.0/-0.0, if argument is +0.0/-0.0, return argument
jcc(Assembler::parity, DONE_LABEL); // handle special case NaN, if argument NaN, return NaN
movdbl(dst, one);
jcc(Assembler::above, DONE_LABEL);
xorpd(dst, ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), scratch);
}

bind(DONE_LABEL);
}

void C2_MacroAssembler::vextendbw(bool sign, XMMRegister dst, XMMRegister src) {
if (sign) {
pmovsxbw(dst, src);
@@ -89,6 +89,10 @@
KRegister ktmp, XMMRegister atmp, XMMRegister btmp,
int vlen_enc);

void signum_fp(int opcode, XMMRegister dst,
XMMRegister zero, XMMRegister one,
Register scratch);

void vextendbw(bool sign, XMMRegister dst, XMMRegister src, int vector_len);
void vextendbw(bool sign, XMMRegister dst, XMMRegister src);
void vextendbd(bool sign, XMMRegister dst, XMMRegister src, int vector_len);
@@ -1700,6 +1700,9 @@ void VM_Version::get_processor_features() {
}
}
#endif // !PRODUCT
if (FLAG_IS_DEFAULT(UseSignumIntrinsic)) {
FLAG_SET_DEFAULT(UseSignumIntrinsic, true);
}
}

void VM_Version::print_platform_virtualization_info(outputStream* st) {
@@ -1599,6 +1599,16 @@ const bool Matcher::match_rule_supported(int opcode) {
}
break;
#endif // !LP64
case Op_SignumF:
if (UseSSE < 1) {
return false;
}
break;
case Op_SignumD:
if (UseSSE < 2) {
return false;
}
break;
}
return true; // Match rules are supported by default.
}
@@ -5775,6 +5785,30 @@ instruct evminmaxFP_reg_eavx(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktm
ins_pipe( pipe_slow );
%}

// --------------------------------- Signum ---------------------------

instruct signumF_reg(regF dst, regF zero, regF one, rRegP scratch, rFlagsReg cr) %{
match(Set dst (SignumF dst (Binary zero one)));
effect(TEMP scratch, KILL cr);
format %{ "signumF $dst, $dst\t! using $scratch as TEMP" %}
ins_encode %{
int opcode = this->ideal_Opcode();
__ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister, $scratch$$Register);
%}
ins_pipe( pipe_slow );
%}

instruct signumD_reg(regD dst, regD zero, regD one, rRegP scratch, rFlagsReg cr) %{
match(Set dst (SignumD dst (Binary zero one)));
effect(TEMP scratch, KILL cr);
format %{ "signumD $dst, $dst\t! using $scratch as TEMP" %}
ins_encode %{
int opcode = this->ideal_Opcode();
__ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister, $scratch$$Register);
%}
ins_pipe( pipe_slow );
%}

// --------------------------------- Sqrt --------------------------------------

instruct vsqrtF_reg(vec dst, vec src) %{
@@ -1690,8 +1690,8 @@ bool LibraryCallKit::inline_math_native(vmIntrinsics::ID id) {

case vmIntrinsics::_dcopySign: return inline_double_math(id);
case vmIntrinsics::_fcopySign: return inline_math(id);
case vmIntrinsics::_dsignum: return inline_double_math(id);
case vmIntrinsics::_fsignum: return inline_math(id);
case vmIntrinsics::_dsignum: return Matcher::match_rule_supported(Op_SignumD) ? inline_double_math(id) : false;
case vmIntrinsics::_fsignum: return Matcher::match_rule_supported(Op_SignumF) ? inline_math(id) : false;

// These intrinsics are not yet correctly implemented
case vmIntrinsics::_datan2:
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2020, 2021, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2020, BELLSOFT. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -25,7 +25,6 @@
/*
* @test
* @summary Test compiler intrinsics for signum
* @requires os.arch=="aarch64"
* @library /test/lib
*
* @run main/othervm
@@ -100,7 +99,7 @@ private static float floatTest() {
float arg = fcase[0];
float expected = fcase[1];
float calculated = Math.signum(arg);
Asserts.assertEQ(expected, calculated, "Unexpected float result");
Asserts.assertEQ(expected, calculated, "Unexpected float result from " + arg);
accum += calculated;
}
return accum;
@@ -112,7 +111,7 @@ private static double doubleTest() {
double arg = dcase[0];
double expected = dcase[1];
double calculated = Math.signum(arg);
Asserts.assertEQ(expected, calculated, "Unexpected double result");
Asserts.assertEQ(expected, calculated, "Unexpected double result from " + arg);
accum += calculated;
}
return accum;
@@ -0,0 +1,142 @@
/*
* Copyright (c) Intel, 2021 All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/

package org.openjdk.bench.vm.compiler;

import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.OperationsPerInvocation;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.Warmup;
import org.openjdk.jmh.infra.Blackhole;

import java.util.concurrent.TimeUnit;

@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.NANOSECONDS)
@State(Scope.Thread)
@Warmup(iterations = 10, time = 500, timeUnit = TimeUnit.MILLISECONDS)
@Measurement(iterations = 5, time = 1000, timeUnit = TimeUnit.MILLISECONDS)
@Fork(3)
public class Signum {

private final int ITERATIONS = 15000;

private double doubleValue = 1D;
private float floatValue = 1F;

private static final float[] float_values = {
123.4f,
-56.7f,
7e30f,
-0.3e30f,
Float.MAX_VALUE,
-Float.MAX_VALUE,
Float.MIN_VALUE,
-Float.MIN_VALUE,
0.0f,
-0.0f,
Float.POSITIVE_INFINITY,
Float.NEGATIVE_INFINITY,
Float.NaN,
Float.MIN_NORMAL,
-Float.MIN_NORMAL,
0x0.0002P-126f,
-0x0.0002P-126f
};

private static final double[] double_values = {
123.4d,
-56.7d,
7e30d,
-0.3e30d,
Double.MAX_VALUE,
-Double.MAX_VALUE,
Double.MIN_VALUE,
-Double.MIN_VALUE,
0.0d,
-0.0d,
Double.POSITIVE_INFINITY,
Double.NEGATIVE_INFINITY,
Double.NaN,
Double.MIN_NORMAL,
-Double.MIN_NORMAL,
0x0.00000001P-1022,
-0x0.00000001P-1022,
};

private static double Signum_Kernel(double data)
{
return Math.signum(data);
}

private static float Signum_Kernel(float data)
{
return Math.signum(data);
}

@Benchmark
@OperationsPerInvocation(ITERATIONS * 17)
public void _1_signumFloatTest(Blackhole bh) {
for (int i = 0; i < ITERATIONS; i++) {
for (float f : float_values) {
bh.consume(Signum_Kernel(f));
}
}
}

@Benchmark
@OperationsPerInvocation(ITERATIONS * 17)
public void _2_overheadFloat(Blackhole bh) {
for (int i = 0; i < ITERATIONS; i++) {
for (float f : float_values) {
bh.consume(f);
}
}
}

@Benchmark
@OperationsPerInvocation(ITERATIONS * 17)
public void _3_signumDoubleTest(Blackhole bh) {
for (int i = 0; i < ITERATIONS; i++) {
for (double d : double_values) {
bh.consume(Signum_Kernel(d));
}
}
}

@Benchmark
@OperationsPerInvocation(ITERATIONS * 17)
public void _4_overheadDouble(Blackhole bh) {
for (int i = 0; i < ITERATIONS; i++) {
for (double d : double_values) {
bh.consume(d);
}
}
}
}