Skip to content
Permalink
Browse files
8271883: Math CopySign optimization for x86
Reviewed-by: jbhateja, sviswanathan, kvn
  • Loading branch information
Marcus G K Williams authored and Sandhya Viswanathan committed Aug 14, 2021
1 parent 6b8b160 commit 87d2761f1b4572633de020b2d4681918c6f10f06
Showing with 111 additions and 1 deletion.
  1. +3 −0 src/hotspot/cpu/x86/vm_version_x86.cpp
  2. +57 −1 src/hotspot/cpu/x86/x86.ad
  3. +51 −0 test/micro/org/openjdk/bench/vm/compiler/Signum.java
@@ -1736,6 +1736,9 @@ void VM_Version::get_processor_features() {
if (FLAG_IS_DEFAULT(UseSignumIntrinsic)) {
FLAG_SET_DEFAULT(UseSignumIntrinsic, true);
}
if (FLAG_IS_DEFAULT(UseCopySignIntrinsic)) {
FLAG_SET_DEFAULT(UseCopySignIntrinsic, true);
}
}

void VM_Version::print_platform_virtualization_info(outputStream* st) {
@@ -1560,6 +1560,15 @@ const bool Matcher::match_rule_supported(int opcode) {
return false;
}
break;
case Op_CopySignD:
case Op_CopySignF:
if (UseAVX < 3 || !is_LP64) {
return false;
}
if (!VM_Version::supports_avx512vl()) {
return false;
}
break;
#ifndef _LP64
case Op_AddReductionVF:
case Op_AddReductionVD:
@@ -5776,7 +5785,7 @@ instruct evminmaxFP_reg_eavx(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktm
ins_pipe( pipe_slow );
%}

// --------------------------------- Signum ---------------------------
// --------------------------------- Signum/CopySign ---------------------------

instruct signumF_reg(regF dst, regF zero, regF one, rRegP scratch, rFlagsReg cr) %{
match(Set dst (SignumF dst (Binary zero one)));
@@ -5800,6 +5809,53 @@ instruct signumD_reg(regD dst, regD zero, regD one, rRegP scratch, rFlagsReg cr)
ins_pipe( pipe_slow );
%}

// ---------------------------------------
// For copySign use 0xE4 as writemask for vpternlog
// Desired Truth Table: A -> xmm0 bit, B -> xmm1 bit, C -> xmm2 bit
// C (xmm2) is set to 0x7FFFFFFF
// Wherever xmm2 is 0, we want to pick from B (sign)
// Wherever xmm2 is 1, we want to pick from A (src)
//
// A B C Result
// 0 0 0 0
// 0 0 1 0
// 0 1 0 1
// 0 1 1 0
// 1 0 0 0
// 1 0 1 1
// 1 1 0 1
// 1 1 1 1
//
// Result going from high bit to low bit is 0x11100100 = 0xe4
// ---------------------------------------

#ifdef _LP64
instruct copySignF_reg(regF dst, regF src, regF tmp1, rRegI tmp2) %{
match(Set dst (CopySignF dst src));
effect(TEMP tmp1, TEMP tmp2);
format %{ "CopySignF $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
ins_encode %{
__ movl($tmp2$$Register, 0x7FFFFFFF);
__ movdl($tmp1$$XMMRegister, $tmp2$$Register);
__ vpternlogd($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
%}
ins_pipe( pipe_slow );
%}

instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{
match(Set dst (CopySignD dst (Binary src zero)));
ins_cost(100);
effect(TEMP tmp1, TEMP tmp2);
format %{ "CopySignD $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
ins_encode %{
__ mov64($tmp2$$Register, 0x7FFFFFFFFFFFFFFF);
__ movq($tmp1$$XMMRegister, $tmp2$$Register);
__ vpternlogq($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
%}
ins_pipe( pipe_slow );
%}
#endif // _LP64

// --------------------------------- Sqrt --------------------------------------

instruct vsqrtF_reg(vec dst, vec src) %{
@@ -100,6 +100,16 @@ private static float Signum_Kernel(float data)
return Math.signum(data);
}

private static double Copysign_Kernel(double data, double sign)
{
return Math.copySign(data, sign);
}

private static float Copysign_Kernel(float data, float sign)
{
return Math.copySign(data, sign);
}

@Benchmark
@OperationsPerInvocation(ITERATIONS * 17)
public void _1_signumFloatTest(Blackhole bh) {
@@ -139,4 +149,45 @@ public void _4_overheadDouble(Blackhole bh) {
}
}
}

@Benchmark
@OperationsPerInvocation(ITERATIONS * 17)
public void _5_copySignFloatTest(Blackhole bh) {
for (int i = 0; i < ITERATIONS; i++) {
for (float f : float_values) {
bh.consume(Copysign_Kernel(floatValue, f));
}
}
}

@Benchmark
@OperationsPerInvocation(ITERATIONS * 17)
public void _6_overheadCopySignFloat(Blackhole bh) {
for (int i = 0; i < ITERATIONS; i++) {
for (float f : float_values) {
bh.consume(f);
}
}
}

@Benchmark
@OperationsPerInvocation(ITERATIONS * 17)
public void _7_copySignDoubleTest(Blackhole bh) {
for (int i = 0; i < ITERATIONS; i++) {
for (double d : double_values) {
bh.consume(Copysign_Kernel(doubleValue, d));
}
}
}

@Benchmark
@OperationsPerInvocation(ITERATIONS * 17)
public void _8_overheadCopySignDouble(Blackhole bh) {
for (int i = 0; i < ITERATIONS; i++) {
for (double d : double_values) {
bh.consume(d);
}
}
}

}

1 comment on commit 87d2761

@openjdk-notifier

This comment has been minimized.

Copy link

@openjdk-notifier openjdk-notifier bot commented on 87d2761 Aug 14, 2021

Please sign in to comment.