Skip to content
Permalink
Browse files

8264409: AArch64: generate better code for Vector API allTrue

Reviewed-by: adinn, pli
  • Loading branch information
Ningsheng Jian committed Apr 2, 2021
1 parent 4793557 commit 0935eaa4b4b3578680e128d041396188f48ff293
@@ -3526,11 +3526,12 @@ instruct anytrue_in_mask8B(iRegINoSp dst, vecD src1, vecD src2, vecD tmp, rFlags
match(Set dst (VectorTest src1 src2 ));
ins_cost(INSN_COST);
effect(TEMP tmp, KILL cr);
format %{ "addv $tmp, T8B, $src1\t# src1 and src2 are the same\n\t"
format %{ "addv $tmp, T8B, $src1\n\t"
"umov $dst, $tmp, B, 0\n\t"
"cmp $dst, 0\n\t"
"cset $dst" %}
"cset $dst\t# anytrue 8B" %}
ins_encode %{
// No need to use src2.
__ addv(as_FloatRegister($tmp$$reg), __ T8B, as_FloatRegister($src1$$reg));
__ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ B, 0);
__ cmpw($dst$$Register, zr);
@@ -3545,11 +3546,12 @@ instruct anytrue_in_mask16B(iRegINoSp dst, vecX src1, vecX src2, vecX tmp, rFlag
match(Set dst (VectorTest src1 src2 ));
ins_cost(INSN_COST);
effect(TEMP tmp, KILL cr);
format %{ "addv $tmp, T16B, $src1\t# src1 and src2 are the same\n\t"
format %{ "addv $tmp, T16B, $src1\n\t"
"umov $dst, $tmp, B, 0\n\t"
"cmp $dst, 0\n\t"
"cset $dst" %}
"cset $dst\t# anytrue 16B" %}
ins_encode %{
// No need to use src2.
__ addv(as_FloatRegister($tmp$$reg), __ T16B, as_FloatRegister($src1$$reg));
__ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ B, 0);
__ cmpw($dst$$Register, zr);
@@ -3564,19 +3566,15 @@ instruct alltrue_in_mask8B(iRegINoSp dst, vecD src1, vecD src2, vecD tmp, rFlags
match(Set dst (VectorTest src1 src2 ));
ins_cost(INSN_COST);
effect(TEMP tmp, KILL cr);
format %{ "andr $tmp, T8B, $src1, $src2\t# src2 is maskAllTrue\n\t"
"notr $tmp, T8B, $tmp\n\t"
"addv $tmp, T8B, $tmp\n\t"
format %{ "uminv $tmp, T8B, $src1\n\t"
"umov $dst, $tmp, B, 0\n\t"
"cmp $dst, 0\n\t"
"cset $dst" %}
"cmp $dst, 0xff\n\t"
"cset $dst\t# alltrue 8B" %}
ins_encode %{
__ andr(as_FloatRegister($tmp$$reg), __ T8B,
as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
__ notr(as_FloatRegister($tmp$$reg), __ T8B, as_FloatRegister($tmp$$reg));
__ addv(as_FloatRegister($tmp$$reg), __ T8B, as_FloatRegister($tmp$$reg));
// No need to use src2.
__ uminv(as_FloatRegister($tmp$$reg), __ T8B, as_FloatRegister($src1$$reg));
__ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ B, 0);
__ cmpw($dst$$Register, zr);
__ cmpw($dst$$Register, 0xff);
__ csetw($dst$$Register, Assembler::EQ);
%}
ins_pipe(pipe_slow);
@@ -3588,19 +3586,15 @@ instruct alltrue_in_mask16B(iRegINoSp dst, vecX src1, vecX src2, vecX tmp, rFlag
match(Set dst (VectorTest src1 src2 ));
ins_cost(INSN_COST);
effect(TEMP tmp, KILL cr);
format %{ "andr $tmp, T16B, $src1, $src2\t# src2 is maskAllTrue\n\t"
"notr $tmp, T16B, $tmp\n\t"
"addv $tmp, T16B, $tmp\n\t"
format %{ "uminv $tmp, T16B, $src1\n\t"
"umov $dst, $tmp, B, 0\n\t"
"cmp $dst, 0\n\t"
"cset $dst" %}
"cmp $dst, 0xff\n\t"
"cset $dst\t# alltrue 16B" %}
ins_encode %{
__ andr(as_FloatRegister($tmp$$reg), __ T16B,
as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
__ notr(as_FloatRegister($tmp$$reg), __ T16B, as_FloatRegister($tmp$$reg));
__ addv(as_FloatRegister($tmp$$reg), __ T16B, as_FloatRegister($tmp$$reg));
// No need to use src2.
__ uminv(as_FloatRegister($tmp$$reg), __ T16B, as_FloatRegister($src1$$reg));
__ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ B, 0);
__ cmpw($dst$$Register, zr);
__ cmpw($dst$$Register, 0xff);
__ csetw($dst$$Register, Assembler::EQ);
%}
ins_pipe(pipe_slow);
@@ -1415,11 +1415,12 @@ instruct anytrue_in_mask$1B`'(iRegINoSp dst, vec$2 src1, vec$2 src2, vec$2 tmp,
match(Set dst (VectorTest src1 src2 ));
ins_cost(INSN_COST);
effect(TEMP tmp, KILL cr);
format %{ "addv $tmp, T$1B, $src1\t# src1 and src2 are the same\n\t"
format %{ "addv $tmp, T$1B, $src1\n\t"
"umov $dst, $tmp, B, 0\n\t"
"cmp $dst, 0\n\t"
"cset $dst" %}
"cset $dst\t# anytrue $1B" %}
ins_encode %{
// No need to use src2.
__ addv(as_FloatRegister($tmp$$reg), __ T$1B, as_FloatRegister($src1$$reg));
__ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ B, 0);
__ cmpw($dst$$Register, zr);
@@ -1438,19 +1439,15 @@ instruct alltrue_in_mask$1B`'(iRegINoSp dst, vec$2 src1, vec$2 src2, vec$2 tmp,
match(Set dst (VectorTest src1 src2 ));
ins_cost(INSN_COST);
effect(TEMP tmp, KILL cr);
format %{ "andr $tmp, T$1B, $src1, $src2\t# src2 is maskAllTrue\n\t"
"notr $tmp, T$1B, $tmp\n\t"
"addv $tmp, T$1B, $tmp\n\t"
format %{ "uminv $tmp, T$1B, $src1\n\t"
"umov $dst, $tmp, B, 0\n\t"
"cmp $dst, 0\n\t"
"cset $dst" %}
"cmp $dst, 0xff\n\t"
"cset $dst\t# alltrue $1B" %}
ins_encode %{
__ andr(as_FloatRegister($tmp$$reg), __ T$1B,
as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
__ notr(as_FloatRegister($tmp$$reg), __ T$1B, as_FloatRegister($tmp$$reg));
__ addv(as_FloatRegister($tmp$$reg), __ T$1B, as_FloatRegister($tmp$$reg));
// No need to use src2.
__ uminv(as_FloatRegister($tmp$$reg), __ T$1B, as_FloatRegister($src1$$reg));
__ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ B, 0);
__ cmpw($dst$$Register, zr);
__ cmpw($dst$$Register, 0xff);
__ csetw($dst$$Register, Assembler::EQ);
%}
ins_pipe(pipe_slow);
@@ -2427,6 +2427,7 @@ void mvnw(Register Rd, Register Rm,
INSN(addv, 0, 0b110001101110, 1); // accepted arrangements: T8B, T16B, T4H, T8H, T4S
INSN(smaxv, 0, 0b110000101010, 1); // accepted arrangements: T8B, T16B, T4H, T8H, T4S
INSN(sminv, 0, 0b110001101010, 1); // accepted arrangements: T8B, T16B, T4H, T8H, T4S
INSN(uminv, 1, 0b110001101010, 1); // accepted arrangements: T8B, T16B, T4H, T8H, T4S
INSN(cls, 0, 0b100000010010, 2); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
INSN(clz, 1, 0b100000010010, 2); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
INSN(cnt, 0, 0b100000010110, 0); // accepted arrangements: T8B, T16B
@@ -1401,9 +1401,12 @@ def generate(kind, names):
["smaxv", "smaxv", "8B"], ["smaxv", "smaxv", "16B"],
["smaxv", "smaxv", "4H"], ["smaxv", "smaxv", "8H"],
["smaxv", "smaxv", "4S"], ["fmaxv", "fmaxv", "4S"],
["sminv", "sminv", "8B"], ["sminv", "sminv", "16B"],
["sminv", "sminv", "4H"], ["sminv", "sminv", "8H"],
["sminv", "sminv", "4S"], ["fminv", "fminv", "4S"],
["sminv", "sminv", "8B"], ["uminv", "uminv", "8B"],
["sminv", "sminv", "16B"],["uminv", "uminv", "16B"],
["sminv", "sminv", "4H"], ["uminv", "uminv", "4H"],
["sminv", "sminv", "8H"], ["uminv", "uminv", "8H"],
["sminv", "sminv", "4S"], ["uminv", "uminv", "4S"],
["fminv", "fminv", "4S"],
["fmaxp", "fmaxp", "2S"], ["fmaxp", "fmaxp", "2D"],
["fminp", "fminp", "2S"], ["fminp", "fminp", "2D"],
])

1 comment on commit 0935eaa

@openjdk-notifier

This comment has been minimized.

Copy link

@openjdk-notifier openjdk-notifier bot commented on 0935eaa Apr 2, 2021

Please sign in to comment.