Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

8264409: AArch64: generate better code for Vector API allTrue #3302

Closed
wants to merge 1 commit into from
Closed
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
@@ -3526,11 +3526,12 @@ instruct anytrue_in_mask8B(iRegINoSp dst, vecD src1, vecD src2, vecD tmp, rFlags
match(Set dst (VectorTest src1 src2 ));
ins_cost(INSN_COST);
effect(TEMP tmp, KILL cr);
format %{ "addv $tmp, T8B, $src1\t# src1 and src2 are the same\n\t"
format %{ "addv $tmp, T8B, $src1\n\t"
"umov $dst, $tmp, B, 0\n\t"
"cmp $dst, 0\n\t"
"cset $dst" %}
"cset $dst\t# anytrue 8B" %}
ins_encode %{
// No need to use src2.
__ addv(as_FloatRegister($tmp$$reg), __ T8B, as_FloatRegister($src1$$reg));
__ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ B, 0);
__ cmpw($dst$$Register, zr);
@@ -3545,11 +3546,12 @@ instruct anytrue_in_mask16B(iRegINoSp dst, vecX src1, vecX src2, vecX tmp, rFlag
match(Set dst (VectorTest src1 src2 ));
ins_cost(INSN_COST);
effect(TEMP tmp, KILL cr);
format %{ "addv $tmp, T16B, $src1\t# src1 and src2 are the same\n\t"
format %{ "addv $tmp, T16B, $src1\n\t"
"umov $dst, $tmp, B, 0\n\t"
"cmp $dst, 0\n\t"
"cset $dst" %}
"cset $dst\t# anytrue 16B" %}
ins_encode %{
// No need to use src2.
__ addv(as_FloatRegister($tmp$$reg), __ T16B, as_FloatRegister($src1$$reg));
__ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ B, 0);
__ cmpw($dst$$Register, zr);
@@ -3564,19 +3566,15 @@ instruct alltrue_in_mask8B(iRegINoSp dst, vecD src1, vecD src2, vecD tmp, rFlags
match(Set dst (VectorTest src1 src2 ));
ins_cost(INSN_COST);
effect(TEMP tmp, KILL cr);
format %{ "andr $tmp, T8B, $src1, $src2\t# src2 is maskAllTrue\n\t"
"notr $tmp, T8B, $tmp\n\t"
"addv $tmp, T8B, $tmp\n\t"
format %{ "uminv $tmp, T8B, $src1\n\t"
"umov $dst, $tmp, B, 0\n\t"
"cmp $dst, 0\n\t"
"cset $dst" %}
"cmp $dst, 0xff\n\t"

This comment has been minimized.

Loading
@pfustc

pfustc Apr 2, 2021
Member

I think we should write "#0xff" here. But it looks that all other immediates in format field of aarch64_neon.ad lose the number sign as well.

This comment has been minimized.

Loading
@nsjian

nsjian Apr 2, 2021
Author

Thanks for the review, but I think both are ok.

"cset $dst\t# alltrue 8B" %}
ins_encode %{
__ andr(as_FloatRegister($tmp$$reg), __ T8B,
as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
__ notr(as_FloatRegister($tmp$$reg), __ T8B, as_FloatRegister($tmp$$reg));
__ addv(as_FloatRegister($tmp$$reg), __ T8B, as_FloatRegister($tmp$$reg));
// No need to use src2.
__ uminv(as_FloatRegister($tmp$$reg), __ T8B, as_FloatRegister($src1$$reg));
__ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ B, 0);
__ cmpw($dst$$Register, zr);
__ cmpw($dst$$Register, 0xff);
__ csetw($dst$$Register, Assembler::EQ);
%}
ins_pipe(pipe_slow);
@@ -3588,19 +3586,15 @@ instruct alltrue_in_mask16B(iRegINoSp dst, vecX src1, vecX src2, vecX tmp, rFlag
match(Set dst (VectorTest src1 src2 ));
ins_cost(INSN_COST);
effect(TEMP tmp, KILL cr);
format %{ "andr $tmp, T16B, $src1, $src2\t# src2 is maskAllTrue\n\t"
"notr $tmp, T16B, $tmp\n\t"
"addv $tmp, T16B, $tmp\n\t"
format %{ "uminv $tmp, T16B, $src1\n\t"
"umov $dst, $tmp, B, 0\n\t"
"cmp $dst, 0\n\t"
"cset $dst" %}
"cmp $dst, 0xff\n\t"
"cset $dst\t# alltrue 16B" %}
ins_encode %{
__ andr(as_FloatRegister($tmp$$reg), __ T16B,
as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
__ notr(as_FloatRegister($tmp$$reg), __ T16B, as_FloatRegister($tmp$$reg));
__ addv(as_FloatRegister($tmp$$reg), __ T16B, as_FloatRegister($tmp$$reg));
// No need to use src2.
__ uminv(as_FloatRegister($tmp$$reg), __ T16B, as_FloatRegister($src1$$reg));
__ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ B, 0);
__ cmpw($dst$$Register, zr);
__ cmpw($dst$$Register, 0xff);
__ csetw($dst$$Register, Assembler::EQ);
%}
ins_pipe(pipe_slow);
@@ -1415,11 +1415,12 @@ instruct anytrue_in_mask$1B`'(iRegINoSp dst, vec$2 src1, vec$2 src2, vec$2 tmp,
match(Set dst (VectorTest src1 src2 ));
ins_cost(INSN_COST);
effect(TEMP tmp, KILL cr);
format %{ "addv $tmp, T$1B, $src1\t# src1 and src2 are the same\n\t"
format %{ "addv $tmp, T$1B, $src1\n\t"
"umov $dst, $tmp, B, 0\n\t"
"cmp $dst, 0\n\t"
"cset $dst" %}
"cset $dst\t# anytrue $1B" %}
ins_encode %{
// No need to use src2.
__ addv(as_FloatRegister($tmp$$reg), __ T$1B, as_FloatRegister($src1$$reg));
__ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ B, 0);
__ cmpw($dst$$Register, zr);
@@ -1438,19 +1439,15 @@ instruct alltrue_in_mask$1B`'(iRegINoSp dst, vec$2 src1, vec$2 src2, vec$2 tmp,
match(Set dst (VectorTest src1 src2 ));
ins_cost(INSN_COST);
effect(TEMP tmp, KILL cr);
format %{ "andr $tmp, T$1B, $src1, $src2\t# src2 is maskAllTrue\n\t"
"notr $tmp, T$1B, $tmp\n\t"
"addv $tmp, T$1B, $tmp\n\t"
format %{ "uminv $tmp, T$1B, $src1\n\t"
"umov $dst, $tmp, B, 0\n\t"
"cmp $dst, 0\n\t"
"cset $dst" %}
"cmp $dst, 0xff\n\t"
"cset $dst\t# alltrue $1B" %}
ins_encode %{
__ andr(as_FloatRegister($tmp$$reg), __ T$1B,
as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
__ notr(as_FloatRegister($tmp$$reg), __ T$1B, as_FloatRegister($tmp$$reg));
__ addv(as_FloatRegister($tmp$$reg), __ T$1B, as_FloatRegister($tmp$$reg));
// No need to use src2.
__ uminv(as_FloatRegister($tmp$$reg), __ T$1B, as_FloatRegister($src1$$reg));
__ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ B, 0);
__ cmpw($dst$$Register, zr);
__ cmpw($dst$$Register, 0xff);
__ csetw($dst$$Register, Assembler::EQ);
%}
ins_pipe(pipe_slow);
@@ -2427,6 +2427,7 @@ void mvnw(Register Rd, Register Rm,
INSN(addv, 0, 0b110001101110, 1); // accepted arrangements: T8B, T16B, T4H, T8H, T4S
INSN(smaxv, 0, 0b110000101010, 1); // accepted arrangements: T8B, T16B, T4H, T8H, T4S
INSN(sminv, 0, 0b110001101010, 1); // accepted arrangements: T8B, T16B, T4H, T8H, T4S
INSN(uminv, 1, 0b110001101010, 1); // accepted arrangements: T8B, T16B, T4H, T8H, T4S
INSN(cls, 0, 0b100000010010, 2); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
INSN(clz, 1, 0b100000010010, 2); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
INSN(cnt, 0, 0b100000010110, 0); // accepted arrangements: T8B, T16B
@@ -1401,9 +1401,12 @@ def generate(kind, names):
["smaxv", "smaxv", "8B"], ["smaxv", "smaxv", "16B"],
["smaxv", "smaxv", "4H"], ["smaxv", "smaxv", "8H"],
["smaxv", "smaxv", "4S"], ["fmaxv", "fmaxv", "4S"],
["sminv", "sminv", "8B"], ["sminv", "sminv", "16B"],
["sminv", "sminv", "4H"], ["sminv", "sminv", "8H"],
["sminv", "sminv", "4S"], ["fminv", "fminv", "4S"],
["sminv", "sminv", "8B"], ["uminv", "uminv", "8B"],
["sminv", "sminv", "16B"],["uminv", "uminv", "16B"],
["sminv", "sminv", "4H"], ["uminv", "uminv", "4H"],
["sminv", "sminv", "8H"], ["uminv", "uminv", "8H"],
["sminv", "sminv", "4S"], ["uminv", "uminv", "4S"],
["fminv", "fminv", "4S"],
["fmaxp", "fmaxp", "2S"], ["fmaxp", "fmaxp", "2D"],
["fminp", "fminp", "2S"], ["fminp", "fminp", "2D"],
])
Loading