Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

8264352: AArch64: Optimize vector "not/andNot" for NEON and SVE #3370

Closed
wants to merge 1 commit into from
Closed
Changes from all commits
Commits
File filter
Filter file types
Jump to
Jump to file
Failed to load files.

Always

Just for now

@@ -2842,6 +2842,51 @@ instruct vnot2L(vecX dst, vecX src, immL_M1 m1)
ins_pipe(pipe_class_default);
%}

// ------------------------------ Vector and_not -------------------------------

instruct vand_not2I(vecD dst, vecD src1, vecD src2, immI_M1 m1)
%{
predicate(n->as_Vector()->length_in_bytes() == 8);
match(Set dst (AndV src1 (XorV src2 (ReplicateB m1))));
match(Set dst (AndV src1 (XorV src2 (ReplicateS m1))));
match(Set dst (AndV src1 (XorV src2 (ReplicateI m1))));
ins_cost(INSN_COST);
format %{ "bic $dst, T8B, $src1, $src2\t# vector (8B)" %}
ins_encode %{
__ bic(as_FloatRegister($dst$$reg), __ T8B,
as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
%}
ins_pipe(pipe_class_default);
%}

instruct vand_not4I(vecX dst, vecX src1, vecX src2, immI_M1 m1)
%{
predicate(n->as_Vector()->length_in_bytes() == 16);
match(Set dst (AndV src1 (XorV src2 (ReplicateB m1))));
match(Set dst (AndV src1 (XorV src2 (ReplicateS m1))));
match(Set dst (AndV src1 (XorV src2 (ReplicateI m1))));
ins_cost(INSN_COST);
format %{ "bic $dst, T16B, $src1, $src2\t# vector (16B)" %}
ins_encode %{
__ bic(as_FloatRegister($dst$$reg), __ T16B,
as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
%}
ins_pipe(pipe_class_default);
%}

instruct vand_not2L(vecX dst, vecX src1, vecX src2, immL_M1 m1)
%{
predicate(n->as_Vector()->length_in_bytes() == 16);
match(Set dst (AndV src1 (XorV src2 (ReplicateL m1))));
ins_cost(INSN_COST);
format %{ "bic $dst, T16B, $src1, $src2\t# vector (16B)" %}
ins_encode %{
__ bic(as_FloatRegister($dst$$reg), __ T16B,
as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
%}
ins_pipe(pipe_class_default);
%}

// ------------------------------ Vector max/min -------------------------------

instruct vmax8B(vecD dst, vecD src1, vecD src2)
@@ -1036,6 +1036,32 @@ VECTOR_NOT(2, I, D, 8, 8B)
VECTOR_NOT(4, I, X, 16, 16B)
VECTOR_NOT(2, L, X, 16, 16B)
undefine(MATCH_RULE)
// ------------------------------ Vector and_not -------------------------------
dnl
define(`MATCH_RULE', `ifelse($1, I,
`match(Set dst (AndV src1 (XorV src2 (ReplicateB m1))));
match(Set dst (AndV src1 (XorV src2 (ReplicateS m1))));
match(Set dst (AndV src1 (XorV src2 (ReplicateI m1))));',
`match(Set dst (AndV src1 (XorV src2 (ReplicateL m1))));')')dnl
dnl
define(`VECTOR_AND_NOT', `
instruct vand_not$1$2`'(vec$3 dst, vec$3 src1, vec$3 src2, imm$2_M1 m1)
%{
predicate(n->as_Vector()->length_in_bytes() == $4);
MATCH_RULE($2)
ins_cost(INSN_COST);
format %{ "bic $dst, T$5, $src1, $src2\t# vector ($5)" %}
ins_encode %{
__ bic(as_FloatRegister($dst$$reg), __ T$5,
as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
%}
ins_pipe(pipe_class_default);
%}')dnl
dnl $1 $2 $3 $4 $5
VECTOR_AND_NOT(2, I, D, 8, 8B)
VECTOR_AND_NOT(4, I, X, 16, 16B)
VECTOR_AND_NOT(2, L, X, 16, 16B)
undefine(MATCH_RULE)
dnl
// ------------------------------ Vector max/min -------------------------------
dnl
@@ -453,6 +453,66 @@ instruct vxor(vReg dst, vReg src1, vReg src2) %{
ins_pipe(pipe_slow);
%}

// vector not

instruct vnotI(vReg dst, vReg src, immI_M1 m1) %{
predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= 16);
match(Set dst (XorV src (ReplicateB m1)));
match(Set dst (XorV src (ReplicateS m1)));
match(Set dst (XorV src (ReplicateI m1)));
ins_cost(SVE_COST);
format %{ "sve_not $dst, $src\t# vector (sve) B/H/S" %}
ins_encode %{
__ sve_not(as_FloatRegister($dst$$reg), __ D,
ptrue, as_FloatRegister($src$$reg));
%}
ins_pipe(pipe_slow);
%}

instruct vnotL(vReg dst, vReg src, immL_M1 m1) %{
predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= 16);
match(Set dst (XorV src (ReplicateL m1)));
ins_cost(SVE_COST);
format %{ "sve_not $dst, $src\t# vector (sve) D" %}
ins_encode %{
__ sve_not(as_FloatRegister($dst$$reg), __ D,
ptrue, as_FloatRegister($src$$reg));
%}
ins_pipe(pipe_slow);
%}


// vector and_not

instruct vand_notI(vReg dst, vReg src1, vReg src2, immI_M1 m1) %{
predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= 16);
match(Set dst (AndV src1 (XorV src2 (ReplicateB m1))));
match(Set dst (AndV src1 (XorV src2 (ReplicateS m1))));
match(Set dst (AndV src1 (XorV src2 (ReplicateI m1))));
ins_cost(SVE_COST);
format %{ "sve_bic $dst, $src1, $src2\t# vector (sve) B/H/S" %}
ins_encode %{
__ sve_bic(as_FloatRegister($dst$$reg),
as_FloatRegister($src1$$reg),
as_FloatRegister($src2$$reg));
%}
ins_pipe(pipe_slow);
%}

instruct vand_notL(vReg dst, vReg src1, vReg src2, immL_M1 m1) %{
predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= 16);
match(Set dst (AndV src1 (XorV src2 (ReplicateL m1))));
ins_cost(SVE_COST);
format %{ "sve_bic $dst, $src1, $src2\t# vector (sve) D" %}
ins_encode %{
__ sve_bic(as_FloatRegister($dst$$reg),
as_FloatRegister($src1$$reg),
as_FloatRegister($src2$$reg));
%}
ins_pipe(pipe_slow);
%}


// vector float div

instruct vdivF(vReg dst_src1, vReg src2) %{
@@ -316,6 +316,57 @@ BINARY_OP_UNSIZED(vor, OrV, 16, sve_orr)

// vector xor
BINARY_OP_UNSIZED(vxor, XorV, 16, sve_eor)

// vector not
dnl
define(`MATCH_RULE', `ifelse($1, I,
`match(Set dst (XorV src (ReplicateB m1)));
match(Set dst (XorV src (ReplicateS m1)));
match(Set dst (XorV src (ReplicateI m1)));',
`match(Set dst (XorV src (ReplicateL m1)));')')dnl
dnl
define(`VECTOR_NOT', `
instruct vnot$1`'(vReg dst, vReg src, imm$1_M1 m1) %{
predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= 16);
MATCH_RULE($1)
ins_cost(SVE_COST);
format %{ "sve_not $dst, $src\t# vector (sve) $2" %}
ins_encode %{
__ sve_not(as_FloatRegister($dst$$reg), __ D,
ptrue, as_FloatRegister($src$$reg));
%}
ins_pipe(pipe_slow);
%}')dnl
dnl $1,$2
VECTOR_NOT(I, B/H/S)
VECTOR_NOT(L, D)
undefine(MATCH_RULE)

// vector and_not
dnl
define(`MATCH_RULE', `ifelse($1, I,
`match(Set dst (AndV src1 (XorV src2 (ReplicateB m1))));
match(Set dst (AndV src1 (XorV src2 (ReplicateS m1))));
match(Set dst (AndV src1 (XorV src2 (ReplicateI m1))));',
`match(Set dst (AndV src1 (XorV src2 (ReplicateL m1))));')')dnl
dnl
define(`VECTOR_AND_NOT', `
instruct vand_not$1`'(vReg dst, vReg src1, vReg src2, imm$1_M1 m1) %{
predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= 16);
MATCH_RULE($1)
ins_cost(SVE_COST);
format %{ "sve_bic $dst, $src1, $src2\t# vector (sve) $2" %}
ins_encode %{
__ sve_bic(as_FloatRegister($dst$$reg),
as_FloatRegister($src1$$reg),
as_FloatRegister($src2$$reg));
%}
ins_pipe(pipe_slow);
%}')dnl
dnl $1,$2
VECTOR_AND_NOT(I, B/H/S)
VECTOR_AND_NOT(L, D)
undefine(MATCH_RULE)
dnl
dnl VDIVF($1, $2 , $3 )
dnl VDIVF(name_suffix, size, min_vec_len)
@@ -3060,6 +3060,7 @@ void mvnw(Register Rd, Register Rm,
INSN(sve_and, 0b00);
INSN(sve_eor, 0b10);
INSN(sve_orr, 0b01);
INSN(sve_bic, 0b11);
#undef INSN

// SVE shift immediate - unpredicated
@@ -916,7 +916,7 @@ def __init__(self, args):
self._bitwiseop = False
if name[0] == 'f':
self._width = RegVariant(2, 3)
elif not self._isPredicated and (name == "and" or name == "eor" or name == "orr"):
elif not self._isPredicated and (name in ["and", "eor", "orr", "bic"]):
self._width = RegVariant(3, 3)
self._bitwiseop = True
else:
@@ -1612,6 +1612,7 @@ def generate(kind, names):
["and", "ZZZ"],
["eor", "ZZZ"],
["orr", "ZZZ"],
["bic", "ZZZ"],
])

generate(SVEReductionOp, [["andv", 0], ["orv", 0], ["eorv", 0], ["smaxv", 0], ["sminv", 0],
@@ -905,17 +905,18 @@
__ sve_and(z9, z22, z11); // and z9.d, z22.d, z11.d
__ sve_eor(z5, z30, z16); // eor z5.d, z30.d, z16.d
__ sve_orr(z22, z11, z1); // orr z22.d, z11.d, z1.d
__ sve_bic(z8, z20, z16); // bic z8.d, z20.d, z16.d

// SVEReductionOp
__ sve_andv(v8, __ D, p5, z16); // andv d8, p5, z16.d
__ sve_orv(v15, __ S, p1, z4); // orv s15, p1, z4.s
__ sve_eorv(v8, __ B, p1, z29); // eorv b8, p1, z29.b
__ sve_smaxv(v28, __ D, p4, z29); // smaxv d28, p4, z29.d
__ sve_sminv(v9, __ H, p3, z2); // sminv h9, p3, z2.h
__ sve_fminv(v28, __ S, p0, z7); // fminv s28, p0, z7.s
__ sve_fmaxv(v26, __ S, p5, z17); // fmaxv s26, p5, z17.s
__ sve_fadda(v8, __ D, p4, z21); // fadda d8, p4, d8, z21.d
__ sve_uaddv(v5, __ S, p5, z21); // uaddv d5, p5, z21.s
__ sve_andv(v15, __ S, p1, z4); // andv s15, p1, z4.s
__ sve_orv(v8, __ B, p1, z29); // orv b8, p1, z29.b
__ sve_eorv(v28, __ D, p4, z29); // eorv d28, p4, z29.d
__ sve_smaxv(v9, __ H, p3, z2); // smaxv h9, p3, z2.h
__ sve_sminv(v28, __ B, p0, z7); // sminv b28, p0, z7.b
__ sve_fminv(v26, __ S, p5, z17); // fminv s26, p5, z17.s
__ sve_fmaxv(v8, __ D, p4, z21); // fmaxv d8, p4, z21.d
__ sve_fadda(v5, __ D, p5, z21); // fadda d5, p5, d5, z21.d
__ sve_uaddv(v22, __ S, p4, z29); // uaddv d22, p4, z29.s

__ bind(forth);

@@ -934,30 +935,30 @@
0x9101a1a0, 0xb10a5cc8, 0xd10810aa, 0xf10fd061,
0x120cb166, 0x321764bc, 0x52174681, 0x720c0227,
0x9241018e, 0xb25a2969, 0xd278b411, 0xf26aad01,
0x14000000, 0x17ffffd7, 0x140002e0, 0x94000000,
0x97ffffd4, 0x940002dd, 0x3400000a, 0x34fffa2a,
0x34005b4a, 0x35000008, 0x35fff9c8, 0x35005ae8,
0xb400000b, 0xb4fff96b, 0xb4005a8b, 0xb500001d,
0xb5fff91d, 0xb5005a3d, 0x10000013, 0x10fff8b3,
0x100059d3, 0x90000013, 0x36300016, 0x3637f836,
0x36305956, 0x3758000c, 0x375ff7cc, 0x375858ec,
0x14000000, 0x17ffffd7, 0x140002e1, 0x94000000,
0x97ffffd4, 0x940002de, 0x3400000a, 0x34fffa2a,
0x34005b6a, 0x35000008, 0x35fff9c8, 0x35005b08,
0xb400000b, 0xb4fff96b, 0xb4005aab, 0xb500001d,
0xb5fff91d, 0xb5005a5d, 0x10000013, 0x10fff8b3,
0x100059f3, 0x90000013, 0x36300016, 0x3637f836,
0x36305976, 0x3758000c, 0x375ff7cc, 0x3758590c,
0x128313a0, 0x528a32c7, 0x7289173b, 0x92ab3acc,
0xd2a0bf94, 0xf2c285e8, 0x9358722f, 0x330e652f,
0x53067f3b, 0x93577c53, 0xb34a1aac, 0xd35a4016,
0x13946c63, 0x93c3dbc8, 0x54000000, 0x54fff5a0,
0x540056c0, 0x54000001, 0x54fff541, 0x54005661,
0x54000002, 0x54fff4e2, 0x54005602, 0x54000002,
0x54fff482, 0x540055a2, 0x54000003, 0x54fff423,
0x54005543, 0x54000003, 0x54fff3c3, 0x540054e3,
0x54000004, 0x54fff364, 0x54005484, 0x54000005,
0x54fff305, 0x54005425, 0x54000006, 0x54fff2a6,
0x540053c6, 0x54000007, 0x54fff247, 0x54005367,
0x54000008, 0x54fff1e8, 0x54005308, 0x54000009,
0x54fff189, 0x540052a9, 0x5400000a, 0x54fff12a,
0x5400524a, 0x5400000b, 0x54fff0cb, 0x540051eb,
0x5400000c, 0x54fff06c, 0x5400518c, 0x5400000d,
0x54fff00d, 0x5400512d, 0x5400000e, 0x54ffefae,
0x540050ce, 0x5400000f, 0x54ffef4f, 0x5400506f,
0x540056e0, 0x54000001, 0x54fff541, 0x54005681,
0x54000002, 0x54fff4e2, 0x54005622, 0x54000002,
0x54fff482, 0x540055c2, 0x54000003, 0x54fff423,
0x54005563, 0x54000003, 0x54fff3c3, 0x54005503,
0x54000004, 0x54fff364, 0x540054a4, 0x54000005,
0x54fff305, 0x54005445, 0x54000006, 0x54fff2a6,
0x540053e6, 0x54000007, 0x54fff247, 0x54005387,
0x54000008, 0x54fff1e8, 0x54005328, 0x54000009,
0x54fff189, 0x540052c9, 0x5400000a, 0x54fff12a,
0x5400526a, 0x5400000b, 0x54fff0cb, 0x5400520b,
0x5400000c, 0x54fff06c, 0x540051ac, 0x5400000d,
0x54fff00d, 0x5400514d, 0x5400000e, 0x54ffefae,
0x540050ee, 0x5400000f, 0x54ffef4f, 0x5400508f,
0xd40658e1, 0xd4014d22, 0xd4046543, 0xd4273f60,
0xd44cad80, 0xd503201f, 0xd69f03e0, 0xd6bf03e0,
0xd5033fdf, 0xd5033e9f, 0xd50332bf, 0xd61f0200,
@@ -989,7 +990,7 @@
0x791f226d, 0xf95aa2f3, 0xb9587bb7, 0x395f7176,
0x795d9143, 0x399e7e08, 0x799a2697, 0x79df3422,
0xb99c2624, 0xfd5c2374, 0xbd5fa1d9, 0xfd1d595a,
0xbd1b1869, 0x580040bb, 0x1800000b, 0xf8945060,
0xbd1b1869, 0x580040db, 0x1800000b, 0xf8945060,
0xd8000000, 0xf8ae6ba0, 0xf99a0080, 0x1a070035,
0x3a0700a8, 0x5a0e0367, 0x7a11009b, 0x9a000380,
0xba1e030c, 0xda0f0320, 0xfa030301, 0x0b340b11,
@@ -1116,8 +1117,8 @@
0x6580b266, 0x65c1b50c, 0x658db013, 0x65c18677,
0x65a010cd, 0x65a8332e, 0x65bb56d6, 0x65b46e23,
0x04405ce4, 0x048476d0, 0x042b32c9, 0x04b033c5,
0x04613176, 0x04da3608, 0x0498248f, 0x041927a8,
0x04c833bc, 0x044a2c49, 0x658720fc, 0x6586363a,
0x65d832a8, 0x048136a5,
0x04613176, 0x04f03288, 0x049a248f, 0x041827a8,
0x04d933bc, 0x04482c49, 0x040a20fc, 0x6587363a,
0x65c632a8, 0x65d836a5, 0x048133b6,
};
// END Generated code -- do not edit
ProTip! Use n and p to navigate between commits in a pull request.