Skip to content
Permalink
Browse files

8264352: AArch64: Optimize vector "not/andNot" for NEON and SVE

Reviewed-by: aph, njian
  • Loading branch information
Xiaohong Gong authored and Ningsheng Jian committed Apr 8, 2021
1 parent 016db40 commit e89542fbe613c3d72574ca053e50deb8056957d8
@@ -2842,6 +2842,51 @@ instruct vnot2L(vecX dst, vecX src, immL_M1 m1)
ins_pipe(pipe_class_default);
%}

// ------------------------------ Vector and_not -------------------------------

instruct vand_not2I(vecD dst, vecD src1, vecD src2, immI_M1 m1)
%{
predicate(n->as_Vector()->length_in_bytes() == 8);
match(Set dst (AndV src1 (XorV src2 (ReplicateB m1))));
match(Set dst (AndV src1 (XorV src2 (ReplicateS m1))));
match(Set dst (AndV src1 (XorV src2 (ReplicateI m1))));
ins_cost(INSN_COST);
format %{ "bic $dst, T8B, $src1, $src2\t# vector (8B)" %}
ins_encode %{
__ bic(as_FloatRegister($dst$$reg), __ T8B,
as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
%}
ins_pipe(pipe_class_default);
%}

instruct vand_not4I(vecX dst, vecX src1, vecX src2, immI_M1 m1)
%{
predicate(n->as_Vector()->length_in_bytes() == 16);
match(Set dst (AndV src1 (XorV src2 (ReplicateB m1))));
match(Set dst (AndV src1 (XorV src2 (ReplicateS m1))));
match(Set dst (AndV src1 (XorV src2 (ReplicateI m1))));
ins_cost(INSN_COST);
format %{ "bic $dst, T16B, $src1, $src2\t# vector (16B)" %}
ins_encode %{
__ bic(as_FloatRegister($dst$$reg), __ T16B,
as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
%}
ins_pipe(pipe_class_default);
%}

instruct vand_not2L(vecX dst, vecX src1, vecX src2, immL_M1 m1)
%{
predicate(n->as_Vector()->length_in_bytes() == 16);
match(Set dst (AndV src1 (XorV src2 (ReplicateL m1))));
ins_cost(INSN_COST);
format %{ "bic $dst, T16B, $src1, $src2\t# vector (16B)" %}
ins_encode %{
__ bic(as_FloatRegister($dst$$reg), __ T16B,
as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
%}
ins_pipe(pipe_class_default);
%}

// ------------------------------ Vector max/min -------------------------------

instruct vmax8B(vecD dst, vecD src1, vecD src2)
@@ -1036,6 +1036,32 @@ VECTOR_NOT(2, I, D, 8, 8B)
VECTOR_NOT(4, I, X, 16, 16B)
VECTOR_NOT(2, L, X, 16, 16B)
undefine(MATCH_RULE)
// ------------------------------ Vector and_not -------------------------------
dnl
define(`MATCH_RULE', `ifelse($1, I,
`match(Set dst (AndV src1 (XorV src2 (ReplicateB m1))));
match(Set dst (AndV src1 (XorV src2 (ReplicateS m1))));
match(Set dst (AndV src1 (XorV src2 (ReplicateI m1))));',
`match(Set dst (AndV src1 (XorV src2 (ReplicateL m1))));')')dnl
dnl
define(`VECTOR_AND_NOT', `
instruct vand_not$1$2`'(vec$3 dst, vec$3 src1, vec$3 src2, imm$2_M1 m1)
%{
predicate(n->as_Vector()->length_in_bytes() == $4);
MATCH_RULE($2)
ins_cost(INSN_COST);
format %{ "bic $dst, T$5, $src1, $src2\t# vector ($5)" %}
ins_encode %{
__ bic(as_FloatRegister($dst$$reg), __ T$5,
as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
%}
ins_pipe(pipe_class_default);
%}')dnl
dnl $1 $2 $3 $4 $5
VECTOR_AND_NOT(2, I, D, 8, 8B)
VECTOR_AND_NOT(4, I, X, 16, 16B)
VECTOR_AND_NOT(2, L, X, 16, 16B)
undefine(MATCH_RULE)
dnl
// ------------------------------ Vector max/min -------------------------------
dnl
@@ -453,6 +453,66 @@ instruct vxor(vReg dst, vReg src1, vReg src2) %{
ins_pipe(pipe_slow);
%}

// vector not

instruct vnotI(vReg dst, vReg src, immI_M1 m1) %{
predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= 16);
match(Set dst (XorV src (ReplicateB m1)));
match(Set dst (XorV src (ReplicateS m1)));
match(Set dst (XorV src (ReplicateI m1)));
ins_cost(SVE_COST);
format %{ "sve_not $dst, $src\t# vector (sve) B/H/S" %}
ins_encode %{
__ sve_not(as_FloatRegister($dst$$reg), __ D,
ptrue, as_FloatRegister($src$$reg));
%}
ins_pipe(pipe_slow);
%}

instruct vnotL(vReg dst, vReg src, immL_M1 m1) %{
predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= 16);
match(Set dst (XorV src (ReplicateL m1)));
ins_cost(SVE_COST);
format %{ "sve_not $dst, $src\t# vector (sve) D" %}
ins_encode %{
__ sve_not(as_FloatRegister($dst$$reg), __ D,
ptrue, as_FloatRegister($src$$reg));
%}
ins_pipe(pipe_slow);
%}


// vector and_not

instruct vand_notI(vReg dst, vReg src1, vReg src2, immI_M1 m1) %{
predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= 16);
match(Set dst (AndV src1 (XorV src2 (ReplicateB m1))));
match(Set dst (AndV src1 (XorV src2 (ReplicateS m1))));
match(Set dst (AndV src1 (XorV src2 (ReplicateI m1))));
ins_cost(SVE_COST);
format %{ "sve_bic $dst, $src1, $src2\t# vector (sve) B/H/S" %}
ins_encode %{
__ sve_bic(as_FloatRegister($dst$$reg),
as_FloatRegister($src1$$reg),
as_FloatRegister($src2$$reg));
%}
ins_pipe(pipe_slow);
%}

instruct vand_notL(vReg dst, vReg src1, vReg src2, immL_M1 m1) %{
predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= 16);
match(Set dst (AndV src1 (XorV src2 (ReplicateL m1))));
ins_cost(SVE_COST);
format %{ "sve_bic $dst, $src1, $src2\t# vector (sve) D" %}
ins_encode %{
__ sve_bic(as_FloatRegister($dst$$reg),
as_FloatRegister($src1$$reg),
as_FloatRegister($src2$$reg));
%}
ins_pipe(pipe_slow);
%}


// vector float div

instruct vdivF(vReg dst_src1, vReg src2) %{
@@ -316,6 +316,57 @@ BINARY_OP_UNSIZED(vor, OrV, 16, sve_orr)

// vector xor
BINARY_OP_UNSIZED(vxor, XorV, 16, sve_eor)

// vector not
dnl
define(`MATCH_RULE', `ifelse($1, I,
`match(Set dst (XorV src (ReplicateB m1)));
match(Set dst (XorV src (ReplicateS m1)));
match(Set dst (XorV src (ReplicateI m1)));',
`match(Set dst (XorV src (ReplicateL m1)));')')dnl
dnl
define(`VECTOR_NOT', `
instruct vnot$1`'(vReg dst, vReg src, imm$1_M1 m1) %{
predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= 16);
MATCH_RULE($1)
ins_cost(SVE_COST);
format %{ "sve_not $dst, $src\t# vector (sve) $2" %}
ins_encode %{
__ sve_not(as_FloatRegister($dst$$reg), __ D,
ptrue, as_FloatRegister($src$$reg));
%}
ins_pipe(pipe_slow);
%}')dnl
dnl $1,$2
VECTOR_NOT(I, B/H/S)
VECTOR_NOT(L, D)
undefine(MATCH_RULE)

// vector and_not
dnl
define(`MATCH_RULE', `ifelse($1, I,
`match(Set dst (AndV src1 (XorV src2 (ReplicateB m1))));
match(Set dst (AndV src1 (XorV src2 (ReplicateS m1))));
match(Set dst (AndV src1 (XorV src2 (ReplicateI m1))));',
`match(Set dst (AndV src1 (XorV src2 (ReplicateL m1))));')')dnl
dnl
define(`VECTOR_AND_NOT', `
instruct vand_not$1`'(vReg dst, vReg src1, vReg src2, imm$1_M1 m1) %{
predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= 16);
MATCH_RULE($1)
ins_cost(SVE_COST);
format %{ "sve_bic $dst, $src1, $src2\t# vector (sve) $2" %}
ins_encode %{
__ sve_bic(as_FloatRegister($dst$$reg),
as_FloatRegister($src1$$reg),
as_FloatRegister($src2$$reg));
%}
ins_pipe(pipe_slow);
%}')dnl
dnl $1,$2
VECTOR_AND_NOT(I, B/H/S)
VECTOR_AND_NOT(L, D)
undefine(MATCH_RULE)
dnl
dnl VDIVF($1, $2 , $3 )
dnl VDIVF(name_suffix, size, min_vec_len)
@@ -3060,6 +3060,7 @@ void mvnw(Register Rd, Register Rm,
INSN(sve_and, 0b00);
INSN(sve_eor, 0b10);
INSN(sve_orr, 0b01);
INSN(sve_bic, 0b11);
#undef INSN

// SVE shift immediate - unpredicated
@@ -916,7 +916,7 @@ def __init__(self, args):
self._bitwiseop = False
if name[0] == 'f':
self._width = RegVariant(2, 3)
elif not self._isPredicated and (name == "and" or name == "eor" or name == "orr"):
elif not self._isPredicated and (name in ["and", "eor", "orr", "bic"]):
self._width = RegVariant(3, 3)
self._bitwiseop = True
else:
@@ -1612,6 +1612,7 @@ def generate(kind, names):
["and", "ZZZ"],
["eor", "ZZZ"],
["orr", "ZZZ"],
["bic", "ZZZ"],
])

generate(SVEReductionOp, [["andv", 0], ["orv", 0], ["eorv", 0], ["smaxv", 0], ["sminv", 0],
@@ -905,17 +905,18 @@
__ sve_and(z9, z22, z11); // and z9.d, z22.d, z11.d
__ sve_eor(z5, z30, z16); // eor z5.d, z30.d, z16.d
__ sve_orr(z22, z11, z1); // orr z22.d, z11.d, z1.d
__ sve_bic(z8, z20, z16); // bic z8.d, z20.d, z16.d

// SVEReductionOp
__ sve_andv(v8, __ D, p5, z16); // andv d8, p5, z16.d
__ sve_orv(v15, __ S, p1, z4); // orv s15, p1, z4.s
__ sve_eorv(v8, __ B, p1, z29); // eorv b8, p1, z29.b
__ sve_smaxv(v28, __ D, p4, z29); // smaxv d28, p4, z29.d
__ sve_sminv(v9, __ H, p3, z2); // sminv h9, p3, z2.h
__ sve_fminv(v28, __ S, p0, z7); // fminv s28, p0, z7.s
__ sve_fmaxv(v26, __ S, p5, z17); // fmaxv s26, p5, z17.s
__ sve_fadda(v8, __ D, p4, z21); // fadda d8, p4, d8, z21.d
__ sve_uaddv(v5, __ S, p5, z21); // uaddv d5, p5, z21.s
__ sve_andv(v15, __ S, p1, z4); // andv s15, p1, z4.s
__ sve_orv(v8, __ B, p1, z29); // orv b8, p1, z29.b
__ sve_eorv(v28, __ D, p4, z29); // eorv d28, p4, z29.d
__ sve_smaxv(v9, __ H, p3, z2); // smaxv h9, p3, z2.h
__ sve_sminv(v28, __ B, p0, z7); // sminv b28, p0, z7.b
__ sve_fminv(v26, __ S, p5, z17); // fminv s26, p5, z17.s
__ sve_fmaxv(v8, __ D, p4, z21); // fmaxv d8, p4, z21.d
__ sve_fadda(v5, __ D, p5, z21); // fadda d5, p5, d5, z21.d
__ sve_uaddv(v22, __ S, p4, z29); // uaddv d22, p4, z29.s

__ bind(forth);

@@ -934,30 +935,30 @@
0x9101a1a0, 0xb10a5cc8, 0xd10810aa, 0xf10fd061,
0x120cb166, 0x321764bc, 0x52174681, 0x720c0227,
0x9241018e, 0xb25a2969, 0xd278b411, 0xf26aad01,
0x14000000, 0x17ffffd7, 0x140002e0, 0x94000000,
0x97ffffd4, 0x940002dd, 0x3400000a, 0x34fffa2a,
0x34005b4a, 0x35000008, 0x35fff9c8, 0x35005ae8,
0xb400000b, 0xb4fff96b, 0xb4005a8b, 0xb500001d,
0xb5fff91d, 0xb5005a3d, 0x10000013, 0x10fff8b3,
0x100059d3, 0x90000013, 0x36300016, 0x3637f836,
0x36305956, 0x3758000c, 0x375ff7cc, 0x375858ec,
0x14000000, 0x17ffffd7, 0x140002e1, 0x94000000,
0x97ffffd4, 0x940002de, 0x3400000a, 0x34fffa2a,
0x34005b6a, 0x35000008, 0x35fff9c8, 0x35005b08,
0xb400000b, 0xb4fff96b, 0xb4005aab, 0xb500001d,
0xb5fff91d, 0xb5005a5d, 0x10000013, 0x10fff8b3,
0x100059f3, 0x90000013, 0x36300016, 0x3637f836,
0x36305976, 0x3758000c, 0x375ff7cc, 0x3758590c,
0x128313a0, 0x528a32c7, 0x7289173b, 0x92ab3acc,
0xd2a0bf94, 0xf2c285e8, 0x9358722f, 0x330e652f,
0x53067f3b, 0x93577c53, 0xb34a1aac, 0xd35a4016,
0x13946c63, 0x93c3dbc8, 0x54000000, 0x54fff5a0,
0x540056c0, 0x54000001, 0x54fff541, 0x54005661,
0x54000002, 0x54fff4e2, 0x54005602, 0x54000002,
0x54fff482, 0x540055a2, 0x54000003, 0x54fff423,
0x54005543, 0x54000003, 0x54fff3c3, 0x540054e3,
0x54000004, 0x54fff364, 0x54005484, 0x54000005,
0x54fff305, 0x54005425, 0x54000006, 0x54fff2a6,
0x540053c6, 0x54000007, 0x54fff247, 0x54005367,
0x54000008, 0x54fff1e8, 0x54005308, 0x54000009,
0x54fff189, 0x540052a9, 0x5400000a, 0x54fff12a,
0x5400524a, 0x5400000b, 0x54fff0cb, 0x540051eb,
0x5400000c, 0x54fff06c, 0x5400518c, 0x5400000d,
0x54fff00d, 0x5400512d, 0x5400000e, 0x54ffefae,
0x540050ce, 0x5400000f, 0x54ffef4f, 0x5400506f,
0x540056e0, 0x54000001, 0x54fff541, 0x54005681,
0x54000002, 0x54fff4e2, 0x54005622, 0x54000002,
0x54fff482, 0x540055c2, 0x54000003, 0x54fff423,
0x54005563, 0x54000003, 0x54fff3c3, 0x54005503,
0x54000004, 0x54fff364, 0x540054a4, 0x54000005,
0x54fff305, 0x54005445, 0x54000006, 0x54fff2a6,
0x540053e6, 0x54000007, 0x54fff247, 0x54005387,
0x54000008, 0x54fff1e8, 0x54005328, 0x54000009,
0x54fff189, 0x540052c9, 0x5400000a, 0x54fff12a,
0x5400526a, 0x5400000b, 0x54fff0cb, 0x5400520b,
0x5400000c, 0x54fff06c, 0x540051ac, 0x5400000d,
0x54fff00d, 0x5400514d, 0x5400000e, 0x54ffefae,
0x540050ee, 0x5400000f, 0x54ffef4f, 0x5400508f,
0xd40658e1, 0xd4014d22, 0xd4046543, 0xd4273f60,
0xd44cad80, 0xd503201f, 0xd69f03e0, 0xd6bf03e0,
0xd5033fdf, 0xd5033e9f, 0xd50332bf, 0xd61f0200,
@@ -989,7 +990,7 @@
0x791f226d, 0xf95aa2f3, 0xb9587bb7, 0x395f7176,
0x795d9143, 0x399e7e08, 0x799a2697, 0x79df3422,
0xb99c2624, 0xfd5c2374, 0xbd5fa1d9, 0xfd1d595a,
0xbd1b1869, 0x580040bb, 0x1800000b, 0xf8945060,
0xbd1b1869, 0x580040db, 0x1800000b, 0xf8945060,
0xd8000000, 0xf8ae6ba0, 0xf99a0080, 0x1a070035,
0x3a0700a8, 0x5a0e0367, 0x7a11009b, 0x9a000380,
0xba1e030c, 0xda0f0320, 0xfa030301, 0x0b340b11,
@@ -1116,8 +1117,8 @@
0x6580b266, 0x65c1b50c, 0x658db013, 0x65c18677,
0x65a010cd, 0x65a8332e, 0x65bb56d6, 0x65b46e23,
0x04405ce4, 0x048476d0, 0x042b32c9, 0x04b033c5,
0x04613176, 0x04da3608, 0x0498248f, 0x041927a8,
0x04c833bc, 0x044a2c49, 0x658720fc, 0x6586363a,
0x65d832a8, 0x048136a5,
0x04613176, 0x04f03288, 0x049a248f, 0x041827a8,
0x04d933bc, 0x04482c49, 0x040a20fc, 0x6587363a,
0x65c632a8, 0x65d836a5, 0x048133b6,
};
// END Generated code -- do not edit

1 comment on commit e89542f

@openjdk-notifier

This comment has been minimized.

Copy link

@openjdk-notifier openjdk-notifier bot commented on e89542f Apr 8, 2021

Please sign in to comment.