Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
8265321: Add Rearrange nodes implementation for Arm SVE
Co-authored-by: Wang Huang <whuang@openjdk.org>
Co-authored-by: Ai Jiaming <aijiaming1@huawei.com>
Reviewed-by: njian
  • Loading branch information
Wang Huang and Ai Jiaming committed Apr 23, 2021
1 parent a6e9e03 commit 6d5c8bd
Show file tree
Hide file tree
Showing 6 changed files with 192 additions and 64 deletions.
10 changes: 10 additions & 0 deletions src/hotspot/cpu/aarch64/aarch64.ad
Expand Up @@ -2411,6 +2411,16 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType
return false;
}
if (UseSVE > 0) {
switch (opcode) {
case Op_VectorLoadShuffle:
case Op_VectorRearrange:
if (vlen < 4) {
return false;
}
break;
default:
break;
}
return op_sve_supported(opcode, vlen, bt);
} else { // NEON
// Special cases
Expand Down
87 changes: 86 additions & 1 deletion src/hotspot/cpu/aarch64/aarch64_sve.ad
Expand Up @@ -229,7 +229,6 @@ source %{
case Op_LoadVectorGather:
case Op_StoreVectorScatter:
case Op_VectorLoadConst:
case Op_VectorRearrange:
return false;
default:
// By default, we only support vector operations with larger than 16 bytes.
Expand Down Expand Up @@ -3993,3 +3992,89 @@ instruct loadshuffleL(vReg dst, vReg src)
%}
ins_pipe(pipe_slow);
%}

// ------------------------------ Vector rearrange -------------------------------

instruct rearrangeB(vReg dst, vReg src, vReg shuffle)
%{
predicate(UseSVE > 0 &&
n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
match(Set dst (VectorRearrange src shuffle));
ins_cost(SVE_COST);
format %{ "sve_tbl $dst, B, $src, $shuffle\t# vector rearrange (B)" %}
ins_encode %{
__ sve_tbl(as_FloatRegister($dst$$reg), __ B,
as_FloatRegister($src$$reg), as_FloatRegister($shuffle$$reg));
%}
ins_pipe(pipe_slow);
%}

instruct rearrangeS(vReg dst, vReg src, vReg shuffle)
%{
predicate(UseSVE > 0 &&
n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
match(Set dst (VectorRearrange src shuffle));
ins_cost(SVE_COST);
format %{ "sve_tbl $dst, H, $src, $shuffle\t# vector rearrange (S)" %}
ins_encode %{
__ sve_tbl(as_FloatRegister($dst$$reg), __ H,
as_FloatRegister($src$$reg), as_FloatRegister($shuffle$$reg));
%}
ins_pipe(pipe_slow);
%}

instruct rearrangeI(vReg dst, vReg src, vReg shuffle)
%{
predicate(UseSVE > 0 &&
n->bottom_type()->is_vect()->element_basic_type() == T_INT);
match(Set dst (VectorRearrange src shuffle));
ins_cost(SVE_COST);
format %{ "sve_tbl $dst, S, $src, $shuffle\t# vector rearrange (I)" %}
ins_encode %{
__ sve_tbl(as_FloatRegister($dst$$reg), __ S,
as_FloatRegister($src$$reg), as_FloatRegister($shuffle$$reg));
%}
ins_pipe(pipe_slow);
%}

instruct rearrangeF(vReg dst, vReg src, vReg shuffle)
%{
predicate(UseSVE > 0 &&
n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
match(Set dst (VectorRearrange src shuffle));
ins_cost(SVE_COST);
format %{ "sve_tbl $dst, S, $src, $shuffle\t# vector rearrange (F)" %}
ins_encode %{
__ sve_tbl(as_FloatRegister($dst$$reg), __ S,
as_FloatRegister($src$$reg), as_FloatRegister($shuffle$$reg));
%}
ins_pipe(pipe_slow);
%}

instruct rearrangeL(vReg dst, vReg src, vReg shuffle)
%{
predicate(UseSVE > 0 &&
n->bottom_type()->is_vect()->element_basic_type() == T_LONG);
match(Set dst (VectorRearrange src shuffle));
ins_cost(SVE_COST);
format %{ "sve_tbl $dst, D, $src, $shuffle\t# vector rearrange (L)" %}
ins_encode %{
__ sve_tbl(as_FloatRegister($dst$$reg), __ D,
as_FloatRegister($src$$reg), as_FloatRegister($shuffle$$reg));
%}
ins_pipe(pipe_slow);
%}

instruct rearrangeD(vReg dst, vReg src, vReg shuffle)
%{
predicate(UseSVE > 0 &&
n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
match(Set dst (VectorRearrange src shuffle));
ins_cost(SVE_COST);
format %{ "sve_tbl $dst, D, $src, $shuffle\t# vector rearrange (D)" %}
ins_encode %{
__ sve_tbl(as_FloatRegister($dst$$reg), __ D,
as_FloatRegister($src$$reg), as_FloatRegister($shuffle$$reg));
%}
ins_pipe(pipe_slow);
%}
25 changes: 24 additions & 1 deletion src/hotspot/cpu/aarch64/aarch64_sve_ad.m4
Expand Up @@ -224,7 +224,6 @@ source %{
case Op_LoadVectorGather:
case Op_StoreVectorScatter:
case Op_VectorLoadConst:
case Op_VectorRearrange:
return false;
default:
// By default, we only support vector operations with larger than 16 bytes.
Expand Down Expand Up @@ -2336,3 +2335,27 @@ instruct loadshuffleL(vReg dst, vReg src)
%}
ins_pipe(pipe_slow);
%}

// ------------------------------ Vector rearrange -------------------------------
dnl
define(`VECTOR_REARRANGE', `
instruct rearrange$1`'(vReg dst, vReg src, vReg shuffle)
%{
predicate(UseSVE > 0 &&
n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($1));
match(Set dst (VectorRearrange src shuffle));
ins_cost(SVE_COST);
format %{ "sve_tbl $dst, $2, $src, $shuffle\t# vector rearrange ($1)" %}
ins_encode %{
__ sve_tbl(as_FloatRegister($dst$$reg), __ $2,
as_FloatRegister($src$$reg), as_FloatRegister($shuffle$$reg));
%}
ins_pipe(pipe_slow);
%}')dnl
dnl $1 $2
VECTOR_REARRANGE(B, B)
VECTOR_REARRANGE(S, H)
VECTOR_REARRANGE(I, S)
VECTOR_REARRANGE(F, S)
VECTOR_REARRANGE(L, D)
VECTOR_REARRANGE(D, D)
8 changes: 8 additions & 0 deletions src/hotspot/cpu/aarch64/assembler_aarch64.hpp
Expand Up @@ -3545,6 +3545,14 @@ void mvnw(Register Rd, Register Rm,
sf(imm1, 9, 5), rf(Zd, 0);
}

// SVE programmable table lookup in single vector table
void sve_tbl(FloatRegister Zd, SIMD_RegVariant T, FloatRegister Zn, FloatRegister Zm) {
starti;
assert(T != Q, "invalid size");
f(0b00000101, 31, 24), f(T, 23, 22), f(0b1, 21), rf(Zm, 16);
f(0b001100, 15, 10), rf(Zn, 5), rf(Zd, 0);
}

Assembler(CodeBuffer* code) : AbstractAssembler(code) {
}

Expand Down
1 change: 1 addition & 0 deletions test/hotspot/gtest/aarch64/aarch64-asmtest.py
Expand Up @@ -1601,6 +1601,7 @@ def generate(kind, names):
["lastb", "__ sve_lastb(v1, __ B, p1, z16);", "lastb\tb1, p1, z16.b"],
["index", "__ sve_index(z6, __ S, 1, 1);", "index\tz6.s, #1, #1"],
["cpy", "__ sve_cpy(z7, __ H, p3, r5);", "cpy\tz7.h, p3/m, w5"],
["tbl", "__ sve_tbl(z16, __ S, z17, z18);", "tbl\tz16.s, {z17.s}, z18.s"],
])

print "\n// FloatImmediateOp"
Expand Down
125 changes: 63 additions & 62 deletions test/hotspot/gtest/aarch64/asmtest.out.h
Expand Up @@ -788,6 +788,7 @@
__ sve_lastb(v1, __ B, p1, z16); // lastb b1, p1, z16.b
__ sve_index(z6, __ S, 1, 1); // index z6.s, #1, #1
__ sve_cpy(z7, __ H, p3, r5); // cpy z7.h, p3/m, w5
__ sve_tbl(z16, __ S, z17, z18); // tbl z16.s, {z17.s}, z18.s

// FloatImmediateOp
__ fmovd(v0, 2.0); // fmov d0, #2.0
Expand Down Expand Up @@ -993,30 +994,30 @@
0x9101a1a0, 0xb10a5cc8, 0xd10810aa, 0xf10fd061,
0x120cb166, 0x321764bc, 0x52174681, 0x720c0227,
0x9241018e, 0xb25a2969, 0xd278b411, 0xf26aad01,
0x14000000, 0x17ffffd7, 0x1400031b, 0x94000000,
0x97ffffd4, 0x94000318, 0x3400000a, 0x34fffa2a,
0x340062aa, 0x35000008, 0x35fff9c8, 0x35006248,
0xb400000b, 0xb4fff96b, 0xb40061eb, 0xb500001d,
0xb5fff91d, 0xb500619d, 0x10000013, 0x10fff8b3,
0x10006133, 0x90000013, 0x36300016, 0x3637f836,
0x363060b6, 0x3758000c, 0x375ff7cc, 0x3758604c,
0x14000000, 0x17ffffd7, 0x1400031c, 0x94000000,
0x97ffffd4, 0x94000319, 0x3400000a, 0x34fffa2a,
0x340062ca, 0x35000008, 0x35fff9c8, 0x35006268,
0xb400000b, 0xb4fff96b, 0xb400620b, 0xb500001d,
0xb5fff91d, 0xb50061bd, 0x10000013, 0x10fff8b3,
0x10006153, 0x90000013, 0x36300016, 0x3637f836,
0x363060d6, 0x3758000c, 0x375ff7cc, 0x3758606c,
0x128313a0, 0x528a32c7, 0x7289173b, 0x92ab3acc,
0xd2a0bf94, 0xf2c285e8, 0x9358722f, 0x330e652f,
0x53067f3b, 0x93577c53, 0xb34a1aac, 0xd35a4016,
0x13946c63, 0x93c3dbc8, 0x54000000, 0x54fff5a0,
0x54005e20, 0x54000001, 0x54fff541, 0x54005dc1,
0x54000002, 0x54fff4e2, 0x54005d62, 0x54000002,
0x54fff482, 0x54005d02, 0x54000003, 0x54fff423,
0x54005ca3, 0x54000003, 0x54fff3c3, 0x54005c43,
0x54000004, 0x54fff364, 0x54005be4, 0x54000005,
0x54fff305, 0x54005b85, 0x54000006, 0x54fff2a6,
0x54005b26, 0x54000007, 0x54fff247, 0x54005ac7,
0x54000008, 0x54fff1e8, 0x54005a68, 0x54000009,
0x54fff189, 0x54005a09, 0x5400000a, 0x54fff12a,
0x540059aa, 0x5400000b, 0x54fff0cb, 0x5400594b,
0x5400000c, 0x54fff06c, 0x540058ec, 0x5400000d,
0x54fff00d, 0x5400588d, 0x5400000e, 0x54ffefae,
0x5400582e, 0x5400000f, 0x54ffef4f, 0x540057cf,
0x54005e40, 0x54000001, 0x54fff541, 0x54005de1,
0x54000002, 0x54fff4e2, 0x54005d82, 0x54000002,
0x54fff482, 0x54005d22, 0x54000003, 0x54fff423,
0x54005cc3, 0x54000003, 0x54fff3c3, 0x54005c63,
0x54000004, 0x54fff364, 0x54005c04, 0x54000005,
0x54fff305, 0x54005ba5, 0x54000006, 0x54fff2a6,
0x54005b46, 0x54000007, 0x54fff247, 0x54005ae7,
0x54000008, 0x54fff1e8, 0x54005a88, 0x54000009,
0x54fff189, 0x54005a29, 0x5400000a, 0x54fff12a,
0x540059ca, 0x5400000b, 0x54fff0cb, 0x5400596b,
0x5400000c, 0x54fff06c, 0x5400590c, 0x5400000d,
0x54fff00d, 0x540058ad, 0x5400000e, 0x54ffefae,
0x5400584e, 0x5400000f, 0x54ffef4f, 0x540057ef,
0xd40658e1, 0xd4014d22, 0xd4046543, 0xd4273f60,
0xd44cad80, 0xd503201f, 0xd69f03e0, 0xd6bf03e0,
0xd5033fdf, 0xd5033e9f, 0xd50332bf, 0xd61f0200,
Expand Down Expand Up @@ -1048,7 +1049,7 @@
0x791f226d, 0xf95aa2f3, 0xb9587bb7, 0x395f7176,
0x795d9143, 0x399e7e08, 0x799a2697, 0x79df3422,
0xb99c2624, 0xfd5c2374, 0xbd5fa1d9, 0xfd1d595a,
0xbd1b1869, 0x5800481b, 0x1800000b, 0xf8945060,
0xbd1b1869, 0x5800483b, 0x1800000b, 0xf8945060,
0xd8000000, 0xf8ae6ba0, 0xf99a0080, 0x1a070035,
0x3a0700a8, 0x5a0e0367, 0x7a11009b, 0x9a000380,
0xba1e030c, 0xda0f0320, 0xfa030301, 0x0b340b11,
Expand Down Expand Up @@ -1152,46 +1153,46 @@
0x25eb0d52, 0x65d0a001, 0x65d1a443, 0x65cbac85,
0x65deaa53, 0x65dfaa53, 0x0520a1e0, 0x0521a601,
0x052281e0, 0x05238601, 0x04a14026, 0x0568aca7,
0x1e601000, 0x1e603000, 0x1e621000, 0x1e623000,
0x1e641000, 0x1e643000, 0x1e661000, 0x1e663000,
0x1e681000, 0x1e683000, 0x1e6a1000, 0x1e6a3000,
0x1e6c1000, 0x1e6c3000, 0x1e6e1000, 0x1e6e3000,
0x1e701000, 0x1e703000, 0x1e721000, 0x1e723000,
0x1e741000, 0x1e743000, 0x1e761000, 0x1e763000,
0x1e781000, 0x1e783000, 0x1e7a1000, 0x1e7a3000,
0x1e7c1000, 0x1e7c3000, 0x1e7e1000, 0x1e7e3000,
0xf8358303, 0xf8280299, 0xf8301051, 0xf8212300,
0xf8243183, 0xf83f515c, 0xf83a4182, 0xf830703f,
0xf82d601d, 0xf8b3822c, 0xf8b6038d, 0xf8be103f,
0xf8ba209c, 0xf8be30c4, 0xf8be51fa, 0xf8a94188,
0xf8a07034, 0xf8b86002, 0xf8e98358, 0xf8f0007e,
0xf8ea1157, 0xf8e42050, 0xf8eb3148, 0xf8ef5051,
0xf8ea418c, 0xf8ef704d, 0xf8e76354, 0xf8708044,
0xf86401ec, 0xf87511f0, 0xf86b22f5, 0xf86c32fa,
0xf87c516e, 0xf8784181, 0xf87f720a, 0xf8676062,
0xb82d8233, 0xb8300023, 0xb82b10be, 0xb82823af,
0xb83e3280, 0xb82752f4, 0xb83c4375, 0xb8397025,
0xb83763f0, 0xb8a5812c, 0xb8bc03af, 0xb8b6127f,
0xb8bf21c5, 0xb8b031ff, 0xb8bb5214, 0xb8ac412b,
0xb8a6723e, 0xb8bb63dc, 0xb8e7828a, 0xb8ea0304,
0xb8f112d1, 0xb8e321fd, 0xb8f63273, 0xb8f651e2,
0xb8e6420c, 0xb8eb72ed, 0xb8e1627e, 0xb8658051,
0xb87001b6, 0xb86a13b5, 0xb87b236c, 0xb86333e1,
0xb8785233, 0xb869437c, 0xb86f72a7, 0xb877633f,
0xce3a47c2, 0xce110aca, 0xce788c11, 0xce8296d9,
0xce7b806c, 0xce70879d, 0xcec080da, 0xce718b89,
0x04670087, 0x042806c9, 0x659e029b, 0x6590081a,
0x65c80723, 0x04d6bb55, 0x04000096, 0x04508071,
0x041aa8c1, 0x04939ce9, 0x045194b6, 0x041013c8,
0x04d7a171, 0x049ea35c, 0x04c80dbc, 0x040a18b0,
0x044109ed, 0x049cb57a, 0x65809096, 0x658d9233,
0x65c68c4e, 0x658796e3, 0x65828626, 0x049db21b,
0x6582bc62, 0x6580b266, 0x65c1b50c, 0x658db013,
0x65c18677, 0x65a010cd, 0x65a8332e, 0x65bb56d6,
0x65b46e23, 0x04405ce4, 0x048476d0, 0x042b32c9,
0x04b033c5, 0x04613176, 0x05f06a88, 0x05a46c8f,
0x041a27a8, 0x04d833bc, 0x04592c49, 0x040820fc,
0x044a363a, 0x65c732a8, 0x65c636a5, 0x65d833b6,
0x04812093,
0x05b23230, 0x1e601000, 0x1e603000, 0x1e621000,
0x1e623000, 0x1e641000, 0x1e643000, 0x1e661000,
0x1e663000, 0x1e681000, 0x1e683000, 0x1e6a1000,
0x1e6a3000, 0x1e6c1000, 0x1e6c3000, 0x1e6e1000,
0x1e6e3000, 0x1e701000, 0x1e703000, 0x1e721000,
0x1e723000, 0x1e741000, 0x1e743000, 0x1e761000,
0x1e763000, 0x1e781000, 0x1e783000, 0x1e7a1000,
0x1e7a3000, 0x1e7c1000, 0x1e7c3000, 0x1e7e1000,
0x1e7e3000, 0xf8358303, 0xf8280299, 0xf8301051,
0xf8212300, 0xf8243183, 0xf83f515c, 0xf83a4182,
0xf830703f, 0xf82d601d, 0xf8b3822c, 0xf8b6038d,
0xf8be103f, 0xf8ba209c, 0xf8be30c4, 0xf8be51fa,
0xf8a94188, 0xf8a07034, 0xf8b86002, 0xf8e98358,
0xf8f0007e, 0xf8ea1157, 0xf8e42050, 0xf8eb3148,
0xf8ef5051, 0xf8ea418c, 0xf8ef704d, 0xf8e76354,
0xf8708044, 0xf86401ec, 0xf87511f0, 0xf86b22f5,
0xf86c32fa, 0xf87c516e, 0xf8784181, 0xf87f720a,
0xf8676062, 0xb82d8233, 0xb8300023, 0xb82b10be,
0xb82823af, 0xb83e3280, 0xb82752f4, 0xb83c4375,
0xb8397025, 0xb83763f0, 0xb8a5812c, 0xb8bc03af,
0xb8b6127f, 0xb8bf21c5, 0xb8b031ff, 0xb8bb5214,
0xb8ac412b, 0xb8a6723e, 0xb8bb63dc, 0xb8e7828a,
0xb8ea0304, 0xb8f112d1, 0xb8e321fd, 0xb8f63273,
0xb8f651e2, 0xb8e6420c, 0xb8eb72ed, 0xb8e1627e,
0xb8658051, 0xb87001b6, 0xb86a13b5, 0xb87b236c,
0xb86333e1, 0xb8785233, 0xb869437c, 0xb86f72a7,
0xb877633f, 0xce3a47c2, 0xce110aca, 0xce788c11,
0xce8296d9, 0xce7b806c, 0xce70879d, 0xcec080da,
0xce718b89, 0x04670087, 0x042806c9, 0x659e029b,
0x6590081a, 0x65c80723, 0x04d6bb55, 0x04000096,
0x04508071, 0x041aa8c1, 0x04939ce9, 0x045194b6,
0x041013c8, 0x04d7a171, 0x049ea35c, 0x04c80dbc,
0x040a18b0, 0x044109ed, 0x049cb57a, 0x65809096,
0x658d9233, 0x65c68c4e, 0x658796e3, 0x65828626,
0x049db21b, 0x6582bc62, 0x6580b266, 0x65c1b50c,
0x658db013, 0x65c18677, 0x65a010cd, 0x65a8332e,
0x65bb56d6, 0x65b46e23, 0x04405ce4, 0x048476d0,
0x042b32c9, 0x04b033c5, 0x04613176, 0x05f06a88,
0x05a46c8f, 0x041a27a8, 0x04d833bc, 0x04592c49,
0x040820fc, 0x044a363a, 0x65c732a8, 0x65c636a5,
0x65d833b6, 0x04812093,
};
// END Generated code -- do not edit

0 comments on commit 6d5c8bd

Please sign in to comment.