Skip to content

Commit

Permalink
8265916: Add StoreVectorScatter nodes implementation for Arm SVE
Browse files Browse the repository at this point in the history
Co-authored-by: Wang Huang <whuang@openjdk.org>
Co-authored-by: Ai Jiaming <aijiaming1@huawei.com>
Reviewed-by: xgong
  • Loading branch information
Wang Huang and Ai Jiaming committed Apr 29, 2021
1 parent ab4fa58 commit 3c6bc2e
Show file tree
Hide file tree
Showing 6 changed files with 139 additions and 66 deletions.
3 changes: 3 additions & 0 deletions src/hotspot/cpu/aarch64/aarch64.ad
Original file line number Diff line number Diff line change
Expand Up @@ -2428,6 +2428,9 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType
return false;
}
break;
case Op_LoadVectorGather:
case Op_StoreVectorScatter:
return false;
default:
break;
}
Expand Down
34 changes: 33 additions & 1 deletion src/hotspot/cpu/aarch64/aarch64_sve.ad
Original file line number Diff line number Diff line change
Expand Up @@ -226,9 +226,12 @@ source %{
case Op_ExtractC:
case Op_ExtractUB:
// Vector API specific
case Op_StoreVectorScatter:
case Op_VectorLoadConst:
return false;
case Op_StoreVectorScatter:
// Currently the implementation for partial vectors are not implemented yet.
// Will add them in a separate patch.
return vlen * type2aelembytes(bt) == MaxVectorSize;
case Op_VectorLoadShuffle:
case Op_VectorRearrange:
if (vlen < 4) {
Expand Down Expand Up @@ -4113,3 +4116,32 @@ instruct gatherL(vReg dst, vmemA mem, vReg idx) %{
%}
ins_pipe(pipe_slow);
%}

// ------------------------------ Vector Store Scatter -------------------------------
instruct scatterI(vmemA mem, vReg src, vReg idx) %{
predicate(UseSVE > 0 &&
(n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT ||
n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT));
match(Set mem (StoreVectorScatter mem (Binary src idx)));
ins_cost(SVE_COST);
format %{ "store_vector_scatter $mem, $idx, $src\t# vector store scatter (I/F)" %}
ins_encode %{
__ sve_st1w_scatter(as_FloatRegister($src$$reg), ptrue, as_Register($mem$$base), as_FloatRegister($idx$$reg));
%}
ins_pipe(pipe_slow);
%}

instruct scatterL(vmemA mem, vReg src, vReg idx) %{
predicate(UseSVE > 0 &&
(n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG ||
n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE));
match(Set mem (StoreVectorScatter mem (Binary src idx)));
ins_cost(2 * SVE_COST);
format %{ "sve_uunpklo $idx, $idx\n\t"
"store_vector_scatter $mem, $idx, $src\t# vector store scatter (L/D)" %}
ins_encode %{
__ sve_uunpklo(as_FloatRegister($idx$$reg), __ D, as_FloatRegister($idx$$reg));
__ sve_st1d_scatter(as_FloatRegister($src$$reg), ptrue, as_Register($mem$$base), as_FloatRegister($idx$$reg));
%}
ins_pipe(pipe_slow);
%}
34 changes: 33 additions & 1 deletion src/hotspot/cpu/aarch64/aarch64_sve_ad.m4
Original file line number Diff line number Diff line change
Expand Up @@ -221,9 +221,12 @@ source %{
case Op_ExtractC:
case Op_ExtractUB:
// Vector API specific
case Op_StoreVectorScatter:
case Op_VectorLoadConst:
return false;
case Op_StoreVectorScatter:
// Currently the implementation for partial vectors are not implemented yet.
// Will add them in a separate patch.
return vlen * type2aelembytes(bt) == MaxVectorSize;
case Op_VectorLoadShuffle:
case Op_VectorRearrange:
if (vlen < 4) {
Expand Down Expand Up @@ -2394,3 +2397,32 @@ instruct gatherL(vReg dst, vmemA mem, vReg idx) %{
%}
ins_pipe(pipe_slow);
%}

// ------------------------------ Vector Store Scatter -------------------------------
instruct scatterI(vmemA mem, vReg src, vReg idx) %{
predicate(UseSVE > 0 &&
(n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT ||
n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT));
match(Set mem (StoreVectorScatter mem (Binary src idx)));
ins_cost(SVE_COST);
format %{ "store_vector_scatter $mem, $idx, $src\t# vector store scatter (I/F)" %}
ins_encode %{
__ sve_st1w_scatter(as_FloatRegister($src$$reg), ptrue, as_Register($mem$$base), as_FloatRegister($idx$$reg));
%}
ins_pipe(pipe_slow);
%}

instruct scatterL(vmemA mem, vReg src, vReg idx) %{
predicate(UseSVE > 0 &&
(n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG ||
n->in(3)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE));
match(Set mem (StoreVectorScatter mem (Binary src idx)));
ins_cost(2 * SVE_COST);
format %{ "sve_uunpklo $idx, $idx\n\t"
"store_vector_scatter $mem, $idx, $src\t# vector store scatter (L/D)" %}
ins_encode %{
__ sve_uunpklo(as_FloatRegister($idx$$reg), __ D, as_FloatRegister($idx$$reg));
__ sve_st1d_scatter(as_FloatRegister($src$$reg), ptrue, as_Register($mem$$base), as_FloatRegister($idx$$reg));
%}
ins_pipe(pipe_slow);
%}
4 changes: 3 additions & 1 deletion src/hotspot/cpu/aarch64/assembler_aarch64.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3154,7 +3154,7 @@ void mvnw(Register Rd, Register Rm,
INSN(sve_st1d, 0b1110010, 0b11, 0b111, 0b010);
#undef INSN

// SVE load gather (scalar plus vector) - 32-bit scaled offset
// SVE load gather, store scatter (scalar plus vector) - 32-bit scaled offset
#define INSN(NAME, op1, type, op2, op3) \
void NAME(FloatRegister Zt, PRegister Pg, Register Xn, FloatRegister Zm) { \
starti; \
Expand All @@ -3164,6 +3164,8 @@ void mvnw(Register Rd, Register Rm,

INSN(sve_ld1w_gather, 0b1000010, 0b10, 0b01, 0b010);
INSN(sve_ld1d_gather, 0b1100010, 0b11, 0b01, 0b010);
INSN(sve_st1w_scatter, 0b1110010, 0b10, 0b11, 0b100);
INSN(sve_st1d_scatter, 0b1110010, 0b11, 0b01, 0b100);
#undef INSN

// SVE load/store - unpredicated
Expand Down
2 changes: 2 additions & 0 deletions test/hotspot/gtest/aarch64/aarch64-asmtest.py
Original file line number Diff line number Diff line change
Expand Up @@ -1604,6 +1604,8 @@ def generate(kind, names):
["tbl", "__ sve_tbl(z16, __ S, z17, z18);", "tbl\tz16.s, {z17.s}, z18.s"],
["ld1w", "__ sve_ld1w_gather(z15, p0, r5, z16);", "ld1w\t{z15.s}, p0/z, [x5, z16.s, uxtw #2]"],
["ld1d", "__ sve_ld1d_gather(z15, p0, r5, z16);", "ld1d\t{z15.d}, p0/z, [x5, z16.d, uxtw #3]"],
["st1w", "__ sve_st1w_scatter(z15, p0, r5, z16);", "st1w\t{z15.s}, p0, [x5, z16.s, uxtw #2]"],
["st1d", "__ sve_st1d_scatter(z15, p0, r5, z16);", "st1d\t{z15.d}, p0, [x5, z16.d, uxtw #3]"],
])

print "\n// FloatImmediateOp"
Expand Down
128 changes: 65 additions & 63 deletions test/hotspot/gtest/aarch64/asmtest.out.h
Original file line number Diff line number Diff line change
Expand Up @@ -791,6 +791,8 @@
__ sve_tbl(z16, __ S, z17, z18); // tbl z16.s, {z17.s}, z18.s
__ sve_ld1w_gather(z15, p0, r5, z16); // ld1w {z15.s}, p0/z, [x5, z16.s, uxtw #2]
__ sve_ld1d_gather(z15, p0, r5, z16); // ld1d {z15.d}, p0/z, [x5, z16.d, uxtw #3]
__ sve_st1w_scatter(z15, p0, r5, z16); // st1w {z15.s}, p0, [x5, z16.s, uxtw #2]
__ sve_st1d_scatter(z15, p0, r5, z16); // st1d {z15.d}, p0, [x5, z16.d, uxtw #3]

// FloatImmediateOp
__ fmovd(v0, 2.0); // fmov d0, #2.0
Expand Down Expand Up @@ -996,30 +998,30 @@
0x9101a1a0, 0xb10a5cc8, 0xd10810aa, 0xf10fd061,
0x120cb166, 0x321764bc, 0x52174681, 0x720c0227,
0x9241018e, 0xb25a2969, 0xd278b411, 0xf26aad01,
0x14000000, 0x17ffffd7, 0x1400031e, 0x94000000,
0x97ffffd4, 0x9400031b, 0x3400000a, 0x34fffa2a,
0x3400630a, 0x35000008, 0x35fff9c8, 0x350062a8,
0xb400000b, 0xb4fff96b, 0xb400624b, 0xb500001d,
0xb5fff91d, 0xb50061fd, 0x10000013, 0x10fff8b3,
0x10006193, 0x90000013, 0x36300016, 0x3637f836,
0x36306116, 0x3758000c, 0x375ff7cc, 0x375860ac,
0x14000000, 0x17ffffd7, 0x14000320, 0x94000000,
0x97ffffd4, 0x9400031d, 0x3400000a, 0x34fffa2a,
0x3400634a, 0x35000008, 0x35fff9c8, 0x350062e8,
0xb400000b, 0xb4fff96b, 0xb400628b, 0xb500001d,
0xb5fff91d, 0xb500623d, 0x10000013, 0x10fff8b3,
0x100061d3, 0x90000013, 0x36300016, 0x3637f836,
0x36306156, 0x3758000c, 0x375ff7cc, 0x375860ec,
0x128313a0, 0x528a32c7, 0x7289173b, 0x92ab3acc,
0xd2a0bf94, 0xf2c285e8, 0x9358722f, 0x330e652f,
0x53067f3b, 0x93577c53, 0xb34a1aac, 0xd35a4016,
0x13946c63, 0x93c3dbc8, 0x54000000, 0x54fff5a0,
0x54005e80, 0x54000001, 0x54fff541, 0x54005e21,
0x54000002, 0x54fff4e2, 0x54005dc2, 0x54000002,
0x54fff482, 0x54005d62, 0x54000003, 0x54fff423,
0x54005d03, 0x54000003, 0x54fff3c3, 0x54005ca3,
0x54000004, 0x54fff364, 0x54005c44, 0x54000005,
0x54fff305, 0x54005be5, 0x54000006, 0x54fff2a6,
0x54005b86, 0x54000007, 0x54fff247, 0x54005b27,
0x54000008, 0x54fff1e8, 0x54005ac8, 0x54000009,
0x54fff189, 0x54005a69, 0x5400000a, 0x54fff12a,
0x54005a0a, 0x5400000b, 0x54fff0cb, 0x540059ab,
0x5400000c, 0x54fff06c, 0x5400594c, 0x5400000d,
0x54fff00d, 0x540058ed, 0x5400000e, 0x54ffefae,
0x5400588e, 0x5400000f, 0x54ffef4f, 0x5400582f,
0x54005ec0, 0x54000001, 0x54fff541, 0x54005e61,
0x54000002, 0x54fff4e2, 0x54005e02, 0x54000002,
0x54fff482, 0x54005da2, 0x54000003, 0x54fff423,
0x54005d43, 0x54000003, 0x54fff3c3, 0x54005ce3,
0x54000004, 0x54fff364, 0x54005c84, 0x54000005,
0x54fff305, 0x54005c25, 0x54000006, 0x54fff2a6,
0x54005bc6, 0x54000007, 0x54fff247, 0x54005b67,
0x54000008, 0x54fff1e8, 0x54005b08, 0x54000009,
0x54fff189, 0x54005aa9, 0x5400000a, 0x54fff12a,
0x54005a4a, 0x5400000b, 0x54fff0cb, 0x540059eb,
0x5400000c, 0x54fff06c, 0x5400598c, 0x5400000d,
0x54fff00d, 0x5400592d, 0x5400000e, 0x54ffefae,
0x540058ce, 0x5400000f, 0x54ffef4f, 0x5400586f,
0xd40658e1, 0xd4014d22, 0xd4046543, 0xd4273f60,
0xd44cad80, 0xd503201f, 0xd69f03e0, 0xd6bf03e0,
0xd5033fdf, 0xd5033e9f, 0xd50332bf, 0xd61f0200,
Expand Down Expand Up @@ -1051,7 +1053,7 @@
0x791f226d, 0xf95aa2f3, 0xb9587bb7, 0x395f7176,
0x795d9143, 0x399e7e08, 0x799a2697, 0x79df3422,
0xb99c2624, 0xfd5c2374, 0xbd5fa1d9, 0xfd1d595a,
0xbd1b1869, 0x5800487b, 0x1800000b, 0xf8945060,
0xbd1b1869, 0x580048bb, 0x1800000b, 0xf8945060,
0xd8000000, 0xf8ae6ba0, 0xf99a0080, 0x1a070035,
0x3a0700a8, 0x5a0e0367, 0x7a11009b, 0x9a000380,
0xba1e030c, 0xda0f0320, 0xfa030301, 0x0b340b11,
Expand Down Expand Up @@ -1155,47 +1157,47 @@
0x25eb0d52, 0x65d0a001, 0x65d1a443, 0x65cbac85,
0x65deaa53, 0x65dfaa53, 0x0520a1e0, 0x0521a601,
0x052281e0, 0x05238601, 0x04a14026, 0x0568aca7,
0x05b23230, 0x853040af, 0xc5b040af, 0x1e601000,
0x1e603000, 0x1e621000, 0x1e623000, 0x1e641000,
0x1e643000, 0x1e661000, 0x1e663000, 0x1e681000,
0x1e683000, 0x1e6a1000, 0x1e6a3000, 0x1e6c1000,
0x1e6c3000, 0x1e6e1000, 0x1e6e3000, 0x1e701000,
0x1e703000, 0x1e721000, 0x1e723000, 0x1e741000,
0x1e743000, 0x1e761000, 0x1e763000, 0x1e781000,
0x1e783000, 0x1e7a1000, 0x1e7a3000, 0x1e7c1000,
0x1e7c3000, 0x1e7e1000, 0x1e7e3000, 0xf8358303,
0xf8280299, 0xf8301051, 0xf8212300, 0xf8243183,
0xf83f515c, 0xf83a4182, 0xf830703f, 0xf82d601d,
0xf8b3822c, 0xf8b6038d, 0xf8be103f, 0xf8ba209c,
0xf8be30c4, 0xf8be51fa, 0xf8a94188, 0xf8a07034,
0xf8b86002, 0xf8e98358, 0xf8f0007e, 0xf8ea1157,
0xf8e42050, 0xf8eb3148, 0xf8ef5051, 0xf8ea418c,
0xf8ef704d, 0xf8e76354, 0xf8708044, 0xf86401ec,
0xf87511f0, 0xf86b22f5, 0xf86c32fa, 0xf87c516e,
0xf8784181, 0xf87f720a, 0xf8676062, 0xb82d8233,
0xb8300023, 0xb82b10be, 0xb82823af, 0xb83e3280,
0xb82752f4, 0xb83c4375, 0xb8397025, 0xb83763f0,
0xb8a5812c, 0xb8bc03af, 0xb8b6127f, 0xb8bf21c5,
0xb8b031ff, 0xb8bb5214, 0xb8ac412b, 0xb8a6723e,
0xb8bb63dc, 0xb8e7828a, 0xb8ea0304, 0xb8f112d1,
0xb8e321fd, 0xb8f63273, 0xb8f651e2, 0xb8e6420c,
0xb8eb72ed, 0xb8e1627e, 0xb8658051, 0xb87001b6,
0xb86a13b5, 0xb87b236c, 0xb86333e1, 0xb8785233,
0xb869437c, 0xb86f72a7, 0xb877633f, 0xce3a47c2,
0xce110aca, 0xce788c11, 0xce8296d9, 0xce7b806c,
0xce70879d, 0xcec080da, 0xce718b89, 0x04670087,
0x042806c9, 0x659e029b, 0x6590081a, 0x65c80723,
0x04d6bb55, 0x04000096, 0x04508071, 0x041aa8c1,
0x04939ce9, 0x045194b6, 0x041013c8, 0x04d7a171,
0x049ea35c, 0x04c80dbc, 0x040a18b0, 0x044109ed,
0x049cb57a, 0x65809096, 0x658d9233, 0x65c68c4e,
0x658796e3, 0x65828626, 0x049db21b, 0x6582bc62,
0x6580b266, 0x65c1b50c, 0x658db013, 0x65c18677,
0x65a010cd, 0x65a8332e, 0x65bb56d6, 0x65b46e23,
0x04405ce4, 0x048476d0, 0x042b32c9, 0x04b033c5,
0x04613176, 0x05f06a88, 0x05a46c8f, 0x041a27a8,
0x04d833bc, 0x04592c49, 0x040820fc, 0x044a363a,
0x65c732a8, 0x65c636a5, 0x65d833b6, 0x04812093,

0x05b23230, 0x853040af, 0xc5b040af, 0xe57080af,
0xe5b080af, 0x1e601000, 0x1e603000, 0x1e621000,
0x1e623000, 0x1e641000, 0x1e643000, 0x1e661000,
0x1e663000, 0x1e681000, 0x1e683000, 0x1e6a1000,
0x1e6a3000, 0x1e6c1000, 0x1e6c3000, 0x1e6e1000,
0x1e6e3000, 0x1e701000, 0x1e703000, 0x1e721000,
0x1e723000, 0x1e741000, 0x1e743000, 0x1e761000,
0x1e763000, 0x1e781000, 0x1e783000, 0x1e7a1000,
0x1e7a3000, 0x1e7c1000, 0x1e7c3000, 0x1e7e1000,
0x1e7e3000, 0xf8358303, 0xf8280299, 0xf8301051,
0xf8212300, 0xf8243183, 0xf83f515c, 0xf83a4182,
0xf830703f, 0xf82d601d, 0xf8b3822c, 0xf8b6038d,
0xf8be103f, 0xf8ba209c, 0xf8be30c4, 0xf8be51fa,
0xf8a94188, 0xf8a07034, 0xf8b86002, 0xf8e98358,
0xf8f0007e, 0xf8ea1157, 0xf8e42050, 0xf8eb3148,
0xf8ef5051, 0xf8ea418c, 0xf8ef704d, 0xf8e76354,
0xf8708044, 0xf86401ec, 0xf87511f0, 0xf86b22f5,
0xf86c32fa, 0xf87c516e, 0xf8784181, 0xf87f720a,
0xf8676062, 0xb82d8233, 0xb8300023, 0xb82b10be,
0xb82823af, 0xb83e3280, 0xb82752f4, 0xb83c4375,
0xb8397025, 0xb83763f0, 0xb8a5812c, 0xb8bc03af,
0xb8b6127f, 0xb8bf21c5, 0xb8b031ff, 0xb8bb5214,
0xb8ac412b, 0xb8a6723e, 0xb8bb63dc, 0xb8e7828a,
0xb8ea0304, 0xb8f112d1, 0xb8e321fd, 0xb8f63273,
0xb8f651e2, 0xb8e6420c, 0xb8eb72ed, 0xb8e1627e,
0xb8658051, 0xb87001b6, 0xb86a13b5, 0xb87b236c,
0xb86333e1, 0xb8785233, 0xb869437c, 0xb86f72a7,
0xb877633f, 0xce3a47c2, 0xce110aca, 0xce788c11,
0xce8296d9, 0xce7b806c, 0xce70879d, 0xcec080da,
0xce718b89, 0x04670087, 0x042806c9, 0x659e029b,
0x6590081a, 0x65c80723, 0x04d6bb55, 0x04000096,
0x04508071, 0x041aa8c1, 0x04939ce9, 0x045194b6,
0x041013c8, 0x04d7a171, 0x049ea35c, 0x04c80dbc,
0x040a18b0, 0x044109ed, 0x049cb57a, 0x65809096,
0x658d9233, 0x65c68c4e, 0x658796e3, 0x65828626,
0x049db21b, 0x6582bc62, 0x6580b266, 0x65c1b50c,
0x658db013, 0x65c18677, 0x65a010cd, 0x65a8332e,
0x65bb56d6, 0x65b46e23, 0x04405ce4, 0x048476d0,
0x042b32c9, 0x04b033c5, 0x04613176, 0x05f06a88,
0x05a46c8f, 0x041a27a8, 0x04d833bc, 0x04592c49,
0x040820fc, 0x044a363a, 0x65c732a8, 0x65c636a5,
0x65d833b6, 0x04812093,
};
// END Generated code -- do not edit

0 comments on commit 3c6bc2e

Please sign in to comment.