Skip to content

Commit

Permalink
8263417: Add Extract integer nodes implementation for Arm SVE
Browse files Browse the repository at this point in the history
Co-authored-by: Wang Huang <whuang@openjdk.org>
Co-authored-by: He Xuejin <hexuejin2@huawei.com>
Co-authored-by: Ai Jiaming <aijiaming1@huawei.com>
Reviewed-by: njian
  • Loading branch information
3 people authored and Ningsheng Jian committed Mar 16, 2021
1 parent 5165981 commit 864a62c
Show file tree
Hide file tree
Showing 5 changed files with 201 additions and 72 deletions.
80 changes: 75 additions & 5 deletions src/hotspot/cpu/aarch64/aarch64_sve.ad
Original file line number Diff line number Diff line change
Expand Up @@ -221,14 +221,9 @@ source %{
case Op_MulReductionVI:
case Op_MulReductionVL:
// Others
case Op_Extract:
case Op_ExtractB:
case Op_ExtractC:
case Op_ExtractD:
case Op_ExtractF:
case Op_ExtractI:
case Op_ExtractL:
case Op_ExtractS:
case Op_ExtractUB:
// Vector API specific
case Op_LoadVectorGather:
Expand Down Expand Up @@ -2991,3 +2986,78 @@ instruct vcvtDtoB(vReg dst, vReg src, vReg tmp)
%}
ins_pipe(pipe_slow);
%}

// ------------------------------ Vector extract ---------------------------------

instruct extractB(iRegINoSp dst, vReg src, immI idx, pRegGov pTmp, rFlagsReg cr)
%{
predicate(UseSVE > 0);
match(Set dst (ExtractB src idx));
effect(TEMP pTmp, KILL cr);
ins_cost(2 * SVE_COST);
format %{ "movzw rscratch1, $idx\n\t"
"sve_whilele $pTmp, B, zr, rscratch1\n\t"
"sve_lastb $dst, B, $pTmp, $src\n\t"
"sbfmw $dst, $dst, 0U, 7U\t# extract from vector(B)" %}
ins_encode %{
__ movzw(rscratch1, (int)($idx$$constant));
__ sve_whilele(as_PRegister($pTmp$$reg), __ B, zr, rscratch1);
__ sve_lastb(as_Register($dst$$reg), __ B, as_PRegister($pTmp$$reg), as_FloatRegister($src$$reg));
__ sbfmw(as_Register($dst$$reg), as_Register($dst$$reg), 0U, 7U);
%}
ins_pipe(pipe_slow);
%}

instruct extractS(iRegINoSp dst, vReg src, immI idx, pRegGov pTmp, rFlagsReg cr)
%{
predicate(UseSVE > 0);
match(Set dst (ExtractS src idx));
effect(TEMP pTmp, KILL cr);
ins_cost(2 * SVE_COST);
format %{ "movzw rscratch1, $idx\n\t"
"sve_whilele $pTmp, H, zr, rscratch1\n\t"
"sve_lastb $dst, H, $pTmp, $src\n\t"
"sbfmw $dst, $dst, 0U, 15U\t# extract from vector(S)" %}
ins_encode %{
__ movzw(rscratch1, (int)($idx$$constant));
__ sve_whilele(as_PRegister($pTmp$$reg), __ H, zr, rscratch1);
__ sve_lastb(as_Register($dst$$reg), __ H, as_PRegister($pTmp$$reg), as_FloatRegister($src$$reg));
__ sbfmw(as_Register($dst$$reg), as_Register($dst$$reg), 0U, 15U);
%}
ins_pipe(pipe_slow);
%}


instruct extractI(iRegINoSp dst, vReg src, immI idx, pRegGov pTmp, rFlagsReg cr)
%{
predicate(UseSVE > 0);
match(Set dst (ExtractI src idx));
effect(TEMP pTmp, KILL cr);
ins_cost(2 * SVE_COST);
format %{ "movzw rscratch1, $idx\n\t"
"sve_whilele $pTmp, S, zr, rscratch1\n\t"
"sve_lastb $dst, S, $pTmp, $src\n\t" %}
ins_encode %{
__ movzw(rscratch1, (int)($idx$$constant));
__ sve_whilele(as_PRegister($pTmp$$reg), __ S, zr, rscratch1);
__ sve_lastb(as_Register($dst$$reg), __ S, as_PRegister($pTmp$$reg), as_FloatRegister($src$$reg));
%}
ins_pipe(pipe_slow);
%}

instruct extractL(iRegLNoSp dst, vReg src, immI idx, pRegGov pTmp, rFlagsReg cr)
%{
predicate(UseSVE > 0);
match(Set dst (ExtractL src idx));
effect(TEMP pTmp, KILL cr);
ins_cost(2 * SVE_COST);
format %{ "movzw rscratch1, $idx\n\t"
"sve_whilele $pTmp, D, zr, rscratch1\n\t"
"sve_lastb $dst, D, $pTmp, $src\n\t" %}
ins_encode %{
__ movzw(rscratch1, (int)($idx$$constant));
__ sve_whilele(as_PRegister($pTmp$$reg), __ D, zr, rscratch1);
__ sve_lastb(as_Register($dst$$reg), __ D, as_PRegister($pTmp$$reg), as_FloatRegister($src$$reg));
%}
ins_pipe(pipe_slow);
%}
51 changes: 46 additions & 5 deletions src/hotspot/cpu/aarch64/aarch64_sve_ad.m4
Original file line number Diff line number Diff line change
Expand Up @@ -217,14 +217,9 @@ source %{
case Op_MulReductionVI:
case Op_MulReductionVL:
// Others
case Op_Extract:
case Op_ExtractB:
case Op_ExtractC:
case Op_ExtractD:
case Op_ExtractF:
case Op_ExtractI:
case Op_ExtractL:
case Op_ExtractS:
case Op_ExtractUB:
// Vector API specific
case Op_LoadVectorGather:
Expand Down Expand Up @@ -1788,3 +1783,49 @@ instruct vcvt$1to$2`'(vReg dst, vReg src, vReg tmp)
%}')dnl
dnl $1 $2 $3 $4 $5 $6 $7 $8 $9
VECTOR_CAST_F2X_NARROW3(D, B, fcvtzs, D, dup, S, uzp1, H, B)

// ------------------------------ Vector extract ---------------------------------
define(`VECTOR_EXTRACT_SXT', `
instruct extract$1`'($2 dst, vReg src, immI idx, pRegGov pTmp, rFlagsReg cr)
%{
predicate(UseSVE > 0);
match(Set dst (Extract$1 src idx));
effect(TEMP pTmp, KILL cr);
ins_cost(2 * SVE_COST);
format %{ "movzw rscratch1, $idx\n\t"
"sve_whilele $pTmp, $3, zr, rscratch1\n\t"
"sve_lastb $dst, $3, $pTmp, $src\n\t"
"sbfmw $dst, $dst, 0U, $5\t# extract from vector($1)" %}
ins_encode %{
__ movzw(rscratch1, (int)($idx$$constant));
__ sve_whilele(as_PRegister($pTmp$$reg), __ $3, zr, rscratch1);
__ sve_lastb(as_$4($dst$$reg), __ $3, as_PRegister($pTmp$$reg), as_FloatRegister($src$$reg));
__ sbfmw(as_$4($dst$$reg), as_$4($dst$$reg), 0U, $5);
%}
ins_pipe(pipe_slow);
%}')dnl
dnl $1 $2 $3 $4 $5
VECTOR_EXTRACT_SXT(B, iRegINoSp, B, Register, 7U)
VECTOR_EXTRACT_SXT(S, iRegINoSp, H, Register, 15U)

dnl
define(`VECTOR_EXTRACT', `
instruct extract$1`'($2 dst, vReg src, immI idx, pRegGov pTmp, rFlagsReg cr)
%{
predicate(UseSVE > 0);
match(Set dst (Extract$1 src idx));
effect(TEMP pTmp, KILL cr);
ins_cost(2 * SVE_COST);
format %{ "movzw rscratch1, $idx\n\t"
"sve_whilele $pTmp, $3, zr, rscratch1\n\t"
"sve_lastb $dst, $3, $pTmp, $src\n\t" %}
ins_encode %{
__ movzw(rscratch1, (int)($idx$$constant));
__ sve_whilele(as_PRegister($pTmp$$reg), __ $3, zr, rscratch1);
__ sve_lastb(as_$4($dst$$reg), __ $3, as_PRegister($pTmp$$reg), as_FloatRegister($src$$reg));
%}
ins_pipe(pipe_slow);
%}')dnl
dnl $1 $2 $3 $4
VECTOR_EXTRACT(I, iRegINoSp, S, Register)
VECTOR_EXTRACT(L, iRegLNoSp, D, Register)
13 changes: 13 additions & 0 deletions src/hotspot/cpu/aarch64/assembler_aarch64.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3502,6 +3502,19 @@ void mvnw(Register Rd, Register Rm,
INSN(sve_fcvtzu, 0b1);
#undef INSN

// SVE conditionally extract element to general-purpose register
#define INSN(NAME, before) \
void NAME(Register Rd, SIMD_RegVariant T, PRegister Pg, FloatRegister Zn) { \
starti; \
f(0b00000101, 31, 24), f(T, 23, 22), f(0b10000, 21, 17); \
f(before, 16), f(0b101, 15, 13); \
pgrf(Pg, 10), rf(Zn, 5), rf(Rd, 0); \
}

INSN(sve_lasta, 0b0);
INSN(sve_lastb, 0b1);
#undef INSN

Assembler(CodeBuffer* code) : AbstractAssembler(code) {
}

Expand Down
2 changes: 2 additions & 0 deletions test/hotspot/gtest/aarch64/aarch64-asmtest.py
Original file line number Diff line number Diff line change
Expand Up @@ -1592,6 +1592,8 @@ def generate(kind, names):
["fcvt", "__ sve_fcvt(z5, __ D, p3, z4, __ S);", "fcvt\tz5.d, p3/m, z4.s"],
["fcvtzs", "__ sve_fcvtzs(z19, __ D, p2, z18, __ D);", "fcvtzs\tz19.d, p2/m, z18.d"],
["fcvtzu", "__ sve_fcvtzu(z19, __ D, p2, z18, __ D);", "fcvtzu\tz19.d, p2/m, z18.d"],
["lasta", "__ sve_lasta(r0, __ B, p0, z15);", "lasta\tw0, p0, z15.b"],
["lastb", "__ sve_lastb(r1, __ B, p1, z16);", "lastb\tw1, p1, z16.b"],
])

print "\n// FloatImmediateOp"
Expand Down
127 changes: 65 additions & 62 deletions test/hotspot/gtest/aarch64/asmtest.out.h
Original file line number Diff line number Diff line change
Expand Up @@ -777,6 +777,8 @@
__ sve_fcvt(z5, __ D, p3, z4, __ S); // fcvt z5.d, p3/m, z4.s
__ sve_fcvtzs(z19, __ D, p2, z18, __ D); // fcvtzs z19.d, p2/m, z18.d
__ sve_fcvtzu(z19, __ D, p2, z18, __ D); // fcvtzu z19.d, p2/m, z18.d
__ sve_lasta(r0, __ B, p0, z15); // lasta w0, p0, z15.b
__ sve_lastb(r1, __ B, p1, z16); // lastb w1, p1, z16.b

// FloatImmediateOp
__ fmovd(v0, 2.0); // fmov d0, #2.0
Expand Down Expand Up @@ -982,30 +984,30 @@
0x9101a1a0, 0xb10a5cc8, 0xd10810aa, 0xf10fd061,
0x120cb166, 0x321764bc, 0x52174681, 0x720c0227,
0x9241018e, 0xb25a2969, 0xd278b411, 0xf26aad01,
0x14000000, 0x17ffffd7, 0x14000310, 0x94000000,
0x97ffffd4, 0x9400030d, 0x3400000a, 0x34fffa2a,
0x3400614a, 0x35000008, 0x35fff9c8, 0x350060e8,
0xb400000b, 0xb4fff96b, 0xb400608b, 0xb500001d,
0xb5fff91d, 0xb500603d, 0x10000013, 0x10fff8b3,
0x10005fd3, 0x90000013, 0x36300016, 0x3637f836,
0x36305f56, 0x3758000c, 0x375ff7cc, 0x37585eec,
0x14000000, 0x17ffffd7, 0x14000312, 0x94000000,
0x97ffffd4, 0x9400030f, 0x3400000a, 0x34fffa2a,
0x3400618a, 0x35000008, 0x35fff9c8, 0x35006128,
0xb400000b, 0xb4fff96b, 0xb40060cb, 0xb500001d,
0xb5fff91d, 0xb500607d, 0x10000013, 0x10fff8b3,
0x10006013, 0x90000013, 0x36300016, 0x3637f836,
0x36305f96, 0x3758000c, 0x375ff7cc, 0x37585f2c,
0x128313a0, 0x528a32c7, 0x7289173b, 0x92ab3acc,
0xd2a0bf94, 0xf2c285e8, 0x9358722f, 0x330e652f,
0x53067f3b, 0x93577c53, 0xb34a1aac, 0xd35a4016,
0x13946c63, 0x93c3dbc8, 0x54000000, 0x54fff5a0,
0x54005cc0, 0x54000001, 0x54fff541, 0x54005c61,
0x54000002, 0x54fff4e2, 0x54005c02, 0x54000002,
0x54fff482, 0x54005ba2, 0x54000003, 0x54fff423,
0x54005b43, 0x54000003, 0x54fff3c3, 0x54005ae3,
0x54000004, 0x54fff364, 0x54005a84, 0x54000005,
0x54fff305, 0x54005a25, 0x54000006, 0x54fff2a6,
0x540059c6, 0x54000007, 0x54fff247, 0x54005967,
0x54000008, 0x54fff1e8, 0x54005908, 0x54000009,
0x54fff189, 0x540058a9, 0x5400000a, 0x54fff12a,
0x5400584a, 0x5400000b, 0x54fff0cb, 0x540057eb,
0x5400000c, 0x54fff06c, 0x5400578c, 0x5400000d,
0x54fff00d, 0x5400572d, 0x5400000e, 0x54ffefae,
0x540056ce, 0x5400000f, 0x54ffef4f, 0x5400566f,
0x54005d00, 0x54000001, 0x54fff541, 0x54005ca1,
0x54000002, 0x54fff4e2, 0x54005c42, 0x54000002,
0x54fff482, 0x54005be2, 0x54000003, 0x54fff423,
0x54005b83, 0x54000003, 0x54fff3c3, 0x54005b23,
0x54000004, 0x54fff364, 0x54005ac4, 0x54000005,
0x54fff305, 0x54005a65, 0x54000006, 0x54fff2a6,
0x54005a06, 0x54000007, 0x54fff247, 0x540059a7,
0x54000008, 0x54fff1e8, 0x54005948, 0x54000009,
0x54fff189, 0x540058e9, 0x5400000a, 0x54fff12a,
0x5400588a, 0x5400000b, 0x54fff0cb, 0x5400582b,
0x5400000c, 0x54fff06c, 0x540057cc, 0x5400000d,
0x54fff00d, 0x5400576d, 0x5400000e, 0x54ffefae,
0x5400570e, 0x5400000f, 0x54ffef4f, 0x540056af,
0xd40658e1, 0xd4014d22, 0xd4046543, 0xd4273f60,
0xd44cad80, 0xd503201f, 0xd69f03e0, 0xd6bf03e0,
0xd5033fdf, 0xd5033e9f, 0xd50332bf, 0xd61f0200,
Expand Down Expand Up @@ -1037,7 +1039,7 @@
0x791f226d, 0xf95aa2f3, 0xb9587bb7, 0x395f7176,
0x795d9143, 0x399e7e08, 0x799a2697, 0x79df3422,
0xb99c2624, 0xfd5c2374, 0xbd5fa1d9, 0xfd1d595a,
0xbd1b1869, 0x580046bb, 0x1800000b, 0xf8945060,
0xbd1b1869, 0x580046fb, 0x1800000b, 0xf8945060,
0xd8000000, 0xf8ae6ba0, 0xf99a0080, 0x1a070035,
0x3a0700a8, 0x5a0e0367, 0x7a11009b, 0x9a000380,
0xba1e030c, 0xda0f0320, 0xfa030301, 0x0b340b11,
Expand Down Expand Up @@ -1138,46 +1140,47 @@
0x25221420, 0x25640461, 0x25a614b2, 0x25eb0553,
0x25221c24, 0x25640c60, 0x25a61cb1, 0x25eb0d52,
0x65d0a001, 0x65d1a443, 0x65cbac85, 0x65deaa53,
0x65dfaa53, 0x1e601000, 0x1e603000, 0x1e621000,
0x1e623000, 0x1e641000, 0x1e643000, 0x1e661000,
0x1e663000, 0x1e681000, 0x1e683000, 0x1e6a1000,
0x1e6a3000, 0x1e6c1000, 0x1e6c3000, 0x1e6e1000,
0x1e6e3000, 0x1e701000, 0x1e703000, 0x1e721000,
0x1e723000, 0x1e741000, 0x1e743000, 0x1e761000,
0x1e763000, 0x1e781000, 0x1e783000, 0x1e7a1000,
0x1e7a3000, 0x1e7c1000, 0x1e7c3000, 0x1e7e1000,
0x1e7e3000, 0xf8238358, 0xf83702af, 0xf8231118,
0xf8392214, 0xf8313022, 0xf8205098, 0xf82343ec,
0xf83c734a, 0xf82261ec, 0xf8bf81a1, 0xf8bd0260,
0xf8ac12d1, 0xf8ad23dc, 0xf8bf3341, 0xf8bc53c4,
0xf8a443c6, 0xf8ba7130, 0xf8a8600c, 0xf8f48301,
0xf8e20120, 0xf8f8121a, 0xf8fe2143, 0xf8f7308a,
0xf8f05162, 0xf8e841ea, 0xf8f17142, 0xf8ec61ec,
0xf86d80e2, 0xf874021a, 0xf8641082, 0xf86c22b0,
0xf8703170, 0xf8755197, 0xf87a4397, 0xf86e730b,
0xf86163ec, 0xb82a80f0, 0xb82201a3, 0xb8331211,
0xb8232161, 0xb83e3105, 0xb82f53dd, 0xb82040f4,
0xb8347397, 0xb835633b, 0xb8a582e1, 0xb8b000bf,
0xb8ac1389, 0xb8af22dd, 0xb8bf33f3, 0xb8a551ee,
0xb8bf4370, 0xb8b47190, 0xb8ab60c9, 0xb8fe8371,
0xb8fc00fe, 0xb8ea1154, 0xb8e42238, 0xb8f13076,
0xb8fd52cf, 0xb8f342d3, 0xb8e270cf, 0xb8ec6170,
0xb86d8037, 0xb87e00b3, 0xb8711202, 0xb876214d,
0xb875337d, 0xb86c507b, 0xb861431f, 0xb8737131,
0xb87c61fb, 0xce367a86, 0xce1e6858, 0xce768d51,
0xce910451, 0xce768338, 0xce6c8622, 0xcec08363,
0xce708b9d, 0x04e900da, 0x042404f1, 0x6596012f,
0x65d40b62, 0x65c00745, 0x0456a72e, 0x04c0175b,
0x04109418, 0x041ab006, 0x0413812f, 0x04118b65,
0x04101694, 0x04d7aa0a, 0x045eb046, 0x04c81c5d,
0x044a1dd6, 0x040112fb, 0x04dcad42, 0x65809aca,
0x658d9603, 0x65c69201, 0x65878d8c, 0x65c28290,
0x04dda4e5, 0x65c2be0c, 0x6580a386, 0x65c1a624,
0x658dae6d, 0x65819638, 0x65f318ca, 0x65a030cd,
0x65a8532e, 0x65bb76d6, 0x04144e23, 0x04407ce4,
0x04363270, 0x04b6312f, 0x047e30b9, 0x052b6acd,
0x05b46d0d, 0x041a2c99, 0x04d828d1, 0x04d93e04,
0x040829da, 0x040a3c6b, 0x65c73aa1, 0x65c62a2e,
0x65d82678, 0x04c13611,
0x65dfaa53, 0x0520a1e0, 0x0521a601, 0x1e601000,
0x1e603000, 0x1e621000, 0x1e623000, 0x1e641000,
0x1e643000, 0x1e661000, 0x1e663000, 0x1e681000,
0x1e683000, 0x1e6a1000, 0x1e6a3000, 0x1e6c1000,
0x1e6c3000, 0x1e6e1000, 0x1e6e3000, 0x1e701000,
0x1e703000, 0x1e721000, 0x1e723000, 0x1e741000,
0x1e743000, 0x1e761000, 0x1e763000, 0x1e781000,
0x1e783000, 0x1e7a1000, 0x1e7a3000, 0x1e7c1000,
0x1e7c3000, 0x1e7e1000, 0x1e7e3000, 0xf8238358,
0xf83702af, 0xf8231118, 0xf8392214, 0xf8313022,
0xf8205098, 0xf82343ec, 0xf83c734a, 0xf82261ec,
0xf8bf81a1, 0xf8bd0260, 0xf8ac12d1, 0xf8ad23dc,
0xf8bf3341, 0xf8bc53c4, 0xf8a443c6, 0xf8ba7130,
0xf8a8600c, 0xf8f48301, 0xf8e20120, 0xf8f8121a,
0xf8fe2143, 0xf8f7308a, 0xf8f05162, 0xf8e841ea,
0xf8f17142, 0xf8ec61ec, 0xf86d80e2, 0xf874021a,
0xf8641082, 0xf86c22b0, 0xf8703170, 0xf8755197,
0xf87a4397, 0xf86e730b, 0xf86163ec, 0xb82a80f0,
0xb82201a3, 0xb8331211, 0xb8232161, 0xb83e3105,
0xb82f53dd, 0xb82040f4, 0xb8347397, 0xb835633b,
0xb8a582e1, 0xb8b000bf, 0xb8ac1389, 0xb8af22dd,
0xb8bf33f3, 0xb8a551ee, 0xb8bf4370, 0xb8b47190,
0xb8ab60c9, 0xb8fe8371, 0xb8fc00fe, 0xb8ea1154,
0xb8e42238, 0xb8f13076, 0xb8fd52cf, 0xb8f342d3,
0xb8e270cf, 0xb8ec6170, 0xb86d8037, 0xb87e00b3,
0xb8711202, 0xb876214d, 0xb875337d, 0xb86c507b,
0xb861431f, 0xb8737131, 0xb87c61fb, 0xce367a86,
0xce1e6858, 0xce768d51, 0xce910451, 0xce768338,
0xce6c8622, 0xcec08363, 0xce708b9d, 0x04e900da,
0x042404f1, 0x6596012f, 0x65d40b62, 0x65c00745,
0x0456a72e, 0x04c0175b, 0x04109418, 0x041ab006,
0x0413812f, 0x04118b65, 0x04101694, 0x04d7aa0a,
0x045eb046, 0x04c81c5d, 0x044a1dd6, 0x040112fb,
0x04dcad42, 0x65809aca, 0x658d9603, 0x65c69201,
0x65878d8c, 0x65c28290, 0x04dda4e5, 0x65c2be0c,
0x6580a386, 0x65c1a624, 0x658dae6d, 0x65819638,
0x65f318ca, 0x65a030cd, 0x65a8532e, 0x65bb76d6,
0x04144e23, 0x04407ce4, 0x04363270, 0x04b6312f,
0x047e30b9, 0x052b6acd, 0x05b46d0d, 0x041a2c99,
0x04d828d1, 0x04d93e04, 0x040829da, 0x040a3c6b,
0x65c73aa1, 0x65c62a2e, 0x65d82678, 0x04c13611,

};
// END Generated code -- do not edit

0 comments on commit 864a62c

Please sign in to comment.