Skip to content
Permalink
Browse files
8263644: Add Extract float nodes implementation for Arm SVE
Co-authored-by: Wang Huang <whuang@openjdk.org>
Co-authored-by: He Xuejin <hexuejin2@huawei.com>
Co-authored-by: Ai Jiaming <aijiaming1@huawei.com>
Reviewed-by: xgong, njian
  • Loading branch information
3 people authored and Ningsheng Jian committed Mar 23, 2021
1 parent 864a62c commit 47334c5b96163c7abe13d38f551d4df4454e93cf
Showing 5 changed files with 121 additions and 73 deletions.
@@ -222,8 +222,6 @@ source %{
case Op_MulReductionVL:
// Others
case Op_ExtractC:
case Op_ExtractD:
case Op_ExtractF:
case Op_ExtractUB:
// Vector API specific
case Op_LoadVectorGather:
@@ -3028,15 +3026,15 @@ instruct extractS(iRegINoSp dst, vReg src, immI idx, pRegGov pTmp, rFlagsReg cr)
%}


instruct extractI(iRegINoSp dst, vReg src, immI idx, pRegGov pTmp, rFlagsReg cr)
instruct extractI(iRegINoSp dst, vReg src, immI idx, pRegGov pTmp, rFlagsReg cr)
%{
predicate(UseSVE > 0);
match(Set dst (ExtractI src idx));
effect(TEMP pTmp, KILL cr);
ins_cost(2 * SVE_COST);
format %{ "movzw rscratch1, $idx\n\t"
"sve_whilele $pTmp, S, zr, rscratch1\n\t"
"sve_lastb $dst, S, $pTmp, $src\n\t" %}
"sve_lastb $dst, S, $pTmp, $src\t# extract from vector(I)" %}
ins_encode %{
__ movzw(rscratch1, (int)($idx$$constant));
__ sve_whilele(as_PRegister($pTmp$$reg), __ S, zr, rscratch1);
@@ -3045,19 +3043,53 @@ instruct extractI(iRegINoSp dst, vReg src, immI idx, pRegGov pTmp, rFlagsReg cr
ins_pipe(pipe_slow);
%}

instruct extractL(iRegLNoSp dst, vReg src, immI idx, pRegGov pTmp, rFlagsReg cr)
instruct extractL(iRegLNoSp dst, vReg src, immI idx, pRegGov pTmp, rFlagsReg cr)
%{
predicate(UseSVE > 0);
match(Set dst (ExtractL src idx));
effect(TEMP pTmp, KILL cr);
ins_cost(2 * SVE_COST);
format %{ "movzw rscratch1, $idx\n\t"
"sve_whilele $pTmp, D, zr, rscratch1\n\t"
"sve_lastb $dst, D, $pTmp, $src\n\t" %}
"sve_lastb $dst, D, $pTmp, $src\t# extract from vector(L)" %}
ins_encode %{
__ movzw(rscratch1, (int)($idx$$constant));
__ sve_whilele(as_PRegister($pTmp$$reg), __ D, zr, rscratch1);
__ sve_lastb(as_Register($dst$$reg), __ D, as_PRegister($pTmp$$reg), as_FloatRegister($src$$reg));
%}
ins_pipe(pipe_slow);
%}

instruct extractF(vRegF dst, vReg src, immI idx, pRegGov pTmp, rFlagsReg cr)
%{
predicate(UseSVE > 0);
match(Set dst (ExtractF src idx));
effect(TEMP pTmp, KILL cr);
ins_cost(2 * SVE_COST);
format %{ "movzw rscratch1, $idx\n\t"
"sve_whilele $pTmp, S, zr, rscratch1\n\t"
"sve_lastb $dst, S, $pTmp, $src\t# extract from vector(F)" %}
ins_encode %{
__ movzw(rscratch1, (int)($idx$$constant));
__ sve_whilele(as_PRegister($pTmp$$reg), __ S, zr, rscratch1);
__ sve_lastb(as_FloatRegister($dst$$reg), __ S, as_PRegister($pTmp$$reg), as_FloatRegister($src$$reg));
%}
ins_pipe(pipe_slow);
%}

instruct extractD(vRegD dst, vReg src, immI idx, pRegGov pTmp, rFlagsReg cr)
%{
predicate(UseSVE > 0);
match(Set dst (ExtractD src idx));
effect(TEMP pTmp, KILL cr);
ins_cost(2 * SVE_COST);
format %{ "movzw rscratch1, $idx\n\t"
"sve_whilele $pTmp, D, zr, rscratch1\n\t"
"sve_lastb $dst, D, $pTmp, $src\t# extract from vector(D)" %}
ins_encode %{
__ movzw(rscratch1, (int)($idx$$constant));
__ sve_whilele(as_PRegister($pTmp$$reg), __ D, zr, rscratch1);
__ sve_lastb(as_FloatRegister($dst$$reg), __ D, as_PRegister($pTmp$$reg), as_FloatRegister($src$$reg));
%}
ins_pipe(pipe_slow);
%}
@@ -218,8 +218,6 @@ source %{
case Op_MulReductionVL:
// Others
case Op_ExtractC:
case Op_ExtractD:
case Op_ExtractF:
case Op_ExtractUB:
// Vector API specific
case Op_LoadVectorGather:
@@ -1810,15 +1808,15 @@ VECTOR_EXTRACT_SXT(S, iRegINoSp, H, Register, 15U)

dnl
define(`VECTOR_EXTRACT', `
instruct extract$1`'($2 dst, vReg src, immI idx, pRegGov pTmp, rFlagsReg cr)
instruct extract$1`'($2 dst, vReg src, immI idx, pRegGov pTmp, rFlagsReg cr)
%{
predicate(UseSVE > 0);
match(Set dst (Extract$1 src idx));
effect(TEMP pTmp, KILL cr);
ins_cost(2 * SVE_COST);
format %{ "movzw rscratch1, $idx\n\t"
"sve_whilele $pTmp, $3, zr, rscratch1\n\t"
"sve_lastb $dst, $3, $pTmp, $src\n\t" %}
"sve_lastb $dst, $3, $pTmp, $src\t# extract from vector($1)" %}
ins_encode %{
__ movzw(rscratch1, (int)($idx$$constant));
__ sve_whilele(as_PRegister($pTmp$$reg), __ $3, zr, rscratch1);
@@ -1829,3 +1827,5 @@ instruct extract$1`'($2 dst, vReg src, immI idx, pRegGov pTmp, rFlagsReg cr)
dnl $1 $2 $3 $4
VECTOR_EXTRACT(I, iRegINoSp, S, Register)
VECTOR_EXTRACT(L, iRegLNoSp, D, Register)
VECTOR_EXTRACT(F, vRegF, S, FloatRegister)
VECTOR_EXTRACT(D, vRegD, D, FloatRegister)
@@ -3515,6 +3515,18 @@ void mvnw(Register Rd, Register Rm,
INSN(sve_lastb, 0b1);
#undef INSN

#define INSN(NAME, before) \
void NAME(FloatRegister Vd, SIMD_RegVariant T, PRegister Pg, FloatRegister Zn) { \
starti; \
f(0b00000101, 31, 24), f(T, 23, 22), f(0b10001, 21, 17); \
f(before, 16), f(0b100, 15, 13); \
pgrf(Pg, 10), rf(Zn, 5), rf(Vd, 0); \
}

INSN(sve_lasta, 0b0);
INSN(sve_lastb, 0b1);
#undef INSN

Assembler(CodeBuffer* code) : AbstractAssembler(code) {
}

@@ -1594,6 +1594,8 @@ def generate(kind, names):
["fcvtzu", "__ sve_fcvtzu(z19, __ D, p2, z18, __ D);", "fcvtzu\tz19.d, p2/m, z18.d"],
["lasta", "__ sve_lasta(r0, __ B, p0, z15);", "lasta\tw0, p0, z15.b"],
["lastb", "__ sve_lastb(r1, __ B, p1, z16);", "lastb\tw1, p1, z16.b"],
["lasta", "__ sve_lasta(v0, __ B, p0, z15);", "lasta\tb0, p0, z15.b"],
["lastb", "__ sve_lastb(v1, __ B, p1, z16);", "lastb\tb1, p1, z16.b"],
])

print "\n// FloatImmediateOp"
@@ -779,6 +779,8 @@
__ sve_fcvtzu(z19, __ D, p2, z18, __ D); // fcvtzu z19.d, p2/m, z18.d
__ sve_lasta(r0, __ B, p0, z15); // lasta w0, p0, z15.b
__ sve_lastb(r1, __ B, p1, z16); // lastb w1, p1, z16.b
__ sve_lasta(v0, __ B, p0, z15); // lasta b0, p0, z15.b
__ sve_lastb(v1, __ B, p1, z16); // lastb b1, p1, z16.b

// FloatImmediateOp
__ fmovd(v0, 2.0); // fmov d0, #2.0
@@ -984,30 +986,30 @@
0x9101a1a0, 0xb10a5cc8, 0xd10810aa, 0xf10fd061,
0x120cb166, 0x321764bc, 0x52174681, 0x720c0227,
0x9241018e, 0xb25a2969, 0xd278b411, 0xf26aad01,
0x14000000, 0x17ffffd7, 0x14000312, 0x94000000,
0x97ffffd4, 0x9400030f, 0x3400000a, 0x34fffa2a,
0x3400618a, 0x35000008, 0x35fff9c8, 0x35006128,
0xb400000b, 0xb4fff96b, 0xb40060cb, 0xb500001d,
0xb5fff91d, 0xb500607d, 0x10000013, 0x10fff8b3,
0x10006013, 0x90000013, 0x36300016, 0x3637f836,
0x36305f96, 0x3758000c, 0x375ff7cc, 0x37585f2c,
0x14000000, 0x17ffffd7, 0x14000314, 0x94000000,
0x97ffffd4, 0x94000311, 0x3400000a, 0x34fffa2a,
0x340061ca, 0x35000008, 0x35fff9c8, 0x35006168,
0xb400000b, 0xb4fff96b, 0xb400610b, 0xb500001d,
0xb5fff91d, 0xb50060bd, 0x10000013, 0x10fff8b3,
0x10006053, 0x90000013, 0x36300016, 0x3637f836,
0x36305fd6, 0x3758000c, 0x375ff7cc, 0x37585f6c,
0x128313a0, 0x528a32c7, 0x7289173b, 0x92ab3acc,
0xd2a0bf94, 0xf2c285e8, 0x9358722f, 0x330e652f,
0x53067f3b, 0x93577c53, 0xb34a1aac, 0xd35a4016,
0x13946c63, 0x93c3dbc8, 0x54000000, 0x54fff5a0,
0x54005d00, 0x54000001, 0x54fff541, 0x54005ca1,
0x54000002, 0x54fff4e2, 0x54005c42, 0x54000002,
0x54fff482, 0x54005be2, 0x54000003, 0x54fff423,
0x54005b83, 0x54000003, 0x54fff3c3, 0x54005b23,
0x54000004, 0x54fff364, 0x54005ac4, 0x54000005,
0x54fff305, 0x54005a65, 0x54000006, 0x54fff2a6,
0x54005a06, 0x54000007, 0x54fff247, 0x540059a7,
0x54000008, 0x54fff1e8, 0x54005948, 0x54000009,
0x54fff189, 0x540058e9, 0x5400000a, 0x54fff12a,
0x5400588a, 0x5400000b, 0x54fff0cb, 0x5400582b,
0x5400000c, 0x54fff06c, 0x540057cc, 0x5400000d,
0x54fff00d, 0x5400576d, 0x5400000e, 0x54ffefae,
0x5400570e, 0x5400000f, 0x54ffef4f, 0x540056af,
0x54005d40, 0x54000001, 0x54fff541, 0x54005ce1,
0x54000002, 0x54fff4e2, 0x54005c82, 0x54000002,
0x54fff482, 0x54005c22, 0x54000003, 0x54fff423,
0x54005bc3, 0x54000003, 0x54fff3c3, 0x54005b63,
0x54000004, 0x54fff364, 0x54005b04, 0x54000005,
0x54fff305, 0x54005aa5, 0x54000006, 0x54fff2a6,
0x54005a46, 0x54000007, 0x54fff247, 0x540059e7,
0x54000008, 0x54fff1e8, 0x54005988, 0x54000009,
0x54fff189, 0x54005929, 0x5400000a, 0x54fff12a,
0x540058ca, 0x5400000b, 0x54fff0cb, 0x5400586b,
0x5400000c, 0x54fff06c, 0x5400580c, 0x5400000d,
0x54fff00d, 0x540057ad, 0x5400000e, 0x54ffefae,
0x5400574e, 0x5400000f, 0x54ffef4f, 0x540056ef,
0xd40658e1, 0xd4014d22, 0xd4046543, 0xd4273f60,
0xd44cad80, 0xd503201f, 0xd69f03e0, 0xd6bf03e0,
0xd5033fdf, 0xd5033e9f, 0xd50332bf, 0xd61f0200,
@@ -1039,7 +1041,7 @@
0x791f226d, 0xf95aa2f3, 0xb9587bb7, 0x395f7176,
0x795d9143, 0x399e7e08, 0x799a2697, 0x79df3422,
0xb99c2624, 0xfd5c2374, 0xbd5fa1d9, 0xfd1d595a,
0xbd1b1869, 0x580046fb, 0x1800000b, 0xf8945060,
0xbd1b1869, 0x5800473b, 0x1800000b, 0xf8945060,
0xd8000000, 0xf8ae6ba0, 0xf99a0080, 0x1a070035,
0x3a0700a8, 0x5a0e0367, 0x7a11009b, 0x9a000380,
0xba1e030c, 0xda0f0320, 0xfa030301, 0x0b340b11,
@@ -1140,47 +1142,47 @@
0x25221420, 0x25640461, 0x25a614b2, 0x25eb0553,
0x25221c24, 0x25640c60, 0x25a61cb1, 0x25eb0d52,
0x65d0a001, 0x65d1a443, 0x65cbac85, 0x65deaa53,
0x65dfaa53, 0x0520a1e0, 0x0521a601, 0x1e601000,
0x1e603000, 0x1e621000, 0x1e623000, 0x1e641000,
0x1e643000, 0x1e661000, 0x1e663000, 0x1e681000,
0x1e683000, 0x1e6a1000, 0x1e6a3000, 0x1e6c1000,
0x1e6c3000, 0x1e6e1000, 0x1e6e3000, 0x1e701000,
0x1e703000, 0x1e721000, 0x1e723000, 0x1e741000,
0x1e743000, 0x1e761000, 0x1e763000, 0x1e781000,
0x1e783000, 0x1e7a1000, 0x1e7a3000, 0x1e7c1000,
0x1e7c3000, 0x1e7e1000, 0x1e7e3000, 0xf8238358,
0xf83702af, 0xf8231118, 0xf8392214, 0xf8313022,
0xf8205098, 0xf82343ec, 0xf83c734a, 0xf82261ec,
0xf8bf81a1, 0xf8bd0260, 0xf8ac12d1, 0xf8ad23dc,
0xf8bf3341, 0xf8bc53c4, 0xf8a443c6, 0xf8ba7130,
0xf8a8600c, 0xf8f48301, 0xf8e20120, 0xf8f8121a,
0xf8fe2143, 0xf8f7308a, 0xf8f05162, 0xf8e841ea,
0xf8f17142, 0xf8ec61ec, 0xf86d80e2, 0xf874021a,
0xf8641082, 0xf86c22b0, 0xf8703170, 0xf8755197,
0xf87a4397, 0xf86e730b, 0xf86163ec, 0xb82a80f0,
0xb82201a3, 0xb8331211, 0xb8232161, 0xb83e3105,
0xb82f53dd, 0xb82040f4, 0xb8347397, 0xb835633b,
0xb8a582e1, 0xb8b000bf, 0xb8ac1389, 0xb8af22dd,
0xb8bf33f3, 0xb8a551ee, 0xb8bf4370, 0xb8b47190,
0xb8ab60c9, 0xb8fe8371, 0xb8fc00fe, 0xb8ea1154,
0xb8e42238, 0xb8f13076, 0xb8fd52cf, 0xb8f342d3,
0xb8e270cf, 0xb8ec6170, 0xb86d8037, 0xb87e00b3,
0xb8711202, 0xb876214d, 0xb875337d, 0xb86c507b,
0xb861431f, 0xb8737131, 0xb87c61fb, 0xce367a86,
0xce1e6858, 0xce768d51, 0xce910451, 0xce768338,
0xce6c8622, 0xcec08363, 0xce708b9d, 0x04e900da,
0x042404f1, 0x6596012f, 0x65d40b62, 0x65c00745,
0x0456a72e, 0x04c0175b, 0x04109418, 0x041ab006,
0x0413812f, 0x04118b65, 0x04101694, 0x04d7aa0a,
0x045eb046, 0x04c81c5d, 0x044a1dd6, 0x040112fb,
0x04dcad42, 0x65809aca, 0x658d9603, 0x65c69201,
0x65878d8c, 0x65c28290, 0x04dda4e5, 0x65c2be0c,
0x6580a386, 0x65c1a624, 0x658dae6d, 0x65819638,
0x65f318ca, 0x65a030cd, 0x65a8532e, 0x65bb76d6,
0x04144e23, 0x04407ce4, 0x04363270, 0x04b6312f,
0x047e30b9, 0x052b6acd, 0x05b46d0d, 0x041a2c99,
0x04d828d1, 0x04d93e04, 0x040829da, 0x040a3c6b,
0x65c73aa1, 0x65c62a2e, 0x65d82678, 0x04c13611,

0x65dfaa53, 0x0520a1e0, 0x0521a601, 0x052281e0,
0x05238601, 0x1e601000, 0x1e603000, 0x1e621000,
0x1e623000, 0x1e641000, 0x1e643000, 0x1e661000,
0x1e663000, 0x1e681000, 0x1e683000, 0x1e6a1000,
0x1e6a3000, 0x1e6c1000, 0x1e6c3000, 0x1e6e1000,
0x1e6e3000, 0x1e701000, 0x1e703000, 0x1e721000,
0x1e723000, 0x1e741000, 0x1e743000, 0x1e761000,
0x1e763000, 0x1e781000, 0x1e783000, 0x1e7a1000,
0x1e7a3000, 0x1e7c1000, 0x1e7c3000, 0x1e7e1000,
0x1e7e3000, 0xf8238358, 0xf83702af, 0xf8231118,
0xf8392214, 0xf8313022, 0xf8205098, 0xf82343ec,
0xf83c734a, 0xf82261ec, 0xf8bf81a1, 0xf8bd0260,
0xf8ac12d1, 0xf8ad23dc, 0xf8bf3341, 0xf8bc53c4,
0xf8a443c6, 0xf8ba7130, 0xf8a8600c, 0xf8f48301,
0xf8e20120, 0xf8f8121a, 0xf8fe2143, 0xf8f7308a,
0xf8f05162, 0xf8e841ea, 0xf8f17142, 0xf8ec61ec,
0xf86d80e2, 0xf874021a, 0xf8641082, 0xf86c22b0,
0xf8703170, 0xf8755197, 0xf87a4397, 0xf86e730b,
0xf86163ec, 0xb82a80f0, 0xb82201a3, 0xb8331211,
0xb8232161, 0xb83e3105, 0xb82f53dd, 0xb82040f4,
0xb8347397, 0xb835633b, 0xb8a582e1, 0xb8b000bf,
0xb8ac1389, 0xb8af22dd, 0xb8bf33f3, 0xb8a551ee,
0xb8bf4370, 0xb8b47190, 0xb8ab60c9, 0xb8fe8371,
0xb8fc00fe, 0xb8ea1154, 0xb8e42238, 0xb8f13076,
0xb8fd52cf, 0xb8f342d3, 0xb8e270cf, 0xb8ec6170,
0xb86d8037, 0xb87e00b3, 0xb8711202, 0xb876214d,
0xb875337d, 0xb86c507b, 0xb861431f, 0xb8737131,
0xb87c61fb, 0xce367a86, 0xce1e6858, 0xce768d51,
0xce910451, 0xce768338, 0xce6c8622, 0xcec08363,
0xce708b9d, 0x04e900da, 0x042404f1, 0x6596012f,
0x65d40b62, 0x65c00745, 0x0456a72e, 0x04c0175b,
0x04109418, 0x041ab006, 0x0413812f, 0x04118b65,
0x04101694, 0x04d7aa0a, 0x045eb046, 0x04c81c5d,
0x044a1dd6, 0x040112fb, 0x04dcad42, 0x65809aca,
0x658d9603, 0x65c69201, 0x65878d8c, 0x65c28290,
0x04dda4e5, 0x65c2be0c, 0x6580a386, 0x65c1a624,
0x658dae6d, 0x65819638, 0x65f318ca, 0x65a030cd,
0x65a8532e, 0x65bb76d6, 0x04144e23, 0x04407ce4,
0x04363270, 0x04b6312f, 0x047e30b9, 0x052b6acd,
0x05b46d0d, 0x041a2c99, 0x04d828d1, 0x04d93e04,
0x040829da, 0x040a3c6b, 0x65c73aa1, 0x65c62a2e,
0x65d82678, 0x04c13611,
};
// END Generated code -- do not edit

0 comments on commit 47334c5

Please sign in to comment.