Skip to content

Commit 78176f7

Browse files
Wang HuangAi JiamingHe Xuejin
authored and
Ningsheng Jian
committed
8263001: Add cast nodes from single precision float types to interger types implementation for Arm SVE
Co-authored-by: Wang Huang <whuang@openjdk.org> Co-authored-by: Ai Jiaming <aijiaming1@huawei.com> Co-authored-by: He Xuejin <hexuejin2@huawei.com> Reviewed-by: njian
1 parent c00a1dc commit 78176f7

File tree

5 files changed

+260
-67
lines changed

5 files changed

+260
-67
lines changed

src/hotspot/cpu/aarch64/aarch64_sve.ad

+71
Original file line numberDiff line numberDiff line change
@@ -2801,6 +2801,19 @@ instruct vcvtLtoD(vReg dst, vReg src)
28012801
ins_pipe(pipe_slow);
28022802
%}
28032803

2804+
instruct vcvtFtoI(vReg dst, vReg src)
2805+
%{
2806+
predicate(UseSVE > 0 && n->bottom_type()->is_vect()->length_in_bytes() >= 16 &&
2807+
n->bottom_type()->is_vect()->element_basic_type() == T_INT);
2808+
match(Set dst (VectorCastF2X src));
2809+
ins_cost(SVE_COST);
2810+
format %{ "sve_fcvtzs $dst, S, $src, S\t# convert F to I vector" %}
2811+
ins_encode %{
2812+
__ sve_fcvtzs(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg), __ S);
2813+
%}
2814+
ins_pipe(pipe_slow);
2815+
%}
2816+
28042817

28052818
instruct vcvtItoD(vReg dst, vReg src)
28062819
%{
@@ -2846,3 +2859,61 @@ instruct vcvtFtoD(vReg dst, vReg src)
28462859
%}
28472860
ins_pipe(pipe_slow);
28482861
%}
2862+
2863+
2864+
instruct vcvtFtoS(vReg dst, vReg src, vReg tmp)
2865+
%{
2866+
predicate(UseSVE > 0 && n->bottom_type()->is_vect()->length_in_bytes() >= 16 &&
2867+
n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
2868+
match(Set dst (VectorCastF2X src));
2869+
effect(TEMP tmp);
2870+
ins_cost(3 * SVE_COST);
2871+
format %{ "sve_fcvtzs $dst, S, $src, S\n\t"
2872+
"sve_dup $tmp, H, 0\n\t"
2873+
"sve_uzp1 $dst, H, $dst, tmp\t# convert F to S vector" %}
2874+
ins_encode %{
2875+
__ sve_fcvtzs(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg), __ S);
2876+
__ sve_dup(as_FloatRegister($tmp$$reg), __ H, 0);
2877+
__ sve_uzp1(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
2878+
%}
2879+
ins_pipe(pipe_slow);
2880+
%}
2881+
2882+
2883+
2884+
instruct vcvtFtoB(vReg dst, vReg src, vReg tmp)
2885+
%{
2886+
predicate(UseSVE > 0 && n->bottom_type()->is_vect()->length_in_bytes() >= 16 &&
2887+
n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
2888+
match(Set dst (VectorCastF2X src));
2889+
effect(TEMP_DEF dst, TEMP tmp);
2890+
ins_cost(4 * SVE_COST);
2891+
format %{ "sve_fcvtzs $dst, S, $src, S\n\t"
2892+
"sve_dup $tmp, H, 0\n\t"
2893+
"sve_uzp1 $dst, H, $dst, tmp\n\t"
2894+
"sve_uzp1 $dst, B, $dst, tmp\n\t# convert F to B vector" %}
2895+
ins_encode %{
2896+
__ sve_fcvtzs(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg), __ S);
2897+
__ sve_dup(as_FloatRegister($tmp$$reg), __ H, 0);
2898+
__ sve_uzp1(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
2899+
__ sve_uzp1(as_FloatRegister($dst$$reg), __ B, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
2900+
%}
2901+
ins_pipe(pipe_slow);
2902+
%}
2903+
2904+
2905+
2906+
instruct vcvtFtoL(vReg dst, vReg src)
2907+
%{
2908+
predicate(UseSVE > 0 && n->bottom_type()->is_vect()->length_in_bytes() >= 16 &&
2909+
n->bottom_type()->is_vect()->element_basic_type() == T_LONG);
2910+
match(Set dst (VectorCastF2X src));
2911+
ins_cost(2 * SVE_COST);
2912+
format %{ "sve_fcvtzs $dst, S, $src, S\n\t"
2913+
"sve_sunpklo $dst, D, $dst\t# convert F to L vector" %}
2914+
ins_encode %{
2915+
__ sve_fcvtzs(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg), __ S);
2916+
__ sve_sunpklo(as_FloatRegister($dst$$reg), __ D, as_FloatRegister($dst$$reg));
2917+
%}
2918+
ins_pipe(pipe_slow);
2919+
%}

src/hotspot/cpu/aarch64/aarch64_sve_ad.m4

+72-4
Original file line numberDiff line numberDiff line change
@@ -1654,7 +1654,7 @@ VECTOR_CAST_X2F_NARROW1(L, F, scvtf, S, D, dup, S, uzp1)
16541654
VECTOR_CAST_X2F_NARROW1(D, F, fcvt, S, D, dup, S, uzp1)
16551655

16561656
dnl
1657-
define(`VECTOR_CAST_I2F', `
1657+
define(`VECTOR_CAST_X2X', `
16581658
instruct vcvt$1to$2`'(vReg dst, vReg src)
16591659
%{
16601660
predicate(UseSVE > 0 && n->bottom_type()->is_vect()->length_in_bytes() >= 16 &&
@@ -1667,9 +1667,10 @@ instruct vcvt$1to$2`'(vReg dst, vReg src)
16671667
%}
16681668
ins_pipe(pipe_slow);
16691669
%}')dnl
1670-
dnl $1 $2 $3 $4
1671-
VECTOR_CAST_I2F(I, F, scvtf, S)
1672-
VECTOR_CAST_I2F(L, D, scvtf, D)
1670+
dnl $1 $2 $3 $4
1671+
VECTOR_CAST_X2X(I, F, scvtf, S)
1672+
VECTOR_CAST_X2X(L, D, scvtf, D)
1673+
VECTOR_CAST_X2X(F, I, fcvtzs, S)
16731674

16741675
dnl
16751676
define(`VECTOR_CAST_X2F_EXTEND1', `
@@ -1691,3 +1692,70 @@ dnl $1 $2 $3 $4 $5 $6
16911692
VECTOR_CAST_X2F_EXTEND1(I, D, sunpklo, D, scvtf, D)
16921693
VECTOR_CAST_X2F_EXTEND1(S, F, sunpklo, S, scvtf, S)
16931694
VECTOR_CAST_X2F_EXTEND1(F, D, sunpklo, D, fcvt, S)
1695+
1696+
dnl
1697+
define(`VECTOR_CAST_F2X_NARROW1', `
1698+
instruct vcvt$1to$2`'(vReg dst, vReg src, vReg tmp)
1699+
%{
1700+
predicate(UseSVE > 0 && n->bottom_type()->is_vect()->length_in_bytes() >= 16 &&
1701+
n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($2));
1702+
match(Set dst (VectorCast$1`'2X src));
1703+
effect(TEMP tmp);
1704+
ins_cost(3 * SVE_COST);
1705+
format %{ "sve_$3 $dst, $4, $src, $4\n\t"
1706+
"sve_$5 $tmp, $6, 0\n\t"
1707+
"sve_$7 $dst, $6, $dst, tmp\t# convert $1 to $2 vector" %}
1708+
ins_encode %{
1709+
__ sve_$3(as_FloatRegister($dst$$reg), __ $4, ptrue, as_FloatRegister($src$$reg), __ $4);
1710+
__ sve_$5(as_FloatRegister($tmp$$reg), __ $6, 0);
1711+
__ sve_$7(as_FloatRegister($dst$$reg), __ $6, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
1712+
%}
1713+
ins_pipe(pipe_slow);
1714+
%}')dnl
1715+
dnl $1 $2 $3 $4 $5 $6 $7
1716+
VECTOR_CAST_F2X_NARROW1(F, S, fcvtzs, S, dup, H, uzp1)
1717+
1718+
1719+
dnl
1720+
define(`VECTOR_CAST_F2X_NARROW2', `
1721+
instruct vcvt$1to$2`'(vReg dst, vReg src, vReg tmp)
1722+
%{
1723+
predicate(UseSVE > 0 && n->bottom_type()->is_vect()->length_in_bytes() >= 16 &&
1724+
n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($2));
1725+
match(Set dst (VectorCast$1`'2X src));
1726+
effect(TEMP_DEF dst, TEMP tmp);
1727+
ins_cost(4 * SVE_COST);
1728+
format %{ "sve_$3 $dst, $4, $src, $4\n\t"
1729+
"sve_$5 $tmp, $6, 0\n\t"
1730+
"sve_$7 $dst, $6, $dst, tmp\n\t"
1731+
"sve_$7 $dst, $8, $dst, tmp\n\t# convert $1 to $2 vector" %}
1732+
ins_encode %{
1733+
__ sve_$3(as_FloatRegister($dst$$reg), __ $4, ptrue, as_FloatRegister($src$$reg), __ $4);
1734+
__ sve_$5(as_FloatRegister($tmp$$reg), __ $6, 0);
1735+
__ sve_$7(as_FloatRegister($dst$$reg), __ $6, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
1736+
__ sve_$7(as_FloatRegister($dst$$reg), __ $8, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
1737+
%}
1738+
ins_pipe(pipe_slow);
1739+
%}')dnl
1740+
dnl $1 $2 $3 $4 $5 $6 $7 $8
1741+
VECTOR_CAST_F2X_NARROW2(F, B, fcvtzs, S, dup, H, uzp1, B)
1742+
1743+
1744+
dnl
1745+
define(`VECTOR_CAST_F2X_EXTEND1', `
1746+
instruct vcvt$1to$2`'(vReg dst, vReg src)
1747+
%{
1748+
predicate(UseSVE > 0 && n->bottom_type()->is_vect()->length_in_bytes() >= 16 &&
1749+
n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($2));
1750+
match(Set dst (VectorCast$1`'2X src));
1751+
ins_cost(2 * SVE_COST);
1752+
format %{ "sve_$3 $dst, $4, $src, $4\n\t"
1753+
"sve_$5 $dst, $6, $dst\t# convert $1 to $2 vector" %}
1754+
ins_encode %{
1755+
__ sve_$3(as_FloatRegister($dst$$reg), __ $4, ptrue, as_FloatRegister($src$$reg), __ $4);
1756+
__ sve_$5(as_FloatRegister($dst$$reg), __ $6, as_FloatRegister($dst$$reg));
1757+
%}
1758+
ins_pipe(pipe_slow);
1759+
%}')dnl
1760+
dnl $1 $2 $3 $4 $5 $6
1761+
VECTOR_CAST_F2X_EXTEND1(F, L, fcvtzs, S, sunpklo, D)

src/hotspot/cpu/aarch64/assembler_aarch64.hpp

+50
Original file line numberDiff line numberDiff line change
@@ -3452,6 +3452,56 @@ void mvnw(Register Rd, Register Rm,
34523452
pgrf(Pg, 10), rf(Zn, 5), rf(Zd, 0);
34533453
}
34543454

3455+
private:
3456+
3457+
void encode_fcvtz_T (SIMD_RegVariant T_dst, SIMD_RegVariant T_src,
3458+
unsigned& opc, unsigned& opc2) {
3459+
assert(T_src != B && T_dst != B &&
3460+
T_src != Q && T_dst != Q, "invalid register variant");
3461+
if (T_src != D) {
3462+
assert(T_src <= T_dst, "invalid register variant");
3463+
} else {
3464+
assert(T_dst != H, "invalid register variant");
3465+
}
3466+
// In most cases we can treat T_dst,T_src as opc2,opc
3467+
// except following four cases. These cases should be converted
3468+
// according to Arm's architecture reference manual:
3469+
// +-----+------+---+-------------------------------------+
3470+
// | opc | opc2 | U | Instruction Details |
3471+
// +-----+------+---+-------------------------------------+
3472+
// | 11 | 10 | 0 | FCVTZS — Single-precision to 64-bit |
3473+
// | 11 | 10 | 1 | FCVTZU — Single-precision to 64-bit |
3474+
// | 11 | 00 | 0 | FCVTZS — Double-precision to 32-bit |
3475+
// | 11 | 00 | 1 | FCVTZU — Double-precision to 32-bit |
3476+
// +-----+------+---+-------------------------------------+
3477+
if (T_dst == D && T_src == S) { // Single-precision to 64-bit
3478+
T_dst = S;
3479+
T_src = D;
3480+
} else if (T_dst == S && T_src == D) { // Double-precision to 32-bit
3481+
T_dst = B;
3482+
T_src = D;
3483+
}
3484+
opc = T_src;
3485+
opc2 = T_dst;
3486+
}
3487+
public:
3488+
3489+
// SVE floating-point convert to integer (predicated)
3490+
#define INSN(NAME, sign) \
3491+
void NAME(FloatRegister Zd, SIMD_RegVariant T_dst, PRegister Pg, \
3492+
FloatRegister Zn, SIMD_RegVariant T_src) { \
3493+
starti; \
3494+
unsigned opc, opc2; \
3495+
encode_fcvtz_T(T_dst, T_src, opc, opc2); \
3496+
f(0b01100101, 31, 24), f(opc, 23, 22), f(0b011, 21, 19); \
3497+
f(opc2, 18, 17), f(sign, 16), f(0b101, 15, 13); \
3498+
pgrf(Pg, 10), rf(Zn, 5), rf(Zd, 0); \
3499+
}
3500+
3501+
INSN(sve_fcvtzs, 0b0);
3502+
INSN(sve_fcvtzu, 0b1);
3503+
#undef INSN
3504+
34553505
Assembler(CodeBuffer* code) : AbstractAssembler(code) {
34563506
}
34573507

test/hotspot/gtest/aarch64/aarch64-asmtest.py

+2
Original file line numberDiff line numberDiff line change
@@ -1590,6 +1590,8 @@ def generate(kind, names):
15901590
["scvtf", "__ sve_scvtf(z1, __ D, p0, z0, __ S);", "scvtf\tz1.d, p0/m, z0.s"],
15911591
["ucvtf", "__ sve_ucvtf(z3, __ D, p1, z2, __ S);", "ucvtf\tz3.d, p1/m, z2.s"],
15921592
["fcvt", "__ sve_fcvt(z5, __ D, p3, z4, __ S);", "fcvt\tz5.d, p3/m, z4.s"],
1593+
["fcvtzs", "__ sve_fcvtzs(z19, __ D, p2, z18, __ D);", "fcvtzs\tz19.d, p2/m, z18.d"],
1594+
["fcvtzu", "__ sve_fcvtzu(z19, __ D, p2, z18, __ D);", "fcvtzu\tz19.d, p2/m, z18.d"],
15931595
])
15941596

15951597
print "\n// FloatImmediateOp"

0 commit comments

Comments
 (0)