Skip to content
Permalink
Browse files
8263001: Add cast nodes from single precision float types to interger…
… types implementation for Arm SVE

Co-authored-by: Wang Huang <whuang@openjdk.org>
Co-authored-by: Ai Jiaming <aijiaming1@huawei.com>
Co-authored-by: He Xuejin <hexuejin2@huawei.com>
Reviewed-by: njian
  • Loading branch information
3 people authored and Ningsheng Jian committed Mar 10, 2021
1 parent c00a1dc commit 78176f7f1e5a50fed5dc2c620b0dfa5202ab755f
@@ -2801,6 +2801,19 @@ instruct vcvtLtoD(vReg dst, vReg src)
ins_pipe(pipe_slow);
%}

instruct vcvtFtoI(vReg dst, vReg src)
%{
predicate(UseSVE > 0 && n->bottom_type()->is_vect()->length_in_bytes() >= 16 &&
n->bottom_type()->is_vect()->element_basic_type() == T_INT);
match(Set dst (VectorCastF2X src));
ins_cost(SVE_COST);
format %{ "sve_fcvtzs $dst, S, $src, S\t# convert F to I vector" %}
ins_encode %{
__ sve_fcvtzs(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg), __ S);
%}
ins_pipe(pipe_slow);
%}


instruct vcvtItoD(vReg dst, vReg src)
%{
@@ -2846,3 +2859,61 @@ instruct vcvtFtoD(vReg dst, vReg src)
%}
ins_pipe(pipe_slow);
%}


instruct vcvtFtoS(vReg dst, vReg src, vReg tmp)
%{
predicate(UseSVE > 0 && n->bottom_type()->is_vect()->length_in_bytes() >= 16 &&
n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
match(Set dst (VectorCastF2X src));
effect(TEMP tmp);
ins_cost(3 * SVE_COST);
format %{ "sve_fcvtzs $dst, S, $src, S\n\t"
"sve_dup $tmp, H, 0\n\t"
"sve_uzp1 $dst, H, $dst, tmp\t# convert F to S vector" %}
ins_encode %{
__ sve_fcvtzs(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg), __ S);
__ sve_dup(as_FloatRegister($tmp$$reg), __ H, 0);
__ sve_uzp1(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
%}
ins_pipe(pipe_slow);
%}



instruct vcvtFtoB(vReg dst, vReg src, vReg tmp)
%{
predicate(UseSVE > 0 && n->bottom_type()->is_vect()->length_in_bytes() >= 16 &&
n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
match(Set dst (VectorCastF2X src));
effect(TEMP_DEF dst, TEMP tmp);
ins_cost(4 * SVE_COST);
format %{ "sve_fcvtzs $dst, S, $src, S\n\t"
"sve_dup $tmp, H, 0\n\t"
"sve_uzp1 $dst, H, $dst, tmp\n\t"
"sve_uzp1 $dst, B, $dst, tmp\n\t# convert F to B vector" %}
ins_encode %{
__ sve_fcvtzs(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg), __ S);
__ sve_dup(as_FloatRegister($tmp$$reg), __ H, 0);
__ sve_uzp1(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
__ sve_uzp1(as_FloatRegister($dst$$reg), __ B, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
%}
ins_pipe(pipe_slow);
%}



instruct vcvtFtoL(vReg dst, vReg src)
%{
predicate(UseSVE > 0 && n->bottom_type()->is_vect()->length_in_bytes() >= 16 &&
n->bottom_type()->is_vect()->element_basic_type() == T_LONG);
match(Set dst (VectorCastF2X src));
ins_cost(2 * SVE_COST);
format %{ "sve_fcvtzs $dst, S, $src, S\n\t"
"sve_sunpklo $dst, D, $dst\t# convert F to L vector" %}
ins_encode %{
__ sve_fcvtzs(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg), __ S);
__ sve_sunpklo(as_FloatRegister($dst$$reg), __ D, as_FloatRegister($dst$$reg));
%}
ins_pipe(pipe_slow);
%}
@@ -1654,7 +1654,7 @@ VECTOR_CAST_X2F_NARROW1(L, F, scvtf, S, D, dup, S, uzp1)
VECTOR_CAST_X2F_NARROW1(D, F, fcvt, S, D, dup, S, uzp1)

dnl
define(`VECTOR_CAST_I2F', `
define(`VECTOR_CAST_X2X', `
instruct vcvt$1to$2`'(vReg dst, vReg src)
%{
predicate(UseSVE > 0 && n->bottom_type()->is_vect()->length_in_bytes() >= 16 &&
@@ -1667,9 +1667,10 @@ instruct vcvt$1to$2`'(vReg dst, vReg src)
%}
ins_pipe(pipe_slow);
%}')dnl
dnl $1 $2 $3 $4
VECTOR_CAST_I2F(I, F, scvtf, S)
VECTOR_CAST_I2F(L, D, scvtf, D)
dnl $1 $2 $3 $4
VECTOR_CAST_X2X(I, F, scvtf, S)
VECTOR_CAST_X2X(L, D, scvtf, D)
VECTOR_CAST_X2X(F, I, fcvtzs, S)

dnl
define(`VECTOR_CAST_X2F_EXTEND1', `
@@ -1691,3 +1692,70 @@ dnl $1 $2 $3 $4 $5 $6
VECTOR_CAST_X2F_EXTEND1(I, D, sunpklo, D, scvtf, D)
VECTOR_CAST_X2F_EXTEND1(S, F, sunpklo, S, scvtf, S)
VECTOR_CAST_X2F_EXTEND1(F, D, sunpklo, D, fcvt, S)

dnl
define(`VECTOR_CAST_F2X_NARROW1', `
instruct vcvt$1to$2`'(vReg dst, vReg src, vReg tmp)
%{
predicate(UseSVE > 0 && n->bottom_type()->is_vect()->length_in_bytes() >= 16 &&
n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($2));
match(Set dst (VectorCast$1`'2X src));
effect(TEMP tmp);
ins_cost(3 * SVE_COST);
format %{ "sve_$3 $dst, $4, $src, $4\n\t"
"sve_$5 $tmp, $6, 0\n\t"
"sve_$7 $dst, $6, $dst, tmp\t# convert $1 to $2 vector" %}
ins_encode %{
__ sve_$3(as_FloatRegister($dst$$reg), __ $4, ptrue, as_FloatRegister($src$$reg), __ $4);
__ sve_$5(as_FloatRegister($tmp$$reg), __ $6, 0);
__ sve_$7(as_FloatRegister($dst$$reg), __ $6, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
%}
ins_pipe(pipe_slow);
%}')dnl
dnl $1 $2 $3 $4 $5 $6 $7
VECTOR_CAST_F2X_NARROW1(F, S, fcvtzs, S, dup, H, uzp1)


dnl
define(`VECTOR_CAST_F2X_NARROW2', `
instruct vcvt$1to$2`'(vReg dst, vReg src, vReg tmp)
%{
predicate(UseSVE > 0 && n->bottom_type()->is_vect()->length_in_bytes() >= 16 &&
n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($2));
match(Set dst (VectorCast$1`'2X src));
effect(TEMP_DEF dst, TEMP tmp);
ins_cost(4 * SVE_COST);
format %{ "sve_$3 $dst, $4, $src, $4\n\t"
"sve_$5 $tmp, $6, 0\n\t"
"sve_$7 $dst, $6, $dst, tmp\n\t"
"sve_$7 $dst, $8, $dst, tmp\n\t# convert $1 to $2 vector" %}
ins_encode %{
__ sve_$3(as_FloatRegister($dst$$reg), __ $4, ptrue, as_FloatRegister($src$$reg), __ $4);
__ sve_$5(as_FloatRegister($tmp$$reg), __ $6, 0);
__ sve_$7(as_FloatRegister($dst$$reg), __ $6, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
__ sve_$7(as_FloatRegister($dst$$reg), __ $8, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
%}
ins_pipe(pipe_slow);
%}')dnl
dnl $1 $2 $3 $4 $5 $6 $7 $8
VECTOR_CAST_F2X_NARROW2(F, B, fcvtzs, S, dup, H, uzp1, B)


dnl
define(`VECTOR_CAST_F2X_EXTEND1', `
instruct vcvt$1to$2`'(vReg dst, vReg src)
%{
predicate(UseSVE > 0 && n->bottom_type()->is_vect()->length_in_bytes() >= 16 &&
n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($2));
match(Set dst (VectorCast$1`'2X src));
ins_cost(2 * SVE_COST);
format %{ "sve_$3 $dst, $4, $src, $4\n\t"
"sve_$5 $dst, $6, $dst\t# convert $1 to $2 vector" %}
ins_encode %{
__ sve_$3(as_FloatRegister($dst$$reg), __ $4, ptrue, as_FloatRegister($src$$reg), __ $4);
__ sve_$5(as_FloatRegister($dst$$reg), __ $6, as_FloatRegister($dst$$reg));
%}
ins_pipe(pipe_slow);
%}')dnl
dnl $1 $2 $3 $4 $5 $6
VECTOR_CAST_F2X_EXTEND1(F, L, fcvtzs, S, sunpklo, D)
@@ -3452,6 +3452,56 @@ void mvnw(Register Rd, Register Rm,
pgrf(Pg, 10), rf(Zn, 5), rf(Zd, 0);
}

private:

void encode_fcvtz_T (SIMD_RegVariant T_dst, SIMD_RegVariant T_src,
unsigned& opc, unsigned& opc2) {
assert(T_src != B && T_dst != B &&
T_src != Q && T_dst != Q, "invalid register variant");
if (T_src != D) {
assert(T_src <= T_dst, "invalid register variant");
} else {
assert(T_dst != H, "invalid register variant");
}
// In most cases we can treat T_dst,T_src as opc2,opc
// except following four cases. These cases should be converted
// according to Arm's architecture reference manual:
// +-----+------+---+-------------------------------------+
// | opc | opc2 | U | Instruction Details |
// +-----+------+---+-------------------------------------+
// | 11 | 10 | 0 | FCVTZS — Single-precision to 64-bit |
// | 11 | 10 | 1 | FCVTZU — Single-precision to 64-bit |
// | 11 | 00 | 0 | FCVTZS — Double-precision to 32-bit |
// | 11 | 00 | 1 | FCVTZU — Double-precision to 32-bit |
// +-----+------+---+-------------------------------------+
if (T_dst == D && T_src == S) { // Single-precision to 64-bit
T_dst = S;
T_src = D;
} else if (T_dst == S && T_src == D) { // Double-precision to 32-bit
T_dst = B;
T_src = D;
}
opc = T_src;
opc2 = T_dst;
}
public:

// SVE floating-point convert to integer (predicated)
#define INSN(NAME, sign) \
void NAME(FloatRegister Zd, SIMD_RegVariant T_dst, PRegister Pg, \
FloatRegister Zn, SIMD_RegVariant T_src) { \
starti; \
unsigned opc, opc2; \
encode_fcvtz_T(T_dst, T_src, opc, opc2); \
f(0b01100101, 31, 24), f(opc, 23, 22), f(0b011, 21, 19); \
f(opc2, 18, 17), f(sign, 16), f(0b101, 15, 13); \
pgrf(Pg, 10), rf(Zn, 5), rf(Zd, 0); \
}

INSN(sve_fcvtzs, 0b0);
INSN(sve_fcvtzu, 0b1);
#undef INSN

Assembler(CodeBuffer* code) : AbstractAssembler(code) {
}

@@ -1590,6 +1590,8 @@ def generate(kind, names):
["scvtf", "__ sve_scvtf(z1, __ D, p0, z0, __ S);", "scvtf\tz1.d, p0/m, z0.s"],
["ucvtf", "__ sve_ucvtf(z3, __ D, p1, z2, __ S);", "ucvtf\tz3.d, p1/m, z2.s"],
["fcvt", "__ sve_fcvt(z5, __ D, p3, z4, __ S);", "fcvt\tz5.d, p3/m, z4.s"],
["fcvtzs", "__ sve_fcvtzs(z19, __ D, p2, z18, __ D);", "fcvtzs\tz19.d, p2/m, z18.d"],
["fcvtzu", "__ sve_fcvtzu(z19, __ D, p2, z18, __ D);", "fcvtzu\tz19.d, p2/m, z18.d"],
])

print "\n// FloatImmediateOp"
Loading

0 comments on commit 78176f7

Please sign in to comment.