Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

8261108: Add cast nodes from integer types to float types implementation for Arm SVE #37

Closed
Closed
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
@@ -2684,3 +2684,134 @@ instruct vcvtLtoB(vReg dst, vReg src, vReg tmp)
ins_pipe(pipe_slow);
%}


instruct vcvtBtoF(vReg dst, vReg src)
%{
predicate(UseSVE > 0 && n->bottom_type()->is_vect()->length_in_bytes() >= 16 &&
n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
match(Set dst (VectorCastB2X src));
ins_cost(3 * SVE_COST);
format %{ "sve_sunpklo $dst, H, $src\n\t"
"sve_sunpklo $dst, S, $dst\n\t"
"sve_scvtf $dst, S, $dst, S\t# convert B to F vector" %}
ins_encode %{
__ sve_sunpklo(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($src$$reg));
__ sve_sunpklo(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($dst$$reg));
__ sve_scvtf(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($dst$$reg), __ S);
%}
ins_pipe(pipe_slow);
%}

instruct vcvtStoD(vReg dst, vReg src)
%{
predicate(UseSVE > 0 && n->bottom_type()->is_vect()->length_in_bytes() >= 16 &&
n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
match(Set dst (VectorCastS2X src));
ins_cost(3 * SVE_COST);
format %{ "sve_sunpklo $dst, S, $src\n\t"
"sve_sunpklo $dst, D, $dst\n\t"
"sve_scvtf $dst, D, $dst, D\t# convert S to D vector" %}
ins_encode %{
__ sve_sunpklo(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($src$$reg));
__ sve_sunpklo(as_FloatRegister($dst$$reg), __ D, as_FloatRegister($dst$$reg));
__ sve_scvtf(as_FloatRegister($dst$$reg), __ D, ptrue, as_FloatRegister($dst$$reg), __ D);
%}
ins_pipe(pipe_slow);
%}


instruct vcvtBtoD(vReg dst, vReg src)
%{
predicate(UseSVE > 0 && n->bottom_type()->is_vect()->length_in_bytes() >= 16 &&
n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
match(Set dst (VectorCastB2X src));
ins_cost(4 * SVE_COST);
format %{ "sve_sunpklo $dst, H, $src\n\t"
"sve_sunpklo $dst, S, $dst\n\t"
"sve_sunpklo $dst, D, $dst\n\t"
"sve_scvtf $dst, D, $dst, D\t# convert B to D vector" %}
ins_encode %{
__ sve_sunpklo(as_FloatRegister($dst$$reg), __ H, as_FloatRegister($src$$reg));
__ sve_sunpklo(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($dst$$reg));
__ sve_sunpklo(as_FloatRegister($dst$$reg), __ D, as_FloatRegister($dst$$reg));
__ sve_scvtf(as_FloatRegister($dst$$reg), __ D, ptrue, as_FloatRegister($dst$$reg), __ D);
%}
ins_pipe(pipe_slow);
%}


instruct vcvtLtoF(vReg dst, vReg src, vReg tmp)
%{
predicate(UseSVE > 0 && n->bottom_type()->is_vect()->length_in_bytes() >= 16 &&
n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
match(Set dst (VectorCastL2X src));
effect(TEMP_DEF dst, TEMP tmp);
ins_cost(3 * SVE_COST);
format %{ "sve_scvtf $dst, S, $src, D\n\t"
"sve_dup $tmp, S, 0\n\t"
"sve_uzp1 $dst, S, $dst, $tmp\t# convert L to F vector" %}
ins_encode %{
__ sve_scvtf(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg), __ D);
__ sve_dup(as_FloatRegister($tmp$$reg), __ S, 0);
__ sve_uzp1(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
This conversation was marked as resolved by Wanghuang-Huawei
Comment on lines +2754 to +2756

This comment has been minimized.

Loading
@XiaohongGong

XiaohongGong Feb 22, 2021
Collaborator

This looks an issue to me. From the encoding, the "__ sve_scvtf(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg), __ D);" can directly converts 64-bits to single-precision. Why do you need the followed sve_uzp1 instruction? I guess you want convert "long" to "double" and then to "float" ? If so, the first scvtf instruction should be "__ sve_scvtf(as_FloatRegister($dst$$reg), __ D, ptrue, as_FloatRegister($src$$reg), __ D);", and then encoding an fcvt which converts "double" to "float". Please correct me if I'm wrong. Thanks!

%}
ins_pipe(pipe_slow);
%}


instruct vcvtItoF(vReg dst, vReg src)
%{
predicate(UseSVE > 0 && n->bottom_type()->is_vect()->length_in_bytes() >= 16 &&
n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
match(Set dst (VectorCastI2X src));
ins_cost(SVE_COST);
format %{ "sve_scvtf $dst, S, $src, S\t# convert I to F vector" %}
ins_encode %{
__ sve_scvtf(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg), __ S);
%}
ins_pipe(pipe_slow);
%}

instruct vcvtLtoD(vReg dst, vReg src)
%{
predicate(UseSVE > 0 && n->bottom_type()->is_vect()->length_in_bytes() >= 16 &&
n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
match(Set dst (VectorCastL2X src));
ins_cost(SVE_COST);
format %{ "sve_scvtf $dst, D, $src, D\t# convert L to D vector" %}
ins_encode %{
__ sve_scvtf(as_FloatRegister($dst$$reg), __ D, ptrue, as_FloatRegister($src$$reg), __ D);
%}
ins_pipe(pipe_slow);
%}


instruct vcvtItoD(vReg dst, vReg src)
%{
predicate(UseSVE > 0 && n->bottom_type()->is_vect()->length_in_bytes() >= 16 &&
n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
match(Set dst (VectorCastI2X src));
ins_cost(2 * SVE_COST);
format %{ "sve_sunpklo $dst, D, $src\n\t"
"sve_scvtf $dst, D, $dst, D\t# convert I to D vector" %}
ins_encode %{
__ sve_sunpklo(as_FloatRegister($dst$$reg), __ D, as_FloatRegister($src$$reg));
__ sve_scvtf(as_FloatRegister($dst$$reg), __ D, ptrue, as_FloatRegister($dst$$reg), __ D);
%}
ins_pipe(pipe_slow);
%}

instruct vcvtStoF(vReg dst, vReg src)
%{
predicate(UseSVE > 0 && n->bottom_type()->is_vect()->length_in_bytes() >= 16 &&
n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
match(Set dst (VectorCastS2X src));
ins_cost(2 * SVE_COST);
format %{ "sve_sunpklo $dst, S, $src\n\t"
"sve_scvtf $dst, S, $dst, S\t# convert S to F vector" %}
ins_encode %{
__ sve_sunpklo(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($src$$reg));
__ sve_scvtf(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($dst$$reg), __ S);
%}
ins_pipe(pipe_slow);
%}
@@ -1587,3 +1587,107 @@ instruct vcvt$1to$2`'(vReg dst, vReg src, vReg tmp)
dnl $1 $2 $3 $4 $5 $6 $7
VECTOR_CAST_NARROW3(L, B, dup, S, uzp1, H, B)

dnl
define(`VECTOR_CAST_I2F_EXTEND2', `
instruct vcvt$1to$2`'(vReg dst, vReg src)
%{
predicate(UseSVE > 0 && n->bottom_type()->is_vect()->length_in_bytes() >= 16 &&
n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($2));
match(Set dst (VectorCast$1`'2X src));
ins_cost(3 * SVE_COST);
format %{ "sve_$3 $dst, $4, $src\n\t"
"sve_$3 $dst, $5, $dst\n\t"
"sve_$6 $dst, $5, $dst, $5\t# convert $1 to $2 vector" %}
ins_encode %{
__ sve_$3(as_FloatRegister($dst$$reg), __ $4, as_FloatRegister($src$$reg));
__ sve_$3(as_FloatRegister($dst$$reg), __ $5, as_FloatRegister($dst$$reg));
__ sve_$6(as_FloatRegister($dst$$reg), __ $5, ptrue, as_FloatRegister($dst$$reg), __ $5);
%}
ins_pipe(pipe_slow);
%}')dnl
dnl $1 $2 $3 $4 $5 $6
VECTOR_CAST_I2F_EXTEND2(B, F, sunpklo, H, S, scvtf)
VECTOR_CAST_I2F_EXTEND2(S, D, sunpklo, S, D, scvtf)

dnl
define(`VECTOR_CAST_I2F_EXTEND3', `
instruct vcvt$1to$2`'(vReg dst, vReg src)
%{
predicate(UseSVE > 0 && n->bottom_type()->is_vect()->length_in_bytes() >= 16 &&
n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($2));
match(Set dst (VectorCast$1`'2X src));
ins_cost(4 * SVE_COST);
format %{ "sve_$3 $dst, $4, $src\n\t"
"sve_$3 $dst, $5, $dst\n\t"
"sve_$3 $dst, $6, $dst\n\t"
"sve_$7 $dst, $6, $dst, $6\t# convert $1 to $2 vector" %}
ins_encode %{
__ sve_$3(as_FloatRegister($dst$$reg), __ $4, as_FloatRegister($src$$reg));
__ sve_$3(as_FloatRegister($dst$$reg), __ $5, as_FloatRegister($dst$$reg));
__ sve_$3(as_FloatRegister($dst$$reg), __ $6, as_FloatRegister($dst$$reg));
__ sve_$7(as_FloatRegister($dst$$reg), __ $6, ptrue, as_FloatRegister($dst$$reg), __ $6);
%}
ins_pipe(pipe_slow);
%}')dnl
dnl $1 $2 $3 $4 $5 $6 $7
VECTOR_CAST_I2F_EXTEND3(B, D, sunpklo, H, S, D, scvtf)
This conversation was marked as resolved by Wanghuang-Huawei

This comment has been minimized.

Loading
@nsjian

nsjian Feb 18, 2021
Collaborator

Can you please align the comment $1...$7 to the arguments?

This comment has been minimized.

Loading
@Wanghuang-Huawei

Wanghuang-Huawei Feb 18, 2021
Author Collaborator

Thank you for your review. I will fix this.


dnl
define(`VECTOR_CAST_I2F_NARROW1', `
instruct vcvt$1to$2`'(vReg dst, vReg src, vReg tmp)
%{
predicate(UseSVE > 0 && n->bottom_type()->is_vect()->length_in_bytes() >= 16 &&
n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($2));
match(Set dst (VectorCast$1`'2X src));
effect(TEMP_DEF dst, TEMP tmp);
ins_cost(3 * SVE_COST);
format %{ "sve_$3 $dst, $4, $src, $5\n\t"
"sve_$6 $tmp, $7, 0\n\t"
"sve_$8 $dst, $7, $dst, $tmp\t# convert $1 to $2 vector" %}
ins_encode %{
__ sve_$3(as_FloatRegister($dst$$reg), __ $4, ptrue, as_FloatRegister($src$$reg), __ $5);
__ sve_$6(as_FloatRegister($tmp$$reg), __ $7, 0);
__ sve_$8(as_FloatRegister($dst$$reg), __ $7, as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
%}
ins_pipe(pipe_slow);
%}')dnl
dnl $1 $2 $3 $4 $5 $6 $7 $8
VECTOR_CAST_I2F_NARROW1(L, F, scvtf, S, D, dup, S, uzp1)

dnl
define(`VECTOR_CAST_I2F', `
instruct vcvt$1to$2`'(vReg dst, vReg src)
%{
predicate(UseSVE > 0 && n->bottom_type()->is_vect()->length_in_bytes() >= 16 &&
n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($2));
match(Set dst (VectorCast$1`'2X src));
ins_cost(SVE_COST);
format %{ "sve_$3 $dst, $4, $src, $4\t# convert $1 to $2 vector" %}
ins_encode %{
__ sve_$3(as_FloatRegister($dst$$reg), __ $4, ptrue, as_FloatRegister($src$$reg), __ $4);
%}
ins_pipe(pipe_slow);
%}')dnl
dnl $1 $2 $3 $4
VECTOR_CAST_I2F(I, F, scvtf, S)
VECTOR_CAST_I2F(L, D, scvtf, D)

dnl
define(`VECTOR_CAST_I2F_EXTEND1', `
instruct vcvt$1to$2`'(vReg dst, vReg src)
%{
predicate(UseSVE > 0 && n->bottom_type()->is_vect()->length_in_bytes() >= 16 &&
n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($2));
match(Set dst (VectorCast$1`'2X src));
ins_cost(2 * SVE_COST);
format %{ "sve_$3 $dst, $4, $src\n\t"
"sve_$5 $dst, $4, $dst, $6\t# convert $1 to $2 vector" %}
ins_encode %{
__ sve_$3(as_FloatRegister($dst$$reg), __ $4, as_FloatRegister($src$$reg));
__ sve_$5(as_FloatRegister($dst$$reg), __ $4, ptrue, as_FloatRegister($dst$$reg), __ $6);
%}
ins_pipe(pipe_slow);
%}')dnl
dnl $1 $2 $3 $4 $5 $6
VECTOR_CAST_I2F_EXTEND1(I, D, sunpklo, D, scvtf, D)
VECTOR_CAST_I2F_EXTEND1(S, F, sunpklo, S, scvtf, S)
@@ -3353,6 +3353,56 @@ void mvnw(Register Rd, Register Rm,
INSN(sve_whilelsw, 0b111, 0);
#undef INSN

private:

void encode_cvtf_T(SIMD_RegVariant T_dst, SIMD_RegVariant T_src,
int& opc, int& opc2) {
assert(T_src != B && T_dst != B &&
T_src != Q && T_dst != Q, "invalid register variant");
if (T_dst != D) {
assert(T_dst <= T_src, "invalid register variant");
} else {
assert(T_src != H, "invalid register variant");
}
// In most cases we can treat T_dst,T_src as opc,opc2
// except following four cases. These cases should be converted
// according to Arm's architecture reference manual:
// +-----+------+---+------------------------------------+
// | opc | opc2 | U | Instruction Details |
// +-----+------+---+------------------------------------+
// | 11 | 00 | 0 | SCVTF — 32-bit to double-precision |
// | 11 | 00 | 1 | UCVTF — 32-bit to double-precision |
// | 11 | 10 | 0 | SCVTF — 64-bit to single-precision |
// | 11 | 10 | 1 | UCVTF — 64-bit to single-precision |
// +-----+------+---+------------------------------------+
if (T_dst == S && T_src == D) { // 64-bit to single-precision
T_dst = D;
T_src = S;
} else if (T_dst == D && T_src == S) { // 32-bit to double-precision
T_dst = D;
T_src = B;
This conversation was marked as resolved by Wanghuang-Huawei

This comment has been minimized.

Loading
@nsjian

nsjian Feb 19, 2021
Collaborator

This looks confusing. I understand that it's just 0b00, but using B here is really confusing. Maybe using 0b00 and 0b11 directly to align with Arm ARM opc/opc2?

This comment has been minimized.

Loading
@Wanghuang-Huawei

Wanghuang-Huawei Feb 19, 2021
Author Collaborator

Arm uses confusing encoding . If we use 0b00 / 0b11 , we can not get a concise macroassmbler here ( for example. other developers can not use this macro). In fact, gcc uses switch..case to convert macro assmbler into Arm's funny encoding.

This comment has been minimized.

Loading
@Wanghuang-Huawei

Wanghuang-Huawei Feb 19, 2021
Author Collaborator

If we use an encoding function (like switch..case in gcc) instead of convert function(encode_cvtf_T), source codes here will be a little complicated. :-)

This comment has been minimized.

Loading
@XiaohongGong

XiaohongGong Feb 20, 2021
Collaborator

I agree with @nsjian that swapping "T_dst" and "T_src" is confusing although this can generate the same final result. As I understand that this function is used to generate two opcs from the dst and src type, I'd like not using the swap logic. Thanks!

}
opc = T_dst;
opc2 = T_src;
}
public:

// SVE convert integer to floating-point (predicated)
#define INSN(NAME, sign) \
void NAME(FloatRegister Zd, SIMD_RegVariant T_dst, PRegister Pg, \
FloatRegister Zn, SIMD_RegVariant T_src) { \
starti; \
int opc, opc2; \
encode_cvtf_T(T_dst, T_src, opc, opc2); \
f(0b01100101, 31, 24), f(opc, 23, 22), f(0b010, 21, 19); \
f(opc2, 18, 17), f(sign, 16), f(0b101, 15, 13); \
pgrf(Pg, 10), rf(Zn, 5), rf(Zd, 0); \
}

INSN(sve_scvtf, 0b0);
INSN(sve_ucvtf, 0b1);
This conversation was marked as resolved by Wanghuang-Huawei

This comment has been minimized.

Loading
@nsjian

nsjian Feb 18, 2021
Collaborator

Is there any usage for ucvtf?

This comment has been minimized.

Loading
@Wanghuang-Huawei

Wanghuang-Huawei Feb 19, 2021
Author Collaborator

Just like sve_uzp2.It seems that ucvtf and scvtf are in the same encoding group.

#undef INSN

Assembler(CodeBuffer* code) : AbstractAssembler(code) {
}

@@ -1580,6 +1580,8 @@ def generate(kind, names):
["whilelo", "__ sve_whilelow(p0, __ H, r3, r4);", "whilelo\tp0.h, w3, w4"],
["whilels", "__ sve_whilels(p1, __ S, r5, r6);", "whilels\tp1.s, x5, x6"],
["whilels", "__ sve_whilelsw(p2, __ D, r10, r11);", "whilels\tp2.d, w10, w11"],
["scvtf", "__ sve_scvtf(z1, __ D, p0, z0, __ S);", "scvtf\tz1.d, p0/m, z0.s"],
["ucvtf", "__ sve_ucvtf(z3, __ D, p1, z2, __ S);", "ucvtf\tz3.d, p1/m, z2.s"],
])

print "\n// FloatImmediateOp"
Loading