Skip to content

Commit

Permalink
8287835: Add support for additional float/double to integral conversi…
Browse files Browse the repository at this point in the history
…on for x86

Reviewed-by: kvn, jbhateja
  • Loading branch information
Sandhya Viswanathan committed Jun 10, 2022
1 parent 3ee1e60 commit 2cc40af
Show file tree
Hide file tree
Showing 7 changed files with 474 additions and 13 deletions.
26 changes: 26 additions & 0 deletions src/hotspot/cpu/x86/assembler_x86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2102,6 +2102,14 @@ void Assembler::vcvtps2dq(XMMRegister dst, XMMRegister src, int vector_len) {
emit_int16(0x5B, (0xC0 | encode));
}

void Assembler::evcvttps2qq(XMMRegister dst, XMMRegister src, int vector_len) {
assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "");
InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int16(0x7A, (0xC0 | encode));
}

void Assembler::evcvtpd2qq(XMMRegister dst, XMMRegister src, int vector_len) {
assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "");
InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
Expand Down Expand Up @@ -2182,6 +2190,14 @@ void Assembler::evpmovqw(XMMRegister dst, XMMRegister src, int vector_len) {
emit_int16(0x34, (0xC0 | encode));
}

void Assembler::evpmovsqd(XMMRegister dst, XMMRegister src, int vector_len) {
assert(UseAVX > 2, "");
InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
emit_int16(0x25, (0xC0 | encode));
}

void Assembler::decl(Address dst) {
// Don't use it directly. Use MacroAssembler::decrement() instead.
InstructionMark im(this);
Expand Down Expand Up @@ -4293,6 +4309,16 @@ void Assembler::pcmpeqq(XMMRegister dst, XMMRegister src) {
emit_int16(0x29, (0xC0 | encode));
}

void Assembler::evpcmpeqq(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, int vector_len) {
assert(VM_Version::supports_evex(), "");
InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_is_evex_instruction();
attributes.reset_is_clear_context();
attributes.set_embedded_opmask_register_specifier(mask);
int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int16(0x29, (0xC0 | encode));
}

void Assembler::vpcmpCCq(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, int vector_len) {
assert(VM_Version::supports_avx(), "");
InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
Expand Down
7 changes: 6 additions & 1 deletion src/hotspot/cpu/x86/assembler_x86.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1169,9 +1169,10 @@ class Assembler : public AbstractAssembler {
void vcvtps2pd(XMMRegister dst, XMMRegister src, int vector_len);
void vcvtpd2ps(XMMRegister dst, XMMRegister src, int vector_len);

// Convert vector float and int
// Convert vector float to int/long
void vcvtps2dq(XMMRegister dst, XMMRegister src, int vector_len);
void vcvttps2dq(XMMRegister dst, XMMRegister src, int vector_len);
void evcvttps2qq(XMMRegister dst, XMMRegister src, int vector_len);

// Convert vector long to vector FP
void evcvtqq2ps(XMMRegister dst, XMMRegister src, int vector_len);
Expand All @@ -1189,6 +1190,9 @@ class Assembler : public AbstractAssembler {
void evpmovqb(XMMRegister dst, XMMRegister src, int vector_len);
void evpmovqw(XMMRegister dst, XMMRegister src, int vector_len);

// Evex casts with signed saturation
void evpmovsqd(XMMRegister dst, XMMRegister src, int vector_len);

//Abs of packed Integer values
void pabsb(XMMRegister dst, XMMRegister src);
void pabsw(XMMRegister dst, XMMRegister src);
Expand Down Expand Up @@ -1786,6 +1790,7 @@ class Assembler : public AbstractAssembler {
void evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len);

void pcmpeqq(XMMRegister dst, XMMRegister src);
void evpcmpeqq(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, int vector_len);
void vpcmpCCq(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, int vector_len);
void vpcmpeqq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void evpcmpeqq(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
Expand Down
51 changes: 51 additions & 0 deletions src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4183,6 +4183,28 @@ void C2_MacroAssembler::vector_cast_float_special_cases_evex(XMMRegister dst, XM
bind(done);
}

void C2_MacroAssembler::vector_cast_float_to_long_special_cases_evex(
XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2,
Register scratch, AddressLiteral double_sign_flip,
int vec_enc) {
Label done;
evmovdquq(xtmp1, k0, double_sign_flip, false, vec_enc, scratch);
Assembler::evpcmpeqq(ktmp1, k0, xtmp1, dst, vec_enc);
kortestwl(ktmp1, ktmp1);
jccb(Assembler::equal, done);

vpxor(xtmp2, xtmp2, xtmp2, vec_enc);
evcmpps(ktmp2, k0, src, src, Assembler::UNORD_Q, vec_enc);
evmovdquq(dst, ktmp2, xtmp2, true, vec_enc);

kxorwl(ktmp1, ktmp1, ktmp2);
evcmpps(ktmp1, ktmp1, src, xtmp2, Assembler::NLT_UQ, vec_enc);
vpternlogq(xtmp2, 0x11, xtmp1, xtmp1, vec_enc);
evmovdquq(dst, ktmp1, xtmp2, true, vec_enc);
bind(done);
}

/*
* Following routine handles special floating point values(NaN/Inf/-Inf/Max/Min) for casting operation.
* If src is NaN, the result is 0.
Expand Down Expand Up @@ -4243,6 +4265,35 @@ void C2_MacroAssembler::vector_castF2I_evex(XMMRegister dst, XMMRegister src, XM
vector_cast_float_special_cases_evex(dst, src, xtmp1, xtmp2, ktmp1, ktmp2, scratch, float_sign_flip, vec_enc);
}

void C2_MacroAssembler::vector_castF2L_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
KRegister ktmp1, KRegister ktmp2, AddressLiteral double_sign_flip,
Register scratch, int vec_enc) {
evcvttps2qq(dst, src, vec_enc);
vector_cast_float_to_long_special_cases_evex(dst, src, xtmp1, xtmp2, ktmp1, ktmp2, scratch, double_sign_flip, vec_enc);
}

void C2_MacroAssembler::vector_castD2X_evex(BasicType to_elem_bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2,
AddressLiteral double_sign_flip, Register scratch, int vec_enc) {
vector_castD2L_evex(dst, src, xtmp1, xtmp2, ktmp1, ktmp2, double_sign_flip, scratch, vec_enc);
if (to_elem_bt != T_LONG) {
switch(to_elem_bt) {
case T_INT:
evpmovsqd(dst, dst, vec_enc);
break;
case T_SHORT:
evpmovsqd(dst, dst, vec_enc);
evpmovdw(dst, dst, vec_enc);
break;
case T_BYTE:
evpmovsqd(dst, dst, vec_enc);
evpmovdb(dst, dst, vec_enc);
break;
default: assert(false, "%s", type2name(to_elem_bt));
}
}
}

#ifdef _LP64
void C2_MacroAssembler::vector_round_double_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
KRegister ktmp1, KRegister ktmp2, AddressLiteral double_sign_flip,
Expand Down
12 changes: 12 additions & 0 deletions src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -310,11 +310,18 @@
KRegister ktmp1, KRegister ktmp2, AddressLiteral float_sign_flip,
Register scratch, int vec_enc);

void vector_castF2L_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
KRegister ktmp1, KRegister ktmp2, AddressLiteral double_sign_flip,
Register scratch, int vec_enc);

void vector_castD2L_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
KRegister ktmp1, KRegister ktmp2, AddressLiteral double_sign_flip,
Register scratch, int vec_enc);

void vector_castD2X_evex(BasicType to_elem_bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2, AddressLiteral double_sign_flip,
Register scratch, int vec_enc);

void vector_unsigned_cast(XMMRegister dst, XMMRegister src, int vlen_enc,
BasicType from_elem_bt, BasicType to_elem_bt);

Expand All @@ -326,6 +333,11 @@
KRegister ktmp1, KRegister ktmp2, Register scratch, AddressLiteral float_sign_flip,
int vec_enc);

void vector_cast_float_to_long_special_cases_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2,
Register scratch, AddressLiteral double_sign_flip,
int vec_enc);

void vector_cast_float_special_cases_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
XMMRegister xtmp2, XMMRegister xtmp3, XMMRegister xtmp4,
Register scratch, AddressLiteral float_sign_flip,
Expand Down
74 changes: 62 additions & 12 deletions src/hotspot/cpu/x86/x86.ad
Original file line number Diff line number Diff line change
Expand Up @@ -1878,11 +1878,15 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType
}
break;
case Op_VectorCastD2X:
if (is_subword_type(bt) || bt == T_INT) {
return false;
}
if (bt == T_LONG && !VM_Version::supports_avx512dq()) {
return false;
// Conversion to integral type is only supported on AVX-512 platforms with avx512dq.
// Need avx512vl for size_in_bits < 512
if (is_integral_type(bt)) {
if (!VM_Version::supports_avx512dq()) {
return false;
}
if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
return false;
}
}
break;
case Op_RoundVD:
Expand All @@ -1891,8 +1895,20 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType
}
break;
case Op_VectorCastF2X:
if (is_subword_type(bt) || bt == T_LONG) {
return false;
// F2I is supported on all AVX and above platforms
// For conversion to other integral types need AVX512:
// Conversion to long in addition needs avx512dq
// Need avx512vl for size_in_bits < 512
if (is_integral_type(bt) && (bt != T_INT)) {
if (UseAVX <= 2) {
return false;
}
if ((bt == T_LONG) && !VM_Version::supports_avx512dq()) {
return false;
}
if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
return false;
}
}
break;
case Op_MulReductionVI:
Expand Down Expand Up @@ -7325,6 +7341,8 @@ instruct vcastFtoD_reg(vec dst, vec src) %{


instruct castFtoI_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rRegP scratch, rFlagsReg cr) %{
// F2I conversion for < 64 byte vector using AVX instructions
// AVX512 platforms that dont support avx512vl also use AVX instructions to support F2I
predicate(!VM_Version::supports_avx512vl() &&
Matcher::vector_length_in_bytes(n) < 64 &&
Matcher::vector_element_basic_type(n) == T_INT);
Expand Down Expand Up @@ -7356,6 +7374,37 @@ instruct castFtoI_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, k
ins_pipe( pipe_slow );
%}

instruct castFtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rRegP scratch, rFlagsReg cr) %{
// F2X conversion for integral non T_INT target using AVX512 instructions
// Platforms that dont support avx512vl can only support 64 byte vectors
predicate(is_integral_type(Matcher::vector_element_basic_type(n)) &&
Matcher::vector_element_basic_type(n) != T_INT);
match(Set dst (VectorCastF2X src));
effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, TEMP scratch, KILL cr);
format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1, $ktmp2 and $scratch as TEMP" %}
ins_encode %{
BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
if (to_elem_bt == T_LONG) {
int vlen_enc = vector_length_encoding(this);
__ vector_castF2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
$xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
ExternalAddress(vector_double_signflip()), $scratch$$Register, vlen_enc);
} else {
int vlen_enc = vector_length_encoding(this, $src);
__ vector_castF2I_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
$xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
ExternalAddress(vector_float_signflip()), $scratch$$Register, vlen_enc);
if (to_elem_bt == T_SHORT) {
__ evpmovdw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
} else {
assert(to_elem_bt == T_BYTE, "required");
__ evpmovdb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
}
}
%}
ins_pipe( pipe_slow );
%}

instruct vcastDtoF_reg(vec dst, vec src) %{
predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
match(Set dst (VectorCastD2X src));
Expand All @@ -7367,14 +7416,15 @@ instruct vcastDtoF_reg(vec dst, vec src) %{
ins_pipe( pipe_slow );
%}

instruct castDtoL_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rRegP scratch, rFlagsReg cr) %{
predicate(Matcher::vector_element_basic_type(n) == T_LONG);
instruct castDtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rRegP scratch, rFlagsReg cr) %{
predicate(is_integral_type(Matcher::vector_element_basic_type(n)));
match(Set dst (VectorCastD2X src));
effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, TEMP scratch, KILL cr);
format %{ "vector_cast_d2l $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1, $ktmp2 and $scratch as TEMP" %}
format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1, $ktmp2 and $scratch as TEMP" %}
ins_encode %{
int vlen_enc = vector_length_encoding(this);
__ vector_castD2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
int vlen_enc = vector_length_encoding(this, $src);
BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
__ vector_castD2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
$xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
ExternalAddress(vector_double_signflip()), $scratch$$Register, vlen_enc);
%}
Expand Down

1 comment on commit 2cc40af

@openjdk-notifier
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.