Skip to content

Commit 2cc40af

Browse files
author
Sandhya Viswanathan
committed
8287835: Add support for additional float/double to integral conversion for x86
Reviewed-by: kvn, jbhateja
1 parent 3ee1e60 commit 2cc40af

File tree

7 files changed

+474
-13
lines changed

7 files changed

+474
-13
lines changed

src/hotspot/cpu/x86/assembler_x86.cpp

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2102,6 +2102,14 @@ void Assembler::vcvtps2dq(XMMRegister dst, XMMRegister src, int vector_len) {
21022102
emit_int16(0x5B, (0xC0 | encode));
21032103
}
21042104

2105+
void Assembler::evcvttps2qq(XMMRegister dst, XMMRegister src, int vector_len) {
2106+
assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "");
2107+
InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
2108+
attributes.set_is_evex_instruction();
2109+
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2110+
emit_int16(0x7A, (0xC0 | encode));
2111+
}
2112+
21052113
void Assembler::evcvtpd2qq(XMMRegister dst, XMMRegister src, int vector_len) {
21062114
assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "");
21072115
InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
@@ -2182,6 +2190,14 @@ void Assembler::evpmovqw(XMMRegister dst, XMMRegister src, int vector_len) {
21822190
emit_int16(0x34, (0xC0 | encode));
21832191
}
21842192

2193+
void Assembler::evpmovsqd(XMMRegister dst, XMMRegister src, int vector_len) {
2194+
assert(UseAVX > 2, "");
2195+
InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
2196+
attributes.set_is_evex_instruction();
2197+
int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
2198+
emit_int16(0x25, (0xC0 | encode));
2199+
}
2200+
21852201
void Assembler::decl(Address dst) {
21862202
// Don't use it directly. Use MacroAssembler::decrement() instead.
21872203
InstructionMark im(this);
@@ -4293,6 +4309,16 @@ void Assembler::pcmpeqq(XMMRegister dst, XMMRegister src) {
42934309
emit_int16(0x29, (0xC0 | encode));
42944310
}
42954311

4312+
void Assembler::evpcmpeqq(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, int vector_len) {
4313+
assert(VM_Version::supports_evex(), "");
4314+
InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4315+
attributes.set_is_evex_instruction();
4316+
attributes.reset_is_clear_context();
4317+
attributes.set_embedded_opmask_register_specifier(mask);
4318+
int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4319+
emit_int16(0x29, (0xC0 | encode));
4320+
}
4321+
42964322
void Assembler::vpcmpCCq(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, int vector_len) {
42974323
assert(VM_Version::supports_avx(), "");
42984324
InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);

src/hotspot/cpu/x86/assembler_x86.hpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1169,9 +1169,10 @@ class Assembler : public AbstractAssembler {
11691169
void vcvtps2pd(XMMRegister dst, XMMRegister src, int vector_len);
11701170
void vcvtpd2ps(XMMRegister dst, XMMRegister src, int vector_len);
11711171

1172-
// Convert vector float and int
1172+
// Convert vector float to int/long
11731173
void vcvtps2dq(XMMRegister dst, XMMRegister src, int vector_len);
11741174
void vcvttps2dq(XMMRegister dst, XMMRegister src, int vector_len);
1175+
void evcvttps2qq(XMMRegister dst, XMMRegister src, int vector_len);
11751176

11761177
// Convert vector long to vector FP
11771178
void evcvtqq2ps(XMMRegister dst, XMMRegister src, int vector_len);
@@ -1189,6 +1190,9 @@ class Assembler : public AbstractAssembler {
11891190
void evpmovqb(XMMRegister dst, XMMRegister src, int vector_len);
11901191
void evpmovqw(XMMRegister dst, XMMRegister src, int vector_len);
11911192

1193+
// Evex casts with signed saturation
1194+
void evpmovsqd(XMMRegister dst, XMMRegister src, int vector_len);
1195+
11921196
//Abs of packed Integer values
11931197
void pabsb(XMMRegister dst, XMMRegister src);
11941198
void pabsw(XMMRegister dst, XMMRegister src);
@@ -1786,6 +1790,7 @@ class Assembler : public AbstractAssembler {
17861790
void evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len);
17871791

17881792
void pcmpeqq(XMMRegister dst, XMMRegister src);
1793+
void evpcmpeqq(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, int vector_len);
17891794
void vpcmpCCq(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, int vector_len);
17901795
void vpcmpeqq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
17911796
void evpcmpeqq(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);

src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4183,6 +4183,28 @@ void C2_MacroAssembler::vector_cast_float_special_cases_evex(XMMRegister dst, XM
41834183
bind(done);
41844184
}
41854185

4186+
void C2_MacroAssembler::vector_cast_float_to_long_special_cases_evex(
4187+
XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
4188+
XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2,
4189+
Register scratch, AddressLiteral double_sign_flip,
4190+
int vec_enc) {
4191+
Label done;
4192+
evmovdquq(xtmp1, k0, double_sign_flip, false, vec_enc, scratch);
4193+
Assembler::evpcmpeqq(ktmp1, k0, xtmp1, dst, vec_enc);
4194+
kortestwl(ktmp1, ktmp1);
4195+
jccb(Assembler::equal, done);
4196+
4197+
vpxor(xtmp2, xtmp2, xtmp2, vec_enc);
4198+
evcmpps(ktmp2, k0, src, src, Assembler::UNORD_Q, vec_enc);
4199+
evmovdquq(dst, ktmp2, xtmp2, true, vec_enc);
4200+
4201+
kxorwl(ktmp1, ktmp1, ktmp2);
4202+
evcmpps(ktmp1, ktmp1, src, xtmp2, Assembler::NLT_UQ, vec_enc);
4203+
vpternlogq(xtmp2, 0x11, xtmp1, xtmp1, vec_enc);
4204+
evmovdquq(dst, ktmp1, xtmp2, true, vec_enc);
4205+
bind(done);
4206+
}
4207+
41864208
/*
41874209
* Following routine handles special floating point values(NaN/Inf/-Inf/Max/Min) for casting operation.
41884210
* If src is NaN, the result is 0.
@@ -4243,6 +4265,35 @@ void C2_MacroAssembler::vector_castF2I_evex(XMMRegister dst, XMMRegister src, XM
42434265
vector_cast_float_special_cases_evex(dst, src, xtmp1, xtmp2, ktmp1, ktmp2, scratch, float_sign_flip, vec_enc);
42444266
}
42454267

4268+
void C2_MacroAssembler::vector_castF2L_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
4269+
KRegister ktmp1, KRegister ktmp2, AddressLiteral double_sign_flip,
4270+
Register scratch, int vec_enc) {
4271+
evcvttps2qq(dst, src, vec_enc);
4272+
vector_cast_float_to_long_special_cases_evex(dst, src, xtmp1, xtmp2, ktmp1, ktmp2, scratch, double_sign_flip, vec_enc);
4273+
}
4274+
4275+
void C2_MacroAssembler::vector_castD2X_evex(BasicType to_elem_bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
4276+
XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2,
4277+
AddressLiteral double_sign_flip, Register scratch, int vec_enc) {
4278+
vector_castD2L_evex(dst, src, xtmp1, xtmp2, ktmp1, ktmp2, double_sign_flip, scratch, vec_enc);
4279+
if (to_elem_bt != T_LONG) {
4280+
switch(to_elem_bt) {
4281+
case T_INT:
4282+
evpmovsqd(dst, dst, vec_enc);
4283+
break;
4284+
case T_SHORT:
4285+
evpmovsqd(dst, dst, vec_enc);
4286+
evpmovdw(dst, dst, vec_enc);
4287+
break;
4288+
case T_BYTE:
4289+
evpmovsqd(dst, dst, vec_enc);
4290+
evpmovdb(dst, dst, vec_enc);
4291+
break;
4292+
default: assert(false, "%s", type2name(to_elem_bt));
4293+
}
4294+
}
4295+
}
4296+
42464297
#ifdef _LP64
42474298
void C2_MacroAssembler::vector_round_double_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
42484299
KRegister ktmp1, KRegister ktmp2, AddressLiteral double_sign_flip,

src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -310,11 +310,18 @@
310310
KRegister ktmp1, KRegister ktmp2, AddressLiteral float_sign_flip,
311311
Register scratch, int vec_enc);
312312

313+
void vector_castF2L_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
314+
KRegister ktmp1, KRegister ktmp2, AddressLiteral double_sign_flip,
315+
Register scratch, int vec_enc);
313316

314317
void vector_castD2L_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
315318
KRegister ktmp1, KRegister ktmp2, AddressLiteral double_sign_flip,
316319
Register scratch, int vec_enc);
317320

321+
void vector_castD2X_evex(BasicType to_elem_bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
322+
XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2, AddressLiteral double_sign_flip,
323+
Register scratch, int vec_enc);
324+
318325
void vector_unsigned_cast(XMMRegister dst, XMMRegister src, int vlen_enc,
319326
BasicType from_elem_bt, BasicType to_elem_bt);
320327

@@ -326,6 +333,11 @@
326333
KRegister ktmp1, KRegister ktmp2, Register scratch, AddressLiteral float_sign_flip,
327334
int vec_enc);
328335

336+
void vector_cast_float_to_long_special_cases_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
337+
XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2,
338+
Register scratch, AddressLiteral double_sign_flip,
339+
int vec_enc);
340+
329341
void vector_cast_float_special_cases_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
330342
XMMRegister xtmp2, XMMRegister xtmp3, XMMRegister xtmp4,
331343
Register scratch, AddressLiteral float_sign_flip,

src/hotspot/cpu/x86/x86.ad

Lines changed: 62 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1878,11 +1878,15 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType
18781878
}
18791879
break;
18801880
case Op_VectorCastD2X:
1881-
if (is_subword_type(bt) || bt == T_INT) {
1882-
return false;
1883-
}
1884-
if (bt == T_LONG && !VM_Version::supports_avx512dq()) {
1885-
return false;
1881+
// Conversion to integral type is only supported on AVX-512 platforms with avx512dq.
1882+
// Need avx512vl for size_in_bits < 512
1883+
if (is_integral_type(bt)) {
1884+
if (!VM_Version::supports_avx512dq()) {
1885+
return false;
1886+
}
1887+
if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
1888+
return false;
1889+
}
18861890
}
18871891
break;
18881892
case Op_RoundVD:
@@ -1891,8 +1895,20 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType
18911895
}
18921896
break;
18931897
case Op_VectorCastF2X:
1894-
if (is_subword_type(bt) || bt == T_LONG) {
1895-
return false;
1898+
// F2I is supported on all AVX and above platforms
1899+
// For conversion to other integral types need AVX512:
1900+
// Conversion to long in addition needs avx512dq
1901+
// Need avx512vl for size_in_bits < 512
1902+
if (is_integral_type(bt) && (bt != T_INT)) {
1903+
if (UseAVX <= 2) {
1904+
return false;
1905+
}
1906+
if ((bt == T_LONG) && !VM_Version::supports_avx512dq()) {
1907+
return false;
1908+
}
1909+
if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
1910+
return false;
1911+
}
18961912
}
18971913
break;
18981914
case Op_MulReductionVI:
@@ -7325,6 +7341,8 @@ instruct vcastFtoD_reg(vec dst, vec src) %{
73257341

73267342

73277343
instruct castFtoI_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rRegP scratch, rFlagsReg cr) %{
7344+
// F2I conversion for < 64 byte vector using AVX instructions
7345+
// AVX512 platforms that dont support avx512vl also use AVX instructions to support F2I
73287346
predicate(!VM_Version::supports_avx512vl() &&
73297347
Matcher::vector_length_in_bytes(n) < 64 &&
73307348
Matcher::vector_element_basic_type(n) == T_INT);
@@ -7356,6 +7374,37 @@ instruct castFtoI_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, k
73567374
ins_pipe( pipe_slow );
73577375
%}
73587376

7377+
instruct castFtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rRegP scratch, rFlagsReg cr) %{
7378+
// F2X conversion for integral non T_INT target using AVX512 instructions
7379+
// Platforms that dont support avx512vl can only support 64 byte vectors
7380+
predicate(is_integral_type(Matcher::vector_element_basic_type(n)) &&
7381+
Matcher::vector_element_basic_type(n) != T_INT);
7382+
match(Set dst (VectorCastF2X src));
7383+
effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, TEMP scratch, KILL cr);
7384+
format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1, $ktmp2 and $scratch as TEMP" %}
7385+
ins_encode %{
7386+
BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
7387+
if (to_elem_bt == T_LONG) {
7388+
int vlen_enc = vector_length_encoding(this);
7389+
__ vector_castF2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
7390+
$xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
7391+
ExternalAddress(vector_double_signflip()), $scratch$$Register, vlen_enc);
7392+
} else {
7393+
int vlen_enc = vector_length_encoding(this, $src);
7394+
__ vector_castF2I_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
7395+
$xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
7396+
ExternalAddress(vector_float_signflip()), $scratch$$Register, vlen_enc);
7397+
if (to_elem_bt == T_SHORT) {
7398+
__ evpmovdw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
7399+
} else {
7400+
assert(to_elem_bt == T_BYTE, "required");
7401+
__ evpmovdb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
7402+
}
7403+
}
7404+
%}
7405+
ins_pipe( pipe_slow );
7406+
%}
7407+
73597408
instruct vcastDtoF_reg(vec dst, vec src) %{
73607409
predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
73617410
match(Set dst (VectorCastD2X src));
@@ -7367,14 +7416,15 @@ instruct vcastDtoF_reg(vec dst, vec src) %{
73677416
ins_pipe( pipe_slow );
73687417
%}
73697418

7370-
instruct castDtoL_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rRegP scratch, rFlagsReg cr) %{
7371-
predicate(Matcher::vector_element_basic_type(n) == T_LONG);
7419+
instruct castDtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rRegP scratch, rFlagsReg cr) %{
7420+
predicate(is_integral_type(Matcher::vector_element_basic_type(n)));
73727421
match(Set dst (VectorCastD2X src));
73737422
effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, TEMP scratch, KILL cr);
7374-
format %{ "vector_cast_d2l $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1, $ktmp2 and $scratch as TEMP" %}
7423+
format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1, $ktmp2 and $scratch as TEMP" %}
73757424
ins_encode %{
7376-
int vlen_enc = vector_length_encoding(this);
7377-
__ vector_castD2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
7425+
int vlen_enc = vector_length_encoding(this, $src);
7426+
BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
7427+
__ vector_castD2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
73787428
$xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
73797429
ExternalAddress(vector_double_signflip()), $scratch$$Register, vlen_enc);
73807430
%}

0 commit comments

Comments
 (0)