Skip to content

Commit 073897c

Browse files
Smita KamathJatin Bhateja
authored andcommitted
8294588: Auto vectorize half precision floating point conversion APIs
Reviewed-by: sviswanathan, kvn, jbhateja, fgao, xgong
1 parent 46cd457 commit 073897c

File tree

12 files changed

+231
-10
lines changed

12 files changed

+231
-10
lines changed

src/hotspot/cpu/x86/assembler_x86.cpp

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1931,14 +1931,14 @@ void Assembler::vcvtdq2pd(XMMRegister dst, XMMRegister src, int vector_len) {
19311931
}
19321932

19331933
void Assembler::vcvtps2ph(XMMRegister dst, XMMRegister src, int imm8, int vector_len) {
1934-
assert(VM_Version::supports_avx512vl() || VM_Version::supports_f16c(), "");
1934+
assert(VM_Version::supports_evex() || VM_Version::supports_f16c(), "");
19351935
InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /*uses_vl */ true);
19361936
int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
19371937
emit_int24(0x1D, (0xC0 | encode), imm8);
19381938
}
19391939

19401940
void Assembler::evcvtps2ph(Address dst, KRegister mask, XMMRegister src, int imm8, int vector_len) {
1941-
assert(VM_Version::supports_avx512vl(), "");
1941+
assert(VM_Version::supports_evex(), "");
19421942
InstructionMark im(this);
19431943
InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /*uses_vl */ true);
19441944
attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_64bit);
@@ -1951,13 +1951,34 @@ void Assembler::evcvtps2ph(Address dst, KRegister mask, XMMRegister src, int imm
19511951
emit_int8(imm8);
19521952
}
19531953

1954+
void Assembler::vcvtps2ph(Address dst, XMMRegister src, int imm8, int vector_len) {
1955+
assert(VM_Version::supports_evex() || VM_Version::supports_f16c(), "");
1956+
InstructionMark im(this);
1957+
InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /*uses_vl */ true);
1958+
attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
1959+
vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
1960+
emit_int8(0x1D);
1961+
emit_operand(src, dst, 1);
1962+
emit_int8(imm8);
1963+
}
1964+
19541965
void Assembler::vcvtph2ps(XMMRegister dst, XMMRegister src, int vector_len) {
1955-
assert(VM_Version::supports_avx512vl() || VM_Version::supports_f16c(), "");
1966+
assert(VM_Version::supports_evex() || VM_Version::supports_f16c(), "");
19561967
InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */false, /* no_mask_reg */ true, /* uses_vl */ true);
19571968
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
19581969
emit_int16(0x13, (0xC0 | encode));
19591970
}
19601971

1972+
void Assembler::vcvtph2ps(XMMRegister dst, Address src, int vector_len) {
1973+
assert(VM_Version::supports_evex() || VM_Version::supports_f16c(), "");
1974+
InstructionMark im(this);
1975+
InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /*uses_vl */ true);
1976+
attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
1977+
vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1978+
emit_int8(0x13);
1979+
emit_operand(dst, src, 0);
1980+
}
1981+
19611982
void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) {
19621983
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
19631984
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);

src/hotspot/cpu/x86/assembler_x86.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1160,6 +1160,8 @@ class Assembler : public AbstractAssembler {
11601160
void vcvtps2ph(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
11611161
void vcvtph2ps(XMMRegister dst, XMMRegister src, int vector_len);
11621162
void evcvtps2ph(Address dst, KRegister mask, XMMRegister src, int imm8, int vector_len);
1163+
void vcvtps2ph(Address dst, XMMRegister src, int imm8, int vector_len);
1164+
void vcvtph2ps(XMMRegister dst, Address src, int vector_len);
11631165

11641166
// Convert Packed Signed Doubleword Integers to Packed Single-Precision Floating-Point Value
11651167
void cvtdq2ps(XMMRegister dst, XMMRegister src);

src/hotspot/cpu/x86/vm_version_x86.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -956,6 +956,7 @@ void VM_Version::get_processor_features() {
956956
if (UseAVX < 1) {
957957
_features &= ~CPU_AVX;
958958
_features &= ~CPU_VZEROUPPER;
959+
_features &= ~CPU_F16C;
959960
}
960961

961962
if (logical_processors_per_package() == 1) {

src/hotspot/cpu/x86/x86.ad

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1687,6 +1687,12 @@ const bool Matcher::match_rule_supported(int opcode) {
16871687
return false;
16881688
}
16891689
break;
1690+
case Op_VectorCastF2HF:
1691+
case Op_VectorCastHF2F:
1692+
if (!VM_Version::supports_f16c() && !VM_Version::supports_evex()) {
1693+
return false;
1694+
}
1695+
break;
16901696
}
16911697
return true; // Match rules are supported by default.
16921698
}
@@ -1901,6 +1907,14 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType
19011907
return false;
19021908
}
19031909
break;
1910+
case Op_VectorCastF2HF:
1911+
case Op_VectorCastHF2F:
1912+
if (!VM_Version::supports_f16c() &&
1913+
((!VM_Version::supports_evex() ||
1914+
((size_in_bits != 512) && !VM_Version::supports_avx512vl())))) {
1915+
return false;
1916+
}
1917+
break;
19041918
case Op_RoundVD:
19051919
if (!VM_Version::supports_avx512dq()) {
19061920
return false;
@@ -3673,6 +3687,26 @@ instruct convF2HF_mem_reg(memory mem, regF src, kReg ktmp, rRegI rtmp) %{
36733687
ins_pipe( pipe_slow );
36743688
%}
36753689

3690+
instruct vconvF2HF(vec dst, vec src) %{
3691+
match(Set dst (VectorCastF2HF src));
3692+
format %{ "vector_conv_F2HF $dst $src" %}
3693+
ins_encode %{
3694+
int vlen_enc = vector_length_encoding(this, $src);
3695+
__ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, vlen_enc);
3696+
%}
3697+
ins_pipe( pipe_slow );
3698+
%}
3699+
3700+
instruct vconvF2HF_mem_reg(memory mem, vec src) %{
3701+
match(Set mem (StoreVector mem (VectorCastF2HF src)));
3702+
format %{ "vcvtps2ph $mem,$src" %}
3703+
ins_encode %{
3704+
int vlen_enc = vector_length_encoding(this, $src);
3705+
__ vcvtps2ph($mem$$Address, $src$$XMMRegister, 0x04, vlen_enc);
3706+
%}
3707+
ins_pipe( pipe_slow );
3708+
%}
3709+
36763710
instruct convHF2F_reg_reg(regF dst, rRegI src) %{
36773711
match(Set dst (ConvHF2F src));
36783712
format %{ "vcvtph2ps $dst,$src" %}
@@ -3683,6 +3717,27 @@ instruct convHF2F_reg_reg(regF dst, rRegI src) %{
36833717
ins_pipe( pipe_slow );
36843718
%}
36853719

3720+
instruct vconvHF2F_reg_mem(vec dst, memory mem) %{
3721+
match(Set dst (VectorCastHF2F (LoadVector mem)));
3722+
format %{ "vcvtph2ps $dst,$mem" %}
3723+
ins_encode %{
3724+
int vlen_enc = vector_length_encoding(this);
3725+
__ vcvtph2ps($dst$$XMMRegister, $mem$$Address, vlen_enc);
3726+
%}
3727+
ins_pipe( pipe_slow );
3728+
%}
3729+
3730+
instruct vconvHF2F(vec dst, vec src) %{
3731+
match(Set dst (VectorCastHF2F src));
3732+
ins_cost(125);
3733+
format %{ "vector_conv_HF2F $dst,$src" %}
3734+
ins_encode %{
3735+
int vlen_enc = vector_length_encoding(this);
3736+
__ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
3737+
%}
3738+
ins_pipe( pipe_slow );
3739+
%}
3740+
36863741
// ---------------------------------------- VectorReinterpret ------------------------------------
36873742
instruct reinterpret_mask(kReg dst) %{
36883743
predicate(n->bottom_type()->isa_vectmask() &&

src/hotspot/share/adlc/formssel.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4223,7 +4223,7 @@ bool MatchRule::is_vector() const {
42234223
"VectorTest", "VectorLoadMask", "VectorStoreMask", "VectorBlend", "VectorInsert",
42244224
"VectorRearrange","VectorLoadShuffle", "VectorLoadConst",
42254225
"VectorCastB2X", "VectorCastS2X", "VectorCastI2X",
4226-
"VectorCastL2X", "VectorCastF2X", "VectorCastD2X",
4226+
"VectorCastL2X", "VectorCastF2X", "VectorCastD2X", "VectorCastF2HF", "VectorCastHF2F",
42274227
"VectorUCastB2X", "VectorUCastS2X", "VectorUCastI2X",
42284228
"VectorMaskWrapper","VectorMaskCmp","VectorReinterpret","LoadVectorMasked","StoreVectorMasked",
42294229
"FmaVD","FmaVF","PopCountVI","PopCountVL","PopulateIndex","VectorLongToMask",

src/hotspot/share/opto/classes.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -506,6 +506,8 @@ macro(VectorCastI2X)
506506
macro(VectorCastL2X)
507507
macro(VectorCastF2X)
508508
macro(VectorCastD2X)
509+
macro(VectorCastF2HF)
510+
macro(VectorCastHF2F)
509511
macro(VectorUCastB2X)
510512
macro(VectorUCastS2X)
511513
macro(VectorUCastI2X)

src/hotspot/share/opto/superword.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2712,7 +2712,7 @@ bool SuperWord::output() {
27122712
assert(n->req() == 2, "only one input expected");
27132713
BasicType bt = velt_basic_type(n);
27142714
Node* in = vector_opd(p, 1);
2715-
int vopc = VectorCastNode::opcode(in->bottom_type()->is_vect()->element_basic_type());
2715+
int vopc = VectorCastNode::opcode(opc, in->bottom_type()->is_vect()->element_basic_type());
27162716
vn = VectorCastNode::make(vopc, in, bt, vlen);
27172717
vlen_in_bytes = vn->as_Vector()->length_in_bytes();
27182718
} else if (is_cmov_pack(p)) {

src/hotspot/share/opto/vectorIntrinsics.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -775,7 +775,7 @@ bool LibraryCallKit::inline_vector_shuffle_to_vector() {
775775
return false;
776776
}
777777

778-
int cast_vopc = VectorCastNode::opcode(T_BYTE); // from shuffle of type T_BYTE
778+
int cast_vopc = VectorCastNode::opcode(-1, T_BYTE); // from shuffle of type T_BYTE
779779
// Make sure that cast is implemented to particular type/size combination.
780780
if (!arch_supports_vector(cast_vopc, num_elem, elem_bt, VecMaskNotUsed)) {
781781
if (C->print_intrinsics()) {
@@ -2489,7 +2489,7 @@ bool LibraryCallKit::inline_vector_convert() {
24892489
Node* op = opd1;
24902490
if (is_cast) {
24912491
assert(!is_mask || num_elem_from == num_elem_to, "vector mask cast needs the same elem num");
2492-
int cast_vopc = VectorCastNode::opcode(elem_bt_from, !is_ucast);
2492+
int cast_vopc = VectorCastNode::opcode(-1, elem_bt_from, !is_ucast);
24932493

24942494
// Make sure that vector cast is implemented to particular type/size combination if it is
24952495
// not a mask casting.

src/hotspot/share/opto/vectornode.cpp

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -467,6 +467,8 @@ bool VectorNode::is_convert_opcode(int opc) {
467467
case Op_ConvD2F:
468468
case Op_ConvF2D:
469469
case Op_ConvD2I:
470+
case Op_ConvF2HF:
471+
case Op_ConvHF2F:
470472
return true;
471473
default:
472474
return false;
@@ -1328,14 +1330,31 @@ VectorCastNode* VectorCastNode::make(int vopc, Node* n1, BasicType bt, uint vlen
13281330
case Op_VectorUCastB2X: return new VectorUCastB2XNode(n1, vt);
13291331
case Op_VectorUCastS2X: return new VectorUCastS2XNode(n1, vt);
13301332
case Op_VectorUCastI2X: return new VectorUCastI2XNode(n1, vt);
1333+
case Op_VectorCastHF2F: return new VectorCastHF2FNode(n1, vt);
1334+
case Op_VectorCastF2HF: return new VectorCastF2HFNode(n1, vt);
13311335
default:
13321336
assert(false, "unknown node: %s", NodeClassNames[vopc]);
13331337
return NULL;
13341338
}
13351339
}
13361340

1337-
int VectorCastNode::opcode(BasicType bt, bool is_signed) {
1341+
int VectorCastNode::opcode(int sopc, BasicType bt, bool is_signed) {
13381342
assert((is_integral_type(bt) && bt != T_LONG) || is_signed, "");
1343+
1344+
// Handle special case for to/from Half Float conversions
1345+
switch (sopc) {
1346+
case Op_ConvHF2F:
1347+
assert(bt == T_SHORT, "");
1348+
return Op_VectorCastHF2F;
1349+
case Op_ConvF2HF:
1350+
assert(bt == T_FLOAT, "");
1351+
return Op_VectorCastF2HF;
1352+
default:
1353+
// Handled normally below
1354+
break;
1355+
}
1356+
1357+
// Handle normal conversions
13391358
switch (bt) {
13401359
case T_BYTE: return is_signed ? Op_VectorCastB2X : Op_VectorUCastB2X;
13411360
case T_SHORT: return is_signed ? Op_VectorCastS2X : Op_VectorUCastS2X;
@@ -1354,7 +1373,7 @@ bool VectorCastNode::implemented(int opc, uint vlen, BasicType src_type, BasicTy
13541373
is_java_primitive(src_type) &&
13551374
(vlen > 1) && is_power_of_2(vlen) &&
13561375
VectorNode::vector_size_supported(dst_type, vlen)) {
1357-
int vopc = VectorCastNode::opcode(src_type);
1376+
int vopc = VectorCastNode::opcode(opc, src_type);
13581377
return vopc > 0 && Matcher::match_rule_supported_superword(vopc, vlen, dst_type);
13591378
}
13601379
return false;

src/hotspot/share/opto/vectornode.hpp

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1542,7 +1542,7 @@ class VectorCastNode : public VectorNode {
15421542
virtual int Opcode() const;
15431543

15441544
static VectorCastNode* make(int vopc, Node* n1, BasicType bt, uint vlen);
1545-
static int opcode(BasicType bt, bool is_signed = true);
1545+
static int opcode(int opc, BasicType bt, bool is_signed = true);
15461546
static bool implemented(int opc, uint vlen, BasicType src_type, BasicType dst_type);
15471547

15481548
virtual Node* Identity(PhaseGVN* phase);
@@ -1628,6 +1628,22 @@ class VectorUCastS2XNode : public VectorCastNode {
16281628
virtual int Opcode() const;
16291629
};
16301630

1631+
class VectorCastHF2FNode : public VectorCastNode {
1632+
public:
1633+
VectorCastHF2FNode(Node* in, const TypeVect* vt) : VectorCastNode(in, vt) {
1634+
assert(in->bottom_type()->is_vect()->element_basic_type() == T_SHORT, "must be short");
1635+
}
1636+
virtual int Opcode() const;
1637+
};
1638+
1639+
class VectorCastF2HFNode : public VectorCastNode {
1640+
public:
1641+
VectorCastF2HFNode(Node* in, const TypeVect* vt) : VectorCastNode(in, vt) {
1642+
assert(in->bottom_type()->is_vect()->element_basic_type() == T_FLOAT, "must be float");
1643+
}
1644+
virtual int Opcode() const;
1645+
};
1646+
16311647
class VectorUCastI2XNode : public VectorCastNode {
16321648
public:
16331649
VectorUCastI2XNode(Node* in, const TypeVect* vt) : VectorCastNode(in, vt) {

0 commit comments

Comments
 (0)