Skip to content

Commit 07946aa

Browse files
Smita KamathSandhya Viswanathan
Smita Kamath
authored and
Sandhya Viswanathan
committed
8289552: Make intrinsic conversions between bit representations of half precision values and floats
Reviewed-by: kvn, sviswanathan, jbhateja
1 parent 2586b1a commit 07946aa

File tree

19 files changed

+345
-11
lines changed

19 files changed

+345
-11
lines changed

src/hotspot/cpu/x86/assembler_x86.cpp

+28
Original file line numberDiff line numberDiff line change
@@ -1930,6 +1930,34 @@ void Assembler::vcvtdq2pd(XMMRegister dst, XMMRegister src, int vector_len) {
19301930
emit_int16((unsigned char)0xE6, (0xC0 | encode));
19311931
}
19321932

1933+
void Assembler::vcvtps2ph(XMMRegister dst, XMMRegister src, int imm8, int vector_len) {
1934+
assert(VM_Version::supports_avx512vl() || VM_Version::supports_f16c(), "");
1935+
InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /*uses_vl */ true);
1936+
int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
1937+
emit_int24(0x1D, (0xC0 | encode), imm8);
1938+
}
1939+
1940+
void Assembler::evcvtps2ph(Address dst, KRegister mask, XMMRegister src, int imm8, int vector_len) {
1941+
assert(VM_Version::supports_avx512vl(), "");
1942+
InstructionMark im(this);
1943+
InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /*uses_vl */ true);
1944+
attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_64bit);
1945+
attributes.reset_is_clear_context();
1946+
attributes.set_embedded_opmask_register_specifier(mask);
1947+
attributes.set_is_evex_instruction();
1948+
vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
1949+
emit_int8(0x1D);
1950+
emit_operand(src, dst, 1);
1951+
emit_int8(imm8);
1952+
}
1953+
1954+
void Assembler::vcvtph2ps(XMMRegister dst, XMMRegister src, int vector_len) {
1955+
assert(VM_Version::supports_avx512vl() || VM_Version::supports_f16c(), "");
1956+
InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */false, /* no_mask_reg */ true, /* uses_vl */ true);
1957+
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1958+
emit_int16(0x13, (0xC0 | encode));
1959+
}
1960+
19331961
void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) {
19341962
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
19351963
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);

src/hotspot/cpu/x86/assembler_x86.hpp

+5
Original file line numberDiff line numberDiff line change
@@ -1156,6 +1156,11 @@ class Assembler : public AbstractAssembler {
11561156
void cvtdq2pd(XMMRegister dst, XMMRegister src);
11571157
void vcvtdq2pd(XMMRegister dst, XMMRegister src, int vector_len);
11581158

1159+
// Convert Halffloat to Single Precision Floating-Point value
1160+
void vcvtps2ph(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
1161+
void vcvtph2ps(XMMRegister dst, XMMRegister src, int vector_len);
1162+
void evcvtps2ph(Address dst, KRegister mask, XMMRegister src, int imm8, int vector_len);
1163+
11591164
// Convert Packed Signed Doubleword Integers to Packed Single-Precision Floating-Point Value
11601165
void cvtdq2ps(XMMRegister dst, XMMRegister src);
11611166
void vcvtdq2ps(XMMRegister dst, XMMRegister src, int vector_len);

src/hotspot/cpu/x86/vm_version_x86.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -2883,6 +2883,8 @@ uint64_t VM_Version::feature_flags() {
28832883
_cpuid_info.xem_xcr0_eax.bits.ymm != 0) {
28842884
result |= CPU_AVX;
28852885
result |= CPU_VZEROUPPER;
2886+
if (_cpuid_info.std_cpuid1_ecx.bits.f16c != 0)
2887+
result |= CPU_F16C;
28862888
if (_cpuid_info.sef_cpuid7_ebx.bits.avx2 != 0)
28872889
result |= CPU_AVX2;
28882890
if (_cpuid_info.sef_cpuid7_ebx.bits.avx512f != 0 &&

src/hotspot/cpu/x86/vm_version_x86.hpp

+5-2
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,8 @@ class VM_Version : public Abstract_VM_Version {
8989
: 1,
9090
osxsave : 1,
9191
avx : 1,
92-
: 2,
92+
f16c : 1,
93+
: 1,
9394
hv : 1;
9495
} bits;
9596
};
@@ -374,7 +375,8 @@ class VM_Version : public Abstract_VM_Version {
374375
decl(RDPID, "rdpid", 49) /* RDPID instruction */ \
375376
decl(FSRM, "fsrm", 50) /* Fast Short REP MOV */ \
376377
decl(GFNI, "gfni", 51) /* Vector GFNI instructions */ \
377-
decl(AVX512_BITALG, "avx512_bitalg", 52) /* Vector sub-word popcount and bit gather instructions */
378+
decl(AVX512_BITALG, "avx512_bitalg", 52) /* Vector sub-word popcount and bit gather instructions */\
379+
decl(F16C, "f16c", 53) /* Half-precision and single precision FP conversion instructions*/
378380

379381
#define DECLARE_CPU_FEATURE_FLAG(id, name, bit) CPU_##id = (1ULL << bit),
380382
CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_FLAG)
@@ -681,6 +683,7 @@ class VM_Version : public Abstract_VM_Version {
681683
static bool supports_avx512_vbmi2() { return (_features & CPU_AVX512_VBMI2) != 0; }
682684
static bool supports_hv() { return (_features & CPU_HV) != 0; }
683685
static bool supports_serialize() { return (_features & CPU_SERIALIZE) != 0; }
686+
static bool supports_f16c() { return (_features & CPU_F16C) != 0; }
684687

685688
// Intel features
686689
static bool is_intel_family_core() { return is_intel() &&

src/hotspot/cpu/x86/x86.ad

+41
Original file line numberDiff line numberDiff line change
@@ -1678,6 +1678,12 @@ const bool Matcher::match_rule_supported(int opcode) {
16781678
// Together with common x86 rules, this handles all UseSSE cases.
16791679
#endif
16801680
break;
1681+
case Op_ConvF2HF:
1682+
case Op_ConvHF2F:
1683+
if (!VM_Version::supports_f16c() && !VM_Version::supports_avx512vl()) {
1684+
return false;
1685+
}
1686+
break;
16811687
}
16821688
return true; // Match rules are supported by default.
16831689
}
@@ -3652,6 +3658,41 @@ instruct sqrtD_reg(regD dst) %{
36523658
ins_pipe(pipe_slow);
36533659
%}
36543660

3661+
instruct convF2HF_reg_reg(rRegI dst, regF src, regF tmp) %{
3662+
effect(TEMP tmp);
3663+
match(Set dst (ConvF2HF src));
3664+
ins_cost(125);
3665+
format %{ "vcvtps2ph $dst,$src \t using $tmp as TEMP"%}
3666+
ins_encode %{
3667+
__ vcvtps2ph($tmp$$XMMRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
3668+
__ movdl($dst$$Register, $tmp$$XMMRegister);
3669+
__ movswl($dst$$Register, $dst$$Register);
3670+
%}
3671+
ins_pipe( pipe_slow );
3672+
%}
3673+
3674+
instruct convF2HF_mem_reg(memory mem, regF src, kReg ktmp, rRegI rtmp) %{
3675+
predicate((UseAVX > 2) && VM_Version::supports_avx512vl());
3676+
effect(TEMP ktmp, TEMP rtmp);
3677+
match(Set mem (StoreC mem (ConvF2HF src)));
3678+
format %{ "evcvtps2ph $mem,$src \t using $ktmp and $rtmp as TEMP" %}
3679+
ins_encode %{
3680+
__ movl($rtmp$$Register, 0x1);
3681+
__ kmovwl($ktmp$$KRegister, $rtmp$$Register);
3682+
__ evcvtps2ph($mem$$Address, $ktmp$$KRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
3683+
%}
3684+
ins_pipe( pipe_slow );
3685+
%}
3686+
3687+
instruct convHF2F_reg_reg(regF dst, rRegI src) %{
3688+
match(Set dst (ConvHF2F src));
3689+
format %{ "vcvtph2ps $dst,$src" %}
3690+
ins_encode %{
3691+
__ movdl($dst$$XMMRegister, $src$$Register);
3692+
__ vcvtph2ps($dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
3693+
%}
3694+
ins_pipe( pipe_slow );
3695+
%}
36553696

36563697
// ---------------------------------------- VectorReinterpret ------------------------------------
36573698
instruct reinterpret_mask(kReg dst) %{

src/hotspot/share/classfile/vmIntrinsics.hpp

+6
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,12 @@ class methodHandle;
224224
do_name( doubleToLongBits_name, "doubleToLongBits") \
225225
do_intrinsic(_longBitsToDouble, java_lang_Double, longBitsToDouble_name, long_double_signature, F_SN)\
226226
do_name( longBitsToDouble_name, "longBitsToDouble") \
227+
do_intrinsic(_float16ToFloat, java_lang_Float, float16ToFloat_name, f16_float_signature, F_S) \
228+
do_name( float16ToFloat_name, "float16ToFloat") \
229+
do_signature(f16_float_signature, "(S)F") \
230+
do_intrinsic(_floatToFloat16, java_lang_Float, floatToFloat16_name, float_f16_signature, F_S) \
231+
do_name( floatToFloat16_name, "floatToFloat16") \
232+
do_signature(float_f16_signature, "(F)S") \
227233
\
228234
do_intrinsic(_compareUnsigned_i, java_lang_Integer, compareUnsigned_name, int2_int_signature, F_S) \
229235
do_intrinsic(_compareUnsigned_l, java_lang_Long, compareUnsigned_name, long2_int_signature, F_S) \

src/hotspot/share/opto/c2compiler.cpp

+6
Original file line numberDiff line numberDiff line change
@@ -300,6 +300,12 @@ bool C2Compiler::is_intrinsic_supported(const methodHandle& method, bool is_virt
300300
case vmIntrinsics::_remainderUnsigned_l:
301301
if (!Matcher::match_rule_supported(Op_UModL)) return false;
302302
break;
303+
case vmIntrinsics::_float16ToFloat:
304+
if (!Matcher::match_rule_supported(Op_ConvHF2F)) return false;
305+
break;
306+
case vmIntrinsics::_floatToFloat16:
307+
if (!Matcher::match_rule_supported(Op_ConvF2HF)) return false;
308+
break;
303309

304310
/* CompareAndSet, Object: */
305311
case vmIntrinsics::_compareAndSetReference:

src/hotspot/share/opto/classes.hpp

+2
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,8 @@ macro(ConvI2L)
149149
macro(ConvL2D)
150150
macro(ConvL2F)
151151
macro(ConvL2I)
152+
macro(ConvF2HF)
153+
macro(ConvHF2F)
152154
macro(CountedLoop)
153155
macro(CountedLoopEnd)
154156
macro(OuterStripMinedLoop)

src/hotspot/share/opto/convertnode.cpp

+27
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,21 @@ const Type* ConvF2DNode::Value(PhaseGVN* phase) const {
161161
return TypeD::make( (double)tf->getf() );
162162
}
163163

164+
//=============================================================================
165+
//------------------------------Value------------------------------------------
166+
const Type* ConvF2HFNode::Value(PhaseGVN* phase) const {
167+
const Type *t = phase->type( in(1) );
168+
if( t == Type::TOP ) return Type::TOP;
169+
if( t == Type::FLOAT ) return TypeInt::SHORT;
170+
const TypeF *tf = t->is_float_constant();
171+
return TypeInt::make( SharedRuntime::f2hf( tf->getf() ) );
172+
}
173+
174+
//------------------------------Identity---------------------------------------
175+
Node* ConvF2HFNode::Identity(PhaseGVN* phase) {
176+
return (in(1)->Opcode() == Op_ConvHF2F) ? in(1)->in(1) : this;
177+
}
178+
164179
//=============================================================================
165180
//------------------------------Value------------------------------------------
166181
const Type* ConvF2INode::Value(PhaseGVN* phase) const {
@@ -219,6 +234,18 @@ Node *ConvF2LNode::Ideal(PhaseGVN *phase, bool can_reshape) {
219234
return NULL;
220235
}
221236

237+
//=============================================================================
238+
//------------------------------Value------------------------------------------
239+
const Type* ConvHF2FNode::Value(PhaseGVN* phase) const {
240+
const Type *t = phase->type( in(1) );
241+
if( t == Type::TOP ) return Type::TOP;
242+
if( t == TypeInt::SHORT ) return Type::FLOAT;
243+
const TypeInt *ti = t->is_int();
244+
if ( ti->is_con() ) return TypeF::make( SharedRuntime::hf2f( ti->get_con() ) );
245+
246+
return bottom_type();
247+
}
248+
222249
//=============================================================================
223250
//------------------------------Value------------------------------------------
224251
const Type* ConvI2DNode::Value(PhaseGVN* phase) const {

src/hotspot/share/opto/convertnode.hpp

+23
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,18 @@ class ConvF2DNode : public Node {
100100
virtual uint ideal_reg() const { return Op_RegD; }
101101
};
102102

103+
//------------------------------ConvF2HFNode------------------------------------
104+
// Convert Float to Halffloat
105+
class ConvF2HFNode : public Node {
106+
public:
107+
ConvF2HFNode( Node *in1 ) : Node(0,in1) {}
108+
virtual int Opcode() const;
109+
virtual const Type *bottom_type() const { return TypeInt::SHORT; }
110+
virtual const Type* Value(PhaseGVN* phase) const;
111+
virtual Node* Identity(PhaseGVN* phase);
112+
virtual uint ideal_reg() const { return Op_RegI; }
113+
};
114+
103115
//------------------------------ConvF2INode------------------------------------
104116
// Convert float to integer
105117
class ConvF2INode : public Node {
@@ -127,6 +139,17 @@ class ConvF2LNode : public Node {
127139
virtual uint ideal_reg() const { return Op_RegL; }
128140
};
129141

142+
//------------------------------ConvHF2FNode------------------------------------
143+
// Convert Halffloat to float
144+
class ConvHF2FNode : public Node {
145+
public:
146+
ConvHF2FNode( Node *in1 ) : Node(0,in1) {}
147+
virtual int Opcode() const;
148+
virtual const Type *bottom_type() const { return Type::FLOAT; }
149+
virtual const Type* Value(PhaseGVN* phase) const;
150+
virtual uint ideal_reg() const { return Op_RegF; }
151+
};
152+
130153
//------------------------------ConvI2DNode------------------------------------
131154
// Convert Integer to Double
132155
class ConvI2DNode : public Node {

src/hotspot/share/opto/library_call.cpp

+5-1
Original file line numberDiff line numberDiff line change
@@ -514,7 +514,9 @@ bool LibraryCallKit::try_to_inline(int predicate) {
514514
case vmIntrinsics::_intBitsToFloat:
515515
case vmIntrinsics::_doubleToRawLongBits:
516516
case vmIntrinsics::_doubleToLongBits:
517-
case vmIntrinsics::_longBitsToDouble: return inline_fp_conversions(intrinsic_id());
517+
case vmIntrinsics::_longBitsToDouble:
518+
case vmIntrinsics::_floatToFloat16:
519+
case vmIntrinsics::_float16ToFloat: return inline_fp_conversions(intrinsic_id());
518520

519521
case vmIntrinsics::_floatIsFinite:
520522
case vmIntrinsics::_floatIsInfinite:
@@ -4440,6 +4442,8 @@ bool LibraryCallKit::inline_fp_conversions(vmIntrinsics::ID id) {
44404442
case vmIntrinsics::_intBitsToFloat: result = new MoveI2FNode(arg); break;
44414443
case vmIntrinsics::_doubleToRawLongBits: result = new MoveD2LNode(arg); break;
44424444
case vmIntrinsics::_longBitsToDouble: result = new MoveL2DNode(arg); break;
4445+
case vmIntrinsics::_floatToFloat16: result = new ConvF2HFNode(arg); break;
4446+
case vmIntrinsics::_float16ToFloat: result = new ConvHF2FNode(arg); break;
44434447

44444448
case vmIntrinsics::_doubleToLongBits: {
44454449
// two paths (plus control) merge in a wood

src/hotspot/share/runtime/sharedRuntime.cpp

+84-4
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,10 @@ JRT_LEAF(jdouble, SharedRuntime::drem(jdouble x, jdouble y))
271271
#endif
272272
JRT_END
273273

274+
JRT_LEAF(jfloat, SharedRuntime::i2f(jint x))
275+
return (jfloat)x;
276+
JRT_END
277+
274278
#ifdef __SOFTFP__
275279
JRT_LEAF(jfloat, SharedRuntime::fadd(jfloat x, jfloat y))
276280
return x + y;
@@ -304,10 +308,6 @@ JRT_LEAF(jdouble, SharedRuntime::ddiv(jdouble x, jdouble y))
304308
return x / y;
305309
JRT_END
306310

307-
JRT_LEAF(jfloat, SharedRuntime::i2f(jint x))
308-
return (jfloat)x;
309-
JRT_END
310-
311311
JRT_LEAF(jdouble, SharedRuntime::i2d(jint x))
312312
return (jdouble)x;
313313
JRT_END
@@ -448,6 +448,86 @@ JRT_LEAF(jdouble, SharedRuntime::l2d(jlong x))
448448
return (jdouble)x;
449449
JRT_END
450450

451+
// Reference implementation at src/java.base/share/classes/java/lang/Float.java:floatToFloat16
452+
JRT_LEAF(jshort, SharedRuntime::f2hf(jfloat x))
453+
jint doppel = SharedRuntime::f2i(x);
454+
jshort sign_bit = (jshort) ((doppel & 0x80000000) >> 16);
455+
if (g_isnan(x))
456+
return (jshort)(sign_bit | 0x7c00 | (doppel & 0x007fe000) >> 13 | (doppel & 0x00001ff0) >> 4 | (doppel & 0x0000000f));
457+
458+
jfloat abs_f = (x >= 0.0f) ? x : (x * -1.0f);
459+
460+
// Overflow threshold is halffloat max value + 1/2 ulp
461+
if (abs_f >= (65504.0f + 16.0f)) {
462+
return (jshort)(sign_bit | 0x7c00); // Positive or negative infinity
463+
}
464+
465+
// Smallest magnitude of Halffloat is 0x1.0p-24, half-way or smaller rounds to zero
466+
if (abs_f <= (pow(2, -24) * 0.5f)) { // Covers float zeros and subnormals.
467+
return sign_bit; // Positive or negative zero
468+
}
469+
470+
jint exp = 0x7f800000 & doppel;
471+
472+
// For binary16 subnormals, beside forcing exp to -15, retain
473+
// the difference exp_delta = E_min - exp. This is the excess
474+
// shift value, in addition to 13, to be used in the
475+
// computations below. Further the (hidden) msb with value 1
476+
// in f must be involved as well
477+
jint exp_delta = 0;
478+
jint msb = 0x00000000;
479+
if (exp < -14) {
480+
exp_delta = -14 - exp;
481+
exp = -15;
482+
msb = 0x00800000;
483+
}
484+
jint f_signif_bits = ((doppel & 0x007fffff) | msb);
485+
486+
// Significand bits as if using rounding to zero
487+
jshort signif_bits = (jshort)(f_signif_bits >> (13 + exp_delta));
488+
489+
jint lsb = f_signif_bits & (1 << (13 + exp_delta));
490+
jint round = f_signif_bits & (1 << (12 + exp_delta));
491+
jint sticky = f_signif_bits & ((1 << (12 + exp_delta)) - 1);
492+
493+
if (round != 0 && ((lsb | sticky) != 0 )) {
494+
signif_bits++;
495+
}
496+
497+
return (jshort)(sign_bit | ( ((exp + 15) << 10) + signif_bits ) );
498+
JRT_END
499+
500+
// Reference implementation at src/java.base/share/classes/java/lang/Float.java:float16ToFloat
501+
JRT_LEAF(jfloat, SharedRuntime::hf2f(jshort x))
502+
// Halffloat format has 1 signbit, 5 exponent bits and
503+
// 10 significand bits
504+
jint hf_arg = (jint)x;
505+
jint hf_sign_bit = 0x8000 & hf_arg;
506+
jint hf_exp_bits = 0x7c00 & hf_arg;
507+
jint hf_significand_bits = 0x03ff & hf_arg;
508+
509+
jint significand_shift = 13; //difference between float and halffloat precision
510+
511+
jfloat sign = (hf_sign_bit != 0) ? -1.0f : 1.0f;
512+
513+
// Extract halffloat exponent, remove its bias
514+
jint hf_exp = (hf_exp_bits >> 10) - 15;
515+
516+
if (hf_exp == -15) {
517+
// For subnormal values, return 2^-24 * significand bits
518+
return (sign * (pow(2,-24)) * hf_significand_bits);
519+
}else if (hf_exp == 16) {
520+
return (hf_significand_bits == 0) ? sign * float_infinity : (SharedRuntime::i2f((hf_sign_bit << 16) | 0x7f800000 |
521+
(hf_significand_bits << significand_shift)));
522+
}
523+
524+
// Add the bias of float exponent and shift
525+
int float_exp_bits = (hf_exp + 127) << (24 - 1);
526+
527+
// Combine sign, exponent and significand bits
528+
return SharedRuntime::i2f((hf_sign_bit << 16) | float_exp_bits | (hf_significand_bits << significand_shift));
529+
JRT_END
530+
451531
// Exception handling across interpreter/compiler boundaries
452532
//
453533
// exception_handler_for_return_address(...) returns the continuation address.

src/hotspot/share/runtime/sharedRuntime.hpp

+3-1
Original file line numberDiff line numberDiff line change
@@ -129,9 +129,11 @@ class SharedRuntime: AllStatic {
129129
static jfloat d2f (jdouble x);
130130
static jfloat l2f (jlong x);
131131
static jdouble l2d (jlong x);
132+
static jfloat hf2f(jshort x);
133+
static jshort f2hf(jfloat x);
134+
static jfloat i2f (jint x);
132135

133136
#ifdef __SOFTFP__
134-
static jfloat i2f (jint x);
135137
static jdouble i2d (jint x);
136138
static jdouble f2d (jfloat x);
137139
#endif // __SOFTFP__

0 commit comments

Comments
 (0)