Skip to content

Commit 8cfd74f

Browse files
author
Vladimir Kozlov
committed
8302976: C2 intrinsification of Float.floatToFloat16 and Float.float16ToFloat yields different result than the interpreter
Reviewed-by: sviswanathan, jbhateja, vlivanov
1 parent 02875e7 commit 8cfd74f

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

47 files changed

+1254
-195
lines changed

src/hotspot/cpu/aarch64/aarch64.ad

+2-4
Original file line numberDiff line numberDiff line change
@@ -15043,8 +15043,7 @@ instruct convF2HF_reg_reg(iRegINoSp dst, vRegF src, vRegF tmp) %{
1504315043
%}
1504415044
effect(TEMP tmp);
1504515045
ins_encode %{
15046-
__ fcvtsh($tmp$$FloatRegister, $src$$FloatRegister);
15047-
__ smov($dst$$Register, $tmp$$FloatRegister, __ H, 0);
15046+
__ flt_to_flt16($dst$$Register, $src$$FloatRegister, $tmp$$FloatRegister);
1504815047
%}
1504915048
ins_pipe(pipe_slow);
1505015049
%}
@@ -15056,8 +15055,7 @@ instruct convHF2F_reg_reg(vRegF dst, iRegINoSp src, vRegF tmp) %{
1505615055
%}
1505715056
effect(TEMP tmp);
1505815057
ins_encode %{
15059-
__ mov($tmp$$FloatRegister, __ H, 0, $src$$Register);
15060-
__ fcvths($dst$$FloatRegister, $tmp$$FloatRegister);
15058+
__ flt16_to_flt($dst$$FloatRegister, $src$$Register, $tmp$$FloatRegister);
1506115059
%}
1506215060
ins_pipe(pipe_slow);
1506315061
%}

src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp

+4-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 2000, 2023, Oracle and/or its affiliates. All rights reserved.
33
* Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
44
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
55
*
@@ -1814,10 +1814,12 @@ void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr
18141814
void LIR_Assembler::arith_fpu_implementation(LIR_Code code, int left_index, int right_index, int dest_index, bool pop_fpu_stack) { Unimplemented(); }
18151815

18161816

1817-
void LIR_Assembler::intrinsic_op(LIR_Code code, LIR_Opr value, LIR_Opr unused, LIR_Opr dest, LIR_Op* op) {
1817+
void LIR_Assembler::intrinsic_op(LIR_Code code, LIR_Opr value, LIR_Opr tmp, LIR_Opr dest, LIR_Op* op) {
18181818
switch(code) {
18191819
case lir_abs : __ fabsd(dest->as_double_reg(), value->as_double_reg()); break;
18201820
case lir_sqrt: __ fsqrtd(dest->as_double_reg(), value->as_double_reg()); break;
1821+
case lir_f2hf: __ flt_to_flt16(dest->as_register(), value->as_float_reg(), tmp->as_float_reg()); break;
1822+
case lir_hf2f: __ flt16_to_flt(dest->as_float_reg(), value->as_register(), tmp->as_float_reg()); break;
18211823
default : ShouldNotReachHere();
18221824
}
18231825
}

src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp

+19-4
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2005, 2022, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 2005, 2023, Oracle and/or its affiliates. All rights reserved.
33
* Copyright (c) 2014, Red Hat Inc. All rights reserved.
44
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
55
*
@@ -752,20 +752,35 @@ void LIRGenerator::do_MathIntrinsic(Intrinsic* x) {
752752
switch (x->id()) {
753753
case vmIntrinsics::_dabs:
754754
case vmIntrinsics::_dsqrt:
755-
case vmIntrinsics::_dsqrt_strict: {
755+
case vmIntrinsics::_dsqrt_strict:
756+
case vmIntrinsics::_floatToFloat16:
757+
case vmIntrinsics::_float16ToFloat: {
756758
assert(x->number_of_arguments() == 1, "wrong type");
757759
LIRItem value(x->argument_at(0), this);
758760
value.load_item();
761+
LIR_Opr src = value.result();
759762
LIR_Opr dst = rlock_result(x);
760763

761764
switch (x->id()) {
762765
case vmIntrinsics::_dsqrt:
763766
case vmIntrinsics::_dsqrt_strict: {
764-
__ sqrt(value.result(), dst, LIR_OprFact::illegalOpr);
767+
__ sqrt(src, dst, LIR_OprFact::illegalOpr);
765768
break;
766769
}
767770
case vmIntrinsics::_dabs: {
768-
__ abs(value.result(), dst, LIR_OprFact::illegalOpr);
771+
__ abs(src, dst, LIR_OprFact::illegalOpr);
772+
break;
773+
}
774+
case vmIntrinsics::_floatToFloat16: {
775+
LIR_Opr tmp = new_register(T_FLOAT);
776+
__ move(LIR_OprFact::floatConst(-0.0), tmp);
777+
__ f2hf(src, dst, tmp);
778+
break;
779+
}
780+
case vmIntrinsics::_float16ToFloat: {
781+
LIR_Opr tmp = new_register(T_FLOAT);
782+
__ move(LIR_OprFact::floatConst(-0.0), tmp);
783+
__ hf2f(src, dst, tmp);
769784
break;
770785
}
771786
default:

src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp

+8-1
Original file line numberDiff line numberDiff line change
@@ -513,8 +513,15 @@ class MacroAssembler: public Assembler {
513513
orr(Vd, T, Vn, Vn);
514514
}
515515

516+
void flt_to_flt16(Register dst, FloatRegister src, FloatRegister tmp) {
517+
fcvtsh(tmp, src);
518+
smov(dst, tmp, H, 0);
519+
}
516520

517-
public:
521+
void flt16_to_flt(FloatRegister dst, Register src, FloatRegister tmp) {
522+
mov(tmp, H, 0, src);
523+
fcvths(dst, tmp);
524+
}
518525

519526
// Generalized Test Bit And Branch, including a "far" variety which
520527
// spans more than 32KiB.

src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp

+45
Original file line numberDiff line numberDiff line change
@@ -300,6 +300,50 @@ void TemplateInterpreterGenerator::generate_transcendental_entry(AbstractInterpr
300300
__ blr(rscratch1);
301301
}
302302

303+
address TemplateInterpreterGenerator::generate_Float_float16ToFloat_entry() {
304+
// vmIntrinsics checks InlineIntrinsics flag, no need to check it here.
305+
if (!VM_Version::supports_float16() ||
306+
vmIntrinsics::is_disabled_by_flags(vmIntrinsics::_float16ToFloat) ||
307+
vmIntrinsics::is_disabled_by_flags(vmIntrinsics::_floatToFloat16)) {
308+
return nullptr;
309+
}
310+
// r19_sender_sp: sender sp
311+
// stack:
312+
// [ arg ] <-- esp
313+
// [ arg ]
314+
// retaddr in lr
315+
// result in v0
316+
317+
address entry_point = __ pc();
318+
__ ldrw(c_rarg0, Address(esp));
319+
__ flt16_to_flt(v0, c_rarg0, v1);
320+
__ mov(sp, r19_sender_sp); // Restore caller's SP
321+
__ br(lr);
322+
return entry_point;
323+
}
324+
325+
address TemplateInterpreterGenerator::generate_Float_floatToFloat16_entry() {
326+
// vmIntrinsics checks InlineIntrinsics flag, no need to check it here.
327+
if (!VM_Version::supports_float16() ||
328+
vmIntrinsics::is_disabled_by_flags(vmIntrinsics::_float16ToFloat) ||
329+
vmIntrinsics::is_disabled_by_flags(vmIntrinsics::_floatToFloat16)) {
330+
return nullptr;
331+
}
332+
// r19_sender_sp: sender sp
333+
// stack:
334+
// [ arg ] <-- esp
335+
// [ arg ]
336+
// retaddr in lr
337+
// result in c_rarg0
338+
339+
address entry_point = __ pc();
340+
__ ldrs(v0, Address(esp));
341+
__ flt_to_flt16(c_rarg0, v0, v1);
342+
__ mov(sp, r19_sender_sp); // Restore caller's SP
343+
__ br(lr);
344+
return entry_point;
345+
}
346+
303347
// Abstract method entry
304348
// Attempt to execute abstract method. Throw exception
305349
address TemplateInterpreterGenerator::generate_abstract_entry(void) {
@@ -1698,6 +1742,7 @@ address TemplateInterpreterGenerator::generate_currentThread() {
16981742
return entry_point;
16991743
}
17001744

1745+
17011746
//-----------------------------------------------------------------------------
17021747
// Exceptions
17031748

src/hotspot/cpu/aarch64/vm_version_aarch64.hpp

+2
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,8 @@ enum Ampere_CPU_Model {
170170

171171
static bool supports_on_spin_wait() { return _spin_wait.inst() != SpinWait::NONE; }
172172

173+
static bool supports_float16() { return true; }
174+
173175
#ifdef __APPLE__
174176
// Is the CPU running emulated (for example macOS Rosetta running x86_64 code on M1 ARM (aarch64)
175177
static bool is_cpu_emulated();

src/hotspot/cpu/arm/templateInterpreterGenerator_arm.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -783,6 +783,8 @@ address TemplateInterpreterGenerator::generate_Reference_get_entry(void) {
783783
address TemplateInterpreterGenerator::generate_CRC32_update_entry() { return nullptr; }
784784
address TemplateInterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) { return nullptr; }
785785
address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) { return nullptr; }
786+
address TemplateInterpreterGenerator::generate_Float_float16ToFloat_entry() { return nullptr; }
787+
address TemplateInterpreterGenerator::generate_Float_floatToFloat16_entry() { return nullptr; }
786788

787789
//
788790
// Interpreter stub for calling a native method. (asm interpreter)

src/hotspot/cpu/ppc/templateInterpreterGenerator_ppc.cpp

+4
Original file line numberDiff line numberDiff line change
@@ -1933,6 +1933,10 @@ address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(Abstract
19331933
return NULL;
19341934
}
19351935

1936+
// Not supported
1937+
address TemplateInterpreterGenerator::generate_Float_float16ToFloat_entry() { return nullptr; }
1938+
address TemplateInterpreterGenerator::generate_Float_floatToFloat16_entry() { return nullptr; }
1939+
19361940
// =============================================================================
19371941
// Exceptions
19381942

src/hotspot/cpu/riscv/assembler_riscv.hpp

-4
Original file line numberDiff line numberDiff line change
@@ -798,8 +798,6 @@ enum operand_size { int8, int16, int32, uint32, int64 };
798798
INSN(fsqrt_d, 0b1010011, 0b00000, 0b0101101);
799799
INSN(fcvt_s_d, 0b1010011, 0b00001, 0b0100000);
800800
INSN(fcvt_d_s, 0b1010011, 0b00000, 0b0100001);
801-
INSN(fcvt_s_h, 0b1010011, 0b00010, 0b0100000);
802-
INSN(fcvt_h_s, 0b1010011, 0b00000, 0b0100010);
803801
#undef INSN
804802

805803
// Immediate Instruction
@@ -1056,7 +1054,6 @@ enum operand_size { int8, int16, int32, uint32, int64 };
10561054

10571055
INSN(fmv_w_x, 0b1010011, 0b000, 0b00000, 0b1111000);
10581056
INSN(fmv_d_x, 0b1010011, 0b000, 0b00000, 0b1111001);
1059-
INSN(fmv_h_x, 0b1010011, 0b000, 0b00000, 0b1111010);
10601057

10611058
#undef INSN
10621059

@@ -1077,7 +1074,6 @@ enum operand_size { int8, int16, int32, uint32, int64 };
10771074
INSN(fclass_d, 0b1010011, 0b001, 0b00000, 0b1110001);
10781075
INSN(fmv_x_w, 0b1010011, 0b000, 0b00000, 0b1110000);
10791076
INSN(fmv_x_d, 0b1010011, 0b000, 0b00000, 0b1110001);
1080-
INSN(fmv_x_h, 0b1010011, 0b000, 0b00000, 0b1110010);
10811077

10821078
#undef INSN
10831079

src/hotspot/cpu/riscv/globals_riscv.hpp

-1
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,6 @@ define_pd_global(intx, InlineSmallCode, 1000);
103103
product(bool, UseZba, false, EXPERIMENTAL, "Use Zba instructions") \
104104
product(bool, UseZbb, false, EXPERIMENTAL, "Use Zbb instructions") \
105105
product(bool, UseZbs, false, EXPERIMENTAL, "Use Zbs instructions") \
106-
product(bool, UseZfhmin, false, EXPERIMENTAL, "Use Zfhmin instructions") \
107106
product(bool, UseZic64b, false, EXPERIMENTAL, "Use Zic64b instructions") \
108107
product(bool, UseZicbom, false, EXPERIMENTAL, "Use Zicbom instructions") \
109108
product(bool, UseZicbop, false, EXPERIMENTAL, "Use Zicbop instructions") \

src/hotspot/cpu/riscv/riscv.ad

-42
Original file line numberDiff line numberDiff line change
@@ -1845,10 +1845,6 @@ const bool Matcher::match_rule_supported(int opcode) {
18451845
case Op_CountTrailingZerosI:
18461846
case Op_CountTrailingZerosL:
18471847
return UseZbb;
1848-
1849-
case Op_ConvF2HF:
1850-
case Op_ConvHF2F:
1851-
return UseZfhmin;
18521848
}
18531849

18541850
return true; // Per default match rules are supported.
@@ -8180,44 +8176,6 @@ instruct convL2F_reg_reg(fRegF dst, iRegL src) %{
81808176
ins_pipe(fp_l2f);
81818177
%}
81828178

8183-
// float <-> half float
8184-
8185-
instruct convHF2F_reg_reg(fRegF dst, iRegINoSp src, fRegF tmp) %{
8186-
predicate(UseZfhmin);
8187-
match(Set dst (ConvHF2F src));
8188-
effect(TEMP tmp);
8189-
8190-
ins_cost(XFER_COST);
8191-
format %{ "fmv.h.x $tmp, $src\t#@convHF2F_reg_reg\n\t"
8192-
"fcvt.s.h $dst, $tmp\t#@convHF2F_reg_reg"
8193-
%}
8194-
8195-
ins_encode %{
8196-
__ fmv_h_x($tmp$$FloatRegister, $src$$Register);
8197-
__ fcvt_s_h($dst$$FloatRegister, $tmp$$FloatRegister);
8198-
%}
8199-
8200-
ins_pipe(fp_i2f);
8201-
%}
8202-
8203-
instruct convF2HF_reg_reg(iRegINoSp dst, fRegF src, fRegF tmp) %{
8204-
predicate(UseZfhmin);
8205-
match(Set dst (ConvF2HF src));
8206-
effect(TEMP tmp);
8207-
8208-
ins_cost(XFER_COST);
8209-
format %{ "fcvt.h.s $tmp, $src\t#@convF2HF_reg_reg\n\t"
8210-
"fmv.x.h $dst, $tmp\t#@convF2HF_reg_reg"
8211-
%}
8212-
8213-
ins_encode %{
8214-
__ fcvt_h_s($tmp$$FloatRegister, $src$$FloatRegister);
8215-
__ fmv_x_h($dst$$Register, $tmp$$FloatRegister);
8216-
%}
8217-
8218-
ins_pipe(fp_f2i);
8219-
%}
8220-
82218179
// double <-> int
82228180

82238181
instruct convD2I_reg_reg(iRegINoSp dst, fRegD src) %{

src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp

+4
Original file line numberDiff line numberDiff line change
@@ -301,6 +301,10 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M
301301
return entry_point;
302302
}
303303

304+
// Not supported
305+
address TemplateInterpreterGenerator::generate_Float_float16ToFloat_entry() { return nullptr; }
306+
address TemplateInterpreterGenerator::generate_Float_floatToFloat16_entry() { return nullptr; }
307+
304308
// Abstract method entry
305309
// Attempt to execute abstract method. Throw exception
306310
address TemplateInterpreterGenerator::generate_abstract_entry(void) {

src/hotspot/cpu/riscv/vm_version_riscv.cpp

-3
Original file line numberDiff line numberDiff line change
@@ -76,9 +76,6 @@ void VM_Version::initialize() {
7676
if (FLAG_IS_DEFAULT(UseZicboz)) {
7777
FLAG_SET_DEFAULT(UseZicboz, true);
7878
}
79-
if (FLAG_IS_DEFAULT(UseZfhmin)) {
80-
FLAG_SET_DEFAULT(UseZfhmin, true);
81-
}
8279
if (FLAG_IS_DEFAULT(UseZihintpause)) {
8380
FLAG_SET_DEFAULT(UseZihintpause, true);
8481
}

src/hotspot/cpu/s390/templateInterpreterGenerator_s390.cpp

+4
Original file line numberDiff line numberDiff line change
@@ -2003,6 +2003,10 @@ address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(Abstract
20032003
return NULL;
20042004
}
20052005

2006+
// Not supported
2007+
address TemplateInterpreterGenerator::generate_Float_float16ToFloat_entry() { return nullptr; }
2008+
address TemplateInterpreterGenerator::generate_Float_floatToFloat16_entry() { return nullptr; }
2009+
20062010
void TemplateInterpreterGenerator::bang_stack_shadow_pages(bool native_call) {
20072011
// Quick & dirty stack overflow checking: bang the stack & handle trap.
20082012
// Note that we do the banging after the frame is setup, since the exception

src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp

+5-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 2000, 2023, Oracle and/or its affiliates. All rights reserved.
33
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
*
55
* This code is free software; you can redistribute it and/or modify it
@@ -2454,6 +2454,10 @@ void LIR_Assembler::intrinsic_op(LIR_Code code, LIR_Opr value, LIR_Opr tmp, LIR_
24542454
default : ShouldNotReachHere();
24552455
}
24562456
#endif // !_LP64
2457+
} else if (code == lir_f2hf) {
2458+
__ flt_to_flt16(dest->as_register(), value->as_xmm_float_reg(), tmp->as_xmm_float_reg());
2459+
} else if (code == lir_hf2f) {
2460+
__ flt16_to_flt(dest->as_xmm_float_reg(), value->as_register());
24572461
} else {
24582462
Unimplemented();
24592463
}

src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp

+11-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2005, 2022, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 2005, 2023, Oracle and/or its affiliates. All rights reserved.
33
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
*
55
* This code is free software; you can redistribute it and/or modify it
@@ -832,6 +832,10 @@ void LIRGenerator::do_MathIntrinsic(Intrinsic* x) {
832832
__ move(LIR_OprFact::doubleConst(-0.0), tmp);
833833
}
834834
#endif
835+
if (x->id() == vmIntrinsics::_floatToFloat16) {
836+
tmp = new_register(T_FLOAT);
837+
__ move(LIR_OprFact::floatConst(-0.0), tmp);
838+
}
835839

836840
switch(x->id()) {
837841
case vmIntrinsics::_dabs:
@@ -841,6 +845,12 @@ void LIRGenerator::do_MathIntrinsic(Intrinsic* x) {
841845
case vmIntrinsics::_dsqrt_strict:
842846
__ sqrt(calc_input, calc_result, LIR_OprFact::illegalOpr);
843847
break;
848+
case vmIntrinsics::_floatToFloat16:
849+
__ f2hf(calc_input, calc_result, tmp);
850+
break;
851+
case vmIntrinsics::_float16ToFloat:
852+
__ hf2f(calc_input, calc_result, LIR_OprFact::illegalOpr);
853+
break;
844854
default:
845855
ShouldNotReachHere();
846856
}

src/hotspot/cpu/x86/macroAssembler_x86.hpp

+16-3
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,11 @@ class MacroAssembler: public Assembler {
162162
void incrementq(Register reg, int value = 1);
163163
void incrementq(Address dst, int value = 1);
164164

165+
void incrementl(AddressLiteral dst, Register rscratch = noreg);
166+
void incrementl(ArrayAddress dst, Register rscratch);
167+
168+
void incrementq(AddressLiteral dst, Register rscratch = noreg);
169+
165170
// Support optimal SSE move instructions.
166171
void movflt(XMMRegister dst, XMMRegister src) {
167172
if (dst-> encoding() == src->encoding()) return;
@@ -189,10 +194,18 @@ class MacroAssembler: public Assembler {
189194
}
190195
void movdbl(Address dst, XMMRegister src) { movsd(dst, src); }
191196

192-
void incrementl(AddressLiteral dst, Register rscratch = noreg);
193-
void incrementl(ArrayAddress dst, Register rscratch);
197+
void flt_to_flt16(Register dst, XMMRegister src, XMMRegister tmp) {
198+
// Use separate tmp XMM register because caller may
199+
// requires src XMM register to be unchanged (as in x86.ad).
200+
vcvtps2ph(tmp, src, 0x04, Assembler::AVX_128bit);
201+
movdl(dst, tmp);
202+
movswl(dst, dst);
203+
}
194204

195-
void incrementq(AddressLiteral dst, Register rscratch = noreg);
205+
void flt16_to_flt(XMMRegister dst, Register src) {
206+
movdl(dst, src);
207+
vcvtph2ps(dst, dst, Assembler::AVX_128bit);
208+
}
196209

197210
// Alignment
198211
void align32();

0 commit comments

Comments
 (0)