Skip to content
This repository was archived by the owner on Sep 19, 2023. It is now read-only.

Commit 59a3f4f

Browse files
author
Vladimir Kozlov
committed
8302976: C2 intrinsification of Float.floatToFloat16 and Float.float16ToFloat yields different result than the interpreter
Reviewed-by: vlivanov, thartmann Backport-of: 8cfd74f76afc9e5d50c52104fef9974784718dd4
1 parent cc4b7e6 commit 59a3f4f

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+1254
-145
lines changed

src/hotspot/cpu/aarch64/aarch64.ad

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14590,8 +14590,7 @@ instruct convF2HF_reg_reg(iRegINoSp dst, vRegF src, vRegF tmp) %{
1459014590
%}
1459114591
effect(TEMP tmp);
1459214592
ins_encode %{
14593-
__ fcvtsh($tmp$$FloatRegister, $src$$FloatRegister);
14594-
__ smov($dst$$Register, $tmp$$FloatRegister, __ H, 0);
14593+
__ flt_to_flt16($dst$$Register, $src$$FloatRegister, $tmp$$FloatRegister);
1459514594
%}
1459614595
ins_pipe(pipe_slow);
1459714596
%}
@@ -14603,8 +14602,7 @@ instruct convHF2F_reg_reg(vRegF dst, iRegINoSp src, vRegF tmp) %{
1460314602
%}
1460414603
effect(TEMP tmp);
1460514604
ins_encode %{
14606-
__ mov($tmp$$FloatRegister, __ H, 0, $src$$Register);
14607-
__ fcvths($dst$$FloatRegister, $tmp$$FloatRegister);
14605+
__ flt16_to_flt($dst$$FloatRegister, $src$$Register, $tmp$$FloatRegister);
1460814606
%}
1460914607
ins_pipe(pipe_slow);
1461014608
%}

src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 2000, 2023, Oracle and/or its affiliates. All rights reserved.
33
* Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
44
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
55
*
@@ -1814,10 +1814,12 @@ void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr
18141814
void LIR_Assembler::arith_fpu_implementation(LIR_Code code, int left_index, int right_index, int dest_index, bool pop_fpu_stack) { Unimplemented(); }
18151815

18161816

1817-
void LIR_Assembler::intrinsic_op(LIR_Code code, LIR_Opr value, LIR_Opr unused, LIR_Opr dest, LIR_Op* op) {
1817+
void LIR_Assembler::intrinsic_op(LIR_Code code, LIR_Opr value, LIR_Opr tmp, LIR_Opr dest, LIR_Op* op) {
18181818
switch(code) {
18191819
case lir_abs : __ fabsd(dest->as_double_reg(), value->as_double_reg()); break;
18201820
case lir_sqrt: __ fsqrtd(dest->as_double_reg(), value->as_double_reg()); break;
1821+
case lir_f2hf: __ flt_to_flt16(dest->as_register(), value->as_float_reg(), tmp->as_float_reg()); break;
1822+
case lir_hf2f: __ flt16_to_flt(dest->as_float_reg(), value->as_register(), tmp->as_float_reg()); break;
18211823
default : ShouldNotReachHere();
18221824
}
18231825
}

src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2005, 2022, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 2005, 2023, Oracle and/or its affiliates. All rights reserved.
33
* Copyright (c) 2014, Red Hat Inc. All rights reserved.
44
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
55
*
@@ -752,20 +752,35 @@ void LIRGenerator::do_MathIntrinsic(Intrinsic* x) {
752752
switch (x->id()) {
753753
case vmIntrinsics::_dabs:
754754
case vmIntrinsics::_dsqrt:
755-
case vmIntrinsics::_dsqrt_strict: {
755+
case vmIntrinsics::_dsqrt_strict:
756+
case vmIntrinsics::_floatToFloat16:
757+
case vmIntrinsics::_float16ToFloat: {
756758
assert(x->number_of_arguments() == 1, "wrong type");
757759
LIRItem value(x->argument_at(0), this);
758760
value.load_item();
761+
LIR_Opr src = value.result();
759762
LIR_Opr dst = rlock_result(x);
760763

761764
switch (x->id()) {
762765
case vmIntrinsics::_dsqrt:
763766
case vmIntrinsics::_dsqrt_strict: {
764-
__ sqrt(value.result(), dst, LIR_OprFact::illegalOpr);
767+
__ sqrt(src, dst, LIR_OprFact::illegalOpr);
765768
break;
766769
}
767770
case vmIntrinsics::_dabs: {
768-
__ abs(value.result(), dst, LIR_OprFact::illegalOpr);
771+
__ abs(src, dst, LIR_OprFact::illegalOpr);
772+
break;
773+
}
774+
case vmIntrinsics::_floatToFloat16: {
775+
LIR_Opr tmp = new_register(T_FLOAT);
776+
__ move(LIR_OprFact::floatConst(-0.0), tmp);
777+
__ f2hf(src, dst, tmp);
778+
break;
779+
}
780+
case vmIntrinsics::_float16ToFloat: {
781+
LIR_Opr tmp = new_register(T_FLOAT);
782+
__ move(LIR_OprFact::floatConst(-0.0), tmp);
783+
__ hf2f(src, dst, tmp);
769784
break;
770785
}
771786
default:

src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -513,8 +513,15 @@ class MacroAssembler: public Assembler {
513513
orr(Vd, T, Vn, Vn);
514514
}
515515

516+
void flt_to_flt16(Register dst, FloatRegister src, FloatRegister tmp) {
517+
fcvtsh(tmp, src);
518+
smov(dst, tmp, H, 0);
519+
}
516520

517-
public:
521+
void flt16_to_flt(FloatRegister dst, Register src, FloatRegister tmp) {
522+
mov(tmp, H, 0, src);
523+
fcvths(dst, tmp);
524+
}
518525

519526
// Generalized Test Bit And Branch, including a "far" variety which
520527
// spans more than 32KiB.

src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -300,6 +300,50 @@ void TemplateInterpreterGenerator::generate_transcendental_entry(AbstractInterpr
300300
__ blr(rscratch1);
301301
}
302302

303+
address TemplateInterpreterGenerator::generate_Float_float16ToFloat_entry() {
304+
// vmIntrinsics checks InlineIntrinsics flag, no need to check it here.
305+
if (!VM_Version::supports_float16() ||
306+
vmIntrinsics::is_disabled_by_flags(vmIntrinsics::_float16ToFloat) ||
307+
vmIntrinsics::is_disabled_by_flags(vmIntrinsics::_floatToFloat16)) {
308+
return nullptr;
309+
}
310+
// r19_sender_sp: sender sp
311+
// stack:
312+
// [ arg ] <-- esp
313+
// [ arg ]
314+
// retaddr in lr
315+
// result in v0
316+
317+
address entry_point = __ pc();
318+
__ ldrw(c_rarg0, Address(esp));
319+
__ flt16_to_flt(v0, c_rarg0, v1);
320+
__ mov(sp, r19_sender_sp); // Restore caller's SP
321+
__ br(lr);
322+
return entry_point;
323+
}
324+
325+
address TemplateInterpreterGenerator::generate_Float_floatToFloat16_entry() {
326+
// vmIntrinsics checks InlineIntrinsics flag, no need to check it here.
327+
if (!VM_Version::supports_float16() ||
328+
vmIntrinsics::is_disabled_by_flags(vmIntrinsics::_float16ToFloat) ||
329+
vmIntrinsics::is_disabled_by_flags(vmIntrinsics::_floatToFloat16)) {
330+
return nullptr;
331+
}
332+
// r19_sender_sp: sender sp
333+
// stack:
334+
// [ arg ] <-- esp
335+
// [ arg ]
336+
// retaddr in lr
337+
// result in c_rarg0
338+
339+
address entry_point = __ pc();
340+
__ ldrs(v0, Address(esp));
341+
__ flt_to_flt16(c_rarg0, v0, v1);
342+
__ mov(sp, r19_sender_sp); // Restore caller's SP
343+
__ br(lr);
344+
return entry_point;
345+
}
346+
303347
// Abstract method entry
304348
// Attempt to execute abstract method. Throw exception
305349
address TemplateInterpreterGenerator::generate_abstract_entry(void) {
@@ -1695,6 +1739,7 @@ address TemplateInterpreterGenerator::generate_currentThread() {
16951739
return entry_point;
16961740
}
16971741

1742+
16981743
//-----------------------------------------------------------------------------
16991744
// Exceptions
17001745

src/hotspot/cpu/aarch64/vm_version_aarch64.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,8 @@ class VM_Version : public Abstract_VM_Version {
159159

160160
static bool supports_on_spin_wait() { return _spin_wait.inst() != SpinWait::NONE; }
161161

162+
static bool supports_float16() { return true; }
163+
162164
#ifdef __APPLE__
163165
// Is the CPU running emulated (for example macOS Rosetta running x86_64 code on M1 ARM (aarch64)
164166
static bool is_cpu_emulated();

src/hotspot/cpu/arm/templateInterpreterGenerator_arm.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -781,6 +781,8 @@ address TemplateInterpreterGenerator::generate_Reference_get_entry(void) {
781781
address TemplateInterpreterGenerator::generate_CRC32_update_entry() { return NULL; }
782782
address TemplateInterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) { return NULL; }
783783
address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) { return NULL; }
784+
address TemplateInterpreterGenerator::generate_Float_float16ToFloat_entry() { return NULL; }
785+
address TemplateInterpreterGenerator::generate_Float_floatToFloat16_entry() { return NULL; }
784786

785787
//
786788
// Interpreter stub for calling a native method. (asm interpreter)

src/hotspot/cpu/ppc/templateInterpreterGenerator_ppc.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1932,6 +1932,10 @@ address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(Abstract
19321932
return NULL;
19331933
}
19341934

1935+
// Not supported
1936+
address TemplateInterpreterGenerator::generate_Float_float16ToFloat_entry() { return nullptr; }
1937+
address TemplateInterpreterGenerator::generate_Float_floatToFloat16_entry() { return nullptr; }
1938+
19351939
// =============================================================================
19361940
// Exceptions
19371941

src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -301,6 +301,10 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M
301301
return entry_point;
302302
}
303303

304+
// Not supported
305+
address TemplateInterpreterGenerator::generate_Float_float16ToFloat_entry() { return nullptr; }
306+
address TemplateInterpreterGenerator::generate_Float_floatToFloat16_entry() { return nullptr; }
307+
304308
// Abstract method entry
305309
// Attempt to execute abstract method. Throw exception
306310
address TemplateInterpreterGenerator::generate_abstract_entry(void) {

src/hotspot/cpu/s390/templateInterpreterGenerator_s390.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2008,6 +2008,10 @@ address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(Abstract
20082008
return NULL;
20092009
}
20102010

2011+
// Not supported
2012+
address TemplateInterpreterGenerator::generate_Float_float16ToFloat_entry() { return nullptr; }
2013+
address TemplateInterpreterGenerator::generate_Float_floatToFloat16_entry() { return nullptr; }
2014+
20112015
void TemplateInterpreterGenerator::bang_stack_shadow_pages(bool native_call) {
20122016
// Quick & dirty stack overflow checking: bang the stack & handle trap.
20132017
// Note that we do the banging after the frame is setup, since the exception

0 commit comments

Comments
 (0)