Skip to content
This repository has been archived by the owner on Sep 19, 2023. It is now read-only.
/ jdk20u Public archive

Commit

Permalink
8302976: C2 intrinsification of Float.floatToFloat16 and Float.float1…
Browse files Browse the repository at this point in the history
…6ToFloat yields different result than the interpreter

Reviewed-by: vlivanov, thartmann
Backport-of: 8cfd74f76afc9e5d50c52104fef9974784718dd4
  • Loading branch information
Vladimir Kozlov committed Mar 22, 2023
1 parent cc4b7e6 commit 59a3f4f
Show file tree
Hide file tree
Showing 43 changed files with 1,254 additions and 145 deletions.
6 changes: 2 additions & 4 deletions src/hotspot/cpu/aarch64/aarch64.ad
Original file line number Diff line number Diff line change
Expand Up @@ -14590,8 +14590,7 @@ instruct convF2HF_reg_reg(iRegINoSp dst, vRegF src, vRegF tmp) %{
%}
effect(TEMP tmp);
ins_encode %{
__ fcvtsh($tmp$$FloatRegister, $src$$FloatRegister);
__ smov($dst$$Register, $tmp$$FloatRegister, __ H, 0);
__ flt_to_flt16($dst$$Register, $src$$FloatRegister, $tmp$$FloatRegister);
%}
ins_pipe(pipe_slow);
%}
Expand All @@ -14603,8 +14602,7 @@ instruct convHF2F_reg_reg(vRegF dst, iRegINoSp src, vRegF tmp) %{
%}
effect(TEMP tmp);
ins_encode %{
__ mov($tmp$$FloatRegister, __ H, 0, $src$$Register);
__ fcvths($dst$$FloatRegister, $tmp$$FloatRegister);
__ flt16_to_flt($dst$$FloatRegister, $src$$Register, $tmp$$FloatRegister);
%}
ins_pipe(pipe_slow);
%}
Expand Down
6 changes: 4 additions & 2 deletions src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2000, 2023, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
Expand Down Expand Up @@ -1814,10 +1814,12 @@ void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr
void LIR_Assembler::arith_fpu_implementation(LIR_Code code, int left_index, int right_index, int dest_index, bool pop_fpu_stack) { Unimplemented(); }


void LIR_Assembler::intrinsic_op(LIR_Code code, LIR_Opr value, LIR_Opr unused, LIR_Opr dest, LIR_Op* op) {
void LIR_Assembler::intrinsic_op(LIR_Code code, LIR_Opr value, LIR_Opr tmp, LIR_Opr dest, LIR_Op* op) {
switch(code) {
case lir_abs : __ fabsd(dest->as_double_reg(), value->as_double_reg()); break;
case lir_sqrt: __ fsqrtd(dest->as_double_reg(), value->as_double_reg()); break;
case lir_f2hf: __ flt_to_flt16(dest->as_register(), value->as_float_reg(), tmp->as_float_reg()); break;
case lir_hf2f: __ flt16_to_flt(dest->as_float_reg(), value->as_register(), tmp->as_float_reg()); break;
default : ShouldNotReachHere();
}
}
Expand Down
23 changes: 19 additions & 4 deletions src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2005, 2022, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2005, 2023, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, Red Hat Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
Expand Down Expand Up @@ -752,20 +752,35 @@ void LIRGenerator::do_MathIntrinsic(Intrinsic* x) {
switch (x->id()) {
case vmIntrinsics::_dabs:
case vmIntrinsics::_dsqrt:
case vmIntrinsics::_dsqrt_strict: {
case vmIntrinsics::_dsqrt_strict:
case vmIntrinsics::_floatToFloat16:
case vmIntrinsics::_float16ToFloat: {
assert(x->number_of_arguments() == 1, "wrong type");
LIRItem value(x->argument_at(0), this);
value.load_item();
LIR_Opr src = value.result();
LIR_Opr dst = rlock_result(x);

switch (x->id()) {
case vmIntrinsics::_dsqrt:
case vmIntrinsics::_dsqrt_strict: {
__ sqrt(value.result(), dst, LIR_OprFact::illegalOpr);
__ sqrt(src, dst, LIR_OprFact::illegalOpr);
break;
}
case vmIntrinsics::_dabs: {
__ abs(value.result(), dst, LIR_OprFact::illegalOpr);
__ abs(src, dst, LIR_OprFact::illegalOpr);
break;
}
case vmIntrinsics::_floatToFloat16: {
LIR_Opr tmp = new_register(T_FLOAT);
__ move(LIR_OprFact::floatConst(-0.0), tmp);
__ f2hf(src, dst, tmp);
break;
}
case vmIntrinsics::_float16ToFloat: {
LIR_Opr tmp = new_register(T_FLOAT);
__ move(LIR_OprFact::floatConst(-0.0), tmp);
__ hf2f(src, dst, tmp);
break;
}
default:
Expand Down
9 changes: 8 additions & 1 deletion src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -513,8 +513,15 @@ class MacroAssembler: public Assembler {
orr(Vd, T, Vn, Vn);
}

void flt_to_flt16(Register dst, FloatRegister src, FloatRegister tmp) {
fcvtsh(tmp, src);
smov(dst, tmp, H, 0);
}

public:
void flt16_to_flt(FloatRegister dst, Register src, FloatRegister tmp) {
mov(tmp, H, 0, src);
fcvths(dst, tmp);
}

// Generalized Test Bit And Branch, including a "far" variety which
// spans more than 32KiB.
Expand Down
45 changes: 45 additions & 0 deletions src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,50 @@ void TemplateInterpreterGenerator::generate_transcendental_entry(AbstractInterpr
__ blr(rscratch1);
}

address TemplateInterpreterGenerator::generate_Float_float16ToFloat_entry() {
// vmIntrinsics checks InlineIntrinsics flag, no need to check it here.
if (!VM_Version::supports_float16() ||
vmIntrinsics::is_disabled_by_flags(vmIntrinsics::_float16ToFloat) ||
vmIntrinsics::is_disabled_by_flags(vmIntrinsics::_floatToFloat16)) {
return nullptr;
}
// r19_sender_sp: sender sp
// stack:
// [ arg ] <-- esp
// [ arg ]
// retaddr in lr
// result in v0

address entry_point = __ pc();
__ ldrw(c_rarg0, Address(esp));
__ flt16_to_flt(v0, c_rarg0, v1);
__ mov(sp, r19_sender_sp); // Restore caller's SP
__ br(lr);
return entry_point;
}

address TemplateInterpreterGenerator::generate_Float_floatToFloat16_entry() {
// vmIntrinsics checks InlineIntrinsics flag, no need to check it here.
if (!VM_Version::supports_float16() ||
vmIntrinsics::is_disabled_by_flags(vmIntrinsics::_float16ToFloat) ||
vmIntrinsics::is_disabled_by_flags(vmIntrinsics::_floatToFloat16)) {
return nullptr;
}
// r19_sender_sp: sender sp
// stack:
// [ arg ] <-- esp
// [ arg ]
// retaddr in lr
// result in c_rarg0

address entry_point = __ pc();
__ ldrs(v0, Address(esp));
__ flt_to_flt16(c_rarg0, v0, v1);
__ mov(sp, r19_sender_sp); // Restore caller's SP
__ br(lr);
return entry_point;
}

// Abstract method entry
// Attempt to execute abstract method. Throw exception
address TemplateInterpreterGenerator::generate_abstract_entry(void) {
Expand Down Expand Up @@ -1695,6 +1739,7 @@ address TemplateInterpreterGenerator::generate_currentThread() {
return entry_point;
}


//-----------------------------------------------------------------------------
// Exceptions

Expand Down
2 changes: 2 additions & 0 deletions src/hotspot/cpu/aarch64/vm_version_aarch64.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,8 @@ class VM_Version : public Abstract_VM_Version {

static bool supports_on_spin_wait() { return _spin_wait.inst() != SpinWait::NONE; }

static bool supports_float16() { return true; }

#ifdef __APPLE__
// Is the CPU running emulated (for example macOS Rosetta running x86_64 code on M1 ARM (aarch64)
static bool is_cpu_emulated();
Expand Down
2 changes: 2 additions & 0 deletions src/hotspot/cpu/arm/templateInterpreterGenerator_arm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -781,6 +781,8 @@ address TemplateInterpreterGenerator::generate_Reference_get_entry(void) {
address TemplateInterpreterGenerator::generate_CRC32_update_entry() { return NULL; }
address TemplateInterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) { return NULL; }
address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) { return NULL; }
address TemplateInterpreterGenerator::generate_Float_float16ToFloat_entry() { return NULL; }
address TemplateInterpreterGenerator::generate_Float_floatToFloat16_entry() { return NULL; }

//
// Interpreter stub for calling a native method. (asm interpreter)
Expand Down
4 changes: 4 additions & 0 deletions src/hotspot/cpu/ppc/templateInterpreterGenerator_ppc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1932,6 +1932,10 @@ address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(Abstract
return NULL;
}

// Not supported
address TemplateInterpreterGenerator::generate_Float_float16ToFloat_entry() { return nullptr; }
address TemplateInterpreterGenerator::generate_Float_floatToFloat16_entry() { return nullptr; }

// =============================================================================
// Exceptions

Expand Down
4 changes: 4 additions & 0 deletions src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,10 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M
return entry_point;
}

// Not supported
address TemplateInterpreterGenerator::generate_Float_float16ToFloat_entry() { return nullptr; }
address TemplateInterpreterGenerator::generate_Float_floatToFloat16_entry() { return nullptr; }

// Abstract method entry
// Attempt to execute abstract method. Throw exception
address TemplateInterpreterGenerator::generate_abstract_entry(void) {
Expand Down
4 changes: 4 additions & 0 deletions src/hotspot/cpu/s390/templateInterpreterGenerator_s390.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2008,6 +2008,10 @@ address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(Abstract
return NULL;
}

// Not supported
address TemplateInterpreterGenerator::generate_Float_float16ToFloat_entry() { return nullptr; }
address TemplateInterpreterGenerator::generate_Float_floatToFloat16_entry() { return nullptr; }

void TemplateInterpreterGenerator::bang_stack_shadow_pages(bool native_call) {
// Quick & dirty stack overflow checking: bang the stack & handle trap.
// Note that we do the banging after the frame is setup, since the exception
Expand Down
6 changes: 5 additions & 1 deletion src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2000, 2023, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -2454,6 +2454,10 @@ void LIR_Assembler::intrinsic_op(LIR_Code code, LIR_Opr value, LIR_Opr tmp, LIR_
default : ShouldNotReachHere();
}
#endif // !_LP64
} else if (code == lir_f2hf) {
__ flt_to_flt16(dest->as_register(), value->as_xmm_float_reg(), tmp->as_xmm_float_reg());
} else if (code == lir_hf2f) {
__ flt16_to_flt(dest->as_xmm_float_reg(), value->as_register());
} else {
Unimplemented();
}
Expand Down
12 changes: 11 additions & 1 deletion src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2005, 2022, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2005, 2023, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -832,6 +832,10 @@ void LIRGenerator::do_MathIntrinsic(Intrinsic* x) {
__ move(LIR_OprFact::doubleConst(-0.0), tmp);
}
#endif
if (x->id() == vmIntrinsics::_floatToFloat16) {
tmp = new_register(T_FLOAT);
__ move(LIR_OprFact::floatConst(-0.0), tmp);
}

switch(x->id()) {
case vmIntrinsics::_dabs:
Expand All @@ -841,6 +845,12 @@ void LIRGenerator::do_MathIntrinsic(Intrinsic* x) {
case vmIntrinsics::_dsqrt_strict:
__ sqrt(calc_input, calc_result, LIR_OprFact::illegalOpr);
break;
case vmIntrinsics::_floatToFloat16:
__ f2hf(calc_input, calc_result, tmp);
break;
case vmIntrinsics::_float16ToFloat:
__ hf2f(calc_input, calc_result, LIR_OprFact::illegalOpr);
break;
default:
ShouldNotReachHere();
}
Expand Down
19 changes: 16 additions & 3 deletions src/hotspot/cpu/x86/macroAssembler_x86.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,11 @@ class MacroAssembler: public Assembler {
void incrementq(Register reg, int value = 1);
void incrementq(Address dst, int value = 1);

void incrementl(AddressLiteral dst, Register rscratch = noreg);
void incrementl(ArrayAddress dst, Register rscratch);

void incrementq(AddressLiteral dst, Register rscratch = noreg);

// Support optimal SSE move instructions.
void movflt(XMMRegister dst, XMMRegister src) {
if (dst-> encoding() == src->encoding()) return;
Expand Down Expand Up @@ -189,10 +194,18 @@ class MacroAssembler: public Assembler {
}
void movdbl(Address dst, XMMRegister src) { movsd(dst, src); }

void incrementl(AddressLiteral dst, Register rscratch = noreg);
void incrementl(ArrayAddress dst, Register rscratch);
void flt_to_flt16(Register dst, XMMRegister src, XMMRegister tmp) {
// Use separate tmp XMM register because caller may
// requires src XMM register to be unchanged (as in x86.ad).
vcvtps2ph(tmp, src, 0x04, Assembler::AVX_128bit);
movdl(dst, tmp);
movswl(dst, dst);
}

void incrementq(AddressLiteral dst, Register rscratch = noreg);
void flt16_to_flt(XMMRegister dst, Register src) {
movdl(dst, src);
vcvtph2ps(dst, dst, Assembler::AVX_128bit);
}

// Alignment
void align32();
Expand Down
59 changes: 59 additions & 0 deletions src/hotspot/cpu/x86/stubGenerator_x86_64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3352,6 +3352,55 @@ void StubGenerator::generate_libm_stubs() {
}
}

/**
* Arguments:
*
* Input:
* c_rarg0 - float16 jshort
*
* Output:
* xmm0 - float
*/
address StubGenerator::generate_float16ToFloat() {
StubCodeMark mark(this, "StubRoutines", "float16ToFloat");

address start = __ pc();

BLOCK_COMMENT("Entry:");
// No need for RuntimeStub frame since it is called only during JIT compilation

// Load value into xmm0 and convert
__ flt16_to_flt(xmm0, c_rarg0);

__ ret(0);

return start;
}

/**
* Arguments:
*
* Input:
* xmm0 - float
*
* Output:
* rax - float16 jshort
*/
address StubGenerator::generate_floatToFloat16() {
StubCodeMark mark(this, "StubRoutines", "floatToFloat16");

address start = __ pc();

BLOCK_COMMENT("Entry:");
// No need for RuntimeStub frame since it is called only during JIT compilation

// Convert and put result into rax
__ flt_to_flt16(rax, xmm0, xmm1);

__ ret(0);

return start;
}

address StubGenerator::generate_cont_thaw(const char* label, Continuation::thaw_kind kind) {
if (!Continuations::enabled()) return nullptr;
Expand Down Expand Up @@ -3724,6 +3773,16 @@ void StubGenerator::generate_initial() {
StubRoutines::_updateBytesAdler32 = generate_updateBytesAdler32();
}

if (VM_Version::supports_float16()) {
// For results consistency both intrinsics should be enabled.
// vmIntrinsics checks InlineIntrinsics flag, no need to check it here.
if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_float16ToFloat) &&
vmIntrinsics::is_intrinsic_available(vmIntrinsics::_floatToFloat16)) {
StubRoutines::_hf2f = generate_float16ToFloat();
StubRoutines::_f2hf = generate_floatToFloat16();
}
}

generate_libm_stubs();
}

Expand Down
4 changes: 3 additions & 1 deletion src/hotspot/cpu/x86/stubGenerator_x86_64.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2003, 2022, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2003, 2023, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -468,6 +468,8 @@ class StubGenerator: public StubCodeGenerator {
address generate_bigIntegerRightShift();
address generate_bigIntegerLeftShift();

address generate_float16ToFloat();
address generate_floatToFloat16();

// Libm trigonometric stubs

Expand Down
Loading

1 comment on commit 59a3f4f

@openjdk-notifier
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.