Skip to content

Commit

Permalink
8302976: C2 intrinsification of Float.floatToFloat16 and Float.float1…
Browse files Browse the repository at this point in the history
…6ToFloat yields different result than the interpreter

Reviewed-by: sviswanathan, jbhateja, vlivanov
  • Loading branch information
Vladimir Kozlov committed Mar 9, 2023
1 parent 02875e7 commit 8cfd74f
Show file tree
Hide file tree
Showing 47 changed files with 1,254 additions and 195 deletions.
6 changes: 2 additions & 4 deletions src/hotspot/cpu/aarch64/aarch64.ad
Original file line number Diff line number Diff line change
Expand Up @@ -15043,8 +15043,7 @@ instruct convF2HF_reg_reg(iRegINoSp dst, vRegF src, vRegF tmp) %{
%}
effect(TEMP tmp);
ins_encode %{
__ fcvtsh($tmp$$FloatRegister, $src$$FloatRegister);
__ smov($dst$$Register, $tmp$$FloatRegister, __ H, 0);
__ flt_to_flt16($dst$$Register, $src$$FloatRegister, $tmp$$FloatRegister);
%}
ins_pipe(pipe_slow);
%}
Expand All @@ -15056,8 +15055,7 @@ instruct convHF2F_reg_reg(vRegF dst, iRegINoSp src, vRegF tmp) %{
%}
effect(TEMP tmp);
ins_encode %{
__ mov($tmp$$FloatRegister, __ H, 0, $src$$Register);
__ fcvths($dst$$FloatRegister, $tmp$$FloatRegister);
__ flt16_to_flt($dst$$FloatRegister, $src$$Register, $tmp$$FloatRegister);
%}
ins_pipe(pipe_slow);
%}
Expand Down
6 changes: 4 additions & 2 deletions src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2000, 2023, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
Expand Down Expand Up @@ -1814,10 +1814,12 @@ void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr
void LIR_Assembler::arith_fpu_implementation(LIR_Code code, int left_index, int right_index, int dest_index, bool pop_fpu_stack) { Unimplemented(); }


void LIR_Assembler::intrinsic_op(LIR_Code code, LIR_Opr value, LIR_Opr unused, LIR_Opr dest, LIR_Op* op) {
void LIR_Assembler::intrinsic_op(LIR_Code code, LIR_Opr value, LIR_Opr tmp, LIR_Opr dest, LIR_Op* op) {
switch(code) {
case lir_abs : __ fabsd(dest->as_double_reg(), value->as_double_reg()); break;
case lir_sqrt: __ fsqrtd(dest->as_double_reg(), value->as_double_reg()); break;
case lir_f2hf: __ flt_to_flt16(dest->as_register(), value->as_float_reg(), tmp->as_float_reg()); break;
case lir_hf2f: __ flt16_to_flt(dest->as_float_reg(), value->as_register(), tmp->as_float_reg()); break;
default : ShouldNotReachHere();
}
}
Expand Down
23 changes: 19 additions & 4 deletions src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2005, 2022, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2005, 2023, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, Red Hat Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
Expand Down Expand Up @@ -752,20 +752,35 @@ void LIRGenerator::do_MathIntrinsic(Intrinsic* x) {
switch (x->id()) {
case vmIntrinsics::_dabs:
case vmIntrinsics::_dsqrt:
case vmIntrinsics::_dsqrt_strict: {
case vmIntrinsics::_dsqrt_strict:
case vmIntrinsics::_floatToFloat16:
case vmIntrinsics::_float16ToFloat: {
assert(x->number_of_arguments() == 1, "wrong type");
LIRItem value(x->argument_at(0), this);
value.load_item();
LIR_Opr src = value.result();
LIR_Opr dst = rlock_result(x);

switch (x->id()) {
case vmIntrinsics::_dsqrt:
case vmIntrinsics::_dsqrt_strict: {
__ sqrt(value.result(), dst, LIR_OprFact::illegalOpr);
__ sqrt(src, dst, LIR_OprFact::illegalOpr);
break;
}
case vmIntrinsics::_dabs: {
__ abs(value.result(), dst, LIR_OprFact::illegalOpr);
__ abs(src, dst, LIR_OprFact::illegalOpr);
break;
}
case vmIntrinsics::_floatToFloat16: {
LIR_Opr tmp = new_register(T_FLOAT);
__ move(LIR_OprFact::floatConst(-0.0), tmp);
__ f2hf(src, dst, tmp);
break;
}
case vmIntrinsics::_float16ToFloat: {
LIR_Opr tmp = new_register(T_FLOAT);
__ move(LIR_OprFact::floatConst(-0.0), tmp);
__ hf2f(src, dst, tmp);
break;
}
default:
Expand Down
9 changes: 8 additions & 1 deletion src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -513,8 +513,15 @@ class MacroAssembler: public Assembler {
orr(Vd, T, Vn, Vn);
}

void flt_to_flt16(Register dst, FloatRegister src, FloatRegister tmp) {
fcvtsh(tmp, src);
smov(dst, tmp, H, 0);
}

public:
void flt16_to_flt(FloatRegister dst, Register src, FloatRegister tmp) {
mov(tmp, H, 0, src);
fcvths(dst, tmp);
}

// Generalized Test Bit And Branch, including a "far" variety which
// spans more than 32KiB.
Expand Down
45 changes: 45 additions & 0 deletions src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,50 @@ void TemplateInterpreterGenerator::generate_transcendental_entry(AbstractInterpr
__ blr(rscratch1);
}

address TemplateInterpreterGenerator::generate_Float_float16ToFloat_entry() {
// vmIntrinsics checks InlineIntrinsics flag, no need to check it here.
if (!VM_Version::supports_float16() ||
vmIntrinsics::is_disabled_by_flags(vmIntrinsics::_float16ToFloat) ||
vmIntrinsics::is_disabled_by_flags(vmIntrinsics::_floatToFloat16)) {
return nullptr;
}
// r19_sender_sp: sender sp
// stack:
// [ arg ] <-- esp
// [ arg ]
// retaddr in lr
// result in v0

address entry_point = __ pc();
__ ldrw(c_rarg0, Address(esp));
__ flt16_to_flt(v0, c_rarg0, v1);
__ mov(sp, r19_sender_sp); // Restore caller's SP
__ br(lr);
return entry_point;
}

address TemplateInterpreterGenerator::generate_Float_floatToFloat16_entry() {
// vmIntrinsics checks InlineIntrinsics flag, no need to check it here.
if (!VM_Version::supports_float16() ||

This comment has been minimized.

Copy link
@theRealAph

theRealAph Feb 5, 2024

Contributor

Do we need this interpreter intrinsic at all?

vmIntrinsics::is_disabled_by_flags(vmIntrinsics::_float16ToFloat) ||
vmIntrinsics::is_disabled_by_flags(vmIntrinsics::_floatToFloat16)) {
return nullptr;
}
// r19_sender_sp: sender sp
// stack:
// [ arg ] <-- esp
// [ arg ]
// retaddr in lr
// result in c_rarg0

address entry_point = __ pc();
__ ldrs(v0, Address(esp));
__ flt_to_flt16(c_rarg0, v0, v1);
__ mov(sp, r19_sender_sp); // Restore caller's SP
__ br(lr);
return entry_point;
}

// Abstract method entry
// Attempt to execute abstract method. Throw exception
address TemplateInterpreterGenerator::generate_abstract_entry(void) {
Expand Down Expand Up @@ -1698,6 +1742,7 @@ address TemplateInterpreterGenerator::generate_currentThread() {
return entry_point;
}


//-----------------------------------------------------------------------------
// Exceptions

Expand Down
2 changes: 2 additions & 0 deletions src/hotspot/cpu/aarch64/vm_version_aarch64.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,8 @@ enum Ampere_CPU_Model {

static bool supports_on_spin_wait() { return _spin_wait.inst() != SpinWait::NONE; }

static bool supports_float16() { return true; }

#ifdef __APPLE__
// Is the CPU running emulated (for example macOS Rosetta running x86_64 code on M1 ARM (aarch64)
static bool is_cpu_emulated();
Expand Down
2 changes: 2 additions & 0 deletions src/hotspot/cpu/arm/templateInterpreterGenerator_arm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -783,6 +783,8 @@ address TemplateInterpreterGenerator::generate_Reference_get_entry(void) {
address TemplateInterpreterGenerator::generate_CRC32_update_entry() { return nullptr; }
address TemplateInterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) { return nullptr; }
address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) { return nullptr; }
address TemplateInterpreterGenerator::generate_Float_float16ToFloat_entry() { return nullptr; }
address TemplateInterpreterGenerator::generate_Float_floatToFloat16_entry() { return nullptr; }

//
// Interpreter stub for calling a native method. (asm interpreter)
Expand Down
4 changes: 4 additions & 0 deletions src/hotspot/cpu/ppc/templateInterpreterGenerator_ppc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1933,6 +1933,10 @@ address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(Abstract
return NULL;
}

// Not supported
address TemplateInterpreterGenerator::generate_Float_float16ToFloat_entry() { return nullptr; }
address TemplateInterpreterGenerator::generate_Float_floatToFloat16_entry() { return nullptr; }

// =============================================================================
// Exceptions

Expand Down
4 changes: 0 additions & 4 deletions src/hotspot/cpu/riscv/assembler_riscv.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -798,8 +798,6 @@ enum operand_size { int8, int16, int32, uint32, int64 };
INSN(fsqrt_d, 0b1010011, 0b00000, 0b0101101);
INSN(fcvt_s_d, 0b1010011, 0b00001, 0b0100000);
INSN(fcvt_d_s, 0b1010011, 0b00000, 0b0100001);
INSN(fcvt_s_h, 0b1010011, 0b00010, 0b0100000);
INSN(fcvt_h_s, 0b1010011, 0b00000, 0b0100010);
#undef INSN

// Immediate Instruction
Expand Down Expand Up @@ -1056,7 +1054,6 @@ enum operand_size { int8, int16, int32, uint32, int64 };

INSN(fmv_w_x, 0b1010011, 0b000, 0b00000, 0b1111000);
INSN(fmv_d_x, 0b1010011, 0b000, 0b00000, 0b1111001);
INSN(fmv_h_x, 0b1010011, 0b000, 0b00000, 0b1111010);

#undef INSN

Expand All @@ -1077,7 +1074,6 @@ enum operand_size { int8, int16, int32, uint32, int64 };
INSN(fclass_d, 0b1010011, 0b001, 0b00000, 0b1110001);
INSN(fmv_x_w, 0b1010011, 0b000, 0b00000, 0b1110000);
INSN(fmv_x_d, 0b1010011, 0b000, 0b00000, 0b1110001);
INSN(fmv_x_h, 0b1010011, 0b000, 0b00000, 0b1110010);

#undef INSN

Expand Down
1 change: 0 additions & 1 deletion src/hotspot/cpu/riscv/globals_riscv.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,6 @@ define_pd_global(intx, InlineSmallCode, 1000);
product(bool, UseZba, false, EXPERIMENTAL, "Use Zba instructions") \
product(bool, UseZbb, false, EXPERIMENTAL, "Use Zbb instructions") \
product(bool, UseZbs, false, EXPERIMENTAL, "Use Zbs instructions") \
product(bool, UseZfhmin, false, EXPERIMENTAL, "Use Zfhmin instructions") \
product(bool, UseZic64b, false, EXPERIMENTAL, "Use Zic64b instructions") \
product(bool, UseZicbom, false, EXPERIMENTAL, "Use Zicbom instructions") \
product(bool, UseZicbop, false, EXPERIMENTAL, "Use Zicbop instructions") \
Expand Down
42 changes: 0 additions & 42 deletions src/hotspot/cpu/riscv/riscv.ad
Original file line number Diff line number Diff line change
Expand Up @@ -1845,10 +1845,6 @@ const bool Matcher::match_rule_supported(int opcode) {
case Op_CountTrailingZerosI:
case Op_CountTrailingZerosL:
return UseZbb;

case Op_ConvF2HF:
case Op_ConvHF2F:
return UseZfhmin;
}

return true; // Per default match rules are supported.
Expand Down Expand Up @@ -8180,44 +8176,6 @@ instruct convL2F_reg_reg(fRegF dst, iRegL src) %{
ins_pipe(fp_l2f);
%}

// float <-> half float

instruct convHF2F_reg_reg(fRegF dst, iRegINoSp src, fRegF tmp) %{
predicate(UseZfhmin);
match(Set dst (ConvHF2F src));
effect(TEMP tmp);

ins_cost(XFER_COST);
format %{ "fmv.h.x $tmp, $src\t#@convHF2F_reg_reg\n\t"
"fcvt.s.h $dst, $tmp\t#@convHF2F_reg_reg"
%}

ins_encode %{
__ fmv_h_x($tmp$$FloatRegister, $src$$Register);
__ fcvt_s_h($dst$$FloatRegister, $tmp$$FloatRegister);
%}

ins_pipe(fp_i2f);
%}

instruct convF2HF_reg_reg(iRegINoSp dst, fRegF src, fRegF tmp) %{
predicate(UseZfhmin);
match(Set dst (ConvF2HF src));
effect(TEMP tmp);

ins_cost(XFER_COST);
format %{ "fcvt.h.s $tmp, $src\t#@convF2HF_reg_reg\n\t"
"fmv.x.h $dst, $tmp\t#@convF2HF_reg_reg"
%}

ins_encode %{
__ fcvt_h_s($tmp$$FloatRegister, $src$$FloatRegister);
__ fmv_x_h($dst$$Register, $tmp$$FloatRegister);
%}

ins_pipe(fp_f2i);
%}

// double <-> int

instruct convD2I_reg_reg(iRegINoSp dst, fRegD src) %{
Expand Down
4 changes: 4 additions & 0 deletions src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,10 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M
return entry_point;
}

// Not supported
address TemplateInterpreterGenerator::generate_Float_float16ToFloat_entry() { return nullptr; }
address TemplateInterpreterGenerator::generate_Float_floatToFloat16_entry() { return nullptr; }

// Abstract method entry
// Attempt to execute abstract method. Throw exception
address TemplateInterpreterGenerator::generate_abstract_entry(void) {
Expand Down
3 changes: 0 additions & 3 deletions src/hotspot/cpu/riscv/vm_version_riscv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,9 +76,6 @@ void VM_Version::initialize() {
if (FLAG_IS_DEFAULT(UseZicboz)) {
FLAG_SET_DEFAULT(UseZicboz, true);
}
if (FLAG_IS_DEFAULT(UseZfhmin)) {
FLAG_SET_DEFAULT(UseZfhmin, true);
}
if (FLAG_IS_DEFAULT(UseZihintpause)) {
FLAG_SET_DEFAULT(UseZihintpause, true);
}
Expand Down
4 changes: 4 additions & 0 deletions src/hotspot/cpu/s390/templateInterpreterGenerator_s390.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2003,6 +2003,10 @@ address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(Abstract
return NULL;
}

// Not supported
address TemplateInterpreterGenerator::generate_Float_float16ToFloat_entry() { return nullptr; }
address TemplateInterpreterGenerator::generate_Float_floatToFloat16_entry() { return nullptr; }

void TemplateInterpreterGenerator::bang_stack_shadow_pages(bool native_call) {
// Quick & dirty stack overflow checking: bang the stack & handle trap.
// Note that we do the banging after the frame is setup, since the exception
Expand Down
6 changes: 5 additions & 1 deletion src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2000, 2023, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -2454,6 +2454,10 @@ void LIR_Assembler::intrinsic_op(LIR_Code code, LIR_Opr value, LIR_Opr tmp, LIR_
default : ShouldNotReachHere();
}
#endif // !_LP64
} else if (code == lir_f2hf) {
__ flt_to_flt16(dest->as_register(), value->as_xmm_float_reg(), tmp->as_xmm_float_reg());
} else if (code == lir_hf2f) {
__ flt16_to_flt(dest->as_xmm_float_reg(), value->as_register());
} else {
Unimplemented();
}
Expand Down
12 changes: 11 additions & 1 deletion src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2005, 2022, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2005, 2023, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -832,6 +832,10 @@ void LIRGenerator::do_MathIntrinsic(Intrinsic* x) {
__ move(LIR_OprFact::doubleConst(-0.0), tmp);
}
#endif
if (x->id() == vmIntrinsics::_floatToFloat16) {
tmp = new_register(T_FLOAT);
__ move(LIR_OprFact::floatConst(-0.0), tmp);
}

switch(x->id()) {
case vmIntrinsics::_dabs:
Expand All @@ -841,6 +845,12 @@ void LIRGenerator::do_MathIntrinsic(Intrinsic* x) {
case vmIntrinsics::_dsqrt_strict:
__ sqrt(calc_input, calc_result, LIR_OprFact::illegalOpr);
break;
case vmIntrinsics::_floatToFloat16:
__ f2hf(calc_input, calc_result, tmp);
break;
case vmIntrinsics::_float16ToFloat:
__ hf2f(calc_input, calc_result, LIR_OprFact::illegalOpr);
break;
default:
ShouldNotReachHere();
}
Expand Down
19 changes: 16 additions & 3 deletions src/hotspot/cpu/x86/macroAssembler_x86.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,11 @@ class MacroAssembler: public Assembler {
void incrementq(Register reg, int value = 1);
void incrementq(Address dst, int value = 1);

void incrementl(AddressLiteral dst, Register rscratch = noreg);
void incrementl(ArrayAddress dst, Register rscratch);

void incrementq(AddressLiteral dst, Register rscratch = noreg);

// Support optimal SSE move instructions.
void movflt(XMMRegister dst, XMMRegister src) {
if (dst-> encoding() == src->encoding()) return;
Expand Down Expand Up @@ -189,10 +194,18 @@ class MacroAssembler: public Assembler {
}
void movdbl(Address dst, XMMRegister src) { movsd(dst, src); }

void incrementl(AddressLiteral dst, Register rscratch = noreg);
void incrementl(ArrayAddress dst, Register rscratch);
void flt_to_flt16(Register dst, XMMRegister src, XMMRegister tmp) {
// Use separate tmp XMM register because caller may
// requires src XMM register to be unchanged (as in x86.ad).
vcvtps2ph(tmp, src, 0x04, Assembler::AVX_128bit);
movdl(dst, tmp);
movswl(dst, dst);
}

void incrementq(AddressLiteral dst, Register rscratch = noreg);
void flt16_to_flt(XMMRegister dst, Register src) {
movdl(dst, src);
vcvtph2ps(dst, dst, Assembler::AVX_128bit);
}

// Alignment
void align32();
Expand Down
Loading

3 comments on commit 8cfd74f

@openjdk-notifier
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@vnkozlov
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

/backport jdk20u

@openjdk
Copy link

@openjdk openjdk bot commented on 8cfd74f Mar 21, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@vnkozlov Could not automatically backport 8cfd74f7 to openjdk/jdk20u due to conflicts in the following files:

  • src/hotspot/cpu/arm/templateInterpreterGenerator_arm.cpp
  • src/hotspot/cpu/riscv/vm_version_riscv.cpp
  • src/hotspot/share/runtime/stubRoutines.cpp

Please fetch the appropriate branch/commit and manually resolve these conflicts by using the following commands in your personal fork of openjdk/jdk20u. Note: these commands are just some suggestions and you can use other equivalent commands you know.

# Fetch the up-to-date version of the target branch
$ git fetch --no-tags https://git.openjdk.org/jdk20u.git master:master

# Check out the target branch and create your own branch to backport
$ git checkout master
$ git checkout -b vnkozlov-backport-8cfd74f7

# Fetch the commit you want to backport
$ git fetch --no-tags https://git.openjdk.org/jdk.git 8cfd74f76afc9e5d50c52104fef9974784718dd4

# Backport the commit
$ git cherry-pick --no-commit 8cfd74f76afc9e5d50c52104fef9974784718dd4
# Resolve conflicts now

# Commit the files you have modified
$ git add files/with/resolved/conflicts
$ git commit -m 'Backport 8cfd74f76afc9e5d50c52104fef9974784718dd4'

Once you have resolved the conflicts as explained above continue with creating a pull request towards the openjdk/jdk20u with the title Backport 8cfd74f76afc9e5d50c52104fef9974784718dd4.

Please sign in to comment.