Skip to content
Permalink
Browse files

8136414: Large performance penalty declaring a method strictfp on str…

…ict-only platforms

Reviewed-by: thartmann
  • Loading branch information
Vladimir Ivanov
Vladimir Ivanov committed Feb 11, 2020
1 parent c160403 commit 590f5996c65ec230f1fd375f78478c540dcfe1ee
@@ -2257,8 +2257,7 @@ void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
Unimplemented();
}

// Advertise here if the CPU requires explicit rounding operations to
// implement the UseStrictFP mode.
// Advertise here if the CPU requires explicit rounding operations to implement strictfp mode.
const bool Matcher::strict_fp_requires_explicit_rounding = false;

// Are floats converted to double when stored to stack during
@@ -61,7 +61,6 @@ define_pd_global(uint64_t,MaxRAM, 1ULL*G);
define_pd_global(bool, CICompileOSR, true );
#endif // !TIERED
define_pd_global(bool, UseTypeProfile, false);
define_pd_global(bool, RoundFPResults, true );

define_pd_global(bool, LIRFillDelaySlots, false);
define_pd_global(bool, OptimizeSinglePrecision, true );
@@ -1140,8 +1140,7 @@ const bool Matcher::misaligned_doubles_ok = false;
void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
}

// Advertise here if the CPU requires explicit rounding operations
// to implement the UseStrictFP mode.
// Advertise here if the CPU requires explicit rounding operations to implement strictfp mode.
const bool Matcher::strict_fp_requires_explicit_rounding = false;

// Are floats converted to double when stored to stack during deoptimization?
@@ -62,8 +62,6 @@ define_pd_global(uint64_t, MaxRAM, 1ULL*G);
define_pd_global(bool, CICompileOSR, true );
#endif // COMPILER2
define_pd_global(bool, UseTypeProfile, false);
define_pd_global(bool, RoundFPResults, false);


define_pd_global(bool, LIRFillDelaySlots, false);
define_pd_global(bool, OptimizeSinglePrecision, true);
@@ -62,7 +62,6 @@ define_pd_global(uintx, InitialCodeCacheSize, 160*K);
#endif // !TIERED

define_pd_global(bool, UseTypeProfile, false);
define_pd_global(bool, RoundFPResults, false);

define_pd_global(bool, LIRFillDelaySlots, false);
define_pd_global(bool, OptimizeSinglePrecision, false);
@@ -2501,8 +2501,7 @@ void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
Unimplemented();
}

// Advertise here if the CPU requires explicit rounding operations
// to implement the UseStrictFP mode.
// Advertise here if the CPU requires explicit rounding operations to implement strictfp mode.
const bool Matcher::strict_fp_requires_explicit_rounding = false;

// Do floats take an entire double register or just half?
@@ -63,7 +63,6 @@ define_pd_global(uintx, InitialCodeCacheSize, 160*K);
#endif // !TIERED

define_pd_global(bool, UseTypeProfile, false);
define_pd_global(bool, RoundFPResults, false);

define_pd_global(bool, LIRFillDelaySlots, false);
define_pd_global(bool, OptimizeSinglePrecision, false);
@@ -1710,8 +1710,7 @@ const bool Matcher::rematerialize_float_constants = false;
// Java calling convention forces doubles to be aligned.
const bool Matcher::misaligned_doubles_ok = true;

// Advertise here if the CPU requires explicit rounding operations
// to implement the UseStrictFP mode.
// Advertise here if the CPU requires explicit rounding operations to implement strictfp mode.
const bool Matcher::strict_fp_requires_explicit_rounding = false;

// Do floats take an entire double register or just half?
@@ -61,7 +61,6 @@ define_pd_global(uintx, InitialCodeCacheSize, 160*K);
#endif // !TIERED

define_pd_global(bool, UseTypeProfile, false);
define_pd_global(bool, RoundFPResults, false);

define_pd_global(bool, LIRFillDelaySlots, true );
define_pd_global(bool, OptimizeSinglePrecision, false);
@@ -1873,8 +1873,7 @@ const bool Matcher::misaligned_doubles_ok = true;
void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
}

// Advertise here if the CPU requires explicit rounding operations
// to implement the UseStrictFP mode.
// Advertise here if the CPU requires explicit rounding operations to implement strictfp mode.
const bool Matcher::strict_fp_requires_explicit_rounding = false;

// Are floats converted to double when stored to stack during deoptimization?
@@ -33,7 +33,7 @@ enum {

// explicit rounding operations are required to implement the strictFP mode
enum {
pd_strict_fp_requires_explicit_rounding = true
pd_strict_fp_requires_explicit_rounding = LP64_ONLY( false ) NOT_LP64 ( true )
};


@@ -60,7 +60,6 @@ define_pd_global(uint64_t, MaxRAM, 1ULL*G);
define_pd_global(bool, CICompileOSR, true );
#endif // !TIERED
define_pd_global(bool, UseTypeProfile, false);
define_pd_global(bool, RoundFPResults, true );

define_pd_global(bool, LIRFillDelaySlots, false);
define_pd_global(bool, OptimizeSinglePrecision, true );
@@ -1516,8 +1516,7 @@ void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
node->_opnds[opcnt] = new_memory;
}

// Advertise here if the CPU requires explicit rounding operations
// to implement the UseStrictFP mode.
// Advertise here if the CPU requires explicit rounding operations to implement strictfp mode.
const bool Matcher::strict_fp_requires_explicit_rounding = true;

// Are floats conerted to double when stored to stack during deoptimization?
@@ -1700,9 +1700,8 @@ const bool Matcher::misaligned_doubles_ok = true;
// No-op on amd64
void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {}

// Advertise here if the CPU requires explicit rounding operations to
// implement the UseStrictFP mode.
const bool Matcher::strict_fp_requires_explicit_rounding = true;
// Advertise here if the CPU requires explicit rounding operations to implement strictfp mode.
const bool Matcher::strict_fp_requires_explicit_rounding = false;

// Are floats conerted to double when stored to stack during deoptimization?
// On x64 it is stored without convertion so we can use normal access.
@@ -10521,24 +10520,6 @@ instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{

//----------Arithmetic Conversion Instructions---------------------------------

instruct roundFloat_nop(regF dst)
%{
match(Set dst (RoundFloat dst));

ins_cost(0);
ins_encode();
ins_pipe(empty);
%}

instruct roundDouble_nop(regD dst)
%{
match(Set dst (RoundDouble dst));

ins_cost(0);
ins_encode();
ins_pipe(empty);
%}

instruct convF2D_reg_reg(regD dst, regF src)
%{
match(Set dst (ConvF2D src));
@@ -607,9 +607,15 @@ class MemoryBuffer: public CompilationResourceObj {
return load;
}

if (RoundFPResults && UseSSE < 2 && load->type()->is_float_kind()) {
// can't skip load since value might get rounded as a side effect
return load;
if (strict_fp_requires_explicit_rounding && load->type()->is_float_kind()) {
#ifdef IA32
if (UseSSE < 2) {
// can't skip load since value might get rounded as a side effect
return load;
}
#else
Unimplemented();
#endif // IA32
}

ciField* field = load->field();
@@ -2272,17 +2278,23 @@ void GraphBuilder::throw_op(int bci) {


Value GraphBuilder::round_fp(Value fp_value) {
// no rounding needed if SSE2 is used
if (RoundFPResults && UseSSE < 2) {
// Must currently insert rounding node for doubleword values that
// are results of expressions (i.e., not loads from memory or
// constants)
if (fp_value->type()->tag() == doubleTag &&
fp_value->as_Constant() == NULL &&
fp_value->as_Local() == NULL && // method parameters need no rounding
fp_value->as_RoundFP() == NULL) {
return append(new RoundFP(fp_value));
if (strict_fp_requires_explicit_rounding) {
#ifdef IA32
// no rounding needed if SSE2 is used
if (UseSSE < 2) {
// Must currently insert rounding node for doubleword values that
// are results of expressions (i.e., not loads from memory or
// constants)
if (fp_value->type()->tag() == doubleTag &&
fp_value->as_Constant() == NULL &&
fp_value->as_Local() == NULL && // method parameters need no rounding
fp_value->as_RoundFP() == NULL) {
return append(new RoundFP(fp_value));
}
}
#else
Unimplemented();
#endif // IA32
}
return fp_value;
}
@@ -3766,11 +3778,17 @@ bool GraphBuilder::try_inline_full(ciMethod* callee, bool holder_known, bool ign
// Proper inlining of methods with jsrs requires a little more work.
if (callee->has_jsrs() ) INLINE_BAILOUT("jsrs not handled properly by inliner yet");

// When SSE2 is used on intel, then no special handling is needed
// for strictfp because the enum-constant is fixed at compile time,
// the check for UseSSE2 is needed here
if (strict_fp_requires_explicit_rounding && UseSSE < 2 && method()->is_strict() != callee->is_strict()) {
INLINE_BAILOUT("caller and callee have different strict fp requirements");
if (strict_fp_requires_explicit_rounding &&
method()->is_strict() != callee->is_strict()) {
#ifdef IA32
// If explicit rounding is required, do not inline strict code into non-strict code (or the reverse).
// When SSE2 is present, no special handling is needed.
if (UseSSE < 2) {
INLINE_BAILOUT("caller and callee have different strict fp requirements");
}
#else
Unimplemented();
#endif // IA32
}

if (is_profiling() && !callee->ensure_method_data()) {
@@ -778,6 +778,7 @@ void LIR_Assembler::build_frame() {


void LIR_Assembler::roundfp_op(LIR_Opr src, LIR_Opr tmp, LIR_Opr dest, bool pop_fpu_stack) {
assert(strict_fp_requires_explicit_rounding, "not required");
assert((src->is_single_fpu() && dest->is_single_stack()) ||
(src->is_double_fpu() && dest->is_double_stack()),
"round_fp: rounds register -> stack location");
@@ -899,13 +899,19 @@ void LIRGenerator::arraycopy_helper(Intrinsic* x, int* flagsp, ciArrayKlass** ex
LIR_Opr LIRGenerator::round_item(LIR_Opr opr) {
assert(opr->is_register(), "why spill if item is not register?");

if (RoundFPResults && UseSSE < 1 && opr->is_single_fpu()) {
LIR_Opr result = new_register(T_FLOAT);
set_vreg_flag(result, must_start_in_memory);
assert(opr->is_register(), "only a register can be spilled");
assert(opr->value_type()->is_float(), "rounding only for floats available");
__ roundfp(opr, LIR_OprFact::illegalOpr, result);
return result;
if (strict_fp_requires_explicit_rounding) {
#ifdef IA32
if (UseSSE < 1 && opr->is_single_fpu()) {
LIR_Opr result = new_register(T_FLOAT);
set_vreg_flag(result, must_start_in_memory);
assert(opr->is_register(), "only a register can be spilled");
assert(opr->value_type()->is_float(), "rounding only for floats available");
__ roundfp(opr, LIR_OprFact::illegalOpr, result);
return result;
}
#else
Unimplemented();
#endif // IA32
}
return opr;
}
@@ -1951,6 +1957,8 @@ void LIRGenerator::do_Throw(Throw* x) {


void LIRGenerator::do_RoundFP(RoundFP* x) {
assert(strict_fp_requires_explicit_rounding, "not required");

LIRItem input(x->input(), this);
input.load_item();
LIR_Opr input_opr = input.result();
@@ -170,9 +170,6 @@
develop(bool, UseTableRanges, true, \
"Faster versions of lookup table using ranges") \
\
develop_pd(bool, RoundFPResults, \
"Indicates whether rounding is needed for floating point results")\
\
develop(intx, NestedInliningSizeRatio, 90, \
"Percentage of prev. allowed inline size in recursive inlining") \
range(0, 100) \
@@ -150,8 +150,9 @@ CallGenerator* Compile::call_generator(ciMethod* callee, int vtable_index, bool
return cg;
}

// Do not inline strict fp into non-strict code, or the reverse
if (caller->is_strict() ^ callee->is_strict()) {
// If explicit rounding is required, do not inline strict into non-strict code (or the reverse).
if (Matcher::strict_fp_requires_explicit_rounding &&
caller->is_strict() != callee->is_strict()) {
allow_inline = false;
}

0 comments on commit 590f599

Please sign in to comment.