From fde23eb9ca03fb427a9145c8ee651f1ed016aeaa Mon Sep 17 00:00:00 2001 From: Andrew Haley Date: Mon, 10 Feb 2025 16:10:29 +0000 Subject: [PATCH 01/48] Start --- src/hotspot/cpu/x86/c1_FrameMap_x86.cpp | 2 ++ src/hotspot/cpu/x86/c1_FrameMap_x86.hpp | 3 +++ src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp | 8 ++++++++ src/hotspot/share/runtime/globals.hpp | 4 ++++ src/hotspot/share/runtime/javaThread.hpp | 4 ++++ 5 files changed, 21 insertions(+) diff --git a/src/hotspot/cpu/x86/c1_FrameMap_x86.cpp b/src/hotspot/cpu/x86/c1_FrameMap_x86.cpp index cff2be393bc9e..0427a144f4685 100644 --- a/src/hotspot/cpu/x86/c1_FrameMap_x86.cpp +++ b/src/hotspot/cpu/x86/c1_FrameMap_x86.cpp @@ -176,6 +176,8 @@ void FrameMap::initialize() { map_register( 7, r9); r9_opr = LIR_OprFact::single_cpu(7); map_register( 8, r11); r11_opr = LIR_OprFact::single_cpu(8); map_register( 9, r13); r13_opr = LIR_OprFact::single_cpu(9); + // r14 is allocated conditionally. It is used to hold the random + // generator for profile counters. map_register(10, r14); r14_opr = LIR_OprFact::single_cpu(10); // r12 is allocated conditionally. With compressed oops it holds // the heapbase value and is not visible to the allocator. diff --git a/src/hotspot/cpu/x86/c1_FrameMap_x86.hpp b/src/hotspot/cpu/x86/c1_FrameMap_x86.hpp index f2bedcb1d273b..dc0601cb0c682 100644 --- a/src/hotspot/cpu/x86/c1_FrameMap_x86.hpp +++ b/src/hotspot/cpu/x86/c1_FrameMap_x86.hpp @@ -147,7 +147,10 @@ } static int adjust_reg_range(int range) { + // Reduce the number of available regs (to free r12) in case of compressed oops + // Reduce the number of available regs (to free r14) for RNG + if (ProfileCaptureRatio < 1.0) return range - 2; if (UseCompressedOops) return range - 1; return range; } diff --git a/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp index e3c8792decd2b..cdaa22c420ba2 100644 --- a/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp @@ -338,6 +338,11 @@ void C1_MacroAssembler::build_frame(int frame_size_in_bytes, int bang_size_in_by empty_FPU_stack(); } #endif // !_LP64 && COMPILER2 + + if (ProfileCaptureRatio < 1.0) { + movl(r14, Address(r15_thread, JavaThread::profile_rng_offset())); + } + decrement(rsp, frame_size_in_bytes); // does not emit code for frame_size == 0 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); @@ -347,6 +352,9 @@ void C1_MacroAssembler::build_frame(int frame_size_in_bytes, int bang_size_in_by void C1_MacroAssembler::remove_frame(int frame_size_in_bytes) { + if (ProfileCaptureRatio < 1.0) { + movl(Address(r15_thread, JavaThread::profile_rng_offset()), r14); + } increment(rsp, frame_size_in_bytes); // Does not emit code for frame_size == 0 pop(rbp); } diff --git a/src/hotspot/share/runtime/globals.hpp b/src/hotspot/share/runtime/globals.hpp index 11c13fe3c9e19..b6354765934c6 100644 --- a/src/hotspot/share/runtime/globals.hpp +++ b/src/hotspot/share/runtime/globals.hpp @@ -1993,6 +1993,10 @@ const int ObjectAlignmentInBytes = 8; product(bool, UseThreadsLockThrottleLock, true, DIAGNOSTIC, \ "Use an extra lock during Thread start and exit to alleviate" \ "contention on Threads_lock.") \ + \ + product(double, ProfileCaptureRatio, 1.0, \ + "Reduce profile captures") \ + range(0.0, 1.0) // end of RUNTIME_FLAGS diff --git a/src/hotspot/share/runtime/javaThread.hpp b/src/hotspot/share/runtime/javaThread.hpp index 9ae8ae151e7a8..4c74302d047dc 100644 --- a/src/hotspot/share/runtime/javaThread.hpp +++ b/src/hotspot/share/runtime/javaThread.hpp @@ -168,6 +168,9 @@ class JavaThread: public Thread { // attached thread cases where this field can have a temporary value. int64_t _monitor_owner_id; + // Random value for randomized profile counters. + uint32_t _profile_rng; + public: void set_monitor_owner_id(int64_t id) { assert(id >= ThreadIdentifier::initial() && id < ThreadIdentifier::current(), ""); @@ -898,6 +901,7 @@ class JavaThread: public Thread { static ByteSize preemption_cancelled_offset() { return byte_offset_of(JavaThread, _preemption_cancelled); } static ByteSize preempt_alternate_return_offset() { return byte_offset_of(JavaThread, _preempt_alternate_return); } static ByteSize unlocked_inflated_monitor_offset() { return byte_offset_of(JavaThread, _unlocked_inflated_monitor); } + static ByteSize profile_rng_offset() { return byte_offset_of(JavaThread, _profile_rng); } #if INCLUDE_JVMTI static ByteSize is_in_VTMS_transition_offset() { return byte_offset_of(JavaThread, _is_in_VTMS_transition); } From 993c05429c319854f210810ddd9141a3a2827b98 Mon Sep 17 00:00:00 2001 From: Andrew Haley Date: Mon, 10 Feb 2025 17:44:35 +0000 Subject: [PATCH 02/48] So far, so good --- src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp | 6 ++++++ src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp | 3 +++ src/hotspot/share/c1/c1_LIR.cpp | 13 +++++++++++++ src/hotspot/share/c1/c1_LIR.hpp | 5 +++++ src/hotspot/share/c1/c1_LIRAssembler.cpp | 5 ++++- src/hotspot/share/c1/c1_LIRAssembler.hpp | 2 ++ src/hotspot/share/c1/c1_LIRGenerator.cpp | 1 + src/hotspot/share/c1/c1_LIRGenerator.hpp | 1 + 8 files changed, 35 insertions(+), 1 deletion(-) diff --git a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp index de1fa1a9cc635..69689ee5abd1e 100644 --- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp @@ -3495,6 +3495,12 @@ void LIR_Assembler::emit_load_klass(LIR_OpLoadKlass* op) { __ load_klass(result, obj, rscratch1); } +void LIR_Assembler::inc_profile_ctr(LIR_Opr incr, LIR_Opr dest) { + Register inc = incr->as_register(); + Address dest_adr = as_Address(dest->as_address_ptr()); + __ addl(dest_adr, inc); +} + void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) { ciMethod* method = op->profiled_method(); int bci = op->profiled_bci(); diff --git a/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp b/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp index 7e28b288f71d3..c3bcfeeeff607 100644 --- a/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp +++ b/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp @@ -1490,6 +1490,9 @@ void LIRGenerator::do_InstanceOf(InstanceOf* x) { x->direct_compare(), patching_info, x->profiled_method(), x->profiled_bci()); } +void LIRGenerator::do_IncProfileCtr(ProfileInvoke* x) { +} + // Intrinsic for Class::isInstance address LIRGenerator::isInstance_entry() { return Runtime1::entry_for(C1StubId::is_instance_of_id); diff --git a/src/hotspot/share/c1/c1_LIR.cpp b/src/hotspot/share/c1/c1_LIR.cpp index fc90530ec95d8..6bb686fc9ee48 100644 --- a/src/hotspot/share/c1/c1_LIR.cpp +++ b/src/hotspot/share/c1/c1_LIR.cpp @@ -456,6 +456,7 @@ void LIR_OpVisitState::visit(LIR_Op* op) { case lir_neg: case lir_f2hf: case lir_hf2f: + case lir_inc_profile_ctr: { assert(op->as_Op1() != nullptr, "must be"); LIR_Op1* op1 = (LIR_Op1*)op; @@ -1298,6 +1299,17 @@ void LIR_List::volatile_store_unsafe_reg(LIR_Opr src, LIR_Opr base, LIR_Opr offs } +void LIR_List::inc_profile_ctr(LIR_Opr src, LIR_Address* addr, CodeEmitInfo* info, LIR_PatchCode patch_code) { + append(new LIR_Op1( + lir_inc_profile_ctr, + src, + LIR_OprFact::address(addr), + addr->type(), + patch_code, + info)); +} + + void LIR_List::idiv(LIR_Opr left, LIR_Opr right, LIR_Opr res, LIR_Opr tmp, CodeEmitInfo* info) { append(new LIR_Op3( lir_idiv, @@ -1795,6 +1807,7 @@ const char * LIR_Op::name() const { case lir_profile_call: s = "profile_call"; break; // LIR_OpProfileType case lir_profile_type: s = "profile_type"; break; + case lir_inc_profile_ctr: s = "inc_profile_ctr"; break; // LIR_OpAssert #ifdef ASSERT case lir_assert: s = "assert"; break; diff --git a/src/hotspot/share/c1/c1_LIR.hpp b/src/hotspot/share/c1/c1_LIR.hpp index d9005c49c89d4..faecfef2eaa37 100644 --- a/src/hotspot/share/c1/c1_LIR.hpp +++ b/src/hotspot/share/c1/c1_LIR.hpp @@ -947,6 +947,7 @@ enum LIR_Code { , lir_safepoint , lir_unwind , lir_load_klass + , lir_inc_profile_ctr , end_op1 , begin_op2 , lir_branch @@ -2300,6 +2301,8 @@ class LIR_List: public CompilationResourceObj { void volatile_store_mem_reg(LIR_Opr src, LIR_Address* address, CodeEmitInfo* info, LIR_PatchCode patch_code = lir_patch_none); void volatile_store_unsafe_reg(LIR_Opr src, LIR_Opr base, LIR_Opr offset, BasicType type, CodeEmitInfo* info, LIR_PatchCode patch_code); + void inc_profile_ctr(LIR_Opr src, LIR_Address* addr, CodeEmitInfo* info = nullptr, LIR_PatchCode patch_code = lir_patch_none); + void idiv(LIR_Opr left, LIR_Opr right, LIR_Opr res, LIR_Opr tmp, CodeEmitInfo* info); void idiv(LIR_Opr left, int right, LIR_Opr res, LIR_Opr tmp, CodeEmitInfo* info); void irem(LIR_Opr left, LIR_Opr right, LIR_Opr res, LIR_Opr tmp, CodeEmitInfo* info); @@ -2430,6 +2433,8 @@ class LIR_InsertionBuffer : public CompilationResourceObj { // instruction void move(int index, LIR_Opr src, LIR_Opr dst, CodeEmitInfo* info = nullptr) { append(index, new LIR_Op1(lir_move, src, dst, dst->type(), lir_patch_none, info)); } + + void inc_profile_ctr(LIR_Opr src, LIR_Address* addr, CodeEmitInfo* info = nullptr); }; diff --git a/src/hotspot/share/c1/c1_LIRAssembler.cpp b/src/hotspot/share/c1/c1_LIRAssembler.cpp index a5930ba54d880..6b3772c85663a 100644 --- a/src/hotspot/share/c1/c1_LIRAssembler.cpp +++ b/src/hotspot/share/c1/c1_LIRAssembler.cpp @@ -597,6 +597,10 @@ void LIR_Assembler::emit_op1(LIR_Op1* op) { unwind_op(op->in_opr()); break; + case lir_inc_profile_ctr: + inc_profile_ctr(op->in_opr(), op->result_opr()); + break; + default: Unimplemented(); break; @@ -829,7 +833,6 @@ void LIR_Assembler::move_op(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_Patch } } - void LIR_Assembler::verify_oop_map(CodeEmitInfo* info) { #ifndef PRODUCT if (VerifyOops) { diff --git a/src/hotspot/share/c1/c1_LIRAssembler.hpp b/src/hotspot/share/c1/c1_LIRAssembler.hpp index 34aa679daedd0..34dba43e378c9 100644 --- a/src/hotspot/share/c1/c1_LIRAssembler.hpp +++ b/src/hotspot/share/c1/c1_LIRAssembler.hpp @@ -238,6 +238,8 @@ class LIR_Assembler: public CompilationResourceObj { void align_backward_branch_target(); void align_call(LIR_Code code); + void inc_profile_ctr(LIR_Opr incr, LIR_Opr dest); + void negate(LIR_Opr left, LIR_Opr dest, LIR_Opr tmp = LIR_OprFact::illegalOpr); void leal(LIR_Opr src, LIR_Opr dest, LIR_PatchCode patch_code = lir_patch_none, CodeEmitInfo* info = nullptr); diff --git a/src/hotspot/share/c1/c1_LIRGenerator.cpp b/src/hotspot/share/c1/c1_LIRGenerator.cpp index 2f0d12b4d7cbe..b8c3d5a29cf70 100644 --- a/src/hotspot/share/c1/c1_LIRGenerator.cpp +++ b/src/hotspot/share/c1/c1_LIRGenerator.cpp @@ -3292,6 +3292,7 @@ void LIRGenerator::increment_event_counter_impl(CodeEmitInfo* info, __ load(counter, result); __ add(result, step, result); __ store(result, counter); + __ inc_profile_ctr(result, counter); if (notify && (!backedge || UseOnStackReplacement)) { LIR_Opr meth = LIR_OprFact::metadataConst(method->constant_encoding()); // The bci for info can point to cmp for if's we want the if bci diff --git a/src/hotspot/share/c1/c1_LIRGenerator.hpp b/src/hotspot/share/c1/c1_LIRGenerator.hpp index 52b1ed54fb0d7..471bd9f7cf779 100644 --- a/src/hotspot/share/c1/c1_LIRGenerator.hpp +++ b/src/hotspot/share/c1/c1_LIRGenerator.hpp @@ -588,6 +588,7 @@ class LIRGenerator: public InstructionVisitor, public BlockClosure { virtual void do_ProfileCall (ProfileCall* x); virtual void do_ProfileReturnType (ProfileReturnType* x); virtual void do_ProfileInvoke (ProfileInvoke* x); + virtual void do_IncProfileCtr (ProfileInvoke* x); virtual void do_RuntimeCall (RuntimeCall* x); virtual void do_MemBar (MemBar* x); virtual void do_RangeCheckPredicate(RangeCheckPredicate* x); From d586694fce2b28794c111ea61c34dad8836ad14c Mon Sep 17 00:00:00 2001 From: Andrew Haley Date: Wed, 12 Feb 2025 15:16:57 +0000 Subject: [PATCH 03/48] So far, so good --- src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp | 32 ++++++++++++++++++--- src/hotspot/share/c1/c1_LIR.cpp | 13 ++++----- src/hotspot/share/c1/c1_LIR.hpp | 6 ++-- src/hotspot/share/c1/c1_LIRAssembler.cpp | 8 +++--- src/hotspot/share/c1/c1_LIRAssembler.hpp | 2 +- src/hotspot/share/c1/c1_LIRGenerator.cpp | 9 +++--- 6 files changed, 47 insertions(+), 23 deletions(-) diff --git a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp index 69689ee5abd1e..0299ff44c58a3 100644 --- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp @@ -3495,10 +3495,34 @@ void LIR_Assembler::emit_load_klass(LIR_OpLoadKlass* op) { __ load_klass(result, obj, rscratch1); } -void LIR_Assembler::inc_profile_ctr(LIR_Opr incr, LIR_Opr dest) { - Register inc = incr->as_register(); - Address dest_adr = as_Address(dest->as_address_ptr()); - __ addl(dest_adr, inc); +void LIR_Assembler::inc_profile_ctr(LIR_Opr incr, LIR_Opr addr, LIR_Opr dest, LIR_Opr temp_op) { + Register temp = temp_op->as_register(); + Address dest_adr = as_Address(addr->as_address_ptr()); + + /* Algorithm "xor" from p. 4 of Marsaglia, "Xorshift RNGs" */ + __ movl(temp, r14); + __ sall(temp, 13); + __ xorl(r14, temp); + __ movl(temp, r14); + __ shrl(temp, 7); + __ xorl(r14, temp); + __ movl(temp, r14); + __ sall(temp, 5); + __ xorl(r14, temp); + + if (incr->is_register()) { + Register inc = incr->as_register(); + __ movl(temp, dest_adr); + __ addl(temp, inc); + __ movl(dest_adr, temp); + __ movl(dest->as_register(), temp); + } else { + jint inc = incr->as_constant_ptr()->as_jint_bits(); + __ movl(temp, dest_adr); + __ addl(temp, inc); + __ movl(dest_adr, temp); + __ movl(dest->as_register(), temp); + } } void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) { diff --git a/src/hotspot/share/c1/c1_LIR.cpp b/src/hotspot/share/c1/c1_LIR.cpp index 6bb686fc9ee48..e3800eb514c02 100644 --- a/src/hotspot/share/c1/c1_LIR.cpp +++ b/src/hotspot/share/c1/c1_LIR.cpp @@ -456,7 +456,6 @@ void LIR_OpVisitState::visit(LIR_Op* op) { case lir_neg: case lir_f2hf: case lir_hf2f: - case lir_inc_profile_ctr: { assert(op->as_Op1() != nullptr, "must be"); LIR_Op1* op1 = (LIR_Op1*)op; @@ -582,6 +581,7 @@ void LIR_OpVisitState::visit(LIR_Op* op) { case lir_xadd: case lir_xchg: case lir_assert: + case lir_inc_profile_ctr: { assert(op->as_Op2() != nullptr, "must be"); LIR_Op2* op2 = (LIR_Op2*)op; @@ -593,7 +593,7 @@ void LIR_OpVisitState::visit(LIR_Op* op) { if (op2->_opr2->is_valid()) do_input(op2->_opr2); if (op2->_tmp1->is_valid()) do_temp(op2->_tmp1); if (op2->_result->is_valid()) do_output(op2->_result); - if (op->code() == lir_xchg || op->code() == lir_xadd) { + if (op->code() == lir_xchg || op->code() == lir_xadd || op->code() == lir_inc_profile_ctr) { // on ARM and PPC, return value is loaded first so could // destroy inputs. On other platforms that implement those // (x86, sparc), the extra constrainsts are harmless. @@ -1299,14 +1299,13 @@ void LIR_List::volatile_store_unsafe_reg(LIR_Opr src, LIR_Opr base, LIR_Opr offs } -void LIR_List::inc_profile_ctr(LIR_Opr src, LIR_Address* addr, CodeEmitInfo* info, LIR_PatchCode patch_code) { - append(new LIR_Op1( +void LIR_List::inc_profile_ctr(LIR_Opr src, LIR_Address* addr, LIR_Opr res, LIR_Opr tmp, CodeEmitInfo* info) { + append(new LIR_Op2( lir_inc_profile_ctr, src, LIR_OprFact::address(addr), - addr->type(), - patch_code, - info)); + res, + tmp)); } diff --git a/src/hotspot/share/c1/c1_LIR.hpp b/src/hotspot/share/c1/c1_LIR.hpp index faecfef2eaa37..47afe4eae544d 100644 --- a/src/hotspot/share/c1/c1_LIR.hpp +++ b/src/hotspot/share/c1/c1_LIR.hpp @@ -947,7 +947,6 @@ enum LIR_Code { , lir_safepoint , lir_unwind , lir_load_klass - , lir_inc_profile_ctr , end_op1 , begin_op2 , lir_branch @@ -971,6 +970,7 @@ enum LIR_Code { , lir_throw , lir_xadd , lir_xchg + , lir_inc_profile_ctr , end_op2 , begin_op3 , lir_idiv @@ -2301,7 +2301,7 @@ class LIR_List: public CompilationResourceObj { void volatile_store_mem_reg(LIR_Opr src, LIR_Address* address, CodeEmitInfo* info, LIR_PatchCode patch_code = lir_patch_none); void volatile_store_unsafe_reg(LIR_Opr src, LIR_Opr base, LIR_Opr offset, BasicType type, CodeEmitInfo* info, LIR_PatchCode patch_code); - void inc_profile_ctr(LIR_Opr src, LIR_Address* addr, CodeEmitInfo* info = nullptr, LIR_PatchCode patch_code = lir_patch_none); + void inc_profile_ctr(LIR_Opr src, LIR_Address* addr, LIR_Opr res, LIR_Opr tmp, CodeEmitInfo* info = nullptr); void idiv(LIR_Opr left, LIR_Opr right, LIR_Opr res, LIR_Opr tmp, CodeEmitInfo* info); void idiv(LIR_Opr left, int right, LIR_Opr res, LIR_Opr tmp, CodeEmitInfo* info); @@ -2434,7 +2434,7 @@ class LIR_InsertionBuffer : public CompilationResourceObj { // instruction void move(int index, LIR_Opr src, LIR_Opr dst, CodeEmitInfo* info = nullptr) { append(index, new LIR_Op1(lir_move, src, dst, dst->type(), lir_patch_none, info)); } - void inc_profile_ctr(LIR_Opr src, LIR_Address* addr, CodeEmitInfo* info = nullptr); + void inc_profile_ctr(LIR_Opr src, LIR_Address* addr, LIR_Opr res, CodeEmitInfo* info = nullptr); }; diff --git a/src/hotspot/share/c1/c1_LIRAssembler.cpp b/src/hotspot/share/c1/c1_LIRAssembler.cpp index 6b3772c85663a..318430e7c3c35 100644 --- a/src/hotspot/share/c1/c1_LIRAssembler.cpp +++ b/src/hotspot/share/c1/c1_LIRAssembler.cpp @@ -597,10 +597,6 @@ void LIR_Assembler::emit_op1(LIR_Op1* op) { unwind_op(op->in_opr()); break; - case lir_inc_profile_ctr: - inc_profile_ctr(op->in_opr(), op->result_opr()); - break; - default: Unimplemented(); break; @@ -756,6 +752,10 @@ void LIR_Assembler::emit_op2(LIR_Op2* op) { atomic_op(op->code(), op->in_opr1(), op->in_opr2(), op->result_opr(), op->tmp1_opr()); break; + case lir_inc_profile_ctr: + inc_profile_ctr(op->in_opr1(), op->in_opr2(), op->result_opr(), op->tmp1_opr()); + break; + default: Unimplemented(); break; diff --git a/src/hotspot/share/c1/c1_LIRAssembler.hpp b/src/hotspot/share/c1/c1_LIRAssembler.hpp index 34dba43e378c9..abd28620d2dc2 100644 --- a/src/hotspot/share/c1/c1_LIRAssembler.hpp +++ b/src/hotspot/share/c1/c1_LIRAssembler.hpp @@ -238,7 +238,7 @@ class LIR_Assembler: public CompilationResourceObj { void align_backward_branch_target(); void align_call(LIR_Code code); - void inc_profile_ctr(LIR_Opr incr, LIR_Opr dest); + void inc_profile_ctr(LIR_Opr incr, LIR_Opr addr, LIR_Opr dest, LIR_Opr temp); void negate(LIR_Opr left, LIR_Opr dest, LIR_Opr tmp = LIR_OprFact::illegalOpr); void leal(LIR_Opr src, LIR_Opr dest, LIR_PatchCode patch_code = lir_patch_none, CodeEmitInfo* info = nullptr); diff --git a/src/hotspot/share/c1/c1_LIRGenerator.cpp b/src/hotspot/share/c1/c1_LIRGenerator.cpp index b8c3d5a29cf70..9c92aeca0de52 100644 --- a/src/hotspot/share/c1/c1_LIRGenerator.cpp +++ b/src/hotspot/share/c1/c1_LIRGenerator.cpp @@ -3289,10 +3289,11 @@ void LIRGenerator::increment_event_counter_impl(CodeEmitInfo* info, } LIR_Address* counter = new LIR_Address(counter_holder, offset, T_INT); LIR_Opr result = new_register(T_INT); - __ load(counter, result); - __ add(result, step, result); - __ store(result, counter); - __ inc_profile_ctr(result, counter); + LIR_Opr tmp = new_register(T_INT); + // __ load(counter, result); + // __ add(result, step, result); + // __ store(result, counter); + __ inc_profile_ctr(step, counter, result, tmp); if (notify && (!backedge || UseOnStackReplacement)) { LIR_Opr meth = LIR_OprFact::metadataConst(method->constant_encoding()); // The bci for info can point to cmp for if's we want the if bci From 97e89e61cf78be18ecf7cbf9b82cfebd06a34150 Mon Sep 17 00:00:00 2001 From: Andrew Haley Date: Thu, 13 Feb 2025 17:37:31 +0000 Subject: [PATCH 04/48] First POC --- src/hotspot/cpu/x86/c1_FrameMap_x86.hpp | 2 +- src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp | 81 +++++++++++++++++-- src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp | 4 + src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp | 4 +- src/hotspot/share/c1/c1_LIR.cpp | 3 + src/hotspot/share/c1/c1_LIRGenerator.cpp | 37 ++++++--- src/hotspot/share/runtime/globals.hpp | 4 +- 7 files changed, 115 insertions(+), 20 deletions(-) diff --git a/src/hotspot/cpu/x86/c1_FrameMap_x86.hpp b/src/hotspot/cpu/x86/c1_FrameMap_x86.hpp index dc0601cb0c682..0b105d5815206 100644 --- a/src/hotspot/cpu/x86/c1_FrameMap_x86.hpp +++ b/src/hotspot/cpu/x86/c1_FrameMap_x86.hpp @@ -150,7 +150,7 @@ // Reduce the number of available regs (to free r12) in case of compressed oops // Reduce the number of available regs (to free r14) for RNG - if (ProfileCaptureRatio < 1.0) return range - 2; + if (ProfileCaptureRatio > 1) return range - 2; if (UseCompressedOops) return range - 1; return range; } diff --git a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp index 0299ff44c58a3..23847fae51861 100644 --- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp @@ -3495,10 +3495,24 @@ void LIR_Assembler::emit_load_klass(LIR_OpLoadKlass* op) { __ load_klass(result, obj, rscratch1); } +long ibar, ifoo; + +void foo() { + asm("nop"); +} + +void bar() { + asm("nop"); +} + void LIR_Assembler::inc_profile_ctr(LIR_Opr incr, LIR_Opr addr, LIR_Opr dest, LIR_Opr temp_op) { Register temp = temp_op->as_register(); Address dest_adr = as_Address(addr->as_address_ptr()); + assert(ProfileCaptureRatio > 1, "ProfileCaptureRatio must be > 1"); + int ratio_shift = exact_log2(ProfileCaptureRatio); + int threshold = (1ull << 32) / ProfileCaptureRatio; + /* Algorithm "xor" from p. 4 of Marsaglia, "Xorshift RNGs" */ __ movl(temp, r14); __ sall(temp, 13); @@ -3510,19 +3524,76 @@ void LIR_Assembler::inc_profile_ctr(LIR_Opr incr, LIR_Opr addr, LIR_Opr dest, LI __ sall(temp, 5); __ xorl(r14, temp); + Label dont; + + // __ call_VM_leaf_base(CAST_FROM_FN_PTR(address, &bar), 0); + if (getenv("APH_TRACE")) { + __ lea(temp, ExternalAddress((address)&ifoo)); + __ incl(Address(temp)); + } + if (incr->is_register()) { Register inc = incr->as_register(); + __ movl(dest->as_register(), inc); + __ cmpl(r14, threshold); + __ jccb(Assembler::above, dont); + + // __ call_VM_leaf_base(CAST_FROM_FN_PTR(address, &foo), 0); + if (getenv("APH_TRACE")) { + __ lea(temp, ExternalAddress((address)&ibar)); + __ incl(Address(temp)); + } + __ movl(temp, dest_adr); + __ sall(inc, ratio_shift); __ addl(temp, inc); __ movl(dest_adr, temp); __ movl(dest->as_register(), temp); } else { - jint inc = incr->as_constant_ptr()->as_jint_bits(); - __ movl(temp, dest_adr); - __ addl(temp, inc); - __ movl(dest_adr, temp); - __ movl(dest->as_register(), temp); + switch (dest->type()) { + case T_INT: { + jint inc = incr->as_constant_ptr()->as_jint_bits() * ProfileCaptureRatio; + __ movl(dest->as_register(), inc); + __ cmpl(r14, threshold); + __ jccb(Assembler::above, dont); + + // __ call_VM_leaf_base(CAST_FROM_FN_PTR(address, &foo), 0); + if (getenv("APH_TRACE")) { + __ lea(temp, ExternalAddress((address)&ibar)); + __ incl(Address(temp)); + } + + __ movl(temp, dest_adr); + __ addl(temp, inc); + __ movl(dest_adr, temp); + __ movl(dest->as_register(), temp); + + break; + } + case T_LONG: { + jint inc = incr->as_constant_ptr()->as_jint_bits() * ProfileCaptureRatio; + __ movq(dest->as_register_lo(), (jlong)inc); + __ cmpl(r14, threshold); + __ jccb(Assembler::above, dont); + + // __ call_VM_leaf_base(CAST_FROM_FN_PTR(address, &foo), 0); + if (getenv("APH_TRACE")) { + __ lea(temp, ExternalAddress((address)&ibar)); + __ incl(Address(temp)); + } + + __ movq(temp, dest_adr); + __ addq(temp, inc); + __ movq(dest_adr, temp); + __ movq(dest->as_register_lo(), temp); + + break; + } + default: + ShouldNotReachHere(); + } } + __ bind(dont); } void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) { diff --git a/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp b/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp index c3bcfeeeff607..9319bebfd2fb0 100644 --- a/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp +++ b/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp @@ -1548,6 +1548,10 @@ void LIRGenerator::do_If(If* x) { __ cmp(lir_cond(cond), left, right); // Generate branch profiling. Profiling code doesn't kill flags. profile_branch(x, cond); + // If we're subsampling counter updates, then profiling code kills flags + if (ProfileCaptureRatio > 1) { + __ cmp(lir_cond(cond), left, right); + } move_to_phi(x->state()); if (x->x()->type()->is_float_kind()) { __ branch(lir_cond(cond), x->tsux(), x->usux()); diff --git a/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp index cdaa22c420ba2..6247e53adfca9 100644 --- a/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp @@ -339,7 +339,7 @@ void C1_MacroAssembler::build_frame(int frame_size_in_bytes, int bang_size_in_by } #endif // !_LP64 && COMPILER2 - if (ProfileCaptureRatio < 1.0) { + if (ProfileCaptureRatio > 1) { movl(r14, Address(r15_thread, JavaThread::profile_rng_offset())); } @@ -352,7 +352,7 @@ void C1_MacroAssembler::build_frame(int frame_size_in_bytes, int bang_size_in_by void C1_MacroAssembler::remove_frame(int frame_size_in_bytes) { - if (ProfileCaptureRatio < 1.0) { + if (ProfileCaptureRatio > 1) { movl(Address(r15_thread, JavaThread::profile_rng_offset()), r14); } increment(rsp, frame_size_in_bytes); // Does not emit code for frame_size == 0 diff --git a/src/hotspot/share/c1/c1_LIR.cpp b/src/hotspot/share/c1/c1_LIR.cpp index e3800eb514c02..cba79abe9d9ac 100644 --- a/src/hotspot/share/c1/c1_LIR.cpp +++ b/src/hotspot/share/c1/c1_LIR.cpp @@ -583,6 +583,9 @@ void LIR_OpVisitState::visit(LIR_Op* op) { case lir_assert: case lir_inc_profile_ctr: { + if (op->code() == lir_inc_profile_ctr) { + asm("nop"); + } assert(op->as_Op2() != nullptr, "must be"); LIR_Op2* op2 = (LIR_Op2*)op; assert(op2->_tmp2->is_illegal() && op2->_tmp3->is_illegal() && diff --git a/src/hotspot/share/c1/c1_LIRGenerator.cpp b/src/hotspot/share/c1/c1_LIRGenerator.cpp index 9c92aeca0de52..f14186b0aedce 100644 --- a/src/hotspot/share/c1/c1_LIRGenerator.cpp +++ b/src/hotspot/share/c1/c1_LIRGenerator.cpp @@ -950,11 +950,16 @@ void LIRGenerator::profile_branch(If* if_instr, If::Condition cond) { // MDO cells are intptr_t, so the data_reg width is arch-dependent. LIR_Opr data_reg = new_pointer_register(); LIR_Address* data_addr = new LIR_Address(md_reg, data_offset_reg, data_reg->type()); - __ move(data_addr, data_reg); - // Use leal instead of add to avoid destroying condition codes on x86 LIR_Address* fake_incr_value = new LIR_Address(data_reg, DataLayout::counter_increment, T_INT); - __ leal(LIR_OprFact::address(fake_incr_value), data_reg); - __ move(data_reg, data_addr); + if (ProfileCaptureRatio == 1) { + __ move(data_addr, data_reg); + // Use leal instead of add to avoid destroying condition codes on x86 + __ leal(LIR_OprFact::address(fake_incr_value), data_reg); + __ move(data_reg, data_addr); + } else { + LIR_Opr tmp = new_register(T_INT); + __ inc_profile_ctr(LIR_OprFact::intConst(DataLayout::counter_increment), data_addr, data_reg, tmp); + } } } @@ -2470,8 +2475,17 @@ void LIRGenerator::do_Goto(Goto* x) { LIR_Opr md_reg = new_register(T_METADATA); __ metadata2reg(md->constant_encoding(), md_reg); - increment_counter(new LIR_Address(md_reg, offset, - NOT_LP64(T_INT) LP64_ONLY(T_LONG)), DataLayout::counter_increment); + LIR_Address *counter_addr = new LIR_Address(md_reg, offset, + NOT_LP64(T_INT) LP64_ONLY(T_LONG)); + if (ProfileCaptureRatio == 1) { + increment_counter(counter_addr, DataLayout::counter_increment); + } else { + // LIR_Address *counter_addr = new LIR_Address(md_reg, offset, T_INT); + LIR_Opr tmp = new_register(T_INT); + LIR_Opr dummy = new_register(T_INT); + LIR_Opr inc = LIR_OprFact::intConst(DataLayout::counter_increment); + __ inc_profile_ctr(inc, counter_addr, tmp, dummy); + } } // emit phi-instruction move after safepoint since this simplifies @@ -3290,10 +3304,13 @@ void LIRGenerator::increment_event_counter_impl(CodeEmitInfo* info, LIR_Address* counter = new LIR_Address(counter_holder, offset, T_INT); LIR_Opr result = new_register(T_INT); LIR_Opr tmp = new_register(T_INT); - // __ load(counter, result); - // __ add(result, step, result); - // __ store(result, counter); - __ inc_profile_ctr(step, counter, result, tmp); + if (ProfileCaptureRatio == 1) { + __ load(counter, result); + __ add(result, step, result); + __ store(result, counter); + } else { + __ inc_profile_ctr(step, counter, result, tmp); + } if (notify && (!backedge || UseOnStackReplacement)) { LIR_Opr meth = LIR_OprFact::metadataConst(method->constant_encoding()); // The bci for info can point to cmp for if's we want the if bci diff --git a/src/hotspot/share/runtime/globals.hpp b/src/hotspot/share/runtime/globals.hpp index b6354765934c6..67288cb42e299 100644 --- a/src/hotspot/share/runtime/globals.hpp +++ b/src/hotspot/share/runtime/globals.hpp @@ -1994,9 +1994,9 @@ const int ObjectAlignmentInBytes = 8; "Use an extra lock during Thread start and exit to alleviate" \ "contention on Threads_lock.") \ \ - product(double, ProfileCaptureRatio, 1.0, \ + product(int, ProfileCaptureRatio, 1, \ "Reduce profile captures") \ - range(0.0, 1.0) + range(1, 4096) // end of RUNTIME_FLAGS From b5fb6c31d440b4bb83b597470cabdb268b596d0f Mon Sep 17 00:00:00 2001 From: Andrew Haley Date: Fri, 14 Feb 2025 16:43:29 +0000 Subject: [PATCH 05/48] More --- src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp | 65 +++++++++++++++------ src/hotspot/cpu/x86/macroAssembler_x86.cpp | 21 +++++++ src/hotspot/cpu/x86/macroAssembler_x86.hpp | 2 + 3 files changed, 69 insertions(+), 19 deletions(-) diff --git a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp index 23847fae51861..6e07c97781a72 100644 --- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp @@ -2816,6 +2816,8 @@ void LIR_Assembler::comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Op void LIR_Assembler::align_call(LIR_Code code) { + if (ProfileCaptureRatio > 1) __ movl(Address(r15_thread, JavaThread::profile_rng_offset()), r14); + // make sure that the displacement word of the call ends up word aligned int offset = __ offset(); switch (code) { @@ -2839,6 +2841,8 @@ void LIR_Assembler::call(LIR_OpJavaCall* op, relocInfo::relocType rtype) { __ call(AddressLiteral(op->addr(), rtype)); add_call_info(code_offset(), op->info()); __ post_call_nop(); + + if (ProfileCaptureRatio > 1) __ movl(r14, Address(r15_thread, JavaThread::profile_rng_offset())); } @@ -2848,6 +2852,8 @@ void LIR_Assembler::ic_call(LIR_OpJavaCall* op) { assert((__ offset() - NativeCall::instruction_size + NativeCall::displacement_offset) % BytesPerWord == 0, "must be aligned"); __ post_call_nop(); + + if (ProfileCaptureRatio > 1) __ movl(r14, Address(r15_thread, JavaThread::profile_rng_offset())); } @@ -3513,30 +3519,22 @@ void LIR_Assembler::inc_profile_ctr(LIR_Opr incr, LIR_Opr addr, LIR_Opr dest, LI int ratio_shift = exact_log2(ProfileCaptureRatio); int threshold = (1ull << 32) / ProfileCaptureRatio; - /* Algorithm "xor" from p. 4 of Marsaglia, "Xorshift RNGs" */ - __ movl(temp, r14); - __ sall(temp, 13); - __ xorl(r14, temp); - __ movl(temp, r14); - __ shrl(temp, 7); - __ xorl(r14, temp); - __ movl(temp, r14); - __ sall(temp, 5); - __ xorl(r14, temp); - - Label dont; - - // __ call_VM_leaf_base(CAST_FROM_FN_PTR(address, &bar), 0); if (getenv("APH_TRACE")) { __ lea(temp, ExternalAddress((address)&ifoo)); __ incl(Address(temp)); } + __ step_random(r14, temp); + + Label dont; + + // __ call_VM_leaf_base(CAST_FROM_FN_PTR(address, &bar), 0); + if (incr->is_register()) { Register inc = incr->as_register(); __ movl(dest->as_register(), inc); __ cmpl(r14, threshold); - __ jccb(Assembler::above, dont); + __ jccb(Assembler::aboveEqual, dont); // __ call_VM_leaf_base(CAST_FROM_FN_PTR(address, &foo), 0); if (getenv("APH_TRACE")) { @@ -3555,7 +3553,7 @@ void LIR_Assembler::inc_profile_ctr(LIR_Opr incr, LIR_Opr addr, LIR_Opr dest, LI jint inc = incr->as_constant_ptr()->as_jint_bits() * ProfileCaptureRatio; __ movl(dest->as_register(), inc); __ cmpl(r14, threshold); - __ jccb(Assembler::above, dont); + __ jccb(Assembler::aboveEqual, dont); // __ call_VM_leaf_base(CAST_FROM_FN_PTR(address, &foo), 0); if (getenv("APH_TRACE")) { @@ -3574,7 +3572,7 @@ void LIR_Assembler::inc_profile_ctr(LIR_Opr incr, LIR_Opr addr, LIR_Opr dest, LI jint inc = incr->as_constant_ptr()->as_jint_bits() * ProfileCaptureRatio; __ movq(dest->as_register_lo(), (jlong)inc); __ cmpl(r14, threshold); - __ jccb(Assembler::above, dont); + __ jccb(Assembler::aboveEqual, dont); // __ call_VM_leaf_base(CAST_FROM_FN_PTR(address, &foo), 0); if (getenv("APH_TRACE")) { @@ -3596,12 +3594,33 @@ void LIR_Assembler::inc_profile_ctr(LIR_Opr incr, LIR_Opr addr, LIR_Opr dest, LI __ bind(dont); } +int ibaz; + void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) { ciMethod* method = op->profiled_method(); int bci = op->profiled_bci(); ciMethod* callee = op->profiled_callee(); Register tmp_load_klass = LP64_ONLY(rscratch1) NOT_LP64(noreg); + Label dont; + + Register temp = op->tmp1()->as_register_lo(); + + int ratio_shift = exact_log2(ProfileCaptureRatio); + int threshold = (1ull << 32) / ProfileCaptureRatio; + + if (ProfileCaptureRatio > 1) { + __ step_random(r14, temp); + + if (getenv("APH_TRACE")) { + __ lea(temp, ExternalAddress((address)&ibaz)); + __ incl(Address(temp)); + } + + __ cmpl(r14, threshold); + __ jcc(Assembler::aboveEqual, dont); + } + // Update counter for all call types ciMethodData* md = method->method_data_or_null(); assert(md != nullptr, "Sanity"); @@ -3633,7 +3652,7 @@ void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) { if (known_klass->equals(receiver)) { Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i))); __ addptr(data_addr, DataLayout::counter_increment); - return; + goto exit; } } @@ -3649,7 +3668,7 @@ void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) { __ mov_metadata(recv_addr, known_klass->constant_encoding(), rscratch1); Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i))); __ addptr(data_addr, DataLayout::counter_increment); - return; + goto exit; } } } else { @@ -3666,6 +3685,9 @@ void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) { // Static call __ addptr(counter_addr, DataLayout::counter_increment); } + + exit: + __ bind(dont); } void LIR_Assembler::emit_profile_type(LIR_OpProfileType* op) { @@ -3940,14 +3962,19 @@ void LIR_Assembler::leal(LIR_Opr src, LIR_Opr dest, LIR_PatchCode patch_code, Co void LIR_Assembler::rt_call(LIR_Opr result, address dest, const LIR_OprList* args, LIR_Opr tmp, CodeEmitInfo* info) { assert(!tmp->is_valid(), "don't need temporary"); + if (ProfileCaptureRatio > 1) __ movl(Address(r15_thread, JavaThread::profile_rng_offset()), r14); + __ call(RuntimeAddress(dest)); if (info != nullptr) { add_call_info_here(info); } __ post_call_nop(); + if (ProfileCaptureRatio > 1) __ movl(r14, Address(r15_thread, JavaThread::profile_rng_offset())); } + + void LIR_Assembler::volatile_move_op(LIR_Opr src, LIR_Opr dest, BasicType type, CodeEmitInfo* info) { assert(type == T_LONG, "only for volatile long fields"); diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.cpp b/src/hotspot/cpu/x86/macroAssembler_x86.cpp index 1c7f851347e0d..4b93dd5c292de 100644 --- a/src/hotspot/cpu/x86/macroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/macroAssembler_x86.cpp @@ -10855,4 +10855,25 @@ void MacroAssembler::setcc(Assembler::Condition comparison, Register dst) { movzbl(dst, dst); } } + +void MacroAssembler::step_random(Register state, Register temp) { + if (0) { + /* Algorithm "xor" from p. 4 of Marsaglia, "Xorshift RNGs" */ + movl(temp, state); + sall(temp, 13); + xorl(state, temp); + movl(temp, state); + shrl(temp, 7); + xorl(state, temp); + movl(temp, state); + sall(temp, 5); + xorl(state, temp); + } else { + /* LCG from glibc. */ + movl(temp, 1103515245); + imull(state, temp); + addl(state, 12345); + } +} + #endif diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.hpp b/src/hotspot/cpu/x86/macroAssembler_x86.hpp index c6e5b2a115f03..27b39751b7137 100644 --- a/src/hotspot/cpu/x86/macroAssembler_x86.hpp +++ b/src/hotspot/cpu/x86/macroAssembler_x86.hpp @@ -2244,6 +2244,8 @@ class MacroAssembler: public Assembler { void restore_legacy_gprs(); void setcc(Assembler::Condition comparison, Register dst); #endif + + void step_random(Register state, Register temp); }; #endif // CPU_X86_MACROASSEMBLER_X86_HPP From 070c683923f2ae3debe6ffa016024001a0cce3e4 Mon Sep 17 00:00:00 2001 From: Andrew Haley Date: Wed, 19 Feb 2025 17:16:25 +0000 Subject: [PATCH 06/48] Temp --- src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp | 56 +++++++++++++++---- src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp | 2 +- src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp | 4 +- src/hotspot/cpu/x86/macroAssembler_x86.cpp | 12 ++++ src/hotspot/cpu/x86/macroAssembler_x86.hpp | 1 + src/hotspot/share/c1/c1_LIR.cpp | 9 ++- src/hotspot/share/c1/c1_LIR.hpp | 18 ++++-- src/hotspot/share/c1/c1_LIRAssembler.cpp | 3 +- src/hotspot/share/c1/c1_LIRAssembler.hpp | 2 +- src/hotspot/share/c1/c1_LIRGenerator.cpp | 6 +- 10 files changed, 87 insertions(+), 26 deletions(-) diff --git a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp index 6e07c97781a72..8d6587326d72a 100644 --- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp @@ -1693,8 +1693,11 @@ void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, L assert_different_registers(obj, k_RInfo, klass_RInfo); + + __ testptr(obj, obj); if (op->should_profile()) { + Label not_null; Register mdo = klass_RInfo; __ mov_metadata(mdo, md->constant_encoding()); @@ -1707,6 +1710,8 @@ void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, L __ bind(not_null); Label update_done; + // __ maybe_skip(r14, k_RInfo, update_done); + Register recv = k_RInfo; __ load_klass(recv, obj, tmp_load_klass); type_profile_helper(mdo, md, data, recv, &update_done); @@ -2816,7 +2821,7 @@ void LIR_Assembler::comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Op void LIR_Assembler::align_call(LIR_Code code) { - if (ProfileCaptureRatio > 1) __ movl(Address(r15_thread, JavaThread::profile_rng_offset()), r14); + if (ProfileCaptureRatio != 1) __ movl(Address(r15_thread, JavaThread::profile_rng_offset()), r14); // make sure that the displacement word of the call ends up word aligned int offset = __ offset(); @@ -2842,7 +2847,7 @@ void LIR_Assembler::call(LIR_OpJavaCall* op, relocInfo::relocType rtype) { add_call_info(code_offset(), op->info()); __ post_call_nop(); - if (ProfileCaptureRatio > 1) __ movl(r14, Address(r15_thread, JavaThread::profile_rng_offset())); + if (ProfileCaptureRatio != 1) __ movl(r14, Address(r15_thread, JavaThread::profile_rng_offset())); } @@ -2853,7 +2858,7 @@ void LIR_Assembler::ic_call(LIR_OpJavaCall* op) { "must be aligned"); __ post_call_nop(); - if (ProfileCaptureRatio > 1) __ movl(r14, Address(r15_thread, JavaThread::profile_rng_offset())); + if (ProfileCaptureRatio != 1) __ movl(r14, Address(r15_thread, JavaThread::profile_rng_offset())); } @@ -3511,13 +3516,22 @@ void bar() { asm("nop"); } -void LIR_Assembler::inc_profile_ctr(LIR_Opr incr, LIR_Opr addr, LIR_Opr dest, LIR_Opr temp_op) { +void LIR_Assembler::inc_profile_ctr(LIR_Opr incr, LIR_Opr addr, LIR_Opr dest, LIR_Opr temp_op, + int profile_limit) { Register temp = temp_op->as_register(); Address dest_adr = as_Address(addr->as_address_ptr()); - assert(ProfileCaptureRatio > 1, "ProfileCaptureRatio must be > 1"); - int ratio_shift = exact_log2(ProfileCaptureRatio); - int threshold = (1ull << 32) / ProfileCaptureRatio; + assert(ProfileCaptureRatio != 1, "ProfileCaptureRatio must be != 1"); + + int profile_capture_ratio = ProfileCaptureRatio; + + if (profile_limit) { + int ratio = sqrt(profile_limit); + profile_capture_ratio = round_down_power_of_2(ratio); + } + + int ratio_shift = exact_log2(profile_capture_ratio); + int threshold = (1ull << 32) >> profile_capture_ratio; if (getenv("APH_TRACE")) { __ lea(temp, ExternalAddress((address)&ifoo)); @@ -3550,7 +3564,7 @@ void LIR_Assembler::inc_profile_ctr(LIR_Opr incr, LIR_Opr addr, LIR_Opr dest, LI } else { switch (dest->type()) { case T_INT: { - jint inc = incr->as_constant_ptr()->as_jint_bits() * ProfileCaptureRatio; + jint inc = incr->as_constant_ptr()->as_jint_bits() * profile_capture_ratio; __ movl(dest->as_register(), inc); __ cmpl(r14, threshold); __ jccb(Assembler::aboveEqual, dont); @@ -3569,7 +3583,7 @@ void LIR_Assembler::inc_profile_ctr(LIR_Opr incr, LIR_Opr addr, LIR_Opr dest, LI break; } case T_LONG: { - jint inc = incr->as_constant_ptr()->as_jint_bits() * ProfileCaptureRatio; + jint inc = incr->as_constant_ptr()->as_jint_bits() * profile_capture_ratio; __ movq(dest->as_register_lo(), (jlong)inc); __ cmpl(r14, threshold); __ jccb(Assembler::aboveEqual, dont); @@ -3609,7 +3623,7 @@ void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) { int ratio_shift = exact_log2(ProfileCaptureRatio); int threshold = (1ull << 32) / ProfileCaptureRatio; - if (ProfileCaptureRatio > 1) { + if (ProfileCaptureRatio != 1) { __ step_random(r14, temp); if (getenv("APH_TRACE")) { @@ -3690,6 +3704,8 @@ void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) { __ bind(dont); } +int kludge; + void LIR_Assembler::emit_profile_type(LIR_OpProfileType* op) { Register obj = op->obj()->as_register(); Register tmp = op->tmp()->as_pointer_register(); @@ -3711,6 +3727,22 @@ void LIR_Assembler::emit_profile_type(LIR_OpProfileType* op) { __ verify_oop(obj); + if (ProfileCaptureRatio != 1) { + + // Subsampling profile capture + int ratio_shift = exact_log2(ProfileCaptureRatio); + int threshold = (1ull << 32) >> ratio_shift; + __ step_random(r14, tmp); + + __ cmpl(r14, threshold); + __ jcc(Assembler::aboveEqual, next); + } + + if (getenv("APH_TRACE2")) { + __ lea(tmp, ExternalAddress((address)&kludge)); + __ incl(Address(tmp)); + } + #ifdef ASSERT if (obj == tmp) { #ifdef _LP64 @@ -3962,14 +3994,14 @@ void LIR_Assembler::leal(LIR_Opr src, LIR_Opr dest, LIR_PatchCode patch_code, Co void LIR_Assembler::rt_call(LIR_Opr result, address dest, const LIR_OprList* args, LIR_Opr tmp, CodeEmitInfo* info) { assert(!tmp->is_valid(), "don't need temporary"); - if (ProfileCaptureRatio > 1) __ movl(Address(r15_thread, JavaThread::profile_rng_offset()), r14); + if (ProfileCaptureRatio != 1) __ movl(Address(r15_thread, JavaThread::profile_rng_offset()), r14); __ call(RuntimeAddress(dest)); if (info != nullptr) { add_call_info_here(info); } __ post_call_nop(); - if (ProfileCaptureRatio > 1) __ movl(r14, Address(r15_thread, JavaThread::profile_rng_offset())); + if (ProfileCaptureRatio != 1) __ movl(r14, Address(r15_thread, JavaThread::profile_rng_offset())); } diff --git a/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp b/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp index 9319bebfd2fb0..bc0bdd1926b10 100644 --- a/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp +++ b/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp @@ -1549,7 +1549,7 @@ void LIRGenerator::do_If(If* x) { // Generate branch profiling. Profiling code doesn't kill flags. profile_branch(x, cond); // If we're subsampling counter updates, then profiling code kills flags - if (ProfileCaptureRatio > 1) { + if (ProfileCaptureRatio != 1) { __ cmp(lir_cond(cond), left, right); } move_to_phi(x->state()); diff --git a/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp index 6247e53adfca9..45beb4efec374 100644 --- a/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp @@ -339,7 +339,7 @@ void C1_MacroAssembler::build_frame(int frame_size_in_bytes, int bang_size_in_by } #endif // !_LP64 && COMPILER2 - if (ProfileCaptureRatio > 1) { + if (ProfileCaptureRatio != 1) { movl(r14, Address(r15_thread, JavaThread::profile_rng_offset())); } @@ -352,7 +352,7 @@ void C1_MacroAssembler::build_frame(int frame_size_in_bytes, int bang_size_in_by void C1_MacroAssembler::remove_frame(int frame_size_in_bytes) { - if (ProfileCaptureRatio > 1) { + if (ProfileCaptureRatio != 1) { movl(Address(r15_thread, JavaThread::profile_rng_offset()), r14); } increment(rsp, frame_size_in_bytes); // Does not emit code for frame_size == 0 diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.cpp b/src/hotspot/cpu/x86/macroAssembler_x86.cpp index 4b93dd5c292de..9b16bea16bdad 100644 --- a/src/hotspot/cpu/x86/macroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/macroAssembler_x86.cpp @@ -10876,4 +10876,16 @@ void MacroAssembler::step_random(Register state, Register temp) { } } +void MacroAssembler::maybe_skip(Register state, Register temp, Label &skip) { + if (ProfileCaptureRatio != 1) { + step_random(state, temp); + + int ratio_shift = exact_log2(ProfileCaptureRatio); + int threshold = (1ull << 32) >> ratio_shift; + + cmpl(state, threshold); + jcc(Assembler::aboveEqual, skip); + } +} + #endif diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.hpp b/src/hotspot/cpu/x86/macroAssembler_x86.hpp index 27b39751b7137..284a2cee81c45 100644 --- a/src/hotspot/cpu/x86/macroAssembler_x86.hpp +++ b/src/hotspot/cpu/x86/macroAssembler_x86.hpp @@ -2246,6 +2246,7 @@ class MacroAssembler: public Assembler { #endif void step_random(Register state, Register temp); + void maybe_skip(Register state, Register temp, Label &skip); }; #endif // CPU_X86_MACROASSEMBLER_X86_HPP diff --git a/src/hotspot/share/c1/c1_LIR.cpp b/src/hotspot/share/c1/c1_LIR.cpp index cba79abe9d9ac..4bd3525492689 100644 --- a/src/hotspot/share/c1/c1_LIR.cpp +++ b/src/hotspot/share/c1/c1_LIR.cpp @@ -1302,13 +1302,18 @@ void LIR_List::volatile_store_unsafe_reg(LIR_Opr src, LIR_Opr base, LIR_Opr offs } -void LIR_List::inc_profile_ctr(LIR_Opr src, LIR_Address* addr, LIR_Opr res, LIR_Opr tmp, CodeEmitInfo* info) { +void LIR_List::inc_profile_ctr(LIR_Opr src, LIR_Address* addr, LIR_Opr res, LIR_Opr tmp, int profile_limit) { append(new LIR_Op2( lir_inc_profile_ctr, src, LIR_OprFact::address(addr), res, - tmp)); + tmp, + LIR_OprFact::illegalOpr, + LIR_OprFact::illegalOpr, + LIR_OprFact::illegalOpr, + LIR_OprFact::illegalOpr, + profile_limit)); } diff --git a/src/hotspot/share/c1/c1_LIR.hpp b/src/hotspot/share/c1/c1_LIR.hpp index 47afe4eae544d..ed4683abc025d 100644 --- a/src/hotspot/share/c1/c1_LIR.hpp +++ b/src/hotspot/share/c1/c1_LIR.hpp @@ -1610,6 +1610,7 @@ class LIR_Op2: public LIR_Op { LIR_Opr _tmp5; LIR_Condition _condition; BasicType _type; + int _profile_limit; void verify() const; @@ -1624,7 +1625,8 @@ class LIR_Op2: public LIR_Op { , _tmp4(LIR_OprFact::illegalOpr) , _tmp5(LIR_OprFact::illegalOpr) , _condition(condition) - , _type(type) { + , _type(type) + , _profile_limit(0) { assert(code == lir_cmp || code == lir_branch || code == lir_cond_float_branch || code == lir_assert, "code check"); } @@ -1638,7 +1640,8 @@ class LIR_Op2: public LIR_Op { , _tmp4(LIR_OprFact::illegalOpr) , _tmp5(LIR_OprFact::illegalOpr) , _condition(condition) - , _type(type) { + , _type(type) + , _profile_limit(0) { assert(code == lir_cmove, "code check"); assert(type != T_ILLEGAL, "cmove should have type"); } @@ -1654,12 +1657,13 @@ class LIR_Op2: public LIR_Op { , _tmp4(LIR_OprFact::illegalOpr) , _tmp5(LIR_OprFact::illegalOpr) , _condition(lir_cond_unknown) - , _type(type) { + , _type(type) + , _profile_limit(0) { assert(code != lir_cmp && code != lir_branch && code != lir_cond_float_branch && is_in_range(code, begin_op2, end_op2), "code check"); } LIR_Op2(LIR_Code code, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, LIR_Opr tmp1, LIR_Opr tmp2 = LIR_OprFact::illegalOpr, - LIR_Opr tmp3 = LIR_OprFact::illegalOpr, LIR_Opr tmp4 = LIR_OprFact::illegalOpr, LIR_Opr tmp5 = LIR_OprFact::illegalOpr) + LIR_Opr tmp3 = LIR_OprFact::illegalOpr, LIR_Opr tmp4 = LIR_OprFact::illegalOpr, LIR_Opr tmp5 = LIR_OprFact::illegalOpr, int profile_limit = 0) : LIR_Op(code, result, nullptr) , _opr1(opr1) , _opr2(opr2) @@ -1669,7 +1673,8 @@ class LIR_Op2: public LIR_Op { , _tmp4(tmp4) , _tmp5(tmp5) , _condition(lir_cond_unknown) - , _type(T_ILLEGAL) { + , _type(T_ILLEGAL) + , _profile_limit(profile_limit) { assert(code != lir_cmp && code != lir_branch && code != lir_cond_float_branch && is_in_range(code, begin_op2, end_op2), "code check"); } @@ -1681,6 +1686,7 @@ class LIR_Op2: public LIR_Op { LIR_Opr tmp3_opr() const { return _tmp3; } LIR_Opr tmp4_opr() const { return _tmp4; } LIR_Opr tmp5_opr() const { return _tmp5; } + int profile_limit() const { return _profile_limit; } LIR_Condition condition() const { assert(code() == lir_cmp || code() == lir_branch || code() == lir_cond_float_branch || code() == lir_assert, "only valid for branch and assert"); return _condition; } @@ -2301,7 +2307,7 @@ class LIR_List: public CompilationResourceObj { void volatile_store_mem_reg(LIR_Opr src, LIR_Address* address, CodeEmitInfo* info, LIR_PatchCode patch_code = lir_patch_none); void volatile_store_unsafe_reg(LIR_Opr src, LIR_Opr base, LIR_Opr offset, BasicType type, CodeEmitInfo* info, LIR_PatchCode patch_code); - void inc_profile_ctr(LIR_Opr src, LIR_Address* addr, LIR_Opr res, LIR_Opr tmp, CodeEmitInfo* info = nullptr); + void inc_profile_ctr(LIR_Opr src, LIR_Address* addr, LIR_Opr res, LIR_Opr tmp, int profile_limit = 0); void idiv(LIR_Opr left, LIR_Opr right, LIR_Opr res, LIR_Opr tmp, CodeEmitInfo* info); void idiv(LIR_Opr left, int right, LIR_Opr res, LIR_Opr tmp, CodeEmitInfo* info); diff --git a/src/hotspot/share/c1/c1_LIRAssembler.cpp b/src/hotspot/share/c1/c1_LIRAssembler.cpp index 318430e7c3c35..46019ff2690e3 100644 --- a/src/hotspot/share/c1/c1_LIRAssembler.cpp +++ b/src/hotspot/share/c1/c1_LIRAssembler.cpp @@ -753,7 +753,8 @@ void LIR_Assembler::emit_op2(LIR_Op2* op) { break; case lir_inc_profile_ctr: - inc_profile_ctr(op->in_opr1(), op->in_opr2(), op->result_opr(), op->tmp1_opr()); + inc_profile_ctr(op->in_opr1(), op->in_opr2(), op->result_opr(), op->tmp1_opr(), + op->profile_limit()); break; default: diff --git a/src/hotspot/share/c1/c1_LIRAssembler.hpp b/src/hotspot/share/c1/c1_LIRAssembler.hpp index abd28620d2dc2..c6a7909a30bac 100644 --- a/src/hotspot/share/c1/c1_LIRAssembler.hpp +++ b/src/hotspot/share/c1/c1_LIRAssembler.hpp @@ -238,7 +238,7 @@ class LIR_Assembler: public CompilationResourceObj { void align_backward_branch_target(); void align_call(LIR_Code code); - void inc_profile_ctr(LIR_Opr incr, LIR_Opr addr, LIR_Opr dest, LIR_Opr temp); + void inc_profile_ctr(LIR_Opr incr, LIR_Opr addr, LIR_Opr dest, LIR_Opr temp, int profile_limit); void negate(LIR_Opr left, LIR_Opr dest, LIR_Opr tmp = LIR_OprFact::illegalOpr); void leal(LIR_Opr src, LIR_Opr dest, LIR_PatchCode patch_code = lir_patch_none, CodeEmitInfo* info = nullptr); diff --git a/src/hotspot/share/c1/c1_LIRGenerator.cpp b/src/hotspot/share/c1/c1_LIRGenerator.cpp index f14186b0aedce..76055cc6c13ab 100644 --- a/src/hotspot/share/c1/c1_LIRGenerator.cpp +++ b/src/hotspot/share/c1/c1_LIRGenerator.cpp @@ -3309,7 +3309,11 @@ void LIRGenerator::increment_event_counter_impl(CodeEmitInfo* info, __ add(result, step, result); __ store(result, counter); } else { - __ inc_profile_ctr(step, counter, result, tmp); + int step_bits = 1; + if (step->is_constant()) { + step_bits = step->as_constant_ptr()->as_jint_bits(); + } + __ inc_profile_ctr(step, counter, result, tmp, frequency/step_bits); } if (notify && (!backedge || UseOnStackReplacement)) { LIR_Opr meth = LIR_OprFact::metadataConst(method->constant_encoding()); From 3fd64631e4d2db67efff8900a6be44234dbbde0d Mon Sep 17 00:00:00 2001 From: Andrew Haley Date: Thu, 20 Feb 2025 10:46:04 +0000 Subject: [PATCH 07/48] Still works --- src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp | 37 +++++++++++---------- 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp index 8d6587326d72a..8af3bd047fa58 100644 --- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp @@ -1710,7 +1710,8 @@ void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, L __ bind(not_null); Label update_done; - // __ maybe_skip(r14, k_RInfo, update_done); + + __ maybe_skip(r14, k_RInfo, update_done); Register recv = k_RInfo; __ load_klass(recv, obj, tmp_load_klass); @@ -3727,22 +3728,6 @@ void LIR_Assembler::emit_profile_type(LIR_OpProfileType* op) { __ verify_oop(obj); - if (ProfileCaptureRatio != 1) { - - // Subsampling profile capture - int ratio_shift = exact_log2(ProfileCaptureRatio); - int threshold = (1ull << 32) >> ratio_shift; - __ step_random(r14, tmp); - - __ cmpl(r14, threshold); - __ jcc(Assembler::aboveEqual, next); - } - - if (getenv("APH_TRACE2")) { - __ lea(tmp, ExternalAddress((address)&kludge)); - __ incl(Address(tmp)); - } - #ifdef ASSERT if (obj == tmp) { #ifdef _LP64 @@ -3758,6 +3743,24 @@ void LIR_Assembler::emit_profile_type(LIR_OpProfileType* op) { #endif } #endif + + if (ProfileCaptureRatio != 1) { + + // Subsampling profile capture + int ratio_shift = exact_log2(ProfileCaptureRatio); + int threshold = (1ull << 32) >> ratio_shift; + // Can't use tmp here because sometimes obj == tmp! + __ step_random(r14, rscratch1); + + __ cmpl(r14, threshold); + __ jcc(Assembler::aboveEqual, next); + } + + if (getenv("APH_TRACE2")) { + __ lea(tmp, ExternalAddress((address)&kludge)); + __ incl(Address(tmp)); + } + if (do_null) { __ testptr(obj, obj); __ jccb(Assembler::notZero, update); From 50838262fb1636a3c84f61b5b0cb0bc60ca96196 Mon Sep 17 00:00:00 2001 From: Andrew Haley Date: Mon, 24 Feb 2025 16:23:43 +0000 Subject: [PATCH 08/48] Thinko --- src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp index 8af3bd047fa58..b633953dd2ab4 100644 --- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp @@ -3532,7 +3532,9 @@ void LIR_Assembler::inc_profile_ctr(LIR_Opr incr, LIR_Opr addr, LIR_Opr dest, LI } int ratio_shift = exact_log2(profile_capture_ratio); - int threshold = (1ull << 32) >> profile_capture_ratio; + int threshold = (1ull << 32) >> ratio_shift; + + assert(threshold > 0, "must be"); if (getenv("APH_TRACE")) { __ lea(temp, ExternalAddress((address)&ifoo)); From aa4da234df7fa17b6959521cf5c63bb8b1bf0b6e Mon Sep 17 00:00:00 2001 From: Andrew Haley Date: Mon, 3 Mar 2025 17:07:33 +0000 Subject: [PATCH 09/48] Inter --- src/hotspot/cpu/x86/c1_FrameMap_x86.cpp | 3 +++ src/hotspot/cpu/x86/c1_FrameMap_x86.hpp | 3 +++ src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp | 25 ++++++++++--------- src/hotspot/cpu/x86/macroAssembler_x86.cpp | 2 +- src/hotspot/cpu/x86/macroAssembler_x86.hpp | 2 +- src/hotspot/share/c1/c1_LIR.cpp | 3 +++ .../share/compiler/compiler_globals.hpp | 5 ++++ src/hotspot/share/runtime/globals.hpp | 6 +---- src/hotspot/share/runtime/javaThread.cpp | 10 +++++++- src/hotspot/share/runtime/javaThread.hpp | 6 ++--- 10 files changed, 42 insertions(+), 23 deletions(-) diff --git a/src/hotspot/cpu/x86/c1_FrameMap_x86.cpp b/src/hotspot/cpu/x86/c1_FrameMap_x86.cpp index 0427a144f4685..c7565b71cf329 100644 --- a/src/hotspot/cpu/x86/c1_FrameMap_x86.cpp +++ b/src/hotspot/cpu/x86/c1_FrameMap_x86.cpp @@ -176,6 +176,9 @@ void FrameMap::initialize() { map_register( 7, r9); r9_opr = LIR_OprFact::single_cpu(7); map_register( 8, r11); r11_opr = LIR_OprFact::single_cpu(8); map_register( 9, r13); r13_opr = LIR_OprFact::single_cpu(9); + + // FIXME: Find a decent name instead of r14 + // r14 is allocated conditionally. It is used to hold the random // generator for profile counters. map_register(10, r14); r14_opr = LIR_OprFact::single_cpu(10); diff --git a/src/hotspot/cpu/x86/c1_FrameMap_x86.hpp b/src/hotspot/cpu/x86/c1_FrameMap_x86.hpp index 0b105d5815206..d5fef8691ac71 100644 --- a/src/hotspot/cpu/x86/c1_FrameMap_x86.hpp +++ b/src/hotspot/cpu/x86/c1_FrameMap_x86.hpp @@ -150,6 +150,9 @@ // Reduce the number of available regs (to free r12) in case of compressed oops // Reduce the number of available regs (to free r14) for RNG + + // PROFILE-FIXME: Be smarter here + if (ProfileCaptureRatio > 1) return range - 2; if (UseCompressedOops) return range - 1; return range; diff --git a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp index b633953dd2ab4..174c67ffc3fbc 100644 --- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp @@ -1693,11 +1693,8 @@ void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, L assert_different_registers(obj, k_RInfo, klass_RInfo); - - __ testptr(obj, obj); if (op->should_profile()) { - Label not_null; Register mdo = klass_RInfo; __ mov_metadata(mdo, md->constant_encoding()); @@ -1711,7 +1708,7 @@ void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, L Label update_done; - __ maybe_skip(r14, k_RInfo, update_done); + __ maybe_skip_profiling(r14, k_RInfo, update_done); Register recv = k_RInfo; __ load_klass(recv, obj, tmp_load_klass); @@ -2822,6 +2819,7 @@ void LIR_Assembler::comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Op void LIR_Assembler::align_call(LIR_Code code) { + // We do this here in order not affect call site alignment. if (ProfileCaptureRatio != 1) __ movl(Address(r15_thread, JavaThread::profile_rng_offset()), r14); // make sure that the displacement word of the call ends up word aligned @@ -3517,6 +3515,7 @@ void bar() { asm("nop"); } +// Rename to increment_profile_ctr void LIR_Assembler::inc_profile_ctr(LIR_Opr incr, LIR_Opr addr, LIR_Opr dest, LIR_Opr temp_op, int profile_limit) { Register temp = temp_op->as_register(); @@ -3526,6 +3525,7 @@ void LIR_Assembler::inc_profile_ctr(LIR_Opr incr, LIR_Opr addr, LIR_Opr dest, LI int profile_capture_ratio = ProfileCaptureRatio; + // FIXME: Use a fixed ProfileCaptureRatio for 1st patch if (profile_limit) { int ratio = sqrt(profile_limit); profile_capture_ratio = round_down_power_of_2(ratio); @@ -3554,10 +3554,10 @@ void LIR_Assembler::inc_profile_ctr(LIR_Opr incr, LIR_Opr addr, LIR_Opr dest, LI __ jccb(Assembler::aboveEqual, dont); // __ call_VM_leaf_base(CAST_FROM_FN_PTR(address, &foo), 0); - if (getenv("APH_TRACE")) { - __ lea(temp, ExternalAddress((address)&ibar)); - __ incl(Address(temp)); - } + // if (getenv("APH_TRACE")) { + // __ lea(temp, ExternalAddress((address)&ibar)); + // __ incl(Address(temp)); + // } __ movl(temp, dest_adr); __ sall(inc, ratio_shift); @@ -3573,10 +3573,10 @@ void LIR_Assembler::inc_profile_ctr(LIR_Opr incr, LIR_Opr addr, LIR_Opr dest, LI __ jccb(Assembler::aboveEqual, dont); // __ call_VM_leaf_base(CAST_FROM_FN_PTR(address, &foo), 0); - if (getenv("APH_TRACE")) { - __ lea(temp, ExternalAddress((address)&ibar)); - __ incl(Address(temp)); - } + // if (getenv("APH_TRACE")) { + // __ lea(temp, ExternalAddress((address)&ibar)); + // __ incl(Address(temp)); + // } __ movl(temp, dest_adr); __ addl(temp, inc); @@ -3749,6 +3749,7 @@ void LIR_Assembler::emit_profile_type(LIR_OpProfileType* op) { if (ProfileCaptureRatio != 1) { // Subsampling profile capture + // FIXME: Use maybe_skip here? int ratio_shift = exact_log2(ProfileCaptureRatio); int threshold = (1ull << 32) >> ratio_shift; // Can't use tmp here because sometimes obj == tmp! diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.cpp b/src/hotspot/cpu/x86/macroAssembler_x86.cpp index 9b16bea16bdad..6d70e447fc6cf 100644 --- a/src/hotspot/cpu/x86/macroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/macroAssembler_x86.cpp @@ -10876,7 +10876,7 @@ void MacroAssembler::step_random(Register state, Register temp) { } } -void MacroAssembler::maybe_skip(Register state, Register temp, Label &skip) { +void MacroAssembler::maybe_skip_profiling(Register state, Register temp, Label &skip) { if (ProfileCaptureRatio != 1) { step_random(state, temp); diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.hpp b/src/hotspot/cpu/x86/macroAssembler_x86.hpp index 284a2cee81c45..936ff442a172e 100644 --- a/src/hotspot/cpu/x86/macroAssembler_x86.hpp +++ b/src/hotspot/cpu/x86/macroAssembler_x86.hpp @@ -2246,7 +2246,7 @@ class MacroAssembler: public Assembler { #endif void step_random(Register state, Register temp); - void maybe_skip(Register state, Register temp, Label &skip); + void maybe_skip_profiling(Register state, Register temp, Label &skip); }; #endif // CPU_X86_MACROASSEMBLER_X86_HPP diff --git a/src/hotspot/share/c1/c1_LIR.cpp b/src/hotspot/share/c1/c1_LIR.cpp index 4bd3525492689..617288f4f016f 100644 --- a/src/hotspot/share/c1/c1_LIR.cpp +++ b/src/hotspot/share/c1/c1_LIR.cpp @@ -1302,6 +1302,9 @@ void LIR_List::volatile_store_unsafe_reg(LIR_Opr src, LIR_Opr base, LIR_Opr offs } +// FIXME: this needs to be a 3-input operation. +// maybe give it its own handlers +// FIXME: Maybe dump profile_limit for now void LIR_List::inc_profile_ctr(LIR_Opr src, LIR_Address* addr, LIR_Opr res, LIR_Opr tmp, int profile_limit) { append(new LIR_Op2( lir_inc_profile_ctr, diff --git a/src/hotspot/share/compiler/compiler_globals.hpp b/src/hotspot/share/compiler/compiler_globals.hpp index a811cd8b3bae6..887874de2b55a 100644 --- a/src/hotspot/share/compiler/compiler_globals.hpp +++ b/src/hotspot/share/compiler/compiler_globals.hpp @@ -382,6 +382,11 @@ "If compilation is stopped with an error, capture diagnostic " \ "information at the bailout point") \ \ + product(int, ProfileCaptureRatio, 1, EXPERIMENTAL, \ + "Reduce and randomize tiered-compilation profile captures " \ + "in order to reduce cache contention on shared method data. " \ + "Must be a power of 2.") \ + range(1, 256) // end of COMPILER_FLAGS diff --git a/src/hotspot/share/runtime/globals.hpp b/src/hotspot/share/runtime/globals.hpp index 67288cb42e299..e85774d34221e 100644 --- a/src/hotspot/share/runtime/globals.hpp +++ b/src/hotspot/share/runtime/globals.hpp @@ -1992,11 +1992,7 @@ const int ObjectAlignmentInBytes = 8; \ product(bool, UseThreadsLockThrottleLock, true, DIAGNOSTIC, \ "Use an extra lock during Thread start and exit to alleviate" \ - "contention on Threads_lock.") \ - \ - product(int, ProfileCaptureRatio, 1, \ - "Reduce profile captures") \ - range(1, 4096) + "contention on Threads_lock.") // end of RUNTIME_FLAGS diff --git a/src/hotspot/share/runtime/javaThread.cpp b/src/hotspot/share/runtime/javaThread.cpp index 029c984511427..0a67be9b17757 100644 --- a/src/hotspot/share/runtime/javaThread.cpp +++ b/src/hotspot/share/runtime/javaThread.cpp @@ -523,7 +523,9 @@ JavaThread::JavaThread(MemTag mem_tag) : #endif _lock_stack(this), - _om_cache(this) { + _om_cache(this), + + _profile_rng(0) { set_jni_functions(jni_functions()); #if INCLUDE_JVMCI @@ -540,6 +542,12 @@ JavaThread::JavaThread(MemTag mem_tag) : set_requires_cross_modify_fence(false); + // Initial state of random-number generator used when profiling + // C1-generated code. + if (ProfileCaptureRatio > 1) { + _profile_rng = os::random(); + } + pd_initialize(); assert(deferred_card_mark().is_empty(), "Default MemRegion ctor"); } diff --git a/src/hotspot/share/runtime/javaThread.hpp b/src/hotspot/share/runtime/javaThread.hpp index 4c74302d047dc..f2f5afd1320d4 100644 --- a/src/hotspot/share/runtime/javaThread.hpp +++ b/src/hotspot/share/runtime/javaThread.hpp @@ -168,9 +168,6 @@ class JavaThread: public Thread { // attached thread cases where this field can have a temporary value. int64_t _monitor_owner_id; - // Random value for randomized profile counters. - uint32_t _profile_rng; - public: void set_monitor_owner_id(int64_t id) { assert(id >= ThreadIdentifier::initial() && id < ThreadIdentifier::current(), ""); @@ -1262,6 +1259,9 @@ class JavaThread: public Thread { LockStack _lock_stack; OMCache _om_cache; + // Random value for randomized profile counters. + uint32_t _profile_rng; + public: LockStack& lock_stack() { return _lock_stack; } From 13c3c8df0ff5c3a601b747e83a81b0301d679d24 Mon Sep 17 00:00:00 2001 From: Andrew Haley Date: Mon, 3 Mar 2025 17:13:43 +0000 Subject: [PATCH 10/48] Inter --- src/hotspot/cpu/x86/c1_FrameMap_x86.hpp | 4 +++- src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp | 2 +- src/hotspot/share/c1/c1_LIR.cpp | 12 ++++++------ src/hotspot/share/c1/c1_LIR.hpp | 6 +++--- src/hotspot/share/c1/c1_LIRAssembler.cpp | 4 ++-- src/hotspot/share/c1/c1_LIRAssembler.hpp | 2 +- src/hotspot/share/c1/c1_LIRGenerator.cpp | 6 +++--- 7 files changed, 19 insertions(+), 17 deletions(-) diff --git a/src/hotspot/cpu/x86/c1_FrameMap_x86.hpp b/src/hotspot/cpu/x86/c1_FrameMap_x86.hpp index d5fef8691ac71..93c0bcbc746cf 100644 --- a/src/hotspot/cpu/x86/c1_FrameMap_x86.hpp +++ b/src/hotspot/cpu/x86/c1_FrameMap_x86.hpp @@ -149,7 +149,9 @@ static int adjust_reg_range(int range) { // Reduce the number of available regs (to free r12) in case of compressed oops - // Reduce the number of available regs (to free r14) for RNG + + // Reduce the number of available regs (to free r14) for + // random-nunmber state used by randomized profile captures. // PROFILE-FIXME: Be smarter here diff --git a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp index 174c67ffc3fbc..8f3d4c3105064 100644 --- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp @@ -3516,7 +3516,7 @@ void bar() { } // Rename to increment_profile_ctr -void LIR_Assembler::inc_profile_ctr(LIR_Opr incr, LIR_Opr addr, LIR_Opr dest, LIR_Opr temp_op, +void LIR_Assembler::maybe_inc_profile_counter(LIR_Opr incr, LIR_Opr addr, LIR_Opr dest, LIR_Opr temp_op, int profile_limit) { Register temp = temp_op->as_register(); Address dest_adr = as_Address(addr->as_address_ptr()); diff --git a/src/hotspot/share/c1/c1_LIR.cpp b/src/hotspot/share/c1/c1_LIR.cpp index 617288f4f016f..746b00de211d0 100644 --- a/src/hotspot/share/c1/c1_LIR.cpp +++ b/src/hotspot/share/c1/c1_LIR.cpp @@ -581,9 +581,9 @@ void LIR_OpVisitState::visit(LIR_Op* op) { case lir_xadd: case lir_xchg: case lir_assert: - case lir_inc_profile_ctr: + case lir_maybe_inc_profile_counter: { - if (op->code() == lir_inc_profile_ctr) { + if (op->code() == lir_maybe_inc_profile_counter) { asm("nop"); } assert(op->as_Op2() != nullptr, "must be"); @@ -596,7 +596,7 @@ void LIR_OpVisitState::visit(LIR_Op* op) { if (op2->_opr2->is_valid()) do_input(op2->_opr2); if (op2->_tmp1->is_valid()) do_temp(op2->_tmp1); if (op2->_result->is_valid()) do_output(op2->_result); - if (op->code() == lir_xchg || op->code() == lir_xadd || op->code() == lir_inc_profile_ctr) { + if (op->code() == lir_xchg || op->code() == lir_xadd || op->code() == lir_maybe_inc_profile_counter) { // on ARM and PPC, return value is loaded first so could // destroy inputs. On other platforms that implement those // (x86, sparc), the extra constrainsts are harmless. @@ -1305,9 +1305,9 @@ void LIR_List::volatile_store_unsafe_reg(LIR_Opr src, LIR_Opr base, LIR_Opr offs // FIXME: this needs to be a 3-input operation. // maybe give it its own handlers // FIXME: Maybe dump profile_limit for now -void LIR_List::inc_profile_ctr(LIR_Opr src, LIR_Address* addr, LIR_Opr res, LIR_Opr tmp, int profile_limit) { +void LIR_List::maybe_inc_profile_counter(LIR_Opr src, LIR_Address* addr, LIR_Opr res, LIR_Opr tmp, int profile_limit) { append(new LIR_Op2( - lir_inc_profile_ctr, + lir_maybe_inc_profile_counter, src, LIR_OprFact::address(addr), res, @@ -1817,7 +1817,7 @@ const char * LIR_Op::name() const { case lir_profile_call: s = "profile_call"; break; // LIR_OpProfileType case lir_profile_type: s = "profile_type"; break; - case lir_inc_profile_ctr: s = "inc_profile_ctr"; break; + case lir_maybe_inc_profile_counter: s = "maybe_inc_profile_counter"; break; // LIR_OpAssert #ifdef ASSERT case lir_assert: s = "assert"; break; diff --git a/src/hotspot/share/c1/c1_LIR.hpp b/src/hotspot/share/c1/c1_LIR.hpp index ed4683abc025d..e310f179776a6 100644 --- a/src/hotspot/share/c1/c1_LIR.hpp +++ b/src/hotspot/share/c1/c1_LIR.hpp @@ -970,7 +970,7 @@ enum LIR_Code { , lir_throw , lir_xadd , lir_xchg - , lir_inc_profile_ctr + , lir_maybe_inc_profile_counter , end_op2 , begin_op3 , lir_idiv @@ -2307,7 +2307,7 @@ class LIR_List: public CompilationResourceObj { void volatile_store_mem_reg(LIR_Opr src, LIR_Address* address, CodeEmitInfo* info, LIR_PatchCode patch_code = lir_patch_none); void volatile_store_unsafe_reg(LIR_Opr src, LIR_Opr base, LIR_Opr offset, BasicType type, CodeEmitInfo* info, LIR_PatchCode patch_code); - void inc_profile_ctr(LIR_Opr src, LIR_Address* addr, LIR_Opr res, LIR_Opr tmp, int profile_limit = 0); + void maybe_inc_profile_counter(LIR_Opr src, LIR_Address* addr, LIR_Opr res, LIR_Opr tmp, int profile_limit = 0); void idiv(LIR_Opr left, LIR_Opr right, LIR_Opr res, LIR_Opr tmp, CodeEmitInfo* info); void idiv(LIR_Opr left, int right, LIR_Opr res, LIR_Opr tmp, CodeEmitInfo* info); @@ -2440,7 +2440,7 @@ class LIR_InsertionBuffer : public CompilationResourceObj { // instruction void move(int index, LIR_Opr src, LIR_Opr dst, CodeEmitInfo* info = nullptr) { append(index, new LIR_Op1(lir_move, src, dst, dst->type(), lir_patch_none, info)); } - void inc_profile_ctr(LIR_Opr src, LIR_Address* addr, LIR_Opr res, CodeEmitInfo* info = nullptr); + void maybe_inc_profile_counter(LIR_Opr src, LIR_Address* addr, LIR_Opr res, CodeEmitInfo* info = nullptr); }; diff --git a/src/hotspot/share/c1/c1_LIRAssembler.cpp b/src/hotspot/share/c1/c1_LIRAssembler.cpp index 46019ff2690e3..2c407286c5c26 100644 --- a/src/hotspot/share/c1/c1_LIRAssembler.cpp +++ b/src/hotspot/share/c1/c1_LIRAssembler.cpp @@ -752,8 +752,8 @@ void LIR_Assembler::emit_op2(LIR_Op2* op) { atomic_op(op->code(), op->in_opr1(), op->in_opr2(), op->result_opr(), op->tmp1_opr()); break; - case lir_inc_profile_ctr: - inc_profile_ctr(op->in_opr1(), op->in_opr2(), op->result_opr(), op->tmp1_opr(), + case lir_maybe_inc_profile_counter: + maybe_inc_profile_counter(op->in_opr1(), op->in_opr2(), op->result_opr(), op->tmp1_opr(), op->profile_limit()); break; diff --git a/src/hotspot/share/c1/c1_LIRAssembler.hpp b/src/hotspot/share/c1/c1_LIRAssembler.hpp index c6a7909a30bac..8d356549ca53c 100644 --- a/src/hotspot/share/c1/c1_LIRAssembler.hpp +++ b/src/hotspot/share/c1/c1_LIRAssembler.hpp @@ -238,7 +238,7 @@ class LIR_Assembler: public CompilationResourceObj { void align_backward_branch_target(); void align_call(LIR_Code code); - void inc_profile_ctr(LIR_Opr incr, LIR_Opr addr, LIR_Opr dest, LIR_Opr temp, int profile_limit); + void maybe_inc_profile_counter(LIR_Opr incr, LIR_Opr addr, LIR_Opr dest, LIR_Opr temp, int profile_limit); void negate(LIR_Opr left, LIR_Opr dest, LIR_Opr tmp = LIR_OprFact::illegalOpr); void leal(LIR_Opr src, LIR_Opr dest, LIR_PatchCode patch_code = lir_patch_none, CodeEmitInfo* info = nullptr); diff --git a/src/hotspot/share/c1/c1_LIRGenerator.cpp b/src/hotspot/share/c1/c1_LIRGenerator.cpp index 76055cc6c13ab..dd4b2f00a17f0 100644 --- a/src/hotspot/share/c1/c1_LIRGenerator.cpp +++ b/src/hotspot/share/c1/c1_LIRGenerator.cpp @@ -958,7 +958,7 @@ void LIRGenerator::profile_branch(If* if_instr, If::Condition cond) { __ move(data_reg, data_addr); } else { LIR_Opr tmp = new_register(T_INT); - __ inc_profile_ctr(LIR_OprFact::intConst(DataLayout::counter_increment), data_addr, data_reg, tmp); + __ maybe_inc_profile_counter(LIR_OprFact::intConst(DataLayout::counter_increment), data_addr, data_reg, tmp); } } } @@ -2484,7 +2484,7 @@ void LIRGenerator::do_Goto(Goto* x) { LIR_Opr tmp = new_register(T_INT); LIR_Opr dummy = new_register(T_INT); LIR_Opr inc = LIR_OprFact::intConst(DataLayout::counter_increment); - __ inc_profile_ctr(inc, counter_addr, tmp, dummy); + __ maybe_inc_profile_counter(inc, counter_addr, tmp, dummy); } } @@ -3313,7 +3313,7 @@ void LIRGenerator::increment_event_counter_impl(CodeEmitInfo* info, if (step->is_constant()) { step_bits = step->as_constant_ptr()->as_jint_bits(); } - __ inc_profile_ctr(step, counter, result, tmp, frequency/step_bits); + __ maybe_inc_profile_counter(step, counter, result, tmp, frequency/step_bits); } if (notify && (!backedge || UseOnStackReplacement)) { LIR_Opr meth = LIR_OprFact::metadataConst(method->constant_encoding()); From 45da64faec6786b3397f9f21fb120a66c28683a8 Mon Sep 17 00:00:00 2001 From: Andrew Haley Date: Mon, 3 Mar 2025 17:38:34 +0000 Subject: [PATCH 11/48] Inter --- src/hotspot/cpu/x86/assembler_x86.hpp | 3 ++ src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp | 29 ++++++++++--------- src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp | 4 +-- src/hotspot/cpu/x86/macroAssembler_x86.cpp | 14 +++++++++ src/hotspot/cpu/x86/macroAssembler_x86.hpp | 4 +++ 5 files changed, 38 insertions(+), 16 deletions(-) diff --git a/src/hotspot/cpu/x86/assembler_x86.hpp b/src/hotspot/cpu/x86/assembler_x86.hpp index 25be0d6a48d32..756a9d3ecd83c 100644 --- a/src/hotspot/cpu/x86/assembler_x86.hpp +++ b/src/hotspot/cpu/x86/assembler_x86.hpp @@ -138,6 +138,9 @@ constexpr Register rscratch2 = r11; // volatile constexpr Register r12_heapbase = r12; // callee-saved constexpr Register r15_thread = r15; // callee-saved +// State for randomized profile counters. Used by C1. +constexpr Register r14_profile_rng = r14; + #else // rscratch1 will appear in 32bit code that is dead but of course must compile // Using noreg ensures if the dead code is incorrectly live and executed it diff --git a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp index 8f3d4c3105064..daf0291823db0 100644 --- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp @@ -74,6 +74,7 @@ NEEDS_CLEANUP // remove this definitions ? const Register SYNC_header = rax; // synchronization header const Register SHIFT_count = rcx; // where count for shift operations must be + #define __ _masm-> @@ -1708,7 +1709,7 @@ void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, L Label update_done; - __ maybe_skip_profiling(r14, k_RInfo, update_done); + __ maybe_skip_profiling(r14_profile_rng, k_RInfo, update_done); Register recv = k_RInfo; __ load_klass(recv, obj, tmp_load_klass); @@ -2820,7 +2821,7 @@ void LIR_Assembler::comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Op void LIR_Assembler::align_call(LIR_Code code) { // We do this here in order not affect call site alignment. - if (ProfileCaptureRatio != 1) __ movl(Address(r15_thread, JavaThread::profile_rng_offset()), r14); + if (ProfileCaptureRatio != 1) __ movl(Address(r15_thread, JavaThread::profile_rng_offset()), r14_profile_rng); // make sure that the displacement word of the call ends up word aligned int offset = __ offset(); @@ -2846,7 +2847,7 @@ void LIR_Assembler::call(LIR_OpJavaCall* op, relocInfo::relocType rtype) { add_call_info(code_offset(), op->info()); __ post_call_nop(); - if (ProfileCaptureRatio != 1) __ movl(r14, Address(r15_thread, JavaThread::profile_rng_offset())); + if (ProfileCaptureRatio != 1) __ movl(r14_profile_rng, Address(r15_thread, JavaThread::profile_rng_offset())); } @@ -2857,7 +2858,7 @@ void LIR_Assembler::ic_call(LIR_OpJavaCall* op) { "must be aligned"); __ post_call_nop(); - if (ProfileCaptureRatio != 1) __ movl(r14, Address(r15_thread, JavaThread::profile_rng_offset())); + if (ProfileCaptureRatio != 1) __ movl(r14_profile_rng, Address(r15_thread, JavaThread::profile_rng_offset())); } @@ -3541,7 +3542,7 @@ void LIR_Assembler::maybe_inc_profile_counter(LIR_Opr incr, LIR_Opr addr, LIR_Op __ incl(Address(temp)); } - __ step_random(r14, temp); + __ step_random(r14_profile_rng, temp); Label dont; @@ -3550,7 +3551,7 @@ void LIR_Assembler::maybe_inc_profile_counter(LIR_Opr incr, LIR_Opr addr, LIR_Op if (incr->is_register()) { Register inc = incr->as_register(); __ movl(dest->as_register(), inc); - __ cmpl(r14, threshold); + __ cmpl(r14_profile_rng, threshold); __ jccb(Assembler::aboveEqual, dont); // __ call_VM_leaf_base(CAST_FROM_FN_PTR(address, &foo), 0); @@ -3569,7 +3570,7 @@ void LIR_Assembler::maybe_inc_profile_counter(LIR_Opr incr, LIR_Opr addr, LIR_Op case T_INT: { jint inc = incr->as_constant_ptr()->as_jint_bits() * profile_capture_ratio; __ movl(dest->as_register(), inc); - __ cmpl(r14, threshold); + __ cmpl(r14_profile_rng, threshold); __ jccb(Assembler::aboveEqual, dont); // __ call_VM_leaf_base(CAST_FROM_FN_PTR(address, &foo), 0); @@ -3588,7 +3589,7 @@ void LIR_Assembler::maybe_inc_profile_counter(LIR_Opr incr, LIR_Opr addr, LIR_Op case T_LONG: { jint inc = incr->as_constant_ptr()->as_jint_bits() * profile_capture_ratio; __ movq(dest->as_register_lo(), (jlong)inc); - __ cmpl(r14, threshold); + __ cmpl(r14_profile_rng, threshold); __ jccb(Assembler::aboveEqual, dont); // __ call_VM_leaf_base(CAST_FROM_FN_PTR(address, &foo), 0); @@ -3627,14 +3628,14 @@ void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) { int threshold = (1ull << 32) / ProfileCaptureRatio; if (ProfileCaptureRatio != 1) { - __ step_random(r14, temp); + __ step_random(r14_profile_rng, temp); if (getenv("APH_TRACE")) { __ lea(temp, ExternalAddress((address)&ibaz)); __ incl(Address(temp)); } - __ cmpl(r14, threshold); + __ cmpl(r14_profile_rng, threshold); __ jcc(Assembler::aboveEqual, dont); } @@ -3753,9 +3754,9 @@ void LIR_Assembler::emit_profile_type(LIR_OpProfileType* op) { int ratio_shift = exact_log2(ProfileCaptureRatio); int threshold = (1ull << 32) >> ratio_shift; // Can't use tmp here because sometimes obj == tmp! - __ step_random(r14, rscratch1); + __ step_random(r14_profile_rng, rscratch1); - __ cmpl(r14, threshold); + __ cmpl(r14_profile_rng, threshold); __ jcc(Assembler::aboveEqual, next); } @@ -4000,14 +4001,14 @@ void LIR_Assembler::leal(LIR_Opr src, LIR_Opr dest, LIR_PatchCode patch_code, Co void LIR_Assembler::rt_call(LIR_Opr result, address dest, const LIR_OprList* args, LIR_Opr tmp, CodeEmitInfo* info) { assert(!tmp->is_valid(), "don't need temporary"); - if (ProfileCaptureRatio != 1) __ movl(Address(r15_thread, JavaThread::profile_rng_offset()), r14); + if (ProfileCaptureRatio != 1) __ movl(Address(r15_thread, JavaThread::profile_rng_offset()), r14_profile_rng); __ call(RuntimeAddress(dest)); if (info != nullptr) { add_call_info_here(info); } __ post_call_nop(); - if (ProfileCaptureRatio != 1) __ movl(r14, Address(r15_thread, JavaThread::profile_rng_offset())); + if (ProfileCaptureRatio != 1) __ movl(r14_profile_rng, Address(r15_thread, JavaThread::profile_rng_offset())); } diff --git a/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp index 45beb4efec374..fb4a7762f95f4 100644 --- a/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp @@ -340,7 +340,7 @@ void C1_MacroAssembler::build_frame(int frame_size_in_bytes, int bang_size_in_by #endif // !_LP64 && COMPILER2 if (ProfileCaptureRatio != 1) { - movl(r14, Address(r15_thread, JavaThread::profile_rng_offset())); + movl(r14_profile_rng, Address(r15_thread, JavaThread::profile_rng_offset())); } decrement(rsp, frame_size_in_bytes); // does not emit code for frame_size == 0 @@ -353,7 +353,7 @@ void C1_MacroAssembler::build_frame(int frame_size_in_bytes, int bang_size_in_by void C1_MacroAssembler::remove_frame(int frame_size_in_bytes) { if (ProfileCaptureRatio != 1) { - movl(Address(r15_thread, JavaThread::profile_rng_offset()), r14); + movl(Address(r15_thread, JavaThread::profile_rng_offset()), r14_profile_rng); } increment(rsp, frame_size_in_bytes); // Does not emit code for frame_size == 0 pop(rbp); diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.cpp b/src/hotspot/cpu/x86/macroAssembler_x86.cpp index 6d70e447fc6cf..443dc95f8e7c5 100644 --- a/src/hotspot/cpu/x86/macroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/macroAssembler_x86.cpp @@ -10856,6 +10856,8 @@ void MacroAssembler::setcc(Assembler::Condition comparison, Register dst) { } } +// Randomized profile capture. + void MacroAssembler::step_random(Register state, Register temp) { if (0) { /* Algorithm "xor" from p. 4 of Marsaglia, "Xorshift RNGs" */ @@ -10888,4 +10890,16 @@ void MacroAssembler::maybe_skip_profiling(Register state, Register temp, Label & } } +void MacroAssembler::save_profile_rng() { + if (ProfileCaptureRatio != 1) { + __ movl(Address(r15_thread, JavaThread::profile_rng_offset()), r14_profile_rng); + } +} + +void MacroAssembler::restore_profile_rng() { + if (ProfileCaptureRatio != 1) { + __ movl(Address(r15_thread, JavaThread::profile_rng_offset()), r14_profile_rng); + } +} + #endif diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.hpp b/src/hotspot/cpu/x86/macroAssembler_x86.hpp index 936ff442a172e..c839094e06291 100644 --- a/src/hotspot/cpu/x86/macroAssembler_x86.hpp +++ b/src/hotspot/cpu/x86/macroAssembler_x86.hpp @@ -2245,8 +2245,12 @@ class MacroAssembler: public Assembler { void setcc(Assembler::Condition comparison, Register dst); #endif + // Randomized profile capture void step_random(Register state, Register temp); void maybe_skip_profiling(Register state, Register temp, Label &skip); + void save_profile_rng(); + void restore_profile_rng(); + }; #endif // CPU_X86_MACROASSEMBLER_X86_HPP From 665946dc6d69c4ecdc626e43692962fcab9f4015 Mon Sep 17 00:00:00 2001 From: Andrew Haley Date: Mon, 3 Mar 2025 17:56:25 +0000 Subject: [PATCH 12/48] Inter --- src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp | 13 +++++-------- src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp | 10 +++------- src/hotspot/cpu/x86/macroAssembler_x86.cpp | 4 ++-- 3 files changed, 10 insertions(+), 17 deletions(-) diff --git a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp index daf0291823db0..efec7a33d038a 100644 --- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp @@ -2821,8 +2821,7 @@ void LIR_Assembler::comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Op void LIR_Assembler::align_call(LIR_Code code) { // We do this here in order not affect call site alignment. - if (ProfileCaptureRatio != 1) __ movl(Address(r15_thread, JavaThread::profile_rng_offset()), r14_profile_rng); - + __ save_profile_rng(); // make sure that the displacement word of the call ends up word aligned int offset = __ offset(); switch (code) { @@ -2847,7 +2846,7 @@ void LIR_Assembler::call(LIR_OpJavaCall* op, relocInfo::relocType rtype) { add_call_info(code_offset(), op->info()); __ post_call_nop(); - if (ProfileCaptureRatio != 1) __ movl(r14_profile_rng, Address(r15_thread, JavaThread::profile_rng_offset())); + __ restore_profile_rng(); } @@ -2857,8 +2856,7 @@ void LIR_Assembler::ic_call(LIR_OpJavaCall* op) { assert((__ offset() - NativeCall::instruction_size + NativeCall::displacement_offset) % BytesPerWord == 0, "must be aligned"); __ post_call_nop(); - - if (ProfileCaptureRatio != 1) __ movl(r14_profile_rng, Address(r15_thread, JavaThread::profile_rng_offset())); + __ restore_profile_rng(); } @@ -4001,14 +3999,13 @@ void LIR_Assembler::leal(LIR_Opr src, LIR_Opr dest, LIR_PatchCode patch_code, Co void LIR_Assembler::rt_call(LIR_Opr result, address dest, const LIR_OprList* args, LIR_Opr tmp, CodeEmitInfo* info) { assert(!tmp->is_valid(), "don't need temporary"); - if (ProfileCaptureRatio != 1) __ movl(Address(r15_thread, JavaThread::profile_rng_offset()), r14_profile_rng); - + __ save_profile_rng(); __ call(RuntimeAddress(dest)); if (info != nullptr) { add_call_info_here(info); } __ post_call_nop(); - if (ProfileCaptureRatio != 1) __ movl(r14_profile_rng, Address(r15_thread, JavaThread::profile_rng_offset())); + __ restore_profile_rng(); } diff --git a/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp index fb4a7762f95f4..e835468b696b1 100644 --- a/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp @@ -339,12 +339,10 @@ void C1_MacroAssembler::build_frame(int frame_size_in_bytes, int bang_size_in_by } #endif // !_LP64 && COMPILER2 - if (ProfileCaptureRatio != 1) { - movl(r14_profile_rng, Address(r15_thread, JavaThread::profile_rng_offset())); - } - decrement(rsp, frame_size_in_bytes); // does not emit code for frame_size == 0 + restore_profile_rng(); + BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); // C1 code is not hot enough to micro optimize the nmethod entry barrier with an out-of-line stub bs->nmethod_entry_barrier(this, nullptr /* slow_path */, nullptr /* continuation */); @@ -352,9 +350,7 @@ void C1_MacroAssembler::build_frame(int frame_size_in_bytes, int bang_size_in_by void C1_MacroAssembler::remove_frame(int frame_size_in_bytes) { - if (ProfileCaptureRatio != 1) { - movl(Address(r15_thread, JavaThread::profile_rng_offset()), r14_profile_rng); - } + save_profile_rng(); increment(rsp, frame_size_in_bytes); // Does not emit code for frame_size == 0 pop(rbp); } diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.cpp b/src/hotspot/cpu/x86/macroAssembler_x86.cpp index 443dc95f8e7c5..cb26d09003f3d 100644 --- a/src/hotspot/cpu/x86/macroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/macroAssembler_x86.cpp @@ -10892,13 +10892,13 @@ void MacroAssembler::maybe_skip_profiling(Register state, Register temp, Label & void MacroAssembler::save_profile_rng() { if (ProfileCaptureRatio != 1) { - __ movl(Address(r15_thread, JavaThread::profile_rng_offset()), r14_profile_rng); + movl(Address(r15_thread, JavaThread::profile_rng_offset()), r14_profile_rng); } } void MacroAssembler::restore_profile_rng() { if (ProfileCaptureRatio != 1) { - __ movl(Address(r15_thread, JavaThread::profile_rng_offset()), r14_profile_rng); + movl(r14_profile_rng, Address(r15_thread, JavaThread::profile_rng_offset())); } } From 659347ebf8374c7012fee142222def1e7c41e816 Mon Sep 17 00:00:00 2001 From: Andrew Haley Date: Tue, 4 Mar 2025 11:09:59 +0000 Subject: [PATCH 13/48] Use a fixed ProfileCaptureRatio for 1st patch --- src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp | 11 +---------- src/hotspot/share/c1/c1_LIR.cpp | 11 ++--------- src/hotspot/share/c1/c1_LIR.hpp | 16 +++++----------- src/hotspot/share/c1/c1_LIRAssembler.cpp | 3 +-- src/hotspot/share/c1/c1_LIRAssembler.hpp | 2 +- src/hotspot/share/c1/c1_LIRGenerator.cpp | 6 +----- 6 files changed, 11 insertions(+), 38 deletions(-) diff --git a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp index efec7a33d038a..df0242d619d2b 100644 --- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp @@ -74,7 +74,6 @@ NEEDS_CLEANUP // remove this definitions ? const Register SYNC_header = rax; // synchronization header const Register SHIFT_count = rcx; // where count for shift operations must be - #define __ _masm-> @@ -3515,21 +3514,13 @@ void bar() { } // Rename to increment_profile_ctr -void LIR_Assembler::maybe_inc_profile_counter(LIR_Opr incr, LIR_Opr addr, LIR_Opr dest, LIR_Opr temp_op, - int profile_limit) { +void LIR_Assembler::maybe_inc_profile_counter(LIR_Opr incr, LIR_Opr addr, LIR_Opr dest, LIR_Opr temp_op) { Register temp = temp_op->as_register(); Address dest_adr = as_Address(addr->as_address_ptr()); assert(ProfileCaptureRatio != 1, "ProfileCaptureRatio must be != 1"); int profile_capture_ratio = ProfileCaptureRatio; - - // FIXME: Use a fixed ProfileCaptureRatio for 1st patch - if (profile_limit) { - int ratio = sqrt(profile_limit); - profile_capture_ratio = round_down_power_of_2(ratio); - } - int ratio_shift = exact_log2(profile_capture_ratio); int threshold = (1ull << 32) >> ratio_shift; diff --git a/src/hotspot/share/c1/c1_LIR.cpp b/src/hotspot/share/c1/c1_LIR.cpp index 746b00de211d0..23740881e01db 100644 --- a/src/hotspot/share/c1/c1_LIR.cpp +++ b/src/hotspot/share/c1/c1_LIR.cpp @@ -1302,21 +1302,14 @@ void LIR_List::volatile_store_unsafe_reg(LIR_Opr src, LIR_Opr base, LIR_Opr offs } -// FIXME: this needs to be a 3-input operation. -// maybe give it its own handlers // FIXME: Maybe dump profile_limit for now -void LIR_List::maybe_inc_profile_counter(LIR_Opr src, LIR_Address* addr, LIR_Opr res, LIR_Opr tmp, int profile_limit) { +void LIR_List::maybe_inc_profile_counter(LIR_Opr src, LIR_Address* addr, LIR_Opr res, LIR_Opr tmp) { append(new LIR_Op2( lir_maybe_inc_profile_counter, src, LIR_OprFact::address(addr), res, - tmp, - LIR_OprFact::illegalOpr, - LIR_OprFact::illegalOpr, - LIR_OprFact::illegalOpr, - LIR_OprFact::illegalOpr, - profile_limit)); + tmp)); } diff --git a/src/hotspot/share/c1/c1_LIR.hpp b/src/hotspot/share/c1/c1_LIR.hpp index e310f179776a6..73b55fdc9ec31 100644 --- a/src/hotspot/share/c1/c1_LIR.hpp +++ b/src/hotspot/share/c1/c1_LIR.hpp @@ -1610,7 +1610,6 @@ class LIR_Op2: public LIR_Op { LIR_Opr _tmp5; LIR_Condition _condition; BasicType _type; - int _profile_limit; void verify() const; @@ -1625,8 +1624,7 @@ class LIR_Op2: public LIR_Op { , _tmp4(LIR_OprFact::illegalOpr) , _tmp5(LIR_OprFact::illegalOpr) , _condition(condition) - , _type(type) - , _profile_limit(0) { + , _type(type) { assert(code == lir_cmp || code == lir_branch || code == lir_cond_float_branch || code == lir_assert, "code check"); } @@ -1640,8 +1638,7 @@ class LIR_Op2: public LIR_Op { , _tmp4(LIR_OprFact::illegalOpr) , _tmp5(LIR_OprFact::illegalOpr) , _condition(condition) - , _type(type) - , _profile_limit(0) { + , _type(type) { assert(code == lir_cmove, "code check"); assert(type != T_ILLEGAL, "cmove should have type"); } @@ -1657,8 +1654,7 @@ class LIR_Op2: public LIR_Op { , _tmp4(LIR_OprFact::illegalOpr) , _tmp5(LIR_OprFact::illegalOpr) , _condition(lir_cond_unknown) - , _type(type) - , _profile_limit(0) { + , _type(type) { assert(code != lir_cmp && code != lir_branch && code != lir_cond_float_branch && is_in_range(code, begin_op2, end_op2), "code check"); } @@ -1673,8 +1669,7 @@ class LIR_Op2: public LIR_Op { , _tmp4(tmp4) , _tmp5(tmp5) , _condition(lir_cond_unknown) - , _type(T_ILLEGAL) - , _profile_limit(profile_limit) { + , _type(T_ILLEGAL) { assert(code != lir_cmp && code != lir_branch && code != lir_cond_float_branch && is_in_range(code, begin_op2, end_op2), "code check"); } @@ -1686,7 +1681,6 @@ class LIR_Op2: public LIR_Op { LIR_Opr tmp3_opr() const { return _tmp3; } LIR_Opr tmp4_opr() const { return _tmp4; } LIR_Opr tmp5_opr() const { return _tmp5; } - int profile_limit() const { return _profile_limit; } LIR_Condition condition() const { assert(code() == lir_cmp || code() == lir_branch || code() == lir_cond_float_branch || code() == lir_assert, "only valid for branch and assert"); return _condition; } @@ -2307,7 +2301,7 @@ class LIR_List: public CompilationResourceObj { void volatile_store_mem_reg(LIR_Opr src, LIR_Address* address, CodeEmitInfo* info, LIR_PatchCode patch_code = lir_patch_none); void volatile_store_unsafe_reg(LIR_Opr src, LIR_Opr base, LIR_Opr offset, BasicType type, CodeEmitInfo* info, LIR_PatchCode patch_code); - void maybe_inc_profile_counter(LIR_Opr src, LIR_Address* addr, LIR_Opr res, LIR_Opr tmp, int profile_limit = 0); + void maybe_inc_profile_counter(LIR_Opr src, LIR_Address* addr, LIR_Opr res, LIR_Opr tmp); void idiv(LIR_Opr left, LIR_Opr right, LIR_Opr res, LIR_Opr tmp, CodeEmitInfo* info); void idiv(LIR_Opr left, int right, LIR_Opr res, LIR_Opr tmp, CodeEmitInfo* info); diff --git a/src/hotspot/share/c1/c1_LIRAssembler.cpp b/src/hotspot/share/c1/c1_LIRAssembler.cpp index 2c407286c5c26..4840c263992eb 100644 --- a/src/hotspot/share/c1/c1_LIRAssembler.cpp +++ b/src/hotspot/share/c1/c1_LIRAssembler.cpp @@ -753,8 +753,7 @@ void LIR_Assembler::emit_op2(LIR_Op2* op) { break; case lir_maybe_inc_profile_counter: - maybe_inc_profile_counter(op->in_opr1(), op->in_opr2(), op->result_opr(), op->tmp1_opr(), - op->profile_limit()); + maybe_inc_profile_counter(op->in_opr1(), op->in_opr2(), op->result_opr(), op->tmp1_opr()); break; default: diff --git a/src/hotspot/share/c1/c1_LIRAssembler.hpp b/src/hotspot/share/c1/c1_LIRAssembler.hpp index 8d356549ca53c..1d5e31f822c24 100644 --- a/src/hotspot/share/c1/c1_LIRAssembler.hpp +++ b/src/hotspot/share/c1/c1_LIRAssembler.hpp @@ -238,7 +238,7 @@ class LIR_Assembler: public CompilationResourceObj { void align_backward_branch_target(); void align_call(LIR_Code code); - void maybe_inc_profile_counter(LIR_Opr incr, LIR_Opr addr, LIR_Opr dest, LIR_Opr temp, int profile_limit); + void maybe_inc_profile_counter(LIR_Opr incr, LIR_Opr addr, LIR_Opr dest, LIR_Opr temp); void negate(LIR_Opr left, LIR_Opr dest, LIR_Opr tmp = LIR_OprFact::illegalOpr); void leal(LIR_Opr src, LIR_Opr dest, LIR_PatchCode patch_code = lir_patch_none, CodeEmitInfo* info = nullptr); diff --git a/src/hotspot/share/c1/c1_LIRGenerator.cpp b/src/hotspot/share/c1/c1_LIRGenerator.cpp index dd4b2f00a17f0..1a0835fa1266f 100644 --- a/src/hotspot/share/c1/c1_LIRGenerator.cpp +++ b/src/hotspot/share/c1/c1_LIRGenerator.cpp @@ -3309,11 +3309,7 @@ void LIRGenerator::increment_event_counter_impl(CodeEmitInfo* info, __ add(result, step, result); __ store(result, counter); } else { - int step_bits = 1; - if (step->is_constant()) { - step_bits = step->as_constant_ptr()->as_jint_bits(); - } - __ maybe_inc_profile_counter(step, counter, result, tmp, frequency/step_bits); + __ maybe_inc_profile_counter(step, counter, result, tmp); } if (notify && (!backedge || UseOnStackReplacement)) { LIR_Opr meth = LIR_OprFact::metadataConst(method->constant_encoding()); From 887b215f6752dbe62a32288adcfde50d73438055 Mon Sep 17 00:00:00 2001 From: Andrew Haley Date: Tue, 4 Mar 2025 11:24:31 +0000 Subject: [PATCH 14/48] More cleanups --- src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp | 43 +++++++-------------- src/hotspot/share/c1/c1_LIR.cpp | 1 - 2 files changed, 15 insertions(+), 29 deletions(-) diff --git a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp index df0242d619d2b..9455489145f46 100644 --- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp @@ -3613,20 +3613,22 @@ void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) { Register temp = op->tmp1()->as_register_lo(); - int ratio_shift = exact_log2(ProfileCaptureRatio); - int threshold = (1ull << 32) / ProfileCaptureRatio; + // int ratio_shift = exact_log2(ProfileCaptureRatio); + // int threshold = (1ull << 32) / ProfileCaptureRatio; - if (ProfileCaptureRatio != 1) { - __ step_random(r14_profile_rng, temp); + // if (ProfileCaptureRatio != 1) { + // __ step_random(r14_profile_rng, temp); - if (getenv("APH_TRACE")) { - __ lea(temp, ExternalAddress((address)&ibaz)); - __ incl(Address(temp)); - } + // if (getenv("APH_TRACE")) { + // __ lea(temp, ExternalAddress((address)&ibaz)); + // __ incl(Address(temp)); + // } - __ cmpl(r14_profile_rng, threshold); - __ jcc(Assembler::aboveEqual, dont); - } + // __ cmpl(r14_profile_rng, threshold); + // __ jcc(Assembler::aboveEqual, dont); + // } + + maybe_skip_profiling(r14_profile_rng, temp, dont); // Update counter for all call types ciMethodData* md = method->method_data_or_null(); @@ -3736,23 +3738,8 @@ void LIR_Assembler::emit_profile_type(LIR_OpProfileType* op) { } #endif - if (ProfileCaptureRatio != 1) { - - // Subsampling profile capture - // FIXME: Use maybe_skip here? - int ratio_shift = exact_log2(ProfileCaptureRatio); - int threshold = (1ull << 32) >> ratio_shift; - // Can't use tmp here because sometimes obj == tmp! - __ step_random(r14_profile_rng, rscratch1); - - __ cmpl(r14_profile_rng, threshold); - __ jcc(Assembler::aboveEqual, next); - } - - if (getenv("APH_TRACE2")) { - __ lea(tmp, ExternalAddress((address)&kludge)); - __ incl(Address(tmp)); - } + // Subsampling profile capture + maybe_skip_profiling(r14_profile_rng, rscratch1, next); if (do_null) { __ testptr(obj, obj); diff --git a/src/hotspot/share/c1/c1_LIR.cpp b/src/hotspot/share/c1/c1_LIR.cpp index 23740881e01db..9f185a656d5b7 100644 --- a/src/hotspot/share/c1/c1_LIR.cpp +++ b/src/hotspot/share/c1/c1_LIR.cpp @@ -1302,7 +1302,6 @@ void LIR_List::volatile_store_unsafe_reg(LIR_Opr src, LIR_Opr base, LIR_Opr offs } -// FIXME: Maybe dump profile_limit for now void LIR_List::maybe_inc_profile_counter(LIR_Opr src, LIR_Address* addr, LIR_Opr res, LIR_Opr tmp) { append(new LIR_Op2( lir_maybe_inc_profile_counter, From 25c4c3c03265b6c92c91c26e8a625cb283bc47ec Mon Sep 17 00:00:00 2001 From: Andrew Haley Date: Tue, 4 Mar 2025 11:28:06 +0000 Subject: [PATCH 15/48] More cleanups --- src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp index 9455489145f46..c6f212ecd9156 100644 --- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp @@ -3628,7 +3628,7 @@ void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) { // __ jcc(Assembler::aboveEqual, dont); // } - maybe_skip_profiling(r14_profile_rng, temp, dont); + __ maybe_skip_profiling(r14_profile_rng, temp, dont); // Update counter for all call types ciMethodData* md = method->method_data_or_null(); @@ -3739,7 +3739,7 @@ void LIR_Assembler::emit_profile_type(LIR_OpProfileType* op) { #endif // Subsampling profile capture - maybe_skip_profiling(r14_profile_rng, rscratch1, next); + __ maybe_skip_profiling(r14_profile_rng, rscratch1, next); if (do_null) { __ testptr(obj, obj); From e7e3d5995dadc6a29b48aee8f994dc73b1ccdc79 Mon Sep 17 00:00:00 2001 From: Andrew Haley Date: Tue, 4 Mar 2025 16:50:36 +0000 Subject: [PATCH 16/48] Cleanup --- src/hotspot/cpu/x86/assembler_x86.hpp | 6 +- src/hotspot/cpu/x86/c1_FrameMap_x86.cpp | 9 ++- src/hotspot/cpu/x86/c1_FrameMap_x86.hpp | 14 ++-- src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp | 66 ++----------------- src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp | 48 ++++++++++++++ src/hotspot/cpu/x86/c1_MacroAssembler_x86.hpp | 6 ++ src/hotspot/cpu/x86/macroAssembler_x86.cpp | 46 ------------- src/hotspot/cpu/x86/macroAssembler_x86.hpp | 7 -- src/hotspot/share/c1/c1_LIR.cpp | 3 - src/hotspot/share/runtime/globals.hpp | 2 +- src/hotspot/share/runtime/javaThread.cpp | 6 +- 11 files changed, 80 insertions(+), 133 deletions(-) diff --git a/src/hotspot/cpu/x86/assembler_x86.hpp b/src/hotspot/cpu/x86/assembler_x86.hpp index 756a9d3ecd83c..8133a50217f64 100644 --- a/src/hotspot/cpu/x86/assembler_x86.hpp +++ b/src/hotspot/cpu/x86/assembler_x86.hpp @@ -138,9 +138,6 @@ constexpr Register rscratch2 = r11; // volatile constexpr Register r12_heapbase = r12; // callee-saved constexpr Register r15_thread = r15; // callee-saved -// State for randomized profile counters. Used by C1. -constexpr Register r14_profile_rng = r14; - #else // rscratch1 will appear in 32bit code that is dead but of course must compile // Using noreg ensures if the dead code is incorrectly live and executed it @@ -155,6 +152,9 @@ constexpr Register r14_profile_rng = r14; // or compiled lambda forms. We indicate that by setting rbp_mh_SP_save to noreg. constexpr Register rbp_mh_SP_save = noreg; +// State for randomized profile counters. Used by C1. +extern Register r_profile_rng; + // Address is an abstraction used to represent a memory location // using any of the amd64 addressing modes with one object. // diff --git a/src/hotspot/cpu/x86/c1_FrameMap_x86.cpp b/src/hotspot/cpu/x86/c1_FrameMap_x86.cpp index c7565b71cf329..68c64aee9d6b3 100644 --- a/src/hotspot/cpu/x86/c1_FrameMap_x86.cpp +++ b/src/hotspot/cpu/x86/c1_FrameMap_x86.cpp @@ -177,10 +177,13 @@ void FrameMap::initialize() { map_register( 8, r11); r11_opr = LIR_OprFact::single_cpu(8); map_register( 9, r13); r13_opr = LIR_OprFact::single_cpu(9); - // FIXME: Find a decent name instead of r14 - - // r14 is allocated conditionally. It is used to hold the random + // r_profile_rng is allocated conditionally. It is used to hold the random // generator for profile counters. + r_profile_rng + = (UseCompressedOops && ProfileCaptureRatio > 1) ? r14 + : (ProfileCaptureRatio > 1) ? r12 + : noreg; + map_register(10, r14); r14_opr = LIR_OprFact::single_cpu(10); // r12 is allocated conditionally. With compressed oops it holds // the heapbase value and is not visible to the allocator. diff --git a/src/hotspot/cpu/x86/c1_FrameMap_x86.hpp b/src/hotspot/cpu/x86/c1_FrameMap_x86.hpp index 93c0bcbc746cf..0eca63b12ba16 100644 --- a/src/hotspot/cpu/x86/c1_FrameMap_x86.hpp +++ b/src/hotspot/cpu/x86/c1_FrameMap_x86.hpp @@ -147,16 +147,10 @@ } static int adjust_reg_range(int range) { - - // Reduce the number of available regs (to free r12) in case of compressed oops - - // Reduce the number of available regs (to free r14) for - // random-nunmber state used by randomized profile captures. - - // PROFILE-FIXME: Be smarter here - - if (ProfileCaptureRatio > 1) return range - 2; - if (UseCompressedOops) return range - 1; + // Reduce the number of available regs (to free r12 or r14) in + // case of compressed oops and randomized profile captures. + if (UseCompressedOops && ProfileCaptureRatio > 1) return range - 2; + if (UseCompressedOops || ProfileCaptureRatio > 1) return range - 1; return range; } diff --git a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp index c6f212ecd9156..ce065fe996dfc 100644 --- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp @@ -1708,7 +1708,7 @@ void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, L Label update_done; - __ maybe_skip_profiling(r14_profile_rng, k_RInfo, update_done); + __ maybe_skip_profiling(r_profile_rng, k_RInfo, update_done); Register recv = k_RInfo; __ load_klass(recv, obj, tmp_load_klass); @@ -3503,16 +3503,6 @@ void LIR_Assembler::emit_load_klass(LIR_OpLoadKlass* op) { __ load_klass(result, obj, rscratch1); } -long ibar, ifoo; - -void foo() { - asm("nop"); -} - -void bar() { - asm("nop"); -} - // Rename to increment_profile_ctr void LIR_Assembler::maybe_inc_profile_counter(LIR_Opr incr, LIR_Opr addr, LIR_Opr dest, LIR_Opr temp_op) { Register temp = temp_op->as_register(); @@ -3526,29 +3516,16 @@ void LIR_Assembler::maybe_inc_profile_counter(LIR_Opr incr, LIR_Opr addr, LIR_Op assert(threshold > 0, "must be"); - if (getenv("APH_TRACE")) { - __ lea(temp, ExternalAddress((address)&ifoo)); - __ incl(Address(temp)); - } - - __ step_random(r14_profile_rng, temp); + __ step_random(r_profile_rng, temp); Label dont; - // __ call_VM_leaf_base(CAST_FROM_FN_PTR(address, &bar), 0); - if (incr->is_register()) { Register inc = incr->as_register(); __ movl(dest->as_register(), inc); - __ cmpl(r14_profile_rng, threshold); + __ cmpl(r_profile_rng, threshold); __ jccb(Assembler::aboveEqual, dont); - // __ call_VM_leaf_base(CAST_FROM_FN_PTR(address, &foo), 0); - // if (getenv("APH_TRACE")) { - // __ lea(temp, ExternalAddress((address)&ibar)); - // __ incl(Address(temp)); - // } - __ movl(temp, dest_adr); __ sall(inc, ratio_shift); __ addl(temp, inc); @@ -3559,15 +3536,9 @@ void LIR_Assembler::maybe_inc_profile_counter(LIR_Opr incr, LIR_Opr addr, LIR_Op case T_INT: { jint inc = incr->as_constant_ptr()->as_jint_bits() * profile_capture_ratio; __ movl(dest->as_register(), inc); - __ cmpl(r14_profile_rng, threshold); + __ cmpl(r_profile_rng, threshold); __ jccb(Assembler::aboveEqual, dont); - // __ call_VM_leaf_base(CAST_FROM_FN_PTR(address, &foo), 0); - // if (getenv("APH_TRACE")) { - // __ lea(temp, ExternalAddress((address)&ibar)); - // __ incl(Address(temp)); - // } - __ movl(temp, dest_adr); __ addl(temp, inc); __ movl(dest_adr, temp); @@ -3578,15 +3549,9 @@ void LIR_Assembler::maybe_inc_profile_counter(LIR_Opr incr, LIR_Opr addr, LIR_Op case T_LONG: { jint inc = incr->as_constant_ptr()->as_jint_bits() * profile_capture_ratio; __ movq(dest->as_register_lo(), (jlong)inc); - __ cmpl(r14_profile_rng, threshold); + __ cmpl(r_profile_rng, threshold); __ jccb(Assembler::aboveEqual, dont); - // __ call_VM_leaf_base(CAST_FROM_FN_PTR(address, &foo), 0); - if (getenv("APH_TRACE")) { - __ lea(temp, ExternalAddress((address)&ibar)); - __ incl(Address(temp)); - } - __ movq(temp, dest_adr); __ addq(temp, inc); __ movq(dest_adr, temp); @@ -3613,22 +3578,7 @@ void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) { Register temp = op->tmp1()->as_register_lo(); - // int ratio_shift = exact_log2(ProfileCaptureRatio); - // int threshold = (1ull << 32) / ProfileCaptureRatio; - - // if (ProfileCaptureRatio != 1) { - // __ step_random(r14_profile_rng, temp); - - // if (getenv("APH_TRACE")) { - // __ lea(temp, ExternalAddress((address)&ibaz)); - // __ incl(Address(temp)); - // } - - // __ cmpl(r14_profile_rng, threshold); - // __ jcc(Assembler::aboveEqual, dont); - // } - - __ maybe_skip_profiling(r14_profile_rng, temp, dont); + __ maybe_skip_profiling(r_profile_rng, temp, dont); // Update counter for all call types ciMethodData* md = method->method_data_or_null(); @@ -3739,7 +3689,7 @@ void LIR_Assembler::emit_profile_type(LIR_OpProfileType* op) { #endif // Subsampling profile capture - __ maybe_skip_profiling(r14_profile_rng, rscratch1, next); + __ maybe_skip_profiling(r_profile_rng, rscratch1, next); if (do_null) { __ testptr(obj, obj); @@ -3987,8 +3937,6 @@ void LIR_Assembler::rt_call(LIR_Opr result, address dest, const LIR_OprList* arg } - - void LIR_Assembler::volatile_move_op(LIR_Opr src, LIR_Opr dest, BasicType type, CodeEmitInfo* info) { assert(type == T_LONG, "only for volatile long fields"); diff --git a/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp index e835468b696b1..b868e65291443 100644 --- a/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp @@ -41,6 +41,8 @@ #include "utilities/checkedCast.hpp" #include "utilities/globalDefinitions.hpp" +Register r_profile_rng; + int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr, Register tmp, Label& slow_case) { const int aligned_mask = BytesPerWord -1; const int hdr_offset = oopDesc::mark_offset_in_bytes(); @@ -410,4 +412,50 @@ void C1_MacroAssembler::invalidate_registers(bool inv_rax, bool inv_rbx, bool in #endif } +// Randomized profile capture. + +void C1_MacroAssembler::step_random(Register state, Register temp) { + // One of these will be the best for a particular CPU. + + /* Algorithm "xor" from p. 4 of Marsaglia, "Xorshift RNGs" */ + // movl(temp, state); + // sall(temp, 13); + // xorl(state, temp); + // movl(temp, state); + // shrl(temp, 7); + // xorl(state, temp); + // movl(temp, state); + // sall(temp, 5); + // xorl(state, temp); + + /* LCG from glibc. */ + movl(temp, 1103515245); + imull(state, temp); + addl(state, 12345); +} + +void C1_MacroAssembler::maybe_skip_profiling(Register state, Register temp, Label &skip) { + if (ProfileCaptureRatio != 1) { + step_random(state, temp); + + int ratio_shift = exact_log2(ProfileCaptureRatio); + int threshold = (1ull << 32) >> ratio_shift; + + cmpl(state, threshold); + jcc(Assembler::aboveEqual, skip); + } +} + +void C1_MacroAssembler::save_profile_rng() { + if (ProfileCaptureRatio != 1) { + movl(Address(r15_thread, JavaThread::profile_rng_offset()), r_profile_rng); + } +} + +void C1_MacroAssembler::restore_profile_rng() { + if (ProfileCaptureRatio != 1) { + movl(r_profile_rng, Address(r15_thread, JavaThread::profile_rng_offset())); + } +} + #endif // ifndef PRODUCT diff --git a/src/hotspot/cpu/x86/c1_MacroAssembler_x86.hpp b/src/hotspot/cpu/x86/c1_MacroAssembler_x86.hpp index 6344a7b6ef19e..619e0186fc131 100644 --- a/src/hotspot/cpu/x86/c1_MacroAssembler_x86.hpp +++ b/src/hotspot/cpu/x86/c1_MacroAssembler_x86.hpp @@ -128,4 +128,10 @@ void restore_live_registers_except_rax(bool restore_fpu_registers); void restore_live_registers(bool restore_fpu_registers); + // Randomized profile capture + void step_random(Register state, Register temp); + void maybe_skip_profiling(Register state, Register temp, Label &skip); + void save_profile_rng(); + void restore_profile_rng(); + #endif // CPU_X86_C1_MACROASSEMBLER_X86_HPP diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.cpp b/src/hotspot/cpu/x86/macroAssembler_x86.cpp index cb26d09003f3d..8b3bb8c30850e 100644 --- a/src/hotspot/cpu/x86/macroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/macroAssembler_x86.cpp @@ -10856,50 +10856,4 @@ void MacroAssembler::setcc(Assembler::Condition comparison, Register dst) { } } -// Randomized profile capture. - -void MacroAssembler::step_random(Register state, Register temp) { - if (0) { - /* Algorithm "xor" from p. 4 of Marsaglia, "Xorshift RNGs" */ - movl(temp, state); - sall(temp, 13); - xorl(state, temp); - movl(temp, state); - shrl(temp, 7); - xorl(state, temp); - movl(temp, state); - sall(temp, 5); - xorl(state, temp); - } else { - /* LCG from glibc. */ - movl(temp, 1103515245); - imull(state, temp); - addl(state, 12345); - } -} - -void MacroAssembler::maybe_skip_profiling(Register state, Register temp, Label &skip) { - if (ProfileCaptureRatio != 1) { - step_random(state, temp); - - int ratio_shift = exact_log2(ProfileCaptureRatio); - int threshold = (1ull << 32) >> ratio_shift; - - cmpl(state, threshold); - jcc(Assembler::aboveEqual, skip); - } -} - -void MacroAssembler::save_profile_rng() { - if (ProfileCaptureRatio != 1) { - movl(Address(r15_thread, JavaThread::profile_rng_offset()), r14_profile_rng); - } -} - -void MacroAssembler::restore_profile_rng() { - if (ProfileCaptureRatio != 1) { - movl(r14_profile_rng, Address(r15_thread, JavaThread::profile_rng_offset())); - } -} - #endif diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.hpp b/src/hotspot/cpu/x86/macroAssembler_x86.hpp index c839094e06291..c6e5b2a115f03 100644 --- a/src/hotspot/cpu/x86/macroAssembler_x86.hpp +++ b/src/hotspot/cpu/x86/macroAssembler_x86.hpp @@ -2244,13 +2244,6 @@ class MacroAssembler: public Assembler { void restore_legacy_gprs(); void setcc(Assembler::Condition comparison, Register dst); #endif - - // Randomized profile capture - void step_random(Register state, Register temp); - void maybe_skip_profiling(Register state, Register temp, Label &skip); - void save_profile_rng(); - void restore_profile_rng(); - }; #endif // CPU_X86_MACROASSEMBLER_X86_HPP diff --git a/src/hotspot/share/c1/c1_LIR.cpp b/src/hotspot/share/c1/c1_LIR.cpp index 9f185a656d5b7..22c83b4b0e13c 100644 --- a/src/hotspot/share/c1/c1_LIR.cpp +++ b/src/hotspot/share/c1/c1_LIR.cpp @@ -583,9 +583,6 @@ void LIR_OpVisitState::visit(LIR_Op* op) { case lir_assert: case lir_maybe_inc_profile_counter: { - if (op->code() == lir_maybe_inc_profile_counter) { - asm("nop"); - } assert(op->as_Op2() != nullptr, "must be"); LIR_Op2* op2 = (LIR_Op2*)op; assert(op2->_tmp2->is_illegal() && op2->_tmp3->is_illegal() && diff --git a/src/hotspot/share/runtime/globals.hpp b/src/hotspot/share/runtime/globals.hpp index e85774d34221e..11c13fe3c9e19 100644 --- a/src/hotspot/share/runtime/globals.hpp +++ b/src/hotspot/share/runtime/globals.hpp @@ -1992,7 +1992,7 @@ const int ObjectAlignmentInBytes = 8; \ product(bool, UseThreadsLockThrottleLock, true, DIAGNOSTIC, \ "Use an extra lock during Thread start and exit to alleviate" \ - "contention on Threads_lock.") + "contention on Threads_lock.") \ // end of RUNTIME_FLAGS diff --git a/src/hotspot/share/runtime/javaThread.cpp b/src/hotspot/share/runtime/javaThread.cpp index 0a67be9b17757..731caf565d49c 100644 --- a/src/hotspot/share/runtime/javaThread.cpp +++ b/src/hotspot/share/runtime/javaThread.cpp @@ -545,7 +545,11 @@ JavaThread::JavaThread(MemTag mem_tag) : // Initial state of random-number generator used when profiling // C1-generated code. if (ProfileCaptureRatio > 1) { - _profile_rng = os::random(); + int state; + do { + state = os::random(); + } while (state == 0); + _profile_rng = state; } pd_initialize(); From 5be4c52ffdafd989aef6253feab9cbe2fe0f5a8e Mon Sep 17 00:00:00 2001 From: Andrew Haley Date: Thu, 23 Oct 2025 18:09:14 +0100 Subject: [PATCH 17/48] Merge from master --- src/hotspot/cpu/x86/cat | 32 -------------------------------- 1 file changed, 32 deletions(-) delete mode 100644 src/hotspot/cpu/x86/cat diff --git a/src/hotspot/cpu/x86/cat b/src/hotspot/cpu/x86/cat deleted file mode 100644 index 3a3cc8ed335ce..0000000000000 --- a/src/hotspot/cpu/x86/cat +++ /dev/null @@ -1,32 +0,0 @@ -diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64_cbrt.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64_cbrt.cpp -index ca2741cc59f..37dbfa764ec 100644 ---- a/src/hotspot/cpu/x86/stubGenerator_x86_64_cbrt.cpp -+++ b/src/hotspot/cpu/x86/stubGenerator_x86_64_cbrt.cpp -@@ -183,6 +183,8 @@ ATTRIBUTE_ALIGNED(4) static const juint _D_table[] = - - #define __ _masm-> - -+long long ppp; -+ - address StubGenerator::generate_libmCbrt() { - StubGenStubId stub_id = StubGenStubId::dcbrt_id; - StubCodeMark mark(this, stub_id); -@@ -207,6 +209,9 @@ address StubGenerator::generate_libmCbrt() { - - __ enter(); // required for proper stackwalking of RuntimeStub frame - -+ __ lea(rdx, ExternalAddress((address)&ppp)); -+ __ incl(Address(rdx)); -+ - __ bind(B1_1); - __ subq(rsp, 24); - __ movsd(Address(rsp), xmm0); -@@ -239,6 +244,8 @@ address StubGenerator::generate_libmCbrt() { - __ movapd(xmm5, ExternalAddress(coeff_table), r11 /*rscratch*/); - __ movl(rax, 5462); - __ movapd(xmm6, ExternalAddress(coeff_table + 16), r11 /*rscratch*/); -+ __ unpcklpd(xmm6, xmm6); -+ __ movapd(xmm6, ExternalAddress(coeff_table + 16), r11 /*rscratch*/); - __ mull(rdx); - __ movq(rdx, r9); - __ andq(r9, 2047); From 62db1836dd7639ed4efd5343e777109ac1a5962c Mon Sep 17 00:00:00 2001 From: Andrew Haley Date: Fri, 24 Oct 2025 17:37:07 +0100 Subject: [PATCH 18/48] Little things. --- src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp | 9 ++++----- src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp | 2 +- src/hotspot/cpu/x86/c1_MacroAssembler_x86.hpp | 2 +- src/hotspot/share/c1/c1_LIR.cpp | 10 +++++----- src/hotspot/share/c1/c1_LIR.hpp | 6 +++--- src/hotspot/share/c1/c1_LIRAssembler.cpp | 4 ++-- src/hotspot/share/c1/c1_LIRAssembler.hpp | 2 +- src/hotspot/share/c1/c1_LIRGenerator.cpp | 6 +++--- 8 files changed, 20 insertions(+), 21 deletions(-) diff --git a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp index 5ee4738704972..947ad7f60e0bd 100644 --- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp @@ -1335,7 +1335,7 @@ void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, L Label update_done; - __ maybe_skip_profiling(r_profile_rng, k_RInfo, update_done); + __ step_profile_rng(r_profile_rng, k_RInfo, update_done); Register recv = k_RInfo; __ load_klass(recv, obj, tmp_load_klass); @@ -2765,8 +2765,7 @@ void LIR_Assembler::emit_load_klass(LIR_OpLoadKlass* op) { __ load_klass(result, obj, rscratch1); } -// Rename to increment_profile_ctr -void LIR_Assembler::maybe_inc_profile_counter(LIR_Opr incr, LIR_Opr addr, LIR_Opr dest, LIR_Opr temp_op) { +void LIR_Assembler::increment_profile_ctr(LIR_Opr incr, LIR_Opr addr, LIR_Opr dest, LIR_Opr temp_op) { Register temp = temp_op->as_register(); Address dest_adr = as_Address(addr->as_address_ptr()); @@ -2840,7 +2839,7 @@ void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) { Register temp = op->tmp1()->as_register_lo(); - __ maybe_skip_profiling(r_profile_rng, temp, dont); + __ step_profile_rng(r_profile_rng, temp, dont); // Update counter for all call types ciMethodData* md = method->method_data_or_null(); @@ -2943,7 +2942,7 @@ void LIR_Assembler::emit_profile_type(LIR_OpProfileType* op) { #endif // Subsampling profile capture - __ maybe_skip_profiling(r_profile_rng, rscratch1, next); + __ step_profile_rng(r_profile_rng, rscratch1, next); if (do_null) { __ testptr(obj, obj); diff --git a/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp index e9cf181a1a689..34a6212beb69a 100644 --- a/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp @@ -319,7 +319,7 @@ void C1_MacroAssembler::step_random(Register state, Register temp) { addl(state, 12345); } -void C1_MacroAssembler::maybe_skip_profiling(Register state, Register temp, Label &skip) { +void C1_MacroAssembler::step_profile_rng(Register state, Register temp, Label &skip) { if (ProfileCaptureRatio != 1) { step_random(state, temp); diff --git a/src/hotspot/cpu/x86/c1_MacroAssembler_x86.hpp b/src/hotspot/cpu/x86/c1_MacroAssembler_x86.hpp index 60500ad0ea424..902e41015bc24 100644 --- a/src/hotspot/cpu/x86/c1_MacroAssembler_x86.hpp +++ b/src/hotspot/cpu/x86/c1_MacroAssembler_x86.hpp @@ -130,7 +130,7 @@ // Randomized profile capture void step_random(Register state, Register temp); - void maybe_skip_profiling(Register state, Register temp, Label &skip); + void step_profile_rng(Register state, Register temp, Label &skip); void save_profile_rng(); void restore_profile_rng(); diff --git a/src/hotspot/share/c1/c1_LIR.cpp b/src/hotspot/share/c1/c1_LIR.cpp index e326fe1d0991e..468877d580946 100644 --- a/src/hotspot/share/c1/c1_LIR.cpp +++ b/src/hotspot/share/c1/c1_LIR.cpp @@ -565,7 +565,7 @@ void LIR_OpVisitState::visit(LIR_Op* op) { case lir_xadd: case lir_xchg: case lir_assert: - case lir_maybe_inc_profile_counter: + case lir_increment_profile_ctr: { assert(op->as_Op2() != nullptr, "must be"); LIR_Op2* op2 = (LIR_Op2*)op; @@ -577,7 +577,7 @@ void LIR_OpVisitState::visit(LIR_Op* op) { if (op2->_opr2->is_valid()) do_input(op2->_opr2); if (op2->_tmp1->is_valid()) do_temp(op2->_tmp1); if (op2->_result->is_valid()) do_output(op2->_result); - if (op->code() == lir_xchg || op->code() == lir_xadd || op->code() == lir_maybe_inc_profile_counter) { + if (op->code() == lir_xchg || op->code() == lir_xadd || op->code() == lir_increment_profile_ctr) { // on ARM and PPC, return value is loaded first so could // destroy inputs. On other platforms that implement those // (x86, sparc), the extra constrainsts are harmless. @@ -1265,9 +1265,9 @@ void LIR_List::volatile_store_unsafe_reg(LIR_Opr src, LIR_Opr base, LIR_Opr offs } -void LIR_List::maybe_inc_profile_counter(LIR_Opr src, LIR_Address* addr, LIR_Opr res, LIR_Opr tmp) { +void LIR_List::increment_profile_ctr(LIR_Opr src, LIR_Address* addr, LIR_Opr res, LIR_Opr tmp) { append(new LIR_Op2( - lir_maybe_inc_profile_counter, + lir_increment_profile_ctr, src, LIR_OprFact::address(addr), res, @@ -1766,7 +1766,7 @@ const char * LIR_Op::name() const { case lir_profile_call: s = "profile_call"; break; // LIR_OpProfileType case lir_profile_type: s = "profile_type"; break; - case lir_maybe_inc_profile_counter: s = "maybe_inc_profile_counter"; break; + case lir_increment_profile_ctr: s = "increment_profile_ctr"; break; // LIR_OpAssert #ifdef ASSERT case lir_assert: s = "assert"; break; diff --git a/src/hotspot/share/c1/c1_LIR.hpp b/src/hotspot/share/c1/c1_LIR.hpp index 1c4a708045285..f4962e5e87a9a 100644 --- a/src/hotspot/share/c1/c1_LIR.hpp +++ b/src/hotspot/share/c1/c1_LIR.hpp @@ -958,7 +958,7 @@ enum LIR_Code { , lir_throw , lir_xadd , lir_xchg - , lir_maybe_inc_profile_counter + , lir_increment_profile_ctr , end_op2 , begin_op3 , lir_idiv @@ -2238,7 +2238,7 @@ class LIR_List: public CompilationResourceObj { void volatile_store_mem_reg(LIR_Opr src, LIR_Address* address, CodeEmitInfo* info, LIR_PatchCode patch_code = lir_patch_none); void volatile_store_unsafe_reg(LIR_Opr src, LIR_Opr base, LIR_Opr offset, BasicType type, CodeEmitInfo* info, LIR_PatchCode patch_code); - void maybe_inc_profile_counter(LIR_Opr src, LIR_Address* addr, LIR_Opr res, LIR_Opr tmp); + void increment_profile_ctr(LIR_Opr src, LIR_Address* addr, LIR_Opr res, LIR_Opr tmp); void idiv(LIR_Opr left, LIR_Opr right, LIR_Opr res, LIR_Opr tmp, CodeEmitInfo* info); void idiv(LIR_Opr left, int right, LIR_Opr res, LIR_Opr tmp, CodeEmitInfo* info); @@ -2371,7 +2371,7 @@ class LIR_InsertionBuffer : public CompilationResourceObj { // instruction void move(int index, LIR_Opr src, LIR_Opr dst, CodeEmitInfo* info = nullptr) { append(index, new LIR_Op1(lir_move, src, dst, dst->type(), lir_patch_none, info)); } - void maybe_inc_profile_counter(LIR_Opr src, LIR_Address* addr, LIR_Opr res, CodeEmitInfo* info = nullptr); + void increment_profile_ctr(LIR_Opr src, LIR_Address* addr, LIR_Opr res, CodeEmitInfo* info = nullptr); }; diff --git a/src/hotspot/share/c1/c1_LIRAssembler.cpp b/src/hotspot/share/c1/c1_LIRAssembler.cpp index c0cc2b0557a1e..050fefc037fac 100644 --- a/src/hotspot/share/c1/c1_LIRAssembler.cpp +++ b/src/hotspot/share/c1/c1_LIRAssembler.cpp @@ -724,8 +724,8 @@ void LIR_Assembler::emit_op2(LIR_Op2* op) { atomic_op(op->code(), op->in_opr1(), op->in_opr2(), op->result_opr(), op->tmp1_opr()); break; - case lir_maybe_inc_profile_counter: - maybe_inc_profile_counter(op->in_opr1(), op->in_opr2(), op->result_opr(), op->tmp1_opr()); + case lir_increment_profile_ctr: + increment_profile_ctr(op->in_opr1(), op->in_opr2(), op->result_opr(), op->tmp1_opr()); break; default: diff --git a/src/hotspot/share/c1/c1_LIRAssembler.hpp b/src/hotspot/share/c1/c1_LIRAssembler.hpp index 3fe52f0427850..6deb37a4a9b32 100644 --- a/src/hotspot/share/c1/c1_LIRAssembler.hpp +++ b/src/hotspot/share/c1/c1_LIRAssembler.hpp @@ -235,7 +235,7 @@ class LIR_Assembler: public CompilationResourceObj { void align_backward_branch_target(); void align_call(LIR_Code code); - void maybe_inc_profile_counter(LIR_Opr incr, LIR_Opr addr, LIR_Opr dest, LIR_Opr temp); + void increment_profile_ctr(LIR_Opr incr, LIR_Opr addr, LIR_Opr dest, LIR_Opr temp); void negate(LIR_Opr left, LIR_Opr dest, LIR_Opr tmp = LIR_OprFact::illegalOpr); void leal(LIR_Opr src, LIR_Opr dest, LIR_PatchCode patch_code = lir_patch_none, CodeEmitInfo* info = nullptr); diff --git a/src/hotspot/share/c1/c1_LIRGenerator.cpp b/src/hotspot/share/c1/c1_LIRGenerator.cpp index 29a57d0309b35..a0cbf2640049d 100644 --- a/src/hotspot/share/c1/c1_LIRGenerator.cpp +++ b/src/hotspot/share/c1/c1_LIRGenerator.cpp @@ -935,7 +935,7 @@ void LIRGenerator::profile_branch(If* if_instr, If::Condition cond) { __ move(data_reg, data_addr); } else { LIR_Opr tmp = new_register(T_INT); - __ maybe_inc_profile_counter(LIR_OprFact::intConst(DataLayout::counter_increment), data_addr, data_reg, tmp); + __ increment_profile_ctr(LIR_OprFact::intConst(DataLayout::counter_increment), data_addr, data_reg, tmp); } } } @@ -2387,7 +2387,7 @@ void LIRGenerator::do_Goto(Goto* x) { LIR_Opr tmp = new_register(T_INT); LIR_Opr dummy = new_register(T_INT); LIR_Opr inc = LIR_OprFact::intConst(DataLayout::counter_increment); - __ maybe_inc_profile_counter(inc, counter_addr, tmp, dummy); + __ increment_profile_ctr(inc, counter_addr, tmp, dummy); } } @@ -3193,7 +3193,7 @@ void LIRGenerator::increment_event_counter_impl(CodeEmitInfo* info, __ add(result, step, result); __ store(result, counter); } else { - __ maybe_inc_profile_counter(step, counter, result, tmp); + __ increment_profile_ctr(step, counter, result, tmp); } if (notify && (!backedge || UseOnStackReplacement)) { LIR_Opr meth = LIR_OprFact::metadataConst(method->constant_encoding()); From 3bbdfad389ec5278194f98d26816a85aea581aac Mon Sep 17 00:00:00 2001 From: Andrew Haley Date: Mon, 27 Oct 2025 16:40:37 +0000 Subject: [PATCH 19/48] Tmp --- src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp | 37 +++++++++++++++---- src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp | 10 +++++ src/hotspot/share/c1/c1_LIRGenerator.cpp | 7 ++-- 3 files changed, 43 insertions(+), 11 deletions(-) diff --git a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp index 947ad7f60e0bd..cf0b11b08f700 100644 --- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp @@ -2766,14 +2766,21 @@ void LIR_Assembler::emit_load_klass(LIR_OpLoadKlass* op) { } void LIR_Assembler::increment_profile_ctr(LIR_Opr incr, LIR_Opr addr, LIR_Opr dest, LIR_Opr temp_op) { - Register temp = temp_op->as_register(); + // Register temp = temp_op->is_register() ? temp_op->as_register() : noreg; + Register temp = temp_op->is_register() ? temp_op->as_register() : noreg; Address dest_adr = as_Address(addr->as_address_ptr()); assert(ProfileCaptureRatio != 1, "ProfileCaptureRatio must be != 1"); +#ifndef PRODUCT + if (CommentedAssembly) { + __ block_comment("increment_profile_ctr" " {"); + } +#endif + int profile_capture_ratio = ProfileCaptureRatio; int ratio_shift = exact_log2(profile_capture_ratio); - int threshold = (1ull << 32) >> ratio_shift; + auto threshold = (1ull << 32) >> ratio_shift; assert(threshold > 0, "must be"); @@ -2796,27 +2803,35 @@ void LIR_Assembler::increment_profile_ctr(LIR_Opr incr, LIR_Opr addr, LIR_Opr de switch (dest->type()) { case T_INT: { jint inc = incr->as_constant_ptr()->as_jint_bits() * profile_capture_ratio; - __ movl(dest->as_register(), inc); + if (dest->is_register()) __ movl(dest->as_register(), inc); __ cmpl(r_profile_rng, threshold); __ jccb(Assembler::aboveEqual, dont); - __ movl(temp, dest_adr); - __ addl(temp, inc); - __ movl(dest_adr, temp); - __ movl(dest->as_register(), temp); + if (dest->is_register()) { + __ movl(temp, dest_adr); + __ addl(temp, inc); + __ movl(dest_adr, temp); + __ movl(dest->as_register(), temp); + } else { + __ addl(dest_adr, inc); + } break; } case T_LONG: { jint inc = incr->as_constant_ptr()->as_jint_bits() * profile_capture_ratio; - __ movq(dest->as_register_lo(), (jlong)inc); + if (dest->is_register()) __ movq(dest->as_register_lo(), (jlong)inc); __ cmpl(r_profile_rng, threshold); __ jccb(Assembler::aboveEqual, dont); + if (dest->is_register()) { __ movq(temp, dest_adr); __ addq(temp, inc); __ movq(dest_adr, temp); __ movq(dest->as_register_lo(), temp); + } else { + __ addq(dest_adr, inc); + } break; } @@ -2824,6 +2839,12 @@ void LIR_Assembler::increment_profile_ctr(LIR_Opr incr, LIR_Opr addr, LIR_Opr de ShouldNotReachHere(); } } + #ifndef PRODUCT + if (CommentedAssembly) { + __ block_comment("} " "increment_profile_ctr"); + } +#endif + __ bind(dont); } diff --git a/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp index 34a6212beb69a..d6d0ac5dfae7f 100644 --- a/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp @@ -321,6 +321,11 @@ void C1_MacroAssembler::step_random(Register state, Register temp) { void C1_MacroAssembler::step_profile_rng(Register state, Register temp, Label &skip) { if (ProfileCaptureRatio != 1) { +#ifndef PRODUCT + if (CommentedAssembly) { + block_comment("step_profile_rng" " {"); + } +#endif step_random(state, temp); int ratio_shift = exact_log2(ProfileCaptureRatio); @@ -328,6 +333,11 @@ void C1_MacroAssembler::step_profile_rng(Register state, Register temp, Label &s cmpl(state, threshold); jcc(Assembler::aboveEqual, skip); +#ifndef PRODUCT + if (CommentedAssembly) { + block_comment("} " "step_profile_rng"); + } +#endif } } diff --git a/src/hotspot/share/c1/c1_LIRGenerator.cpp b/src/hotspot/share/c1/c1_LIRGenerator.cpp index a0cbf2640049d..af5d8b6b507a1 100644 --- a/src/hotspot/share/c1/c1_LIRGenerator.cpp +++ b/src/hotspot/share/c1/c1_LIRGenerator.cpp @@ -2385,9 +2385,10 @@ void LIRGenerator::do_Goto(Goto* x) { } else { // LIR_Address *counter_addr = new LIR_Address(md_reg, offset, T_INT); LIR_Opr tmp = new_register(T_INT); - LIR_Opr dummy = new_register(T_INT); + // LIR_Opr dummy = new_register(T_INT); + LIR_Opr dummy = LIR_OprFact::intConst(0); LIR_Opr inc = LIR_OprFact::intConst(DataLayout::counter_increment); - __ increment_profile_ctr(inc, counter_addr, tmp, dummy); + __ increment_profile_ctr(inc, counter_addr, dummy, tmp); } } @@ -3186,7 +3187,7 @@ void LIRGenerator::increment_event_counter_impl(CodeEmitInfo* info, ShouldNotReachHere(); } LIR_Address* counter = new LIR_Address(counter_holder, offset, T_INT); - LIR_Opr result = new_register(T_INT); + LIR_Opr result = notify ? new_register(T_INT) : LIR_OprFact::intConst(0); LIR_Opr tmp = new_register(T_INT); if (ProfileCaptureRatio == 1) { __ load(counter, result); From f137d65b6ade76d0d4a5585d9738ce0de0255384 Mon Sep 17 00:00:00 2001 From: Andrew Haley Date: Thu, 30 Oct 2025 11:21:46 +0000 Subject: [PATCH 20/48] Temp --- src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp | 72 ++++++++++--------- src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp | 4 +- src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp | 25 +++++++ src/hotspot/cpu/x86/c1_Runtime1_x86.cpp | 3 + src/hotspot/cpu/x86/macroAssembler_x86.hpp | 2 + src/hotspot/share/c1/c1_LIRAssembler.cpp | 4 +- src/hotspot/share/c1/c1_LIRAssembler.hpp | 3 +- src/hotspot/share/c1/c1_LIRGenerator.cpp | 36 +++++----- .../share/compiler/compiler_globals.hpp | 2 +- .../libawt/java2d/pipe/ShapeSpanIterator.c | 2 +- 10 files changed, 96 insertions(+), 57 deletions(-) diff --git a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp index cf0b11b08f700..7172123daaee9 100644 --- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp @@ -2770,7 +2770,7 @@ void LIR_Assembler::increment_profile_ctr(LIR_Opr incr, LIR_Opr addr, LIR_Opr de Register temp = temp_op->is_register() ? temp_op->as_register() : noreg; Address dest_adr = as_Address(addr->as_address_ptr()); - assert(ProfileCaptureRatio != 1, "ProfileCaptureRatio must be != 1"); + // assert(ProfileCaptureRatio != 1, "ProfileCaptureRatio must be != 1"); #ifndef PRODUCT if (CommentedAssembly) { @@ -2784,57 +2784,65 @@ void LIR_Assembler::increment_profile_ctr(LIR_Opr incr, LIR_Opr addr, LIR_Opr de assert(threshold > 0, "must be"); - __ step_random(r_profile_rng, temp); + if (profile_capture_ratio > 1) { + __ step_random(r_profile_rng, temp); + } Label dont; if (incr->is_register()) { Register inc = incr->as_register(); - __ movl(dest->as_register(), inc); - __ cmpl(r_profile_rng, threshold); - __ jccb(Assembler::aboveEqual, dont); - + if (profile_capture_ratio > 1) { + __ movl(dest->as_register(), inc); + __ cmpl(r_profile_rng, threshold); + __ jccb(Assembler::aboveEqual, dont); + } __ movl(temp, dest_adr); - __ sall(inc, ratio_shift); + if (profile_capture_ratio > 1) { + __ sall(inc, ratio_shift); + } __ addl(temp, inc); __ movl(dest_adr, temp); __ movl(dest->as_register(), temp); } else { + jint inc = incr->as_constant_ptr()->as_jint_bits(); switch (dest->type()) { - case T_INT: { - jint inc = incr->as_constant_ptr()->as_jint_bits() * profile_capture_ratio; - if (dest->is_register()) __ movl(dest->as_register(), inc); + case T_INT: { + if (dest->is_register()) __ movl(dest->as_register(), inc); + if (profile_capture_ratio > 1) { __ cmpl(r_profile_rng, threshold); __ jccb(Assembler::aboveEqual, dont); - - if (dest->is_register()) { - __ movl(temp, dest_adr); - __ addl(temp, inc); - __ movl(dest_adr, temp); - __ movl(dest->as_register(), temp); - } else { - __ addl(dest_adr, inc); - } - - break; } - case T_LONG: { - jint inc = incr->as_constant_ptr()->as_jint_bits() * profile_capture_ratio; - if (dest->is_register()) __ movq(dest->as_register_lo(), (jlong)inc); + inc *= profile_capture_ratio; + if (dest->is_register()) { + __ movl(temp, dest_adr); + __ addl(temp, inc); + __ movl(dest_adr, temp); + __ movl(dest->as_register(), temp); + } else { + __ addl(dest_adr, inc); + } + + break; + } + case T_LONG: { + if (dest->is_register()) __ movq(dest->as_register_lo(), (jlong)inc); + if (profile_capture_ratio > 1) { __ cmpl(r_profile_rng, threshold); __ jccb(Assembler::aboveEqual, dont); - - if (dest->is_register()) { + } + inc *= profile_capture_ratio; + if (dest->is_register()) { __ movq(temp, dest_adr); __ addq(temp, inc); __ movq(dest_adr, temp); __ movq(dest->as_register_lo(), temp); - } else { - __ addq(dest_adr, inc); - } - - break; + } else { + __ addq(dest_adr, inc); } + + break; + } default: ShouldNotReachHere(); } @@ -2848,8 +2856,6 @@ void LIR_Assembler::increment_profile_ctr(LIR_Opr incr, LIR_Opr addr, LIR_Opr de __ bind(dont); } -int ibaz; - void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) { ciMethod* method = op->profiled_method(); int bci = op->profiled_bci(); diff --git a/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp b/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp index a7e143b2bd63c..b2ca0dd1a168f 100644 --- a/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp +++ b/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp @@ -1380,9 +1380,9 @@ void LIRGenerator::do_If(If* x) { // Generate branch profiling. Profiling code doesn't kill flags. profile_branch(x, cond); // If we're subsampling counter updates, then profiling code kills flags - if (ProfileCaptureRatio != 1) { + // if (ProfileCaptureRatio != 1) { __ cmp(lir_cond(cond), left, right); - } + // } move_to_phi(x->state()); if (x->x()->type()->is_float_kind()) { __ branch(lir_cond(cond), x->tsux(), x->usux()); diff --git a/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp index d6d0ac5dfae7f..d8f0867751db9 100644 --- a/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp @@ -297,6 +297,8 @@ void C1_MacroAssembler::invalidate_registers(bool inv_rax, bool inv_rbx, bool in #endif } +int baz, barf; + // Randomized profile capture. void C1_MacroAssembler::step_random(Register state, Register temp) { @@ -317,6 +319,28 @@ void C1_MacroAssembler::step_random(Register state, Register temp) { movl(temp, 1103515245); imull(state, temp); addl(state, 12345); + + int ratio_shift = exact_log2(ProfileCaptureRatio); + int threshold = (1ull << 32) >> ratio_shift; + + if (getenv("APH_BAZ_BARF")) { + Label big, done; + push(temp); + cmpl(state, threshold); + jcc(Assembler::aboveEqual, big); + + lea(temp, ExternalAddress((address)&baz)); + addl(Address(temp), 1); + jmp(done); + + bind(big); + lea(temp, ExternalAddress((address)&barf)); + addl(Address(temp), 1); + + bind(done); + pop(temp); + } + } void C1_MacroAssembler::step_profile_rng(Register state, Register temp, Label &skip) { @@ -333,6 +357,7 @@ void C1_MacroAssembler::step_profile_rng(Register state, Register temp, Label &s cmpl(state, threshold); jcc(Assembler::aboveEqual, skip); + #ifndef PRODUCT if (CommentedAssembly) { block_comment("} " "step_profile_rng"); diff --git a/src/hotspot/cpu/x86/c1_Runtime1_x86.cpp b/src/hotspot/cpu/x86/c1_Runtime1_x86.cpp index 96439c719907e..8c1de238c68db 100644 --- a/src/hotspot/cpu/x86/c1_Runtime1_x86.cpp +++ b/src/hotspot/cpu/x86/c1_Runtime1_x86.cpp @@ -810,6 +810,7 @@ OopMapSet* Runtime1::generate_patching(StubAssembler* sasm, address target) { return oop_maps; } +int foo; OopMapSet* Runtime1::generate_code_for(StubId id, StubAssembler* sasm) { @@ -867,6 +868,8 @@ OopMapSet* Runtime1::generate_code_for(StubId id, StubAssembler* sasm) { Register bci = rax, method = rbx; __ enter(); OopMap* map = save_live_registers(sasm, 3); + __ lea(rbx, ExternalAddress((address)&foo)); + __ addl(Address(rbx), 1); // Retrieve bci __ movl(bci, Address(rbp, 2*BytesPerWord)); // And a pointer to the Method* diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.hpp b/src/hotspot/cpu/x86/macroAssembler_x86.hpp index ed1343d9c8ce4..b2bb47165b482 100644 --- a/src/hotspot/cpu/x86/macroAssembler_x86.hpp +++ b/src/hotspot/cpu/x86/macroAssembler_x86.hpp @@ -38,6 +38,8 @@ // Instructions for which a 'better' code sequence exists depending // on arguments should also go in here. +extern int baz, barf; + class MacroAssembler: public Assembler { friend class LIR_Assembler; friend class Runtime1; // as_Address() diff --git a/src/hotspot/share/c1/c1_LIRAssembler.cpp b/src/hotspot/share/c1/c1_LIRAssembler.cpp index 050fefc037fac..b9a01d5f769ca 100644 --- a/src/hotspot/share/c1/c1_LIRAssembler.cpp +++ b/src/hotspot/share/c1/c1_LIRAssembler.cpp @@ -725,7 +725,9 @@ void LIR_Assembler::emit_op2(LIR_Op2* op) { break; case lir_increment_profile_ctr: - increment_profile_ctr(op->in_opr1(), op->in_opr2(), op->result_opr(), op->tmp1_opr()); + increment_profile_ctr(op->in_opr1(), op->in_opr2(), op->result_opr(), op->tmp1_opr()// , + // op->continuation_opr() + ); break; default: diff --git a/src/hotspot/share/c1/c1_LIRAssembler.hpp b/src/hotspot/share/c1/c1_LIRAssembler.hpp index 6deb37a4a9b32..9d74f160bf19b 100644 --- a/src/hotspot/share/c1/c1_LIRAssembler.hpp +++ b/src/hotspot/share/c1/c1_LIRAssembler.hpp @@ -235,7 +235,8 @@ class LIR_Assembler: public CompilationResourceObj { void align_backward_branch_target(); void align_call(LIR_Code code); - void increment_profile_ctr(LIR_Opr incr, LIR_Opr addr, LIR_Opr dest, LIR_Opr temp); + void increment_profile_ctr(LIR_Opr incr, LIR_Opr addr, LIR_Opr dest, LIR_Opr temp// , LIR_Opr continuation + ); void negate(LIR_Opr left, LIR_Opr dest, LIR_Opr tmp = LIR_OprFact::illegalOpr); void leal(LIR_Opr src, LIR_Opr dest, LIR_PatchCode patch_code = lir_patch_none, CodeEmitInfo* info = nullptr); diff --git a/src/hotspot/share/c1/c1_LIRGenerator.cpp b/src/hotspot/share/c1/c1_LIRGenerator.cpp index af5d8b6b507a1..32d27ff4679c2 100644 --- a/src/hotspot/share/c1/c1_LIRGenerator.cpp +++ b/src/hotspot/share/c1/c1_LIRGenerator.cpp @@ -928,15 +928,15 @@ void LIRGenerator::profile_branch(If* if_instr, If::Condition cond) { LIR_Opr data_reg = new_pointer_register(); LIR_Address* data_addr = new LIR_Address(md_reg, data_offset_reg, data_reg->type()); LIR_Address* fake_incr_value = new LIR_Address(data_reg, DataLayout::counter_increment, T_INT); - if (ProfileCaptureRatio == 1) { - __ move(data_addr, data_reg); - // Use leal instead of add to avoid destroying condition codes on x86 - __ leal(LIR_OprFact::address(fake_incr_value), data_reg); - __ move(data_reg, data_addr); - } else { + // if (ProfileCaptureRatio == 1) { + // __ move(data_addr, data_reg); + // // Use leal instead of add to avoid destroying condition codes on x86 + // __ leal(LIR_OprFact::address(fake_incr_value), data_reg); + // __ move(data_reg, data_addr); + // } else { LIR_Opr tmp = new_register(T_INT); __ increment_profile_ctr(LIR_OprFact::intConst(DataLayout::counter_increment), data_addr, data_reg, tmp); - } + // } } } @@ -2380,16 +2380,16 @@ void LIRGenerator::do_Goto(Goto* x) { LIR_Address *counter_addr = new LIR_Address(md_reg, offset, NOT_LP64(T_INT) LP64_ONLY(T_LONG)); - if (ProfileCaptureRatio == 1) { - increment_counter(counter_addr, DataLayout::counter_increment); - } else { + // if (ProfileCaptureRatio == 1) { + // increment_counter(counter_addr, DataLayout::counter_increment); + // } else { // LIR_Address *counter_addr = new LIR_Address(md_reg, offset, T_INT); LIR_Opr tmp = new_register(T_INT); // LIR_Opr dummy = new_register(T_INT); LIR_Opr dummy = LIR_OprFact::intConst(0); LIR_Opr inc = LIR_OprFact::intConst(DataLayout::counter_increment); __ increment_profile_ctr(inc, counter_addr, dummy, tmp); - } + // } } // emit phi-instruction move after safepoint since this simplifies @@ -3189,17 +3189,15 @@ void LIRGenerator::increment_event_counter_impl(CodeEmitInfo* info, LIR_Address* counter = new LIR_Address(counter_holder, offset, T_INT); LIR_Opr result = notify ? new_register(T_INT) : LIR_OprFact::intConst(0); LIR_Opr tmp = new_register(T_INT); - if (ProfileCaptureRatio == 1) { - __ load(counter, result); - __ add(result, step, result); - __ store(result, counter); - } else { - __ increment_profile_ctr(step, counter, result, tmp); - } + + __ increment_profile_ctr(step, counter, result, tmp); if (notify && (!backedge || UseOnStackReplacement)) { LIR_Opr meth = LIR_OprFact::metadataConst(method->constant_encoding()); // The bci for info can point to cmp for if's we want the if bci CodeStub* overflow = new CounterOverflowStub(info, bci, meth); + + __ increment_profile_ctr(step, counter, result, tmp); + int freq = frequency << InvocationCounter::count_shift; if (freq == 0) { if (!step->is_constant()) { @@ -3220,6 +3218,8 @@ void LIRGenerator::increment_event_counter_impl(CodeEmitInfo* info, __ branch(lir_cond_equal, overflow); } __ branch_destination(overflow->continuation()); + } else { + __ increment_profile_ctr(step, counter, result, tmp); } } diff --git a/src/hotspot/share/compiler/compiler_globals.hpp b/src/hotspot/share/compiler/compiler_globals.hpp index 62383a7fef3a2..c39d839132970 100644 --- a/src/hotspot/share/compiler/compiler_globals.hpp +++ b/src/hotspot/share/compiler/compiler_globals.hpp @@ -399,7 +399,7 @@ "Reduce and randomize tiered-compilation profile captures " \ "in order to reduce cache contention on shared method data. " \ "Must be a power of 2.") \ - range(1, 256) + range(1, 65536) // end of COMPILER_FLAGS diff --git a/src/java.desktop/share/native/libawt/java2d/pipe/ShapeSpanIterator.c b/src/java.desktop/share/native/libawt/java2d/pipe/ShapeSpanIterator.c index 5b34ac9fbaa64..d8ab84abb7bcd 100644 --- a/src/java.desktop/share/native/libawt/java2d/pipe/ShapeSpanIterator.c +++ b/src/java.desktop/share/native/libawt/java2d/pipe/ShapeSpanIterator.c @@ -580,7 +580,7 @@ Java_sun_java2d_pipe_ShapeSpanIterator_appendPoly { pathData *pd; int i; - jint *xPoints, *yPoints; + jint *xPoints = 0, *yPoints = 0; jboolean oom = JNI_FALSE; jfloat xoff = (jfloat) ixoff, yoff = (jfloat) iyoff; From 0637a8caab6c5d7f2ca0ad1736ebd5df2413ec7e Mon Sep 17 00:00:00 2001 From: Andrew Haley Date: Thu, 30 Oct 2025 17:12:50 +0000 Subject: [PATCH 21/48] Branch around --- src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp | 11 ++++---- src/hotspot/share/c1/c1_LIR.cpp | 6 ++-- src/hotspot/share/c1/c1_LIR.hpp | 31 +++++++++++++++++---- src/hotspot/share/c1/c1_LIRAssembler.cpp | 5 ++-- src/hotspot/share/c1/c1_LIRAssembler.hpp | 4 +-- src/hotspot/share/c1/c1_LIRGenerator.cpp | 2 +- 6 files changed, 41 insertions(+), 18 deletions(-) diff --git a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp index 7172123daaee9..693406d9b58f9 100644 --- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp @@ -2765,7 +2765,8 @@ void LIR_Assembler::emit_load_klass(LIR_OpLoadKlass* op) { __ load_klass(result, obj, rscratch1); } -void LIR_Assembler::increment_profile_ctr(LIR_Opr incr, LIR_Opr addr, LIR_Opr dest, LIR_Opr temp_op) { +void LIR_Assembler::increment_profile_ctr(LIR_Opr incr, LIR_Opr addr, LIR_Opr dest, LIR_Opr temp_op, + CodeStub* overflow) { // Register temp = temp_op->is_register() ? temp_op->as_register() : noreg; Register temp = temp_op->is_register() ? temp_op->as_register() : noreg; Address dest_adr = as_Address(addr->as_address_ptr()); @@ -2789,13 +2790,13 @@ void LIR_Assembler::increment_profile_ctr(LIR_Opr incr, LIR_Opr addr, LIR_Opr de } Label dont; + Label *skip = overflow ? overflow->continuation() : &dont; if (incr->is_register()) { Register inc = incr->as_register(); if (profile_capture_ratio > 1) { - __ movl(dest->as_register(), inc); __ cmpl(r_profile_rng, threshold); - __ jccb(Assembler::aboveEqual, dont); + __ jccb(Assembler::aboveEqual, *skip); } __ movl(temp, dest_adr); if (profile_capture_ratio > 1) { @@ -2811,7 +2812,7 @@ void LIR_Assembler::increment_profile_ctr(LIR_Opr incr, LIR_Opr addr, LIR_Opr de if (dest->is_register()) __ movl(dest->as_register(), inc); if (profile_capture_ratio > 1) { __ cmpl(r_profile_rng, threshold); - __ jccb(Assembler::aboveEqual, dont); + __ jccb(Assembler::aboveEqual, *skip); } inc *= profile_capture_ratio; if (dest->is_register()) { @@ -2829,7 +2830,7 @@ void LIR_Assembler::increment_profile_ctr(LIR_Opr incr, LIR_Opr addr, LIR_Opr de if (dest->is_register()) __ movq(dest->as_register_lo(), (jlong)inc); if (profile_capture_ratio > 1) { __ cmpl(r_profile_rng, threshold); - __ jccb(Assembler::aboveEqual, dont); + __ jccb(Assembler::aboveEqual, *skip); } inc *= profile_capture_ratio; if (dest->is_register()) { diff --git a/src/hotspot/share/c1/c1_LIR.cpp b/src/hotspot/share/c1/c1_LIR.cpp index 468877d580946..516e59e419f19 100644 --- a/src/hotspot/share/c1/c1_LIR.cpp +++ b/src/hotspot/share/c1/c1_LIR.cpp @@ -1265,13 +1265,15 @@ void LIR_List::volatile_store_unsafe_reg(LIR_Opr src, LIR_Opr base, LIR_Opr offs } -void LIR_List::increment_profile_ctr(LIR_Opr src, LIR_Address* addr, LIR_Opr res, LIR_Opr tmp) { +void LIR_List::increment_profile_ctr(LIR_Opr src, LIR_Address* addr, LIR_Opr res, LIR_Opr tmp, + CodeStub* overflow) { append(new LIR_Op2( lir_increment_profile_ctr, src, LIR_OprFact::address(addr), res, - tmp)); + tmp, + overflow)); } diff --git a/src/hotspot/share/c1/c1_LIR.hpp b/src/hotspot/share/c1/c1_LIR.hpp index f4962e5e87a9a..68fbc8075a91b 100644 --- a/src/hotspot/share/c1/c1_LIR.hpp +++ b/src/hotspot/share/c1/c1_LIR.hpp @@ -1567,6 +1567,7 @@ class LIR_Op2: public LIR_Op { LIR_Opr _tmp5; LIR_Condition _condition; BasicType _type; + CodeStub* _overflow; void verify() const; @@ -1581,7 +1582,8 @@ class LIR_Op2: public LIR_Op { , _tmp4(LIR_OprFact::illegalOpr) , _tmp5(LIR_OprFact::illegalOpr) , _condition(condition) - , _type(type) { + , _type(type) + , _overflow(nullptr) { assert(code == lir_cmp || code == lir_branch || code == lir_cond_float_branch || code == lir_assert, "code check"); } @@ -1595,7 +1597,8 @@ class LIR_Op2: public LIR_Op { , _tmp4(LIR_OprFact::illegalOpr) , _tmp5(LIR_OprFact::illegalOpr) , _condition(condition) - , _type(type) { + , _type(type) + , _overflow(nullptr) { assert(code == lir_cmove, "code check"); assert(type != T_ILLEGAL, "cmove should have type"); } @@ -1611,7 +1614,8 @@ class LIR_Op2: public LIR_Op { , _tmp4(LIR_OprFact::illegalOpr) , _tmp5(LIR_OprFact::illegalOpr) , _condition(lir_cond_unknown) - , _type(type) { + , _type(type) + , _overflow(nullptr) { assert(code != lir_cmp && code != lir_branch && code != lir_cond_float_branch && is_in_range(code, begin_op2, end_op2), "code check"); } @@ -1626,7 +1630,23 @@ class LIR_Op2: public LIR_Op { , _tmp4(tmp4) , _tmp5(tmp5) , _condition(lir_cond_unknown) - , _type(T_ILLEGAL) { + , _type(T_ILLEGAL) + , _overflow(nullptr) { + assert(code != lir_cmp && code != lir_branch && code != lir_cond_float_branch && is_in_range(code, begin_op2, end_op2), "code check"); + } + + LIR_Op2(LIR_Code code, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, LIR_Opr tmp1, CodeStub* overflow) + : LIR_Op(code, result, nullptr) + , _opr1(opr1) + , _opr2(opr2) + , _tmp1(tmp1) + , _tmp2(LIR_OprFact::illegalOpr) + , _tmp3(LIR_OprFact::illegalOpr) + , _tmp4(LIR_OprFact::illegalOpr) + , _tmp5(LIR_OprFact::illegalOpr) + , _condition(lir_cond_unknown) + , _type(T_ILLEGAL) + , _overflow(overflow) { assert(code != lir_cmp && code != lir_branch && code != lir_cond_float_branch && is_in_range(code, begin_op2, end_op2), "code check"); } @@ -1641,6 +1661,7 @@ class LIR_Op2: public LIR_Op { LIR_Condition condition() const { assert(code() == lir_cmp || code() == lir_branch || code() == lir_cond_float_branch || code() == lir_assert, "only valid for branch and assert"); return _condition; } + CodeStub *overflow() const { return _overflow; } void set_condition(LIR_Condition condition) { assert(code() == lir_cmp || code() == lir_branch || code() == lir_cond_float_branch, "only valid for branch"); _condition = condition; } @@ -2238,7 +2259,7 @@ class LIR_List: public CompilationResourceObj { void volatile_store_mem_reg(LIR_Opr src, LIR_Address* address, CodeEmitInfo* info, LIR_PatchCode patch_code = lir_patch_none); void volatile_store_unsafe_reg(LIR_Opr src, LIR_Opr base, LIR_Opr offset, BasicType type, CodeEmitInfo* info, LIR_PatchCode patch_code); - void increment_profile_ctr(LIR_Opr src, LIR_Address* addr, LIR_Opr res, LIR_Opr tmp); + void increment_profile_ctr(LIR_Opr src, LIR_Address* addr, LIR_Opr res, LIR_Opr tmp, CodeStub* overflow = nullptr); void idiv(LIR_Opr left, LIR_Opr right, LIR_Opr res, LIR_Opr tmp, CodeEmitInfo* info); void idiv(LIR_Opr left, int right, LIR_Opr res, LIR_Opr tmp, CodeEmitInfo* info); diff --git a/src/hotspot/share/c1/c1_LIRAssembler.cpp b/src/hotspot/share/c1/c1_LIRAssembler.cpp index b9a01d5f769ca..73c2c8286801b 100644 --- a/src/hotspot/share/c1/c1_LIRAssembler.cpp +++ b/src/hotspot/share/c1/c1_LIRAssembler.cpp @@ -725,9 +725,8 @@ void LIR_Assembler::emit_op2(LIR_Op2* op) { break; case lir_increment_profile_ctr: - increment_profile_ctr(op->in_opr1(), op->in_opr2(), op->result_opr(), op->tmp1_opr()// , - // op->continuation_opr() - ); + increment_profile_ctr(op->in_opr1(), op->in_opr2(), op->result_opr(), op->tmp1_opr(), + op->overflow()); break; default: diff --git a/src/hotspot/share/c1/c1_LIRAssembler.hpp b/src/hotspot/share/c1/c1_LIRAssembler.hpp index 9d74f160bf19b..2ac1cba8b6490 100644 --- a/src/hotspot/share/c1/c1_LIRAssembler.hpp +++ b/src/hotspot/share/c1/c1_LIRAssembler.hpp @@ -235,8 +235,8 @@ class LIR_Assembler: public CompilationResourceObj { void align_backward_branch_target(); void align_call(LIR_Code code); - void increment_profile_ctr(LIR_Opr incr, LIR_Opr addr, LIR_Opr dest, LIR_Opr temp// , LIR_Opr continuation - ); + void increment_profile_ctr(LIR_Opr incr, LIR_Opr addr, LIR_Opr dest, LIR_Opr temp, + CodeStub *overflow); void negate(LIR_Opr left, LIR_Opr dest, LIR_Opr tmp = LIR_OprFact::illegalOpr); void leal(LIR_Opr src, LIR_Opr dest, LIR_PatchCode patch_code = lir_patch_none, CodeEmitInfo* info = nullptr); diff --git a/src/hotspot/share/c1/c1_LIRGenerator.cpp b/src/hotspot/share/c1/c1_LIRGenerator.cpp index 32d27ff4679c2..4533f5fd5394e 100644 --- a/src/hotspot/share/c1/c1_LIRGenerator.cpp +++ b/src/hotspot/share/c1/c1_LIRGenerator.cpp @@ -3196,7 +3196,7 @@ void LIRGenerator::increment_event_counter_impl(CodeEmitInfo* info, // The bci for info can point to cmp for if's we want the if bci CodeStub* overflow = new CounterOverflowStub(info, bci, meth); - __ increment_profile_ctr(step, counter, result, tmp); + __ increment_profile_ctr(step, counter, result, tmp, overflow); int freq = frequency << InvocationCounter::count_shift; if (freq == 0) { From d715a21945e8aef4be16a3321bb44124e62537c1 Mon Sep 17 00:00:00 2001 From: Andrew Haley Date: Fri, 31 Oct 2025 15:15:04 +0000 Subject: [PATCH 22/48] Blah --- src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp | 12 +++++++++--- src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp | 4 +++- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp index 693406d9b58f9..20584cff59154 100644 --- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp @@ -2796,7 +2796,9 @@ void LIR_Assembler::increment_profile_ctr(LIR_Opr incr, LIR_Opr addr, LIR_Opr de Register inc = incr->as_register(); if (profile_capture_ratio > 1) { __ cmpl(r_profile_rng, threshold); - __ jccb(Assembler::aboveEqual, *skip); + if (! getenv("APH_DISABLE")) { + __ jccb(Assembler::aboveEqual, *skip); + } } __ movl(temp, dest_adr); if (profile_capture_ratio > 1) { @@ -2812,7 +2814,9 @@ void LIR_Assembler::increment_profile_ctr(LIR_Opr incr, LIR_Opr addr, LIR_Opr de if (dest->is_register()) __ movl(dest->as_register(), inc); if (profile_capture_ratio > 1) { __ cmpl(r_profile_rng, threshold); - __ jccb(Assembler::aboveEqual, *skip); + if (! getenv("APH_DISABLE")) { + __ jccb(Assembler::aboveEqual, *skip); + } } inc *= profile_capture_ratio; if (dest->is_register()) { @@ -2830,7 +2834,9 @@ void LIR_Assembler::increment_profile_ctr(LIR_Opr incr, LIR_Opr addr, LIR_Opr de if (dest->is_register()) __ movq(dest->as_register_lo(), (jlong)inc); if (profile_capture_ratio > 1) { __ cmpl(r_profile_rng, threshold); - __ jccb(Assembler::aboveEqual, *skip); + if (! getenv("APH_DISABLE")) { + __ jccb(Assembler::aboveEqual, *skip); + } } inc *= profile_capture_ratio; if (dest->is_register()) { diff --git a/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp index d8f0867751db9..c4335d96ad12e 100644 --- a/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp @@ -356,7 +356,9 @@ void C1_MacroAssembler::step_profile_rng(Register state, Register temp, Label &s int threshold = (1ull << 32) >> ratio_shift; cmpl(state, threshold); - jcc(Assembler::aboveEqual, skip); + if (! getenv("APH_DISABLE")) { + jcc(Assembler::aboveEqual, skip); + } #ifndef PRODUCT if (CommentedAssembly) { From ed4fee4bd118f6fbfc8e3aebe57c49cc55865318 Mon Sep 17 00:00:00 2001 From: Andrew Haley Date: Wed, 5 Nov 2025 15:13:59 +0000 Subject: [PATCH 23/48] Refactor increment_profile_ctr to back end --- src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp | 35 ++++++++++++++-- src/hotspot/share/c1/c1_Compilation.cpp | 2 +- src/hotspot/share/c1/c1_LIR.cpp | 22 ++++++++--- src/hotspot/share/c1/c1_LIR.hpp | 22 ++++++++--- src/hotspot/share/c1/c1_LIRAssembler.cpp | 1 + src/hotspot/share/c1/c1_LIRAssembler.hpp | 1 + src/hotspot/share/c1/c1_LIRGenerator.cpp | 44 ++++++++++----------- 7 files changed, 90 insertions(+), 37 deletions(-) diff --git a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp index 20584cff59154..bbb1e173e89e8 100644 --- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp @@ -2766,9 +2766,12 @@ void LIR_Assembler::emit_load_klass(LIR_OpLoadKlass* op) { } void LIR_Assembler::increment_profile_ctr(LIR_Opr incr, LIR_Opr addr, LIR_Opr dest, LIR_Opr temp_op, + LIR_Opr freq_op, LIR_Opr step_op, CodeStub* overflow) { // Register temp = temp_op->is_register() ? temp_op->as_register() : noreg; Register temp = temp_op->is_register() ? temp_op->as_register() : noreg; + // RegisterOrConstant dest_adr = addr->is_address() ? as_Address(addr->as_address_ptr()) + Address dest_adr = as_Address(addr->as_address_ptr()); // assert(ProfileCaptureRatio != 1, "ProfileCaptureRatio must be != 1"); @@ -2811,7 +2814,7 @@ void LIR_Assembler::increment_profile_ctr(LIR_Opr incr, LIR_Opr addr, LIR_Opr de jint inc = incr->as_constant_ptr()->as_jint_bits(); switch (dest->type()) { case T_INT: { - if (dest->is_register()) __ movl(dest->as_register(), inc); + // if (dest->is_register()) __ movl(dest->as_register(), inc); if (profile_capture_ratio > 1) { __ cmpl(r_profile_rng, threshold); if (! getenv("APH_DISABLE")) { @@ -2831,7 +2834,7 @@ void LIR_Assembler::increment_profile_ctr(LIR_Opr incr, LIR_Opr addr, LIR_Opr de break; } case T_LONG: { - if (dest->is_register()) __ movq(dest->as_register_lo(), (jlong)inc); + // if (dest->is_register()) __ movq(dest->as_register_lo(), (jlong)inc); if (profile_capture_ratio > 1) { __ cmpl(r_profile_rng, threshold); if (! getenv("APH_DISABLE")) { @@ -2854,7 +2857,33 @@ void LIR_Assembler::increment_profile_ctr(LIR_Opr incr, LIR_Opr addr, LIR_Opr de ShouldNotReachHere(); } } - #ifndef PRODUCT + + if (incr->is_valid() && overflow) { + if (!freq_op->is_valid()) { + if (!incr->is_constant()) { + __ cmpl(incr->as_register(), 0); + __ jcc(Assembler::notEqual, *(overflow->entry())); + } else { + __ jmp(*(overflow->entry())); + } + } else { + Register result = + dest->type() == T_INT ? dest->as_register() : + dest->type() == T_LONG ? dest->as_register_lo() : + noreg; + if (!incr->is_constant()) { + // If step is 0, make sure the overflow check below always fails + __ cmpl(incr->as_register(), 0); + __ movl(temp, InvocationCounter::count_increment); + __ cmovl(Assembler::notEqual, result, temp); + } + __ andl(result, freq_op->as_jint()); + __ jcc(Assembler::equal, *overflow->entry()); + } + __ bind(*overflow->continuation()); + } + +#ifndef PRODUCT if (CommentedAssembly) { __ block_comment("} " "increment_profile_ctr"); } diff --git a/src/hotspot/share/c1/c1_Compilation.cpp b/src/hotspot/share/c1/c1_Compilation.cpp index 368cf604eebed..e3554772970c6 100644 --- a/src/hotspot/share/c1/c1_Compilation.cpp +++ b/src/hotspot/share/c1/c1_Compilation.cpp @@ -294,7 +294,7 @@ void Compilation::emit_code_epilog(LIR_Assembler* assembler) { return; } - // generate code or slow cases + // generate code for slow cases assembler->emit_slow_case_stubs(); CHECK_BAILOUT(); diff --git a/src/hotspot/share/c1/c1_LIR.cpp b/src/hotspot/share/c1/c1_LIR.cpp index 516e59e419f19..ffd4592e91295 100644 --- a/src/hotspot/share/c1/c1_LIR.cpp +++ b/src/hotspot/share/c1/c1_LIR.cpp @@ -564,13 +564,16 @@ void LIR_OpVisitState::visit(LIR_Op* op) { case lir_ushr: case lir_xadd: case lir_xchg: - case lir_assert: - case lir_increment_profile_ctr: - { - assert(op->as_Op2() != nullptr, "must be"); + case lir_assert: { LIR_Op2* op2 = (LIR_Op2*)op; assert(op2->_tmp2->is_illegal() && op2->_tmp3->is_illegal() && op2->_tmp4->is_illegal() && op2->_tmp5->is_illegal(), "not used"); + } + // fallthrough + case lir_increment_profile_ctr: + { + LIR_Op2* op2 = (LIR_Op2*)op; + assert(op->as_Op2() != nullptr, "must be"); if (op2->_info) do_info(op2->_info); if (op2->_opr1->is_valid()) do_input(op2->_opr1); @@ -1017,6 +1020,9 @@ void LIR_OpConvert::emit_code(LIR_Assembler* masm) { void LIR_Op2::emit_code(LIR_Assembler* masm) { masm->emit_op2(this); + if (overflow()) { + masm->append_code_stub(overflow()); + } } void LIR_OpAllocArray::emit_code(LIR_Assembler* masm) { @@ -1266,14 +1272,18 @@ void LIR_List::volatile_store_unsafe_reg(LIR_Opr src, LIR_Opr base, LIR_Opr offs void LIR_List::increment_profile_ctr(LIR_Opr src, LIR_Address* addr, LIR_Opr res, LIR_Opr tmp, - CodeStub* overflow) { + LIR_Opr freq, LIR_Opr step, + CodeStub* overflow, CodeEmitInfo* info) { append(new LIR_Op2( lir_increment_profile_ctr, src, LIR_OprFact::address(addr), res, tmp, - overflow)); + freq, + tmp, + overflow, + info)); } diff --git a/src/hotspot/share/c1/c1_LIR.hpp b/src/hotspot/share/c1/c1_LIR.hpp index 68fbc8075a91b..97cbda4a72d64 100644 --- a/src/hotspot/share/c1/c1_LIR.hpp +++ b/src/hotspot/share/c1/c1_LIR.hpp @@ -1635,13 +1635,14 @@ class LIR_Op2: public LIR_Op { assert(code != lir_cmp && code != lir_branch && code != lir_cond_float_branch && is_in_range(code, begin_op2, end_op2), "code check"); } - LIR_Op2(LIR_Code code, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, LIR_Opr tmp1, CodeStub* overflow) - : LIR_Op(code, result, nullptr) + LIR_Op2(LIR_Code code, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, LIR_Opr tmp1, + LIR_Opr freq, LIR_Opr step, CodeStub* overflow, CodeEmitInfo *info) + : LIR_Op(code, result, info) , _opr1(opr1) , _opr2(opr2) , _tmp1(tmp1) - , _tmp2(LIR_OprFact::illegalOpr) - , _tmp3(LIR_OprFact::illegalOpr) + , _tmp2(freq) + , _tmp3(step) , _tmp4(LIR_OprFact::illegalOpr) , _tmp5(LIR_OprFact::illegalOpr) , _condition(lir_cond_unknown) @@ -1650,6 +1651,12 @@ class LIR_Op2: public LIR_Op { assert(code != lir_cmp && code != lir_branch && code != lir_cond_float_branch && is_in_range(code, begin_op2, end_op2), "code check"); } + LIR_Op2(LIR_Code code, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, LIR_Opr tmp1, + CodeStub* overflow) { + LIR_Op2(code, opr1, opr2, result, tmp1, + LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr); + } + LIR_Opr in_opr1() const { return _opr1; } LIR_Opr in_opr2() const { return _opr2; } BasicType type() const { return _type; } @@ -1658,6 +1665,8 @@ class LIR_Op2: public LIR_Op { LIR_Opr tmp3_opr() const { return _tmp3; } LIR_Opr tmp4_opr() const { return _tmp4; } LIR_Opr tmp5_opr() const { return _tmp5; } + LIR_Opr freq_opr() const { return _tmp2; } + LIR_Opr step_opr() const { return _tmp3; } LIR_Condition condition() const { assert(code() == lir_cmp || code() == lir_branch || code() == lir_cond_float_branch || code() == lir_assert, "only valid for branch and assert"); return _condition; } @@ -2259,7 +2268,10 @@ class LIR_List: public CompilationResourceObj { void volatile_store_mem_reg(LIR_Opr src, LIR_Address* address, CodeEmitInfo* info, LIR_PatchCode patch_code = lir_patch_none); void volatile_store_unsafe_reg(LIR_Opr src, LIR_Opr base, LIR_Opr offset, BasicType type, CodeEmitInfo* info, LIR_PatchCode patch_code); - void increment_profile_ctr(LIR_Opr src, LIR_Address* addr, LIR_Opr res, LIR_Opr tmp, CodeStub* overflow = nullptr); + void increment_profile_ctr(LIR_Opr src, LIR_Address* addr, LIR_Opr res, LIR_Opr tmp, LIR_Opr freq, LIR_Opr step, CodeStub* overflow, CodeEmitInfo* info); + void increment_profile_ctr(LIR_Opr src, LIR_Address* addr, LIR_Opr res, LIR_Opr tmp, CodeStub* overflow = nullptr) { + increment_profile_ctr(src, addr, res,tmp, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, overflow, nullptr); + } void idiv(LIR_Opr left, LIR_Opr right, LIR_Opr res, LIR_Opr tmp, CodeEmitInfo* info); void idiv(LIR_Opr left, int right, LIR_Opr res, LIR_Opr tmp, CodeEmitInfo* info); diff --git a/src/hotspot/share/c1/c1_LIRAssembler.cpp b/src/hotspot/share/c1/c1_LIRAssembler.cpp index 73c2c8286801b..5a4ae5fc53a8e 100644 --- a/src/hotspot/share/c1/c1_LIRAssembler.cpp +++ b/src/hotspot/share/c1/c1_LIRAssembler.cpp @@ -726,6 +726,7 @@ void LIR_Assembler::emit_op2(LIR_Op2* op) { case lir_increment_profile_ctr: increment_profile_ctr(op->in_opr1(), op->in_opr2(), op->result_opr(), op->tmp1_opr(), + op->freq_opr(), op->step_opr(), op->overflow()); break; diff --git a/src/hotspot/share/c1/c1_LIRAssembler.hpp b/src/hotspot/share/c1/c1_LIRAssembler.hpp index 2ac1cba8b6490..d3624ec3a76a6 100644 --- a/src/hotspot/share/c1/c1_LIRAssembler.hpp +++ b/src/hotspot/share/c1/c1_LIRAssembler.hpp @@ -236,6 +236,7 @@ class LIR_Assembler: public CompilationResourceObj { void align_call(LIR_Code code); void increment_profile_ctr(LIR_Opr incr, LIR_Opr addr, LIR_Opr dest, LIR_Opr temp, + LIR_Opr freq_op, LIR_Opr step_op, CodeStub *overflow); void negate(LIR_Opr left, LIR_Opr dest, LIR_Opr tmp = LIR_OprFact::illegalOpr); diff --git a/src/hotspot/share/c1/c1_LIRGenerator.cpp b/src/hotspot/share/c1/c1_LIRGenerator.cpp index 4533f5fd5394e..c8186e6e004f6 100644 --- a/src/hotspot/share/c1/c1_LIRGenerator.cpp +++ b/src/hotspot/share/c1/c1_LIRGenerator.cpp @@ -3190,34 +3190,34 @@ void LIRGenerator::increment_event_counter_impl(CodeEmitInfo* info, LIR_Opr result = notify ? new_register(T_INT) : LIR_OprFact::intConst(0); LIR_Opr tmp = new_register(T_INT); - __ increment_profile_ctr(step, counter, result, tmp); if (notify && (!backedge || UseOnStackReplacement)) { LIR_Opr meth = LIR_OprFact::metadataConst(method->constant_encoding()); // The bci for info can point to cmp for if's we want the if bci CodeStub* overflow = new CounterOverflowStub(info, bci, meth); - __ increment_profile_ctr(step, counter, result, tmp, overflow); - int freq = frequency << InvocationCounter::count_shift; - if (freq == 0) { - if (!step->is_constant()) { - __ cmp(lir_cond_notEqual, step, LIR_OprFact::intConst(0)); - __ branch(lir_cond_notEqual, overflow); - } else { - __ branch(lir_cond_always, overflow); - } - } else { - LIR_Opr mask = load_immediate(freq, T_INT); - if (!step->is_constant()) { - // If step is 0, make sure the overflow check below always fails - __ cmp(lir_cond_notEqual, step, LIR_OprFact::intConst(0)); - __ cmove(lir_cond_notEqual, result, LIR_OprFact::intConst(InvocationCounter::count_increment), result, T_INT); - } - __ logical_and(result, mask, result); - __ cmp(lir_cond_equal, result, LIR_OprFact::intConst(0)); - __ branch(lir_cond_equal, overflow); - } - __ branch_destination(overflow->continuation()); + __ increment_profile_ctr(step, counter, result, tmp, + LIR_OprFact::intConst(freq), step, overflow, info); + + // if (freq == 0) { + // if (!step->is_constant()) { + // __ cmp(lir_cond_notEqual, step, LIR_OprFact::intConst(0)); + // __ branch(lir_cond_notEqual, overflow); + // } else { + // __ branch(lir_cond_always, overflow); + // } + // } else { + // LIR_Opr mask = load_immediate(freq, T_INT); + // if (!step->is_constant()) { + // // If step is 0, make sure the overflow check below always fails + // __ cmp(lir_cond_notEqual, step, LIR_OprFact::intConst(0)); + // __ cmove(lir_cond_notEqual, result, LIR_OprFact::intConst(InvocationCounter::count_increment), result, T_INT); + // } + // __ logical_and(result, mask, result); + // __ cmp(lir_cond_equal, result, LIR_OprFact::intConst(0)); + // __ branch(lir_cond_equal, overflow); + // } + // __ branch_destination(overflow->continuation()); } else { __ increment_profile_ctr(step, counter, result, tmp); } From e7afed1edd6719f73deb6b32f8f809854fb778bc Mon Sep 17 00:00:00 2001 From: Andrew Haley Date: Tue, 11 Nov 2025 09:30:22 +0000 Subject: [PATCH 24/48] More --- src/hotspot/cpu/x86/assembler_x86.hpp | 1 + src/hotspot/cpu/x86/c1_CodeStubs_x86.cpp | 135 ++++++++++++ src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp | 203 +++++++++++------- src/hotspot/share/c1/c1_CodeStubs.hpp | 75 ++++++- src/hotspot/share/c1/c1_LIR.cpp | 1 + src/hotspot/share/c1/c1_LIR.hpp | 2 +- src/hotspot/share/c1/c1_LIRGenerator.cpp | 62 +++--- .../share/compiler/abstractDisassembler.cpp | 2 + 8 files changed, 366 insertions(+), 115 deletions(-) diff --git a/src/hotspot/cpu/x86/assembler_x86.hpp b/src/hotspot/cpu/x86/assembler_x86.hpp index 08a22adb4b6b6..b302825682070 100644 --- a/src/hotspot/cpu/x86/assembler_x86.hpp +++ b/src/hotspot/cpu/x86/assembler_x86.hpp @@ -463,6 +463,7 @@ class Assembler : public AbstractAssembler { friend class AbstractAssembler; // for the non-virtual hack friend class LIR_Assembler; // as_Address() friend class StubGenerator; + friend class CodeStub; // as_Address() public: enum Condition { // The x86 condition codes used for conditional jumps/moves. diff --git a/src/hotspot/cpu/x86/c1_CodeStubs_x86.cpp b/src/hotspot/cpu/x86/c1_CodeStubs_x86.cpp index 95ce48f34db73..51e94e9565359 100644 --- a/src/hotspot/cpu/x86/c1_CodeStubs_x86.cpp +++ b/src/hotspot/cpu/x86/c1_CodeStubs_x86.cpp @@ -37,6 +37,42 @@ #define __ ce->masm()-> +Address CodeStub::as_Address(LIR_Assembler* ce, LIR_Address* addr, Register tmp) { + if (addr->base()->is_illegal()) { + assert(addr->index()->is_illegal(), "must be illegal too"); + AddressLiteral laddr((address)addr->disp(), relocInfo::none); + if (! __ reachable(laddr)) { + __ movptr(tmp, laddr.addr()); + Address res(tmp, 0); + return res; + } else { + return __ as_Address(laddr); + } + } + + Register base = addr->base()->as_pointer_register(); + + if (addr->index()->is_illegal()) { + return Address( base, addr->disp()); + } else if (addr->index()->is_cpu_register()) { + Register index = addr->index()->as_pointer_register(); + return Address(base, index, (Address::ScaleFactor) addr->scale(), addr->disp()); + } else if (addr->index()->is_constant()) { + intptr_t addr_offset = (addr->index()->as_constant_ptr()->as_jint() << addr->scale()) + addr->disp(); + assert(Assembler::is_simm32(addr_offset), "must be"); + + return Address(base, addr_offset); + } else { + Unimplemented(); + return Address(); + } +} + +Address CodeStub::as_Address(LIR_Assembler* ce, LIR_Address* addr) { + return as_Address(ce, addr, rscratch1); +} + + void C1SafepointPollStub::emit_code(LIR_Assembler* ce) { __ bind(_entry); InternalAddress safepoint_pc(ce->masm()->pc() - ce->masm()->offset() + safepoint_offset()); @@ -61,6 +97,105 @@ void CounterOverflowStub::emit_code(LIR_Assembler* ce) { __ jmp(_continuation); } +void ExtendedCounterOverflowStub::emit_code(LIR_Assembler* ce) { + int profile_capture_ratio = ProfileCaptureRatio; + int ratio_shift = exact_log2(profile_capture_ratio); + auto threshold = (1ull << 32) >> ratio_shift; + + Label overflow_entry; + + assert(threshold > 0, "must be"); + + __ bind(_entry); + + Register temp = _temp_op->as_register(); + Address dest_adr = as_Address(ce, _addr->as_address_ptr()); + + if (_incr->is_register()) { + Register inc = _incr->as_register(); + __ movl(temp, dest_adr); + if (profile_capture_ratio > 1) { + __ sall(inc, ratio_shift); + } + __ addl(temp, inc); + __ movl(dest_adr, temp); + __ movl(_dest->as_register(), temp); + } else { + jint inc = _incr->as_constant_ptr()->as_jint_bits(); + switch (_dest->type()) { + case T_INT: { + inc *= profile_capture_ratio; + if (_dest->is_register()) { + __ movl(temp, dest_adr); + __ addl(temp, inc); + __ movl(dest_adr, temp); + __ movl(_dest->as_register(), temp); + } else { + __ addl(dest_adr, inc); + } + + break; + } + case T_LONG: { + inc *= profile_capture_ratio; + if (_dest->is_register()) { + __ movq(temp, dest_adr); + __ addq(temp, inc); + __ movq(dest_adr, temp); + __ movq(_dest->as_register_lo(), temp); + } else { + __ addq(dest_adr, inc); + } + + break; + } + default: + ShouldNotReachHere(); + } + } + + + if (_incr->is_valid()) { + if (!_freq_op->is_valid()) { + if (!_incr->is_constant()) { + __ cmpl(_incr->as_register(), 0); + __ jccb(Assembler::equal, overflow_entry); + } else { + __ jmp(_notify ? overflow_entry : _continuation); + } + } else { + Register result = + _dest->type() == T_INT ? _dest->as_register() : + _dest->type() == T_LONG ? _dest->as_register_lo() : + noreg; + if (!_incr->is_constant()) { + // If step is 0, make sure the overflow check below always fails + __ cmpl(_incr->as_register(), 0); + __ movl(temp, InvocationCounter::count_increment); + __ cmovl(Assembler::notEqual, result, temp); + } + __ andl(result, _freq_op->as_jint()); + __ jcc(Assembler::notEqual, _continuation); + } + } else { + __ jmp(_continuation); + } + + __ bind(overflow_entry); + + if (_notify) { + Metadata *m = _method->as_constant_ptr()->as_metadata(); + ce->store_parameter(m, 1); + ce->store_parameter(_bci, 0); + __ call(RuntimeAddress(Runtime1::entry_for(StubId::c1_counter_overflow_id))); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + + __ jmp(_continuation); + } +} + + void RangeCheckStub::emit_code(LIR_Assembler* ce) { __ bind(_entry); if (_info->deoptimize_on_exception()) { diff --git a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp index bbb1e173e89e8..2f0419fe8dc00 100644 --- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp @@ -2765,6 +2765,8 @@ void LIR_Assembler::emit_load_klass(LIR_OpLoadKlass* op) { __ load_klass(result, obj, rscratch1); } +// int ploopy; + void LIR_Assembler::increment_profile_ctr(LIR_Opr incr, LIR_Opr addr, LIR_Opr dest, LIR_Opr temp_op, LIR_Opr freq_op, LIR_Opr step_op, CodeStub* overflow) { @@ -2788,6 +2790,9 @@ void LIR_Assembler::increment_profile_ctr(LIR_Opr incr, LIR_Opr addr, LIR_Opr de assert(threshold > 0, "must be"); + // __ mov64(temp, (uintptr_t)&ploopy); + // __ addl(Address(temp, 0), 1); + if (profile_capture_ratio > 1) { __ step_random(r_profile_rng, temp); } @@ -2795,93 +2800,101 @@ void LIR_Assembler::increment_profile_ctr(LIR_Opr incr, LIR_Opr addr, LIR_Opr de Label dont; Label *skip = overflow ? overflow->continuation() : &dont; - if (incr->is_register()) { - Register inc = incr->as_register(); - if (profile_capture_ratio > 1) { - __ cmpl(r_profile_rng, threshold); - if (! getenv("APH_DISABLE")) { - __ jccb(Assembler::aboveEqual, *skip); - } - } - __ movl(temp, dest_adr); - if (profile_capture_ratio > 1) { - __ sall(inc, ratio_shift); + if (profile_capture_ratio > 1 && overflow) { + __ cmpl(r_profile_rng, threshold); + if (! getenv("APH_DISABLE")) { + __ jcc(Assembler::below, *overflow->entry()); } - __ addl(temp, inc); - __ movl(dest_adr, temp); - __ movl(dest->as_register(), temp); } else { - jint inc = incr->as_constant_ptr()->as_jint_bits(); - switch (dest->type()) { - case T_INT: { - // if (dest->is_register()) __ movl(dest->as_register(), inc); - if (profile_capture_ratio > 1) { - __ cmpl(r_profile_rng, threshold); - if (! getenv("APH_DISABLE")) { - __ jccb(Assembler::aboveEqual, *skip); - } - } - inc *= profile_capture_ratio; - if (dest->is_register()) { - __ movl(temp, dest_adr); - __ addl(temp, inc); - __ movl(dest_adr, temp); - __ movl(dest->as_register(), temp); - } else { - __ addl(dest_adr, inc); + __ block_comment("increment_profile_ctrX" " {"); + if (incr->is_register()) { + Register inc = incr->as_register(); + if (profile_capture_ratio > 1) { + __ cmpl(r_profile_rng, threshold); + if (! getenv("APH_DISABLE")) { + __ jccb(Assembler::aboveEqual, *skip); } - - break; } - case T_LONG: { - // if (dest->is_register()) __ movq(dest->as_register_lo(), (jlong)inc); - if (profile_capture_ratio > 1) { - __ cmpl(r_profile_rng, threshold); - if (! getenv("APH_DISABLE")) { - __ jccb(Assembler::aboveEqual, *skip); + __ movl(temp, dest_adr); + if (profile_capture_ratio > 1) { + __ sall(inc, ratio_shift); + } + __ addl(temp, inc); + __ movl(dest_adr, temp); + __ movl(dest->as_register(), temp); + } else { + jint inc = incr->as_constant_ptr()->as_jint_bits(); + switch (dest->type()) { + case T_INT: { + // if (dest->is_register()) __ movl(dest->as_register(), inc); + if (profile_capture_ratio > 1) { + __ cmpl(r_profile_rng, threshold); + if (! getenv("APH_DISABLE")) { + __ jccb(Assembler::aboveEqual, *skip); + } } + inc *= profile_capture_ratio; + if (dest->is_register()) { + __ movl(temp, dest_adr); + __ addl(temp, inc); + __ movl(dest_adr, temp); + __ movl(dest->as_register(), temp); + } else { + __ addl(dest_adr, inc); + } + + break; } - inc *= profile_capture_ratio; - if (dest->is_register()) { - __ movq(temp, dest_adr); - __ addq(temp, inc); - __ movq(dest_adr, temp); - __ movq(dest->as_register_lo(), temp); - } else { - __ addq(dest_adr, inc); - } + case T_LONG: { + // if (dest->is_register()) __ movq(dest->as_register_lo(), (jlong)inc); + if (profile_capture_ratio > 1) { + __ cmpl(r_profile_rng, threshold); + if (! getenv("APH_DISABLE")) { + __ jccb(Assembler::aboveEqual, *skip); + } + } + inc *= profile_capture_ratio; + if (dest->is_register()) { + __ movq(temp, dest_adr); + __ addq(temp, inc); + __ movq(dest_adr, temp); + __ movq(dest->as_register_lo(), temp); + } else { + __ addq(dest_adr, inc); + } - break; + break; + } + default: + ShouldNotReachHere(); } - default: - ShouldNotReachHere(); - } - } - if (incr->is_valid() && overflow) { - if (!freq_op->is_valid()) { - if (!incr->is_constant()) { - __ cmpl(incr->as_register(), 0); - __ jcc(Assembler::notEqual, *(overflow->entry())); - } else { - __ jmp(*(overflow->entry())); - } - } else { - Register result = - dest->type() == T_INT ? dest->as_register() : - dest->type() == T_LONG ? dest->as_register_lo() : - noreg; - if (!incr->is_constant()) { - // If step is 0, make sure the overflow check below always fails - __ cmpl(incr->as_register(), 0); - __ movl(temp, InvocationCounter::count_increment); - __ cmovl(Assembler::notEqual, result, temp); + if (incr->is_valid() && overflow) { + if (!freq_op->is_valid()) { + if (!incr->is_constant()) { + __ cmpl(incr->as_register(), 0); + __ jcc(Assembler::notEqual, *(overflow->entry())); + } else { + __ jmp(*(overflow->entry())); + } + } else { + Register result = + dest->type() == T_INT ? dest->as_register() : + dest->type() == T_LONG ? dest->as_register_lo() : + noreg; + if (!incr->is_constant()) { + // If step is 0, make sure the overflow check below always fails + __ cmpl(incr->as_register(), 0); + __ movl(temp, InvocationCounter::count_increment); + __ cmovl(Assembler::notEqual, result, temp); + } + __ andl(result, freq_op->as_jint()); + __ jcc(Assembler::equal, *overflow->entry()); + } } - __ andl(result, freq_op->as_jint()); - __ jcc(Assembler::equal, *overflow->entry()); } - __ bind(*overflow->continuation()); } + __ bind(*skip); #ifndef PRODUCT if (CommentedAssembly) { @@ -2893,16 +2906,37 @@ void LIR_Assembler::increment_profile_ctr(LIR_Opr incr, LIR_Opr addr, LIR_Opr de } void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) { + Label dont; + + Register temp = op->tmp1()->as_register_lo(); + + EmitProfileCallStub *stub = new EmitProfileCallStub(); + + int profile_capture_ratio = ProfileCaptureRatio; + int ratio_shift = exact_log2(profile_capture_ratio); + auto threshold = (1ull << 32) >> ratio_shift; + assert(threshold > 0, "must be"); + + if (profile_capture_ratio > 1) { + __ step_random(r_profile_rng, temp); + __ cmpl(r_profile_rng, threshold); + __ jcc(Assembler::below, *stub->entry()); + } else { + __ jmp(*stub->entry()); + // abort(); + } + +#undef __ +#define __ ce->masm()-> + auto lambda = [=] (LIR_Assembler* ce, LIR_OpProfileCall* op) { ciMethod* method = op->profiled_method(); int bci = op->profiled_bci(); ciMethod* callee = op->profiled_callee(); Register tmp_load_klass = rscratch1; - Label dont; - Register temp = op->tmp1()->as_register_lo(); - __ step_profile_rng(r_profile_rng, temp, dont); + __ bind(*stub->entry()); // Update counter for all call types ciMethodData* md = method->method_data_or_null(); @@ -2964,12 +2998,23 @@ void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) { __ bind(update_done); } + exit: {} } else { // Static call __ addptr(counter_addr, DataLayout::counter_increment); } + __ jmp(*stub->continuation()); +#undef __ +#define __ _masm-> + }; + + Fubarbaz_base *ff = new Fubarbaz(lambda, op); + stub->set_doit(ff); + append_code_stub(stub); + + __ bind(*stub->continuation()); - exit: + // lambda(this, op); __ bind(dont); } diff --git a/src/hotspot/share/c1/c1_CodeStubs.hpp b/src/hotspot/share/c1/c1_CodeStubs.hpp index a02368487c5a2..e1c87b97e38fb 100644 --- a/src/hotspot/share/c1/c1_CodeStubs.hpp +++ b/src/hotspot/share/c1/c1_CodeStubs.hpp @@ -61,6 +61,8 @@ class CodeStub: public CompilationResourceObj { #ifndef PRODUCT virtual void print_name(outputStream* out) const = 0; #endif + Address as_Address(LIR_Assembler* ce, LIR_Address* addr, Register tmp); + Address as_Address(LIR_Assembler* ce, LIR_Address* addr); // label access Label* entry() { return &_entry; } @@ -103,7 +105,7 @@ class C1SafepointPollStub: public CodeStub { }; class CounterOverflowStub: public CodeStub { - private: + protected: CodeEmitInfo* _info; int _bci; LIR_Opr _method; @@ -117,8 +119,8 @@ class CounterOverflowStub: public CodeStub { virtual void emit_code(LIR_Assembler* e); virtual void visit(LIR_OpVisitState* visitor) { - visitor->do_slow_case(_info); - visitor->do_input(_method); + if (_info) visitor->do_slow_case(_info); + if (_method->is_valid()) visitor->do_input(_method); } #ifndef PRODUCT @@ -127,6 +129,73 @@ class CounterOverflowStub: public CodeStub { }; +class ExtendedCounterOverflowStub: public CounterOverflowStub { + private: + LIR_Opr _incr; + LIR_Opr _addr; + LIR_Opr _dest; + LIR_Opr _temp_op; + LIR_Opr _freq_op; + bool _notify; + +public: + ExtendedCounterOverflowStub(CodeEmitInfo* info, int bci, LIR_Opr method, + LIR_Opr incr, LIR_Opr addr, LIR_Opr dest, LIR_Opr temp_op, + LIR_Opr freq_op, bool notify) + : CounterOverflowStub(info, bci, method), + _incr(incr), _addr(addr), _dest(dest), _temp_op(temp_op), _freq_op(freq_op), + _notify(notify) { } + + virtual void emit_code(LIR_Assembler* e); + + virtual void visit(LIR_OpVisitState* visitor) { + CounterOverflowStub::visit(visitor); + visitor->do_input(_incr); + visitor->do_input(_addr); + if (_dest->is_valid()) visitor->do_output(_dest); + visitor->do_temp(_temp_op); + if (_freq_op->is_valid()) visitor->do_input(_freq_op); + } + +#ifndef PRODUCT + virtual void print_name(outputStream* out) const { out->print("ExtendedCounterOverflowStub"); } +#endif // PRODUCT + +}; + +class Fubarbaz_base : public CompilationResourceObj { +public: + virtual void operator() (LIR_Assembler* ce) = 0; +}; + +template +struct Fubarbaz : public Fubarbaz_base { + T _lambda; + LIR_OpProfileCall* _op; + + Fubarbaz(T lambda, LIR_OpProfileCall* op) : _lambda(lambda), _op(op) { + } + virtual void operator() (LIR_Assembler* ce) { + _lambda(ce, _op); + } +}; + +class EmitProfileCallStub: public CodeStub { +private: + Fubarbaz_base *_doit; + +public: + EmitProfileCallStub() {} + void set_doit(Fubarbaz_base *doit) { _doit = doit; } + virtual void emit_code(LIR_Assembler* ce) { + (*_doit)(ce); + } +#ifndef PRODUCT + virtual void print_name(outputStream* out) const { out->print("EmitProfileCallStub"); } +#endif // PRODUCT + virtual void visit(LIR_OpVisitState* visitor) { } +}; + class ConversionStub: public CodeStub { private: Bytecodes::Code _bytecode; diff --git a/src/hotspot/share/c1/c1_LIR.cpp b/src/hotspot/share/c1/c1_LIR.cpp index ffd4592e91295..6a84f80c2ab51 100644 --- a/src/hotspot/share/c1/c1_LIR.cpp +++ b/src/hotspot/share/c1/c1_LIR.cpp @@ -587,6 +587,7 @@ void LIR_OpVisitState::visit(LIR_Op* op) { if (op2->_opr1->is_valid()) do_temp(op2->_opr1); if (op2->_opr2->is_valid()) do_temp(op2->_opr2); } + if (op2->overflow() != nullptr) do_stub(op2->overflow()); break; } diff --git a/src/hotspot/share/c1/c1_LIR.hpp b/src/hotspot/share/c1/c1_LIR.hpp index 97cbda4a72d64..85622baeae6f6 100644 --- a/src/hotspot/share/c1/c1_LIR.hpp +++ b/src/hotspot/share/c1/c1_LIR.hpp @@ -2270,7 +2270,7 @@ class LIR_List: public CompilationResourceObj { void increment_profile_ctr(LIR_Opr src, LIR_Address* addr, LIR_Opr res, LIR_Opr tmp, LIR_Opr freq, LIR_Opr step, CodeStub* overflow, CodeEmitInfo* info); void increment_profile_ctr(LIR_Opr src, LIR_Address* addr, LIR_Opr res, LIR_Opr tmp, CodeStub* overflow = nullptr) { - increment_profile_ctr(src, addr, res,tmp, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, overflow, nullptr); + increment_profile_ctr(src, addr, res, tmp, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, overflow, nullptr); } void idiv(LIR_Opr left, LIR_Opr right, LIR_Opr res, LIR_Opr tmp, CodeEmitInfo* info); diff --git a/src/hotspot/share/c1/c1_LIRGenerator.cpp b/src/hotspot/share/c1/c1_LIRGenerator.cpp index c8186e6e004f6..5ef40195f52a7 100644 --- a/src/hotspot/share/c1/c1_LIRGenerator.cpp +++ b/src/hotspot/share/c1/c1_LIRGenerator.cpp @@ -928,15 +928,19 @@ void LIRGenerator::profile_branch(If* if_instr, If::Condition cond) { LIR_Opr data_reg = new_pointer_register(); LIR_Address* data_addr = new LIR_Address(md_reg, data_offset_reg, data_reg->type()); LIR_Address* fake_incr_value = new LIR_Address(data_reg, DataLayout::counter_increment, T_INT); - // if (ProfileCaptureRatio == 1) { - // __ move(data_addr, data_reg); - // // Use leal instead of add to avoid destroying condition codes on x86 - // __ leal(LIR_OprFact::address(fake_incr_value), data_reg); - // __ move(data_reg, data_addr); - // } else { - LIR_Opr tmp = new_register(T_INT); - __ increment_profile_ctr(LIR_OprFact::intConst(DataLayout::counter_increment), data_addr, data_reg, tmp); - // } + LIR_Opr tmp = new_register(T_INT); + LIR_Opr step = LIR_OprFact::intConst(DataLayout::counter_increment); + if (ProfileCaptureRatio == 1) { + __ increment_profile_ctr(step, data_addr, LIR_OprFact::intConst(0), tmp, nullptr); + } else { + CodeStub *overflow = new ExtendedCounterOverflowStub + (/*info*/nullptr, -1, LIR_OprFact::illegalOpr, + step, data_addr, LIR_OprFact::intConst(0), tmp, LIR_OprFact::illegalOpr, /*notify*/false); + + __ increment_profile_ctr(step, data_addr, data_reg, tmp, + LIR_OprFact::illegalOpr, step, overflow, /*info*/nullptr); + // __ increment_profile_ctr(LIR_OprFact::intConst(DataLayout::counter_increment), data_addr, data_reg, tmp); + } } } @@ -2388,7 +2392,11 @@ void LIRGenerator::do_Goto(Goto* x) { // LIR_Opr dummy = new_register(T_INT); LIR_Opr dummy = LIR_OprFact::intConst(0); LIR_Opr inc = LIR_OprFact::intConst(DataLayout::counter_increment); - __ increment_profile_ctr(inc, counter_addr, dummy, tmp); + LIR_Opr step = LIR_OprFact::intConst(DataLayout::counter_increment); + CodeStub *overflow = new ExtendedCounterOverflowStub + (/*info*/nullptr, -1, LIR_OprFact::illegalOpr, + step, counter_addr, dummy, tmp, LIR_OprFact::illegalOpr, /*notify*/false); + __ increment_profile_ctr(inc, counter_addr, dummy, tmp, overflow); // } } @@ -3190,36 +3198,26 @@ void LIRGenerator::increment_event_counter_impl(CodeEmitInfo* info, LIR_Opr result = notify ? new_register(T_INT) : LIR_OprFact::intConst(0); LIR_Opr tmp = new_register(T_INT); + CodeStub* overflow = nullptr; + if (notify && (!backedge || UseOnStackReplacement)) { LIR_Opr meth = LIR_OprFact::metadataConst(method->constant_encoding()); // The bci for info can point to cmp for if's we want the if bci - CodeStub* overflow = new CounterOverflowStub(info, bci, meth); + int freq = frequency << InvocationCounter::count_shift >> exact_log2(ProfileCaptureRatio); + overflow = new ExtendedCounterOverflowStub + (info, bci, meth, + step, counter, result, tmp, LIR_OprFact::intConst(freq), /*notify*/true); - int freq = frequency << InvocationCounter::count_shift; __ increment_profile_ctr(step, counter, result, tmp, LIR_OprFact::intConst(freq), step, overflow, info); - // if (freq == 0) { - // if (!step->is_constant()) { - // __ cmp(lir_cond_notEqual, step, LIR_OprFact::intConst(0)); - // __ branch(lir_cond_notEqual, overflow); - // } else { - // __ branch(lir_cond_always, overflow); - // } - // } else { - // LIR_Opr mask = load_immediate(freq, T_INT); - // if (!step->is_constant()) { - // // If step is 0, make sure the overflow check below always fails - // __ cmp(lir_cond_notEqual, step, LIR_OprFact::intConst(0)); - // __ cmove(lir_cond_notEqual, result, LIR_OprFact::intConst(InvocationCounter::count_increment), result, T_INT); - // } - // __ logical_and(result, mask, result); - // __ cmp(lir_cond_equal, result, LIR_OprFact::intConst(0)); - // __ branch(lir_cond_equal, overflow); - // } - // __ branch_destination(overflow->continuation()); } else { - __ increment_profile_ctr(step, counter, result, tmp); + overflow = new ExtendedCounterOverflowStub + (info, bci, LIR_OprFact::illegalOpr, + step, counter, result, tmp, LIR_OprFact::illegalOpr, /*notify*/false); + + __ increment_profile_ctr(step, counter, result, tmp, + LIR_OprFact::illegalOpr, step, overflow, info); } } diff --git a/src/hotspot/share/compiler/abstractDisassembler.cpp b/src/hotspot/share/compiler/abstractDisassembler.cpp index df7781e93d5db..26342fc8187ce 100644 --- a/src/hotspot/share/compiler/abstractDisassembler.cpp +++ b/src/hotspot/share/compiler/abstractDisassembler.cpp @@ -338,6 +338,7 @@ void AbstractDisassembler::decode_range_abstract(address range_start, address ra } } +int barfism; // Decodes all instructions in the given range [start..end). // The output is enclosed in [MachCode] and [/MachCode] tags for later recognition. @@ -350,6 +351,7 @@ void AbstractDisassembler::decode_abstract(address start, address end, outputStr outputStream* st = (ost == nullptr) ? tty : ost; + barfism++; st->bol(); st->cr(); st->print_cr("Loading hsdis library failed, undisassembled code is shown in MachCode section"); From 8ef8f927e8d5f2b944df68b7e5bfd52bb32c0c58 Mon Sep 17 00:00:00 2001 From: Andrew Haley Date: Wed, 12 Nov 2025 14:31:00 +0000 Subject: [PATCH 25/48] Works --- src/hotspot/cpu/x86/c1_CodeStubs_x86.cpp | 34 ++++++++-- src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp | 74 ++++++++++----------- src/hotspot/share/c1/c1_LIRGenerator.cpp | 31 ++++++--- src/hotspot/share/c1/c1_Runtime1.cpp | 3 + 4 files changed, 89 insertions(+), 53 deletions(-) diff --git a/src/hotspot/cpu/x86/c1_CodeStubs_x86.cpp b/src/hotspot/cpu/x86/c1_CodeStubs_x86.cpp index 51e94e9565359..3fddec7bddec2 100644 --- a/src/hotspot/cpu/x86/c1_CodeStubs_x86.cpp +++ b/src/hotspot/cpu/x86/c1_CodeStubs_x86.cpp @@ -97,6 +97,8 @@ void CounterOverflowStub::emit_code(LIR_Assembler* ce) { __ jmp(_continuation); } +long x7fe_counts, x7fe_overs; + void ExtendedCounterOverflowStub::emit_code(LIR_Assembler* ce) { int profile_capture_ratio = ProfileCaptureRatio; int ratio_shift = exact_log2(profile_capture_ratio); @@ -113,18 +115,21 @@ void ExtendedCounterOverflowStub::emit_code(LIR_Assembler* ce) { if (_incr->is_register()) { Register inc = _incr->as_register(); - __ movl(temp, dest_adr); if (profile_capture_ratio > 1) { - __ sall(inc, ratio_shift); + __ shll(inc, ratio_shift); } + __ movl(temp, dest_adr); __ addl(temp, inc); __ movl(dest_adr, temp); __ movl(_dest->as_register(), temp); + + if (profile_capture_ratio > 1) { + __ shrl(inc, ratio_shift); + } } else { jint inc = _incr->as_constant_ptr()->as_jint_bits(); switch (_dest->type()) { case T_INT: { - inc *= profile_capture_ratio; if (_dest->is_register()) { __ movl(temp, dest_adr); __ addl(temp, inc); @@ -171,8 +176,27 @@ void ExtendedCounterOverflowStub::emit_code(LIR_Assembler* ce) { if (!_incr->is_constant()) { // If step is 0, make sure the overflow check below always fails __ cmpl(_incr->as_register(), 0); - __ movl(temp, InvocationCounter::count_increment); - __ cmovl(Assembler::notEqual, result, temp); + __ movl(temp, InvocationCounter::count_increment * ProfileCaptureRatio); + __ cmovl(Assembler::equal, result, temp); + } + // long x7fe_counts, x7fe_overs; + if (getenv("APH_BAZ_BARF") && // inc != 0x7fe + ! _incr-> is_constant() + ) { + Label nonzero; + __ push(temp); + + __ testl(result, _freq_op->as_jint()); + __ jcc(Assembler::notEqual, nonzero); + + __ lea(temp, ExternalAddress((address)&x7fe_overs)); + __ addl(Address(temp), 1); + + __ bind(nonzero); + __ lea(temp, ExternalAddress((address)&x7fe_counts)); + __ addl(Address(temp), 1); + + __ pop(temp); } __ andl(result, _freq_op->as_jint()); __ jcc(Assembler::notEqual, _continuation); diff --git a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp index 2f0419fe8dc00..209f024c5a278 100644 --- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp @@ -1260,7 +1260,7 @@ void LIR_Assembler::type_profile_helper(Register mdo, __ cmpptr(recv, Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i)))); __ jccb(Assembler::notEqual, next_test); Address data_addr(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i))); - __ addptr(data_addr, DataLayout::counter_increment); + __ addptr(data_addr, DataLayout::counter_increment * ProfileCaptureRatio); __ jmp(*update_done); __ bind(next_test); } @@ -1272,7 +1272,7 @@ void LIR_Assembler::type_profile_helper(Register mdo, __ cmpptr(recv_addr, NULL_WORD); __ jccb(Assembler::notEqual, next_test); __ movptr(recv_addr, recv); - __ movptr(Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i))), DataLayout::counter_increment); + __ movptr(Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i))), DataLayout::counter_increment * ProfileCaptureRatio); __ jmp(*update_done); __ bind(next_test); } @@ -2769,7 +2769,7 @@ void LIR_Assembler::emit_load_klass(LIR_OpLoadKlass* op) { void LIR_Assembler::increment_profile_ctr(LIR_Opr incr, LIR_Opr addr, LIR_Opr dest, LIR_Opr temp_op, LIR_Opr freq_op, LIR_Opr step_op, - CodeStub* overflow) { + CodeStub* stub) { // Register temp = temp_op->is_register() ? temp_op->as_register() : noreg; Register temp = temp_op->is_register() ? temp_op->as_register() : noreg; // RegisterOrConstant dest_adr = addr->is_address() ? as_Address(addr->as_address_ptr()) @@ -2798,12 +2798,12 @@ void LIR_Assembler::increment_profile_ctr(LIR_Opr incr, LIR_Opr addr, LIR_Opr de } Label dont; - Label *skip = overflow ? overflow->continuation() : &dont; + Label *skip = stub ? stub->continuation() : &dont; - if (profile_capture_ratio > 1 && overflow) { + if (profile_capture_ratio > 1 && stub) { __ cmpl(r_profile_rng, threshold); if (! getenv("APH_DISABLE")) { - __ jcc(Assembler::below, *overflow->entry()); + __ jcc(Assembler::below, *stub->entry()); } } else { __ block_comment("increment_profile_ctrX" " {"); @@ -2817,11 +2817,14 @@ void LIR_Assembler::increment_profile_ctr(LIR_Opr incr, LIR_Opr addr, LIR_Opr de } __ movl(temp, dest_adr); if (profile_capture_ratio > 1) { - __ sall(inc, ratio_shift); + __ shll(inc, ratio_shift); } __ addl(temp, inc); __ movl(dest_adr, temp); __ movl(dest->as_register(), temp); + if (profile_capture_ratio > 1) { + __ shrl(inc, ratio_shift); + } } else { jint inc = incr->as_constant_ptr()->as_jint_bits(); switch (dest->type()) { @@ -2869,13 +2872,13 @@ void LIR_Assembler::increment_profile_ctr(LIR_Opr incr, LIR_Opr addr, LIR_Opr de ShouldNotReachHere(); } - if (incr->is_valid() && overflow) { + if (incr->is_valid() && stub) { if (!freq_op->is_valid()) { if (!incr->is_constant()) { __ cmpl(incr->as_register(), 0); - __ jcc(Assembler::notEqual, *(overflow->entry())); + __ jcc(Assembler::notEqual, *(stub->entry())); } else { - __ jmp(*(overflow->entry())); + __ jmp(*(stub->entry())); } } else { Register result = @@ -2883,13 +2886,13 @@ void LIR_Assembler::increment_profile_ctr(LIR_Opr incr, LIR_Opr addr, LIR_Opr de dest->type() == T_LONG ? dest->as_register_lo() : noreg; if (!incr->is_constant()) { - // If step is 0, make sure the overflow check below always fails + // If step is 0, make sure the stub check below always fails __ cmpl(incr->as_register(), 0); - __ movl(temp, InvocationCounter::count_increment); + __ movl(temp, InvocationCounter::count_increment * ProfileCaptureRatio); __ cmovl(Assembler::notEqual, result, temp); } __ andl(result, freq_op->as_jint()); - __ jcc(Assembler::equal, *overflow->entry()); + __ jcc(Assembler::equal, *stub->entry()); } } } @@ -2906,25 +2909,16 @@ void LIR_Assembler::increment_profile_ctr(LIR_Opr incr, LIR_Opr addr, LIR_Opr de } void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) { - Label dont; Register temp = op->tmp1()->as_register_lo(); - EmitProfileCallStub *stub = new EmitProfileCallStub(); - int profile_capture_ratio = ProfileCaptureRatio; int ratio_shift = exact_log2(profile_capture_ratio); auto threshold = (1ull << 32) >> ratio_shift; assert(threshold > 0, "must be"); - if (profile_capture_ratio > 1) { - __ step_random(r_profile_rng, temp); - __ cmpl(r_profile_rng, threshold); - __ jcc(Assembler::below, *stub->entry()); - } else { - __ jmp(*stub->entry()); - // abort(); - } + EmitProfileCallStub *stub + = profile_capture_ratio > 1 ? new EmitProfileCallStub() : nullptr; #undef __ #define __ ce->masm()-> @@ -2936,7 +2930,7 @@ void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) { Register temp = op->tmp1()->as_register_lo(); - __ bind(*stub->entry()); + if (stub != nullptr) __ bind(*stub->entry()); // Update counter for all call types ciMethodData* md = method->method_data_or_null(); @@ -2968,7 +2962,7 @@ void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) { ciKlass* receiver = vc_data->receiver(i); if (known_klass->equals(receiver)) { Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i))); - __ addptr(data_addr, DataLayout::counter_increment); + __ addptr(data_addr, DataLayout::counter_increment * ProfileCaptureRatio); goto exit; } } @@ -2984,7 +2978,7 @@ void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) { Address recv_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_offset(i))); __ mov_metadata(recv_addr, known_klass->constant_encoding(), rscratch1); Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i))); - __ addptr(data_addr, DataLayout::counter_increment); + __ addptr(data_addr, DataLayout::counter_increment * ProfileCaptureRatio); goto exit; } } @@ -2994,28 +2988,34 @@ void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) { type_profile_helper(mdo, md, data, recv, &update_done); // Receiver did not match any saved receiver and there is no empty row for it. // Increment total counter to indicate polymorphic case. - __ addptr(counter_addr, DataLayout::counter_increment); + __ addptr(counter_addr, DataLayout::counter_increment * ProfileCaptureRatio); __ bind(update_done); } exit: {} } else { // Static call - __ addptr(counter_addr, DataLayout::counter_increment); + __ addptr(counter_addr, DataLayout::counter_increment * ProfileCaptureRatio); } - __ jmp(*stub->continuation()); + + if (stub != nullptr) __ jmp(*stub->continuation()); + #undef __ #define __ _masm-> }; - Fubarbaz_base *ff = new Fubarbaz(lambda, op); - stub->set_doit(ff); - append_code_stub(stub); - - __ bind(*stub->continuation()); + if (stub != nullptr) { + __ step_random(r_profile_rng, temp); + __ cmpl(r_profile_rng, threshold); + __ jcc(Assembler::below, *stub->entry()); + __ bind(*stub->continuation()); - // lambda(this, op); - __ bind(dont); + Fubarbaz_base *ff = new Fubarbaz(lambda, op); + stub->set_doit(ff); + append_code_stub(stub); + } else { + lambda(this, op); + } } int kludge; diff --git a/src/hotspot/share/c1/c1_LIRGenerator.cpp b/src/hotspot/share/c1/c1_LIRGenerator.cpp index 5ef40195f52a7..8cc63c76aa4dd 100644 --- a/src/hotspot/share/c1/c1_LIRGenerator.cpp +++ b/src/hotspot/share/c1/c1_LIRGenerator.cpp @@ -2384,9 +2384,9 @@ void LIRGenerator::do_Goto(Goto* x) { LIR_Address *counter_addr = new LIR_Address(md_reg, offset, NOT_LP64(T_INT) LP64_ONLY(T_LONG)); - // if (ProfileCaptureRatio == 1) { - // increment_counter(counter_addr, DataLayout::counter_increment); - // } else { + if (ProfileCaptureRatio == 1) { + increment_counter(counter_addr, DataLayout::counter_increment); + } else { // LIR_Address *counter_addr = new LIR_Address(md_reg, offset, T_INT); LIR_Opr tmp = new_register(T_INT); // LIR_Opr dummy = new_register(T_INT); @@ -2397,7 +2397,7 @@ void LIRGenerator::do_Goto(Goto* x) { (/*info*/nullptr, -1, LIR_OprFact::illegalOpr, step, counter_addr, dummy, tmp, LIR_OprFact::illegalOpr, /*notify*/false); __ increment_profile_ctr(inc, counter_addr, dummy, tmp, overflow); - // } + } } // emit phi-instruction move after safepoint since this simplifies @@ -3203,18 +3203,27 @@ void LIRGenerator::increment_event_counter_impl(CodeEmitInfo* info, if (notify && (!backedge || UseOnStackReplacement)) { LIR_Opr meth = LIR_OprFact::metadataConst(method->constant_encoding()); // The bci for info can point to cmp for if's we want the if bci - int freq = frequency << InvocationCounter::count_shift >> exact_log2(ProfileCaptureRatio); - overflow = new ExtendedCounterOverflowStub - (info, bci, meth, - step, counter, result, tmp, LIR_OprFact::intConst(freq), /*notify*/true); + int freq = frequency + // Clear the bottom bit based on capture ratio, such that we + // detect overflows. + >> exact_log2(ProfileCaptureRatio) << exact_log2(ProfileCaptureRatio) + << InvocationCounter::count_shift; + overflow = (ProfileCaptureRatio > 1 + ? (new ExtendedCounterOverflowStub + (info, bci, meth, + step, counter, result, tmp, LIR_OprFact::intConst(freq), /*notify*/true)) + : (new CounterOverflowStub + (info, bci, meth))); __ increment_profile_ctr(step, counter, result, tmp, LIR_OprFact::intConst(freq), step, overflow, info); } else { - overflow = new ExtendedCounterOverflowStub - (info, bci, LIR_OprFact::illegalOpr, - step, counter, result, tmp, LIR_OprFact::illegalOpr, /*notify*/false); + overflow = (ProfileCaptureRatio > 1 + ? (new ExtendedCounterOverflowStub + (info, bci, LIR_OprFact::illegalOpr, + step, counter, result, tmp, LIR_OprFact::illegalOpr, /*notify*/false)) + : nullptr); __ increment_profile_ctr(step, counter, result, tmp, LIR_OprFact::illegalOpr, step, overflow, info); diff --git a/src/hotspot/share/c1/c1_Runtime1.cpp b/src/hotspot/share/c1/c1_Runtime1.cpp index a4c956ff5bea1..b2e78cbec57a6 100644 --- a/src/hotspot/share/c1/c1_Runtime1.cpp +++ b/src/hotspot/share/c1/c1_Runtime1.cpp @@ -506,9 +506,12 @@ static nmethod* counter_overflow_helper(JavaThread* current, int branch_bci, Met return osr_nm; } +long c1_overflows; + JRT_BLOCK_ENTRY(address, Runtime1::counter_overflow(JavaThread* current, int bci, Method* method)) nmethod* osr_nm; JRT_BLOCK_NO_ASYNC + c1_overflows++; osr_nm = counter_overflow_helper(current, bci, method); if (osr_nm != nullptr) { RegisterMap map(current, From 7e6a8d8837379166350f8054c7e75a5ab187a253 Mon Sep 17 00:00:00 2001 From: Andrew Haley Date: Thu, 13 Nov 2025 16:50:50 +0000 Subject: [PATCH 26/48] Better --- src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp | 170 ++++++++++++++++---- src/hotspot/share/c1/c1_CodeStubs.hpp | 22 +-- src/hotspot/share/c1/c1_LIR.cpp | 2 +- 3 files changed, 157 insertions(+), 37 deletions(-) diff --git a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp index 209f024c5a278..f75aa20a1645c 100644 --- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp @@ -1278,6 +1278,8 @@ void LIR_Assembler::type_profile_helper(Register mdo, } } +long blooper; + void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, Label* failure, Label* obj_is_null) { // we always need a stub for the failure case. CodeStub* stub = op->stub(); @@ -1322,6 +1324,11 @@ void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, L __ testptr(obj, obj); if (op->should_profile()) { + int profile_capture_ratio = ProfileCaptureRatio; + int ratio_shift = exact_log2(profile_capture_ratio); + auto threshold = (1ull << 32) >> ratio_shift; + assert(threshold > 0, "must be"); + Label not_null; Register mdo = klass_RInfo; __ mov_metadata(mdo, md->constant_encoding()); @@ -1333,22 +1340,49 @@ void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, L __ jmp(*obj_is_null); __ bind(not_null); - Label update_done; + EmitProfileStub *stub + = profile_capture_ratio > 1 ? new EmitProfileStub() : nullptr; + + auto lambda = [stub, md, mdo, data, k_RInfo, obj, tmp_load_klass] (LIR_Assembler* ce, LIR_Op* base_op) { + +#undef __ +#define __ masm-> - __ step_profile_rng(r_profile_rng, k_RInfo, update_done); + auto masm = ce->masm(); + if (stub != nullptr) __ bind(*stub->entry()); + + Label update_done; Register recv = k_RInfo; __ load_klass(recv, obj, tmp_load_klass); - type_profile_helper(mdo, md, data, recv, &update_done); + ce->type_profile_helper(mdo, md, data, recv, &update_done); Address nonprofiled_receiver_count_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset())); __ addptr(nonprofiled_receiver_count_addr, DataLayout::counter_increment); __ bind(update_done); + + if (stub != nullptr) __ jmp(*stub->continuation()); + +#undef __ +#define __ _masm-> + }; + + if (stub != nullptr) { + __ step_random(r_profile_rng, rscratch1); + __ cmpl(r_profile_rng, threshold); + __ jcc(Assembler::below, *stub->entry()); + __ bind(*stub->continuation()); + + stub->set_doit(new ProfileCounterStub(lambda, op)); + stub->set_name("Typecheck stub"); + append_code_stub(stub); + } else { + lambda(this, op); + } } else { __ jcc(Assembler::equal, *obj_is_null); } - if (!k->is_loaded()) { klass2reg_with_patching(k_RInfo, op->info_for_patch()); } else { @@ -1443,28 +1477,79 @@ void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) { Label* success_target = &done; Label* failure_target = stub->entry(); - __ testptr(value, value); if (op->should_profile()) { + int profile_capture_ratio = ProfileCaptureRatio; + int ratio_shift = exact_log2(profile_capture_ratio); + auto threshold = (1ull << 32) >> ratio_shift; + assert(threshold > 0, "must be"); + + EmitProfileStub *profiling_stub + = profile_capture_ratio > 1 ? new EmitProfileStub() : nullptr; + + auto lambda = [profiling_stub, md, data, value, + k_RInfo, klass_RInfo, tmp_load_klass, success_target] (LIR_Assembler* ce, LIR_Op*) { +#undef __ +#define __ masm-> + + auto masm = ce->masm(); + + if (profiling_stub != nullptr) __ bind(*profiling_stub->entry()); + + __ testptr(value, value); + Label not_null; Register mdo = klass_RInfo; __ mov_metadata(mdo, md->constant_encoding()); __ jccb(Assembler::notEqual, not_null); + // Object is null; update MDO and exit Address data_addr(mdo, md->byte_offset_of_slot(data, DataLayout::flags_offset())); int header_bits = BitData::null_seen_byte_constant(); __ orb(data_addr, header_bits); - __ jmp(done); + if (profiling_stub != nullptr) { + __ jmp(*profiling_stub->continuation()); + } else { + __ jmp(*success_target); + } __ bind(not_null); + __ push(rax); + __ lea(rax, ExternalAddress((address)&blooper)); + __ addl(Address(rax), 1); + __ pop(rax); + Label update_done; Register recv = k_RInfo; __ load_klass(recv, value, tmp_load_klass); - type_profile_helper(mdo, md, data, recv, &update_done); + ce->type_profile_helper(mdo, md, data, recv, &update_done); Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset())); __ addptr(counter_addr, DataLayout::counter_increment); __ bind(update_done); + + if (profiling_stub != nullptr) __ jmp(*profiling_stub->continuation()); + +#undef __ +#define __ _masm-> + }; + + if (profiling_stub != nullptr) { + __ step_random(r_profile_rng, rscratch1); + __ cmpl(r_profile_rng, threshold); + __ jcc(Assembler::below, *profiling_stub->entry()); + __ bind(*profiling_stub->continuation()); + __ testptr(value, value); + __ jcc(Assembler::equal, done); + + profiling_stub->set_doit(new ProfileCounterStub(lambda, op)); + profiling_stub->set_name("Typecheck profile stub"); + append_code_stub(profiling_stub); + } else { + lambda(this, op); + } + } else { + __ testptr(value, value); __ jcc(Assembler::equal, done); } @@ -2917,12 +3002,15 @@ void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) { auto threshold = (1ull << 32) >> ratio_shift; assert(threshold > 0, "must be"); - EmitProfileCallStub *stub - = profile_capture_ratio > 1 ? new EmitProfileCallStub() : nullptr; + EmitProfileStub *stub + = profile_capture_ratio > 1 ? new EmitProfileStub() : nullptr; + auto lambda = [op, stub] (LIR_Assembler* ce, LIR_Op* base_op) { #undef __ -#define __ ce->masm()-> - auto lambda = [=] (LIR_Assembler* ce, LIR_OpProfileCall* op) { +#define __ masm-> + + auto masm = ce->masm(); + LIR_OpProfileCall* op = base_op->as_OpProfileCall(); ciMethod* method = op->profiled_method(); int bci = op->profiled_bci(); ciMethod* callee = op->profiled_callee(); @@ -2985,7 +3073,7 @@ void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) { } else { __ load_klass(recv, recv, tmp_load_klass); Label update_done; - type_profile_helper(mdo, md, data, recv, &update_done); + ce->type_profile_helper(mdo, md, data, recv, &update_done); // Receiver did not match any saved receiver and there is no empty row for it. // Increment total counter to indicate polymorphic case. __ addptr(counter_addr, DataLayout::counter_increment * ProfileCaptureRatio); @@ -3010,8 +3098,8 @@ void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) { __ jcc(Assembler::below, *stub->entry()); __ bind(*stub->continuation()); - Fubarbaz_base *ff = new Fubarbaz(lambda, op); - stub->set_doit(ff); + stub->set_doit(new ProfileCounterStub(lambda, op)); + stub->set_name("ProfileCallStub"); append_code_stub(stub); } else { lambda(this, op); @@ -3030,6 +3118,26 @@ void LIR_Assembler::emit_profile_type(LIR_OpProfileType* op) { bool not_null = op->not_null(); bool no_conflict = op->no_conflict(); + __ verify_oop(obj); + +#ifdef ASSERT + assert_different_registers(obj, tmp, rscratch1, mdo_addr.base(), mdo_addr.index()); +#endif + + int profile_capture_ratio = ProfileCaptureRatio; + int ratio_shift = exact_log2(profile_capture_ratio); + auto threshold = (1ull << 32) >> ratio_shift; + assert(threshold > 0, "must be"); + + EmitProfileStub *stub + = profile_capture_ratio > 1 ? new EmitProfileStub() : nullptr; + + auto lambda = [stub, mdo_addr, not_null, exact_klass, current_klass, + obj, tmp, tmp_load_klass, no_conflict] (LIR_Assembler* ce, LIR_Op*) { +#undef __ +#define __ masm-> + + auto masm = ce->masm(); Label update, next, none; bool do_null = !not_null; @@ -3039,18 +3147,7 @@ void LIR_Assembler::emit_profile_type(LIR_OpProfileType* op) { assert(do_null || do_update, "why are we here?"); assert(!TypeEntries::was_null_seen(current_klass) || do_update, "why are we here?"); - __ verify_oop(obj); - -#ifdef ASSERT - if (obj == tmp) { - assert_different_registers(obj, rscratch1, mdo_addr.base(), mdo_addr.index()); - } else { - assert_different_registers(obj, tmp, rscratch1, mdo_addr.base(), mdo_addr.index()); - } -#endif - - // Subsampling profile capture - __ step_profile_rng(r_profile_rng, rscratch1, next); + if (stub != nullptr) __ bind(*stub->entry()); if (do_null) { __ testptr(obj, obj); @@ -3190,8 +3287,27 @@ void LIR_Assembler::emit_profile_type(LIR_OpProfileType* op) { __ orptr(mdo_addr, TypeEntries::type_unknown); } } - } + } // do_update + __ bind(next); + if (stub != nullptr) __ jmp(*stub->continuation()); + +#undef __ +#define __ _masm-> + }; + + if (stub != nullptr) { + __ step_random(r_profile_rng, tmp); + __ cmpl(r_profile_rng, threshold); + __ jcc(Assembler::below, *stub->entry()); + __ bind(*stub->continuation()); + + stub->set_doit(new ProfileCounterStub(lambda, op)); + stub->set_name("ProfileTypeStub"); + append_code_stub(stub); + } else { + lambda(this, op); + } } void LIR_Assembler::monitor_address(int monitor_no, LIR_Opr dst) { diff --git a/src/hotspot/share/c1/c1_CodeStubs.hpp b/src/hotspot/share/c1/c1_CodeStubs.hpp index e1c87b97e38fb..740e312675f7d 100644 --- a/src/hotspot/share/c1/c1_CodeStubs.hpp +++ b/src/hotspot/share/c1/c1_CodeStubs.hpp @@ -163,35 +163,39 @@ class ExtendedCounterOverflowStub: public CounterOverflowStub { }; -class Fubarbaz_base : public CompilationResourceObj { +class AbstractProfileCounterStub : public CompilationResourceObj { public: virtual void operator() (LIR_Assembler* ce) = 0; }; template -struct Fubarbaz : public Fubarbaz_base { +struct ProfileCounterStub : public AbstractProfileCounterStub { T _lambda; - LIR_OpProfileCall* _op; + LIR_Op* _op; - Fubarbaz(T lambda, LIR_OpProfileCall* op) : _lambda(lambda), _op(op) { + ProfileCounterStub(T lambda, LIR_Op* op) : _lambda(lambda), _op(op) { } virtual void operator() (LIR_Assembler* ce) { _lambda(ce, _op); } }; -class EmitProfileCallStub: public CodeStub { +class EmitProfileStub: public CodeStub { private: - Fubarbaz_base *_doit; + AbstractProfileCounterStub *_doit; + const char* _name; public: - EmitProfileCallStub() {} - void set_doit(Fubarbaz_base *doit) { _doit = doit; } + EmitProfileStub() { + _name = "EmitProfileStub"; + } + void set_doit(AbstractProfileCounterStub *doit) { _doit = doit; } + void set_name(const char* name) { _name = name; } virtual void emit_code(LIR_Assembler* ce) { (*_doit)(ce); } #ifndef PRODUCT - virtual void print_name(outputStream* out) const { out->print("EmitProfileCallStub"); } + virtual void print_name(outputStream* out) const { out->print("%s", _name); } #endif // PRODUCT virtual void visit(LIR_OpVisitState* visitor) { } }; diff --git a/src/hotspot/share/c1/c1_LIR.cpp b/src/hotspot/share/c1/c1_LIR.cpp index 6a84f80c2ab51..dd406272ac9d3 100644 --- a/src/hotspot/share/c1/c1_LIR.cpp +++ b/src/hotspot/share/c1/c1_LIR.cpp @@ -895,7 +895,7 @@ void LIR_OpVisitState::visit(LIR_Op* op) { LIR_OpProfileType* opProfileType = (LIR_OpProfileType*)op; do_input(opProfileType->_mdp); do_temp(opProfileType->_mdp); - do_input(opProfileType->_obj); + do_input(opProfileType->_obj); do_temp(opProfileType->_obj); do_temp(opProfileType->_tmp); break; } From 822a673eb137ea5d6843f1b50d4c6f7d0bb7280d Mon Sep 17 00:00:00 2001 From: Andrew Haley Date: Fri, 14 Nov 2025 14:37:32 +0000 Subject: [PATCH 27/48] Step --- src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp | 95 +++++++++++---------- src/hotspot/share/c1/c1_LIRGenerator.cpp | 18 +--- 2 files changed, 53 insertions(+), 60 deletions(-) diff --git a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp index f75aa20a1645c..6437fde851c4a 100644 --- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp @@ -2854,7 +2854,7 @@ void LIR_Assembler::emit_load_klass(LIR_OpLoadKlass* op) { void LIR_Assembler::increment_profile_ctr(LIR_Opr incr, LIR_Opr addr, LIR_Opr dest, LIR_Opr temp_op, LIR_Opr freq_op, LIR_Opr step_op, - CodeStub* stub) { + CodeStub* overflow_stub) { // Register temp = temp_op->is_register() ? temp_op->as_register() : noreg; Register temp = temp_op->is_register() ? temp_op->as_register() : noreg; // RegisterOrConstant dest_adr = addr->is_address() ? as_Address(addr->as_address_ptr()) @@ -2878,50 +2878,36 @@ void LIR_Assembler::increment_profile_ctr(LIR_Opr incr, LIR_Opr addr, LIR_Opr de // __ mov64(temp, (uintptr_t)&ploopy); // __ addl(Address(temp, 0), 1); - if (profile_capture_ratio > 1) { - __ step_random(r_profile_rng, temp); - } + EmitProfileStub *counter_stub + = profile_capture_ratio > 1 ? new EmitProfileStub() : nullptr; - Label dont; - Label *skip = stub ? stub->continuation() : &dont; + auto lambda = [counter_stub, overflow_stub, freq_op, ratio_shift, incr, + dest, dest_adr, temp] (LIR_Assembler* ce, LIR_Op* op) { + +#undef __ +#define __ masm-> + + auto masm = ce->masm(); + + if (counter_stub != nullptr) __ bind(*counter_stub->entry()); - if (profile_capture_ratio > 1 && stub) { - __ cmpl(r_profile_rng, threshold); - if (! getenv("APH_DISABLE")) { - __ jcc(Assembler::below, *stub->entry()); - } - } else { - __ block_comment("increment_profile_ctrX" " {"); if (incr->is_register()) { Register inc = incr->as_register(); - if (profile_capture_ratio > 1) { - __ cmpl(r_profile_rng, threshold); - if (! getenv("APH_DISABLE")) { - __ jccb(Assembler::aboveEqual, *skip); - } - } __ movl(temp, dest_adr); - if (profile_capture_ratio > 1) { + if (ProfileCaptureRatio > 1) { __ shll(inc, ratio_shift); } __ addl(temp, inc); __ movl(dest_adr, temp); __ movl(dest->as_register(), temp); - if (profile_capture_ratio > 1) { + if (ProfileCaptureRatio > 1) { __ shrl(inc, ratio_shift); } } else { jint inc = incr->as_constant_ptr()->as_jint_bits(); switch (dest->type()) { case T_INT: { - // if (dest->is_register()) __ movl(dest->as_register(), inc); - if (profile_capture_ratio > 1) { - __ cmpl(r_profile_rng, threshold); - if (! getenv("APH_DISABLE")) { - __ jccb(Assembler::aboveEqual, *skip); - } - } - inc *= profile_capture_ratio; + inc *= ProfileCaptureRatio; if (dest->is_register()) { __ movl(temp, dest_adr); __ addl(temp, inc); @@ -2934,14 +2920,7 @@ void LIR_Assembler::increment_profile_ctr(LIR_Opr incr, LIR_Opr addr, LIR_Opr de break; } case T_LONG: { - // if (dest->is_register()) __ movq(dest->as_register_lo(), (jlong)inc); - if (profile_capture_ratio > 1) { - __ cmpl(r_profile_rng, threshold); - if (! getenv("APH_DISABLE")) { - __ jccb(Assembler::aboveEqual, *skip); - } - } - inc *= profile_capture_ratio; + inc *= ProfileCaptureRatio; if (dest->is_register()) { __ movq(temp, dest_adr); __ addq(temp, inc); @@ -2957,13 +2936,14 @@ void LIR_Assembler::increment_profile_ctr(LIR_Opr incr, LIR_Opr addr, LIR_Opr de ShouldNotReachHere(); } - if (incr->is_valid() && stub) { + if (incr->is_valid() && overflow_stub) { if (!freq_op->is_valid()) { if (!incr->is_constant()) { __ cmpl(incr->as_register(), 0); - __ jcc(Assembler::notEqual, *(stub->entry())); + __ jcc(Assembler::equal, *overflow_stub->entry()); } else { - __ jmp(*(stub->entry())); + __ jmp(*overflow_stub->entry()); + goto exit; } } else { Register result = @@ -2974,23 +2954,46 @@ void LIR_Assembler::increment_profile_ctr(LIR_Opr incr, LIR_Opr addr, LIR_Opr de // If step is 0, make sure the stub check below always fails __ cmpl(incr->as_register(), 0); __ movl(temp, InvocationCounter::count_increment * ProfileCaptureRatio); - __ cmovl(Assembler::notEqual, result, temp); + __ cmovl(Assembler::equal, result, temp); } __ andl(result, freq_op->as_jint()); - __ jcc(Assembler::equal, *stub->entry()); + __ jcc(Assembler::equal, *overflow_stub->entry()); } } } + + if (counter_stub != nullptr) { + __ jmp(*counter_stub->continuation()); + } + + exit: { } + +#undef __ +#define __ _masm-> + }; + + if (counter_stub != nullptr) { + __ step_random(r_profile_rng, temp); + __ cmpl(r_profile_rng, threshold); + __ jcc(Assembler::below, *counter_stub->entry()); + __ bind(*counter_stub->continuation()); + + counter_stub->set_doit(new ProfileCounterStub(lambda, nullptr)); + counter_stub->set_name("IncrementProfileCtr"); + append_code_stub(counter_stub); + } else { + lambda(this, nullptr); + } + + if (overflow_stub != nullptr) { + __ bind(*overflow_stub->continuation()); } - __ bind(*skip); #ifndef PRODUCT if (CommentedAssembly) { - __ block_comment("} " "increment_profile_ctr"); + __ block_comment("} increment_profile_ctr"); } #endif - - __ bind(dont); } void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) { diff --git a/src/hotspot/share/c1/c1_LIRGenerator.cpp b/src/hotspot/share/c1/c1_LIRGenerator.cpp index 8cc63c76aa4dd..ce8dd50294ab0 100644 --- a/src/hotspot/share/c1/c1_LIRGenerator.cpp +++ b/src/hotspot/share/c1/c1_LIRGenerator.cpp @@ -930,17 +930,7 @@ void LIRGenerator::profile_branch(If* if_instr, If::Condition cond) { LIR_Address* fake_incr_value = new LIR_Address(data_reg, DataLayout::counter_increment, T_INT); LIR_Opr tmp = new_register(T_INT); LIR_Opr step = LIR_OprFact::intConst(DataLayout::counter_increment); - if (ProfileCaptureRatio == 1) { - __ increment_profile_ctr(step, data_addr, LIR_OprFact::intConst(0), tmp, nullptr); - } else { - CodeStub *overflow = new ExtendedCounterOverflowStub - (/*info*/nullptr, -1, LIR_OprFact::illegalOpr, - step, data_addr, LIR_OprFact::intConst(0), tmp, LIR_OprFact::illegalOpr, /*notify*/false); - - __ increment_profile_ctr(step, data_addr, data_reg, tmp, - LIR_OprFact::illegalOpr, step, overflow, /*info*/nullptr); - // __ increment_profile_ctr(LIR_OprFact::intConst(DataLayout::counter_increment), data_addr, data_reg, tmp); - } + __ increment_profile_ctr(step, data_addr, LIR_OprFact::intConst(0), tmp, nullptr); } } @@ -2384,7 +2374,7 @@ void LIRGenerator::do_Goto(Goto* x) { LIR_Address *counter_addr = new LIR_Address(md_reg, offset, NOT_LP64(T_INT) LP64_ONLY(T_LONG)); - if (ProfileCaptureRatio == 1) { + if (true || ProfileCaptureRatio == 1) { increment_counter(counter_addr, DataLayout::counter_increment); } else { // LIR_Address *counter_addr = new LIR_Address(md_reg, offset, T_INT); @@ -3208,7 +3198,7 @@ void LIRGenerator::increment_event_counter_impl(CodeEmitInfo* info, // detect overflows. >> exact_log2(ProfileCaptureRatio) << exact_log2(ProfileCaptureRatio) << InvocationCounter::count_shift; - overflow = (ProfileCaptureRatio > 1 + overflow = (ProfileCaptureRatio > 1 && false ? (new ExtendedCounterOverflowStub (info, bci, meth, step, counter, result, tmp, LIR_OprFact::intConst(freq), /*notify*/true)) @@ -3219,7 +3209,7 @@ void LIRGenerator::increment_event_counter_impl(CodeEmitInfo* info, LIR_OprFact::intConst(freq), step, overflow, info); } else { - overflow = (ProfileCaptureRatio > 1 + overflow = (ProfileCaptureRatio > 1 && false ? (new ExtendedCounterOverflowStub (info, bci, LIR_OprFact::illegalOpr, step, counter, result, tmp, LIR_OprFact::illegalOpr, /*notify*/false)) From 84385e7008dd0b87ff7d15c95feb29dbbc8ff5ad Mon Sep 17 00:00:00 2001 From: Andrew Haley Date: Fri, 14 Nov 2025 15:29:01 +0000 Subject: [PATCH 28/48] Cleanup --- src/hotspot/cpu/x86/c1_CodeStubs_x86.cpp | 123 -------------------- src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp | 7 +- src/hotspot/cpu/x86/macroAssembler_x86.cpp | 5 - src/hotspot/share/c1/c1_CodeStubs.hpp | 33 ------ src/hotspot/share/c1/c1_LIRGenerator.cpp | 34 +----- 5 files changed, 6 insertions(+), 196 deletions(-) diff --git a/src/hotspot/cpu/x86/c1_CodeStubs_x86.cpp b/src/hotspot/cpu/x86/c1_CodeStubs_x86.cpp index 3fddec7bddec2..0d6463e37aec9 100644 --- a/src/hotspot/cpu/x86/c1_CodeStubs_x86.cpp +++ b/src/hotspot/cpu/x86/c1_CodeStubs_x86.cpp @@ -97,129 +97,6 @@ void CounterOverflowStub::emit_code(LIR_Assembler* ce) { __ jmp(_continuation); } -long x7fe_counts, x7fe_overs; - -void ExtendedCounterOverflowStub::emit_code(LIR_Assembler* ce) { - int profile_capture_ratio = ProfileCaptureRatio; - int ratio_shift = exact_log2(profile_capture_ratio); - auto threshold = (1ull << 32) >> ratio_shift; - - Label overflow_entry; - - assert(threshold > 0, "must be"); - - __ bind(_entry); - - Register temp = _temp_op->as_register(); - Address dest_adr = as_Address(ce, _addr->as_address_ptr()); - - if (_incr->is_register()) { - Register inc = _incr->as_register(); - if (profile_capture_ratio > 1) { - __ shll(inc, ratio_shift); - } - __ movl(temp, dest_adr); - __ addl(temp, inc); - __ movl(dest_adr, temp); - __ movl(_dest->as_register(), temp); - - if (profile_capture_ratio > 1) { - __ shrl(inc, ratio_shift); - } - } else { - jint inc = _incr->as_constant_ptr()->as_jint_bits(); - switch (_dest->type()) { - case T_INT: { - if (_dest->is_register()) { - __ movl(temp, dest_adr); - __ addl(temp, inc); - __ movl(dest_adr, temp); - __ movl(_dest->as_register(), temp); - } else { - __ addl(dest_adr, inc); - } - - break; - } - case T_LONG: { - inc *= profile_capture_ratio; - if (_dest->is_register()) { - __ movq(temp, dest_adr); - __ addq(temp, inc); - __ movq(dest_adr, temp); - __ movq(_dest->as_register_lo(), temp); - } else { - __ addq(dest_adr, inc); - } - - break; - } - default: - ShouldNotReachHere(); - } - } - - - if (_incr->is_valid()) { - if (!_freq_op->is_valid()) { - if (!_incr->is_constant()) { - __ cmpl(_incr->as_register(), 0); - __ jccb(Assembler::equal, overflow_entry); - } else { - __ jmp(_notify ? overflow_entry : _continuation); - } - } else { - Register result = - _dest->type() == T_INT ? _dest->as_register() : - _dest->type() == T_LONG ? _dest->as_register_lo() : - noreg; - if (!_incr->is_constant()) { - // If step is 0, make sure the overflow check below always fails - __ cmpl(_incr->as_register(), 0); - __ movl(temp, InvocationCounter::count_increment * ProfileCaptureRatio); - __ cmovl(Assembler::equal, result, temp); - } - // long x7fe_counts, x7fe_overs; - if (getenv("APH_BAZ_BARF") && // inc != 0x7fe - ! _incr-> is_constant() - ) { - Label nonzero; - __ push(temp); - - __ testl(result, _freq_op->as_jint()); - __ jcc(Assembler::notEqual, nonzero); - - __ lea(temp, ExternalAddress((address)&x7fe_overs)); - __ addl(Address(temp), 1); - - __ bind(nonzero); - __ lea(temp, ExternalAddress((address)&x7fe_counts)); - __ addl(Address(temp), 1); - - __ pop(temp); - } - __ andl(result, _freq_op->as_jint()); - __ jcc(Assembler::notEqual, _continuation); - } - } else { - __ jmp(_continuation); - } - - __ bind(overflow_entry); - - if (_notify) { - Metadata *m = _method->as_constant_ptr()->as_metadata(); - ce->store_parameter(m, 1); - ce->store_parameter(_bci, 0); - __ call(RuntimeAddress(Runtime1::entry_for(StubId::c1_counter_overflow_id))); - ce->add_call_info_here(_info); - ce->verify_oop_map(_info); - - __ jmp(_continuation); - } -} - - void RangeCheckStub::emit_code(LIR_Assembler* ce) { __ bind(_entry); if (_info->deoptimize_on_exception()) { diff --git a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp index 6437fde851c4a..5d19c5f7fed28 100644 --- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp @@ -2861,8 +2861,6 @@ void LIR_Assembler::increment_profile_ctr(LIR_Opr incr, LIR_Opr addr, LIR_Opr de Address dest_adr = as_Address(addr->as_address_ptr()); - // assert(ProfileCaptureRatio != 1, "ProfileCaptureRatio must be != 1"); - #ifndef PRODUCT if (CommentedAssembly) { __ block_comment("increment_profile_ctr" " {"); @@ -2871,13 +2869,10 @@ void LIR_Assembler::increment_profile_ctr(LIR_Opr incr, LIR_Opr addr, LIR_Opr de int profile_capture_ratio = ProfileCaptureRatio; int ratio_shift = exact_log2(profile_capture_ratio); - auto threshold = (1ull << 32) >> ratio_shift; + auto threshold = (UCONST64(1) << 32) >> ratio_shift; assert(threshold > 0, "must be"); - // __ mov64(temp, (uintptr_t)&ploopy); - // __ addl(Address(temp, 0), 1); - EmitProfileStub *counter_stub = profile_capture_ratio > 1 ? new EmitProfileStub() : nullptr; diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.cpp b/src/hotspot/cpu/x86/macroAssembler_x86.cpp index 30a75605f35d0..4f19b30b832af 100644 --- a/src/hotspot/cpu/x86/macroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/macroAssembler_x86.cpp @@ -9823,8 +9823,3 @@ void MacroAssembler::setcc(Assembler::Condition comparison, Register dst) { movzbl(dst, dst); } } -// <<<<<<< HEAD - -// #endif -// ======= -// >>>>>>> master diff --git a/src/hotspot/share/c1/c1_CodeStubs.hpp b/src/hotspot/share/c1/c1_CodeStubs.hpp index 740e312675f7d..6bd9b925f9c0f 100644 --- a/src/hotspot/share/c1/c1_CodeStubs.hpp +++ b/src/hotspot/share/c1/c1_CodeStubs.hpp @@ -129,39 +129,6 @@ class CounterOverflowStub: public CodeStub { }; -class ExtendedCounterOverflowStub: public CounterOverflowStub { - private: - LIR_Opr _incr; - LIR_Opr _addr; - LIR_Opr _dest; - LIR_Opr _temp_op; - LIR_Opr _freq_op; - bool _notify; - -public: - ExtendedCounterOverflowStub(CodeEmitInfo* info, int bci, LIR_Opr method, - LIR_Opr incr, LIR_Opr addr, LIR_Opr dest, LIR_Opr temp_op, - LIR_Opr freq_op, bool notify) - : CounterOverflowStub(info, bci, method), - _incr(incr), _addr(addr), _dest(dest), _temp_op(temp_op), _freq_op(freq_op), - _notify(notify) { } - - virtual void emit_code(LIR_Assembler* e); - - virtual void visit(LIR_OpVisitState* visitor) { - CounterOverflowStub::visit(visitor); - visitor->do_input(_incr); - visitor->do_input(_addr); - if (_dest->is_valid()) visitor->do_output(_dest); - visitor->do_temp(_temp_op); - if (_freq_op->is_valid()) visitor->do_input(_freq_op); - } - -#ifndef PRODUCT - virtual void print_name(outputStream* out) const { out->print("ExtendedCounterOverflowStub"); } -#endif // PRODUCT - -}; class AbstractProfileCounterStub : public CompilationResourceObj { public: diff --git a/src/hotspot/share/c1/c1_LIRGenerator.cpp b/src/hotspot/share/c1/c1_LIRGenerator.cpp index ce8dd50294ab0..a13719905de64 100644 --- a/src/hotspot/share/c1/c1_LIRGenerator.cpp +++ b/src/hotspot/share/c1/c1_LIRGenerator.cpp @@ -927,7 +927,6 @@ void LIRGenerator::profile_branch(If* if_instr, If::Condition cond) { // MDO cells are intptr_t, so the data_reg width is arch-dependent. LIR_Opr data_reg = new_pointer_register(); LIR_Address* data_addr = new LIR_Address(md_reg, data_offset_reg, data_reg->type()); - LIR_Address* fake_incr_value = new LIR_Address(data_reg, DataLayout::counter_increment, T_INT); LIR_Opr tmp = new_register(T_INT); LIR_Opr step = LIR_OprFact::intConst(DataLayout::counter_increment); __ increment_profile_ctr(step, data_addr, LIR_OprFact::intConst(0), tmp, nullptr); @@ -2374,20 +2373,10 @@ void LIRGenerator::do_Goto(Goto* x) { LIR_Address *counter_addr = new LIR_Address(md_reg, offset, NOT_LP64(T_INT) LP64_ONLY(T_LONG)); - if (true || ProfileCaptureRatio == 1) { - increment_counter(counter_addr, DataLayout::counter_increment); - } else { - // LIR_Address *counter_addr = new LIR_Address(md_reg, offset, T_INT); - LIR_Opr tmp = new_register(T_INT); - // LIR_Opr dummy = new_register(T_INT); - LIR_Opr dummy = LIR_OprFact::intConst(0); - LIR_Opr inc = LIR_OprFact::intConst(DataLayout::counter_increment); - LIR_Opr step = LIR_OprFact::intConst(DataLayout::counter_increment); - CodeStub *overflow = new ExtendedCounterOverflowStub - (/*info*/nullptr, -1, LIR_OprFact::illegalOpr, - step, counter_addr, dummy, tmp, LIR_OprFact::illegalOpr, /*notify*/false); - __ increment_profile_ctr(inc, counter_addr, dummy, tmp, overflow); - } + LIR_Opr tmp = new_register(T_INT); + LIR_Opr dummy = LIR_OprFact::intConst(0); + LIR_Opr inc = LIR_OprFact::intConst(DataLayout::counter_increment); + __ increment_profile_ctr(inc, counter_addr, dummy, tmp, nullptr); } // emit phi-instruction move after safepoint since this simplifies @@ -3198,23 +3187,10 @@ void LIRGenerator::increment_event_counter_impl(CodeEmitInfo* info, // detect overflows. >> exact_log2(ProfileCaptureRatio) << exact_log2(ProfileCaptureRatio) << InvocationCounter::count_shift; - overflow = (ProfileCaptureRatio > 1 && false - ? (new ExtendedCounterOverflowStub - (info, bci, meth, - step, counter, result, tmp, LIR_OprFact::intConst(freq), /*notify*/true)) - : (new CounterOverflowStub - (info, bci, meth))); - + overflow = new CounterOverflowStub (info, bci, meth); __ increment_profile_ctr(step, counter, result, tmp, LIR_OprFact::intConst(freq), step, overflow, info); - } else { - overflow = (ProfileCaptureRatio > 1 && false - ? (new ExtendedCounterOverflowStub - (info, bci, LIR_OprFact::illegalOpr, - step, counter, result, tmp, LIR_OprFact::illegalOpr, /*notify*/false)) - : nullptr); - __ increment_profile_ctr(step, counter, result, tmp, LIR_OprFact::illegalOpr, step, overflow, info); } From 6cafeb7096868c51426abb201b3d481845f2f89a Mon Sep 17 00:00:00 2001 From: Andrew Haley Date: Fri, 14 Nov 2025 17:23:02 +0000 Subject: [PATCH 29/48] Cleanup --- src/hotspot/cpu/x86/c1_CodeStubs_x86.cpp | 36 ------------- src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp | 52 ++++++++++--------- src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp | 25 --------- src/hotspot/cpu/x86/c1_MacroAssembler_x86.hpp | 1 - src/hotspot/share/c1/c1_CodeStubs.hpp | 9 ++-- src/hotspot/share/c1/c1_LIR.cpp | 2 +- 6 files changed, 33 insertions(+), 92 deletions(-) diff --git a/src/hotspot/cpu/x86/c1_CodeStubs_x86.cpp b/src/hotspot/cpu/x86/c1_CodeStubs_x86.cpp index 0d6463e37aec9..95ce48f34db73 100644 --- a/src/hotspot/cpu/x86/c1_CodeStubs_x86.cpp +++ b/src/hotspot/cpu/x86/c1_CodeStubs_x86.cpp @@ -37,42 +37,6 @@ #define __ ce->masm()-> -Address CodeStub::as_Address(LIR_Assembler* ce, LIR_Address* addr, Register tmp) { - if (addr->base()->is_illegal()) { - assert(addr->index()->is_illegal(), "must be illegal too"); - AddressLiteral laddr((address)addr->disp(), relocInfo::none); - if (! __ reachable(laddr)) { - __ movptr(tmp, laddr.addr()); - Address res(tmp, 0); - return res; - } else { - return __ as_Address(laddr); - } - } - - Register base = addr->base()->as_pointer_register(); - - if (addr->index()->is_illegal()) { - return Address( base, addr->disp()); - } else if (addr->index()->is_cpu_register()) { - Register index = addr->index()->as_pointer_register(); - return Address(base, index, (Address::ScaleFactor) addr->scale(), addr->disp()); - } else if (addr->index()->is_constant()) { - intptr_t addr_offset = (addr->index()->as_constant_ptr()->as_jint() << addr->scale()) + addr->disp(); - assert(Assembler::is_simm32(addr_offset), "must be"); - - return Address(base, addr_offset); - } else { - Unimplemented(); - return Address(); - } -} - -Address CodeStub::as_Address(LIR_Assembler* ce, LIR_Address* addr) { - return as_Address(ce, addr, rscratch1); -} - - void C1SafepointPollStub::emit_code(LIR_Assembler* ce) { __ bind(_entry); InternalAddress safepoint_pc(ce->masm()->pc() - ce->masm()->offset() + safepoint_offset()); diff --git a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp index 5d19c5f7fed28..43d380e625bb2 100644 --- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp @@ -1340,8 +1340,8 @@ void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, L __ jmp(*obj_is_null); __ bind(not_null); - EmitProfileStub *stub - = profile_capture_ratio > 1 ? new EmitProfileStub() : nullptr; + ProfileStub *stub + = profile_capture_ratio > 1 ? new ProfileStub() : nullptr; auto lambda = [stub, md, mdo, data, k_RInfo, obj, tmp_load_klass] (LIR_Assembler* ce, LIR_Op* base_op) { @@ -1383,6 +1383,7 @@ void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, L } else { __ jcc(Assembler::equal, *obj_is_null); } + if (!k->is_loaded()) { klass2reg_with_patching(k_RInfo, op->info_for_patch()); } else { @@ -1483,17 +1484,17 @@ void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) { auto threshold = (1ull << 32) >> ratio_shift; assert(threshold > 0, "must be"); - EmitProfileStub *profiling_stub - = profile_capture_ratio > 1 ? new EmitProfileStub() : nullptr; + ProfileStub *profile_stub + = profile_capture_ratio > 1 ? new ProfileStub() : nullptr; - auto lambda = [profiling_stub, md, data, value, + auto lambda = [profile_stub, md, data, value, k_RInfo, klass_RInfo, tmp_load_klass, success_target] (LIR_Assembler* ce, LIR_Op*) { #undef __ #define __ masm-> auto masm = ce->masm(); - if (profiling_stub != nullptr) __ bind(*profiling_stub->entry()); + if (profile_stub != nullptr) __ bind(*profile_stub->entry()); __ testptr(value, value); @@ -1506,8 +1507,8 @@ void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) { Address data_addr(mdo, md->byte_offset_of_slot(data, DataLayout::flags_offset())); int header_bits = BitData::null_seen_byte_constant(); __ orb(data_addr, header_bits); - if (profiling_stub != nullptr) { - __ jmp(*profiling_stub->continuation()); + if (profile_stub != nullptr) { + __ jmp(*profile_stub->continuation()); } else { __ jmp(*success_target); } @@ -1527,23 +1528,23 @@ void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) { __ addptr(counter_addr, DataLayout::counter_increment); __ bind(update_done); - if (profiling_stub != nullptr) __ jmp(*profiling_stub->continuation()); + if (profile_stub != nullptr) __ jmp(*profile_stub->continuation()); #undef __ #define __ _masm-> }; - if (profiling_stub != nullptr) { + if (profile_stub != nullptr) { __ step_random(r_profile_rng, rscratch1); __ cmpl(r_profile_rng, threshold); - __ jcc(Assembler::below, *profiling_stub->entry()); - __ bind(*profiling_stub->continuation()); + __ jcc(Assembler::below, *profile_stub->entry()); + __ bind(*profile_stub->continuation()); __ testptr(value, value); __ jcc(Assembler::equal, done); - profiling_stub->set_doit(new ProfileCounterStub(lambda, op)); - profiling_stub->set_name("Typecheck profile stub"); - append_code_stub(profiling_stub); + profile_stub->set_doit(new ProfileCounterStub(lambda, op)); + profile_stub->set_name("Typecheck profile stub"); + append_code_stub(profile_stub); } else { lambda(this, op); } @@ -2873,8 +2874,8 @@ void LIR_Assembler::increment_profile_ctr(LIR_Opr incr, LIR_Opr addr, LIR_Opr de assert(threshold > 0, "must be"); - EmitProfileStub *counter_stub - = profile_capture_ratio > 1 ? new EmitProfileStub() : nullptr; + ProfileStub *counter_stub + = profile_capture_ratio > 1 ? new ProfileStub() : nullptr; auto lambda = [counter_stub, overflow_stub, freq_op, ratio_shift, incr, dest, dest_adr, temp] (LIR_Assembler* ce, LIR_Op* op) { @@ -3000,8 +3001,8 @@ void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) { auto threshold = (1ull << 32) >> ratio_shift; assert(threshold > 0, "must be"); - EmitProfileStub *stub - = profile_capture_ratio > 1 ? new EmitProfileStub() : nullptr; + ProfileStub *stub + = profile_capture_ratio > 1 ? new ProfileStub() : nullptr; auto lambda = [op, stub] (LIR_Assembler* ce, LIR_Op* base_op) { #undef __ @@ -3048,8 +3049,8 @@ void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) { ciKlass* receiver = vc_data->receiver(i); if (known_klass->equals(receiver)) { Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i))); - __ addptr(data_addr, DataLayout::counter_increment * ProfileCaptureRatio); - goto exit; + __ addptr(data_addr, DataLayout::counter_increment); + return; } } @@ -3127,8 +3128,8 @@ void LIR_Assembler::emit_profile_type(LIR_OpProfileType* op) { auto threshold = (1ull << 32) >> ratio_shift; assert(threshold > 0, "must be"); - EmitProfileStub *stub - = profile_capture_ratio > 1 ? new EmitProfileStub() : nullptr; + ProfileStub *stub + = profile_capture_ratio > 1 ? new ProfileStub() : nullptr; auto lambda = [stub, mdo_addr, not_null, exact_klass, current_klass, obj, tmp, tmp_load_klass, no_conflict] (LIR_Assembler* ce, LIR_Op*) { @@ -3145,8 +3146,11 @@ void LIR_Assembler::emit_profile_type(LIR_OpProfileType* op) { assert(do_null || do_update, "why are we here?"); assert(!TypeEntries::was_null_seen(current_klass) || do_update, "why are we here?"); - if (stub != nullptr) __ bind(*stub->entry()); + __ verify_oop(obj); +#ifdef ASSERT + assert_different_registers(obj, tmp, rscratch1, mdo_addr.base(), mdo_addr.index()); +#endif if (do_null) { __ testptr(obj, obj); __ jccb(Assembler::notZero, update); diff --git a/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp index c4335d96ad12e..c5440eddbff3f 100644 --- a/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp @@ -343,31 +343,6 @@ void C1_MacroAssembler::step_random(Register state, Register temp) { } -void C1_MacroAssembler::step_profile_rng(Register state, Register temp, Label &skip) { - if (ProfileCaptureRatio != 1) { -#ifndef PRODUCT - if (CommentedAssembly) { - block_comment("step_profile_rng" " {"); - } -#endif - step_random(state, temp); - - int ratio_shift = exact_log2(ProfileCaptureRatio); - int threshold = (1ull << 32) >> ratio_shift; - - cmpl(state, threshold); - if (! getenv("APH_DISABLE")) { - jcc(Assembler::aboveEqual, skip); - } - -#ifndef PRODUCT - if (CommentedAssembly) { - block_comment("} " "step_profile_rng"); - } -#endif - } -} - void C1_MacroAssembler::save_profile_rng() { if (ProfileCaptureRatio != 1) { movl(Address(r15_thread, JavaThread::profile_rng_offset()), r_profile_rng); diff --git a/src/hotspot/cpu/x86/c1_MacroAssembler_x86.hpp b/src/hotspot/cpu/x86/c1_MacroAssembler_x86.hpp index 902e41015bc24..a9ddfe1db64b0 100644 --- a/src/hotspot/cpu/x86/c1_MacroAssembler_x86.hpp +++ b/src/hotspot/cpu/x86/c1_MacroAssembler_x86.hpp @@ -130,7 +130,6 @@ // Randomized profile capture void step_random(Register state, Register temp); - void step_profile_rng(Register state, Register temp, Label &skip); void save_profile_rng(); void restore_profile_rng(); diff --git a/src/hotspot/share/c1/c1_CodeStubs.hpp b/src/hotspot/share/c1/c1_CodeStubs.hpp index 6bd9b925f9c0f..bc7c778dbfa1f 100644 --- a/src/hotspot/share/c1/c1_CodeStubs.hpp +++ b/src/hotspot/share/c1/c1_CodeStubs.hpp @@ -140,21 +140,20 @@ struct ProfileCounterStub : public AbstractProfileCounterStub { T _lambda; LIR_Op* _op; - ProfileCounterStub(T lambda, LIR_Op* op) : _lambda(lambda), _op(op) { - } + ProfileCounterStub(T lambda, LIR_Op* op) : _lambda(lambda), _op(op) { } virtual void operator() (LIR_Assembler* ce) { _lambda(ce, _op); } }; -class EmitProfileStub: public CodeStub { +class ProfileStub: public CodeStub { private: AbstractProfileCounterStub *_doit; const char* _name; public: - EmitProfileStub() { - _name = "EmitProfileStub"; + ProfileStub() { + _name = "ProfileStub"; } void set_doit(AbstractProfileCounterStub *doit) { _doit = doit; } void set_name(const char* name) { _name = name; } diff --git a/src/hotspot/share/c1/c1_LIR.cpp b/src/hotspot/share/c1/c1_LIR.cpp index dd406272ac9d3..45cfd5b4bfa42 100644 --- a/src/hotspot/share/c1/c1_LIR.cpp +++ b/src/hotspot/share/c1/c1_LIR.cpp @@ -1779,7 +1779,7 @@ const char * LIR_Op::name() const { case lir_profile_call: s = "profile_call"; break; // LIR_OpProfileType case lir_profile_type: s = "profile_type"; break; - case lir_increment_profile_ctr: s = "increment_profile_ctr"; break; + case lir_increment_profile_ctr: s = "increment_profile_ctr"; break; // LIR_OpAssert #ifdef ASSERT case lir_assert: s = "assert"; break; From 5d7425b4029a8e86050864278058d2133c1f3029 Mon Sep 17 00:00:00 2001 From: Andrew Haley Date: Sun, 16 Nov 2025 10:18:29 +0000 Subject: [PATCH 30/48] Fix bug introduced during cleanup --- src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp index 43d380e625bb2..38aa8bbd977df 100644 --- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp @@ -3050,7 +3050,7 @@ void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) { if (known_klass->equals(receiver)) { Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i))); __ addptr(data_addr, DataLayout::counter_increment); - return; + goto exit; } } From e9c809cd7ca595fd37c62f915c1ed710dcb5a898 Mon Sep 17 00:00:00 2001 From: Andrew Haley Date: Mon, 17 Nov 2025 10:39:39 +0000 Subject: [PATCH 31/48] cleanup --- src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp | 21 ++++++++++--------- src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp | 17 +++++++++++---- src/hotspot/share/c1/c1_CodeStubs.hpp | 11 +++++----- 3 files changed, 30 insertions(+), 19 deletions(-) diff --git a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp index 38aa8bbd977df..a54703de5d083 100644 --- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp @@ -1369,12 +1369,12 @@ void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, L }; if (stub != nullptr) { - __ step_random(r_profile_rng, rscratch1); __ cmpl(r_profile_rng, threshold); __ jcc(Assembler::below, *stub->entry()); __ bind(*stub->continuation()); + __ step_random(r_profile_rng, rscratch1); - stub->set_doit(new ProfileCounterStub(lambda, op)); + stub->set_doit(new LambdaWrapper(lambda, op)); stub->set_name("Typecheck stub"); append_code_stub(stub); } else { @@ -1535,14 +1535,14 @@ void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) { }; if (profile_stub != nullptr) { - __ step_random(r_profile_rng, rscratch1); __ cmpl(r_profile_rng, threshold); __ jcc(Assembler::below, *profile_stub->entry()); __ bind(*profile_stub->continuation()); + __ step_random(r_profile_rng, rscratch1); __ testptr(value, value); __ jcc(Assembler::equal, done); - profile_stub->set_doit(new ProfileCounterStub(lambda, op)); + profile_stub->set_doit(new LambdaWrapper(lambda, op)); profile_stub->set_name("Typecheck profile stub"); append_code_stub(profile_stub); } else { @@ -2969,12 +2969,12 @@ void LIR_Assembler::increment_profile_ctr(LIR_Opr incr, LIR_Opr addr, LIR_Opr de }; if (counter_stub != nullptr) { - __ step_random(r_profile_rng, temp); __ cmpl(r_profile_rng, threshold); __ jcc(Assembler::below, *counter_stub->entry()); __ bind(*counter_stub->continuation()); + __ step_random(r_profile_rng, temp); - counter_stub->set_doit(new ProfileCounterStub(lambda, nullptr)); + counter_stub->set_doit(new LambdaWrapper(lambda, nullptr)); counter_stub->set_name("IncrementProfileCtr"); append_code_stub(counter_stub); } else { @@ -3092,12 +3092,12 @@ void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) { }; if (stub != nullptr) { - __ step_random(r_profile_rng, temp); __ cmpl(r_profile_rng, threshold); __ jcc(Assembler::below, *stub->entry()); __ bind(*stub->continuation()); + __ step_random(r_profile_rng, temp); - stub->set_doit(new ProfileCounterStub(lambda, op)); + stub->set_doit(new LambdaWrapper(lambda, op)); stub->set_name("ProfileCallStub"); append_code_stub(stub); } else { @@ -3146,6 +3146,7 @@ void LIR_Assembler::emit_profile_type(LIR_OpProfileType* op) { assert(do_null || do_update, "why are we here?"); assert(!TypeEntries::was_null_seen(current_klass) || do_update, "why are we here?"); + if (stub != nullptr) __ bind(*stub->entry()); __ verify_oop(obj); #ifdef ASSERT @@ -3299,12 +3300,12 @@ void LIR_Assembler::emit_profile_type(LIR_OpProfileType* op) { }; if (stub != nullptr) { - __ step_random(r_profile_rng, tmp); __ cmpl(r_profile_rng, threshold); __ jcc(Assembler::below, *stub->entry()); __ bind(*stub->continuation()); + __ step_random(r_profile_rng, tmp); - stub->set_doit(new ProfileCounterStub(lambda, op)); + stub->set_doit(new LambdaWrapper(lambda, op)); stub->set_name("ProfileTypeStub"); append_code_stub(stub); } else { diff --git a/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp index c5440eddbff3f..3b42a59164bce 100644 --- a/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp @@ -315,10 +315,19 @@ void C1_MacroAssembler::step_random(Register state, Register temp) { // sall(temp, 5); // xorl(state, temp); - /* LCG from glibc. */ - movl(temp, 1103515245); - imull(state, temp); - addl(state, 12345); + if (VM_Version::supports_sse4_2()) { + /* CRC used as a psuedo-random-number generator */ + // From a theoretical point of view a CRC is a poor RNG because + // it's linear. But it's unbeatably fast, and plenty good enough + // for what we need. + movl(temp, 1); + crc32(state, temp, /*sizeInBytes*/2); + } else { + /* LCG from glibc. */ + movl(temp, 1103515245); + imull(state, temp); + addl(state, 12345); + } int ratio_shift = exact_log2(ProfileCaptureRatio); int threshold = (1ull << 32) >> ratio_shift; diff --git a/src/hotspot/share/c1/c1_CodeStubs.hpp b/src/hotspot/share/c1/c1_CodeStubs.hpp index bc7c778dbfa1f..bf97bc2ecadf6 100644 --- a/src/hotspot/share/c1/c1_CodeStubs.hpp +++ b/src/hotspot/share/c1/c1_CodeStubs.hpp @@ -130,17 +130,17 @@ class CounterOverflowStub: public CodeStub { }; -class AbstractProfileCounterStub : public CompilationResourceObj { +class AbstractLambdaWrapper : public CompilationResourceObj { public: virtual void operator() (LIR_Assembler* ce) = 0; }; template -struct ProfileCounterStub : public AbstractProfileCounterStub { +struct LambdaWrapper : public AbstractLambdaWrapper { T _lambda; LIR_Op* _op; - ProfileCounterStub(T lambda, LIR_Op* op) : _lambda(lambda), _op(op) { } + LambdaWrapper(T lambda, LIR_Op* op) : _lambda(lambda), _op(op) { } virtual void operator() (LIR_Assembler* ce) { _lambda(ce, _op); } @@ -148,14 +148,14 @@ struct ProfileCounterStub : public AbstractProfileCounterStub { class ProfileStub: public CodeStub { private: - AbstractProfileCounterStub *_doit; + AbstractLambdaWrapper *_doit; const char* _name; public: ProfileStub() { _name = "ProfileStub"; } - void set_doit(AbstractProfileCounterStub *doit) { _doit = doit; } + void set_doit(AbstractLambdaWrapper *doit) { _doit = doit; } void set_name(const char* name) { _name = name; } virtual void emit_code(LIR_Assembler* ce) { (*_doit)(ce); @@ -166,6 +166,7 @@ class ProfileStub: public CodeStub { virtual void visit(LIR_OpVisitState* visitor) { } }; + class ConversionStub: public CodeStub { private: Bytecodes::Code _bytecode; From 619805b90a1d730cdfb1ee57cbe0d6d541db2f0a Mon Sep 17 00:00:00 2001 From: Andrew Haley Date: Mon, 17 Nov 2025 14:47:03 +0000 Subject: [PATCH 32/48] cleanup --- src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp | 157 +++++++++--------- src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp | 3 - src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp | 6 +- src/hotspot/share/c1/c1_CodeStubs.hpp | 13 +- src/hotspot/share/c1/c1_LIRGenerator.hpp | 2 +- 5 files changed, 88 insertions(+), 93 deletions(-) diff --git a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp index a54703de5d083..4bb43d41267d5 100644 --- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp @@ -1374,7 +1374,7 @@ void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, L __ bind(*stub->continuation()); __ step_random(r_profile_rng, rscratch1); - stub->set_doit(new LambdaWrapper(lambda, op)); + stub->set_action(lambda, op); stub->set_name("Typecheck stub"); append_code_stub(stub); } else { @@ -1542,7 +1542,7 @@ void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) { __ testptr(value, value); __ jcc(Assembler::equal, done); - profile_stub->set_doit(new LambdaWrapper(lambda, op)); + profile_stub->set_action(lambda, op); profile_stub->set_name("Typecheck profile stub"); append_code_stub(profile_stub); } else { @@ -2236,7 +2236,7 @@ void LIR_Assembler::comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Op void LIR_Assembler::align_call(LIR_Code code) { - // We do this here in order not affect call site alignment. + // We do this here in order not to affect call site alignment. __ save_profile_rng(); // make sure that the displacement word of the call ends up word aligned int offset = __ offset(); @@ -2856,12 +2856,6 @@ void LIR_Assembler::emit_load_klass(LIR_OpLoadKlass* op) { void LIR_Assembler::increment_profile_ctr(LIR_Opr incr, LIR_Opr addr, LIR_Opr dest, LIR_Opr temp_op, LIR_Opr freq_op, LIR_Opr step_op, CodeStub* overflow_stub) { - // Register temp = temp_op->is_register() ? temp_op->as_register() : noreg; - Register temp = temp_op->is_register() ? temp_op->as_register() : noreg; - // RegisterOrConstant dest_adr = addr->is_address() ? as_Address(addr->as_address_ptr()) - - Address dest_adr = as_Address(addr->as_address_ptr()); - #ifndef PRODUCT if (CommentedAssembly) { __ block_comment("increment_profile_ctr" " {"); @@ -2877,8 +2871,11 @@ void LIR_Assembler::increment_profile_ctr(LIR_Opr incr, LIR_Opr addr, LIR_Opr de ProfileStub *counter_stub = profile_capture_ratio > 1 ? new ProfileStub() : nullptr; + Register temp = temp_op->is_register() ? temp_op->as_register() : noreg; + Address dest_adr = as_Address(addr->as_address_ptr()); + auto lambda = [counter_stub, overflow_stub, freq_op, ratio_shift, incr, - dest, dest_adr, temp] (LIR_Assembler* ce, LIR_Op* op) { + temp, dest, dest_adr] (LIR_Assembler* ce, LIR_Op* op) { #undef __ #define __ masm-> @@ -2974,7 +2971,7 @@ void LIR_Assembler::increment_profile_ctr(LIR_Opr incr, LIR_Opr addr, LIR_Opr de __ bind(*counter_stub->continuation()); __ step_random(r_profile_rng, temp); - counter_stub->set_doit(new LambdaWrapper(lambda, nullptr)); + counter_stub->set_action(lambda, nullptr); counter_stub->set_name("IncrementProfileCtr"); append_code_stub(counter_stub); } else { @@ -3008,84 +3005,84 @@ void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) { #undef __ #define __ masm-> - auto masm = ce->masm(); - LIR_OpProfileCall* op = base_op->as_OpProfileCall(); - ciMethod* method = op->profiled_method(); - int bci = op->profiled_bci(); - ciMethod* callee = op->profiled_callee(); - Register tmp_load_klass = rscratch1; + auto masm = ce->masm(); + LIR_OpProfileCall* op = base_op->as_OpProfileCall(); + ciMethod* method = op->profiled_method(); + int bci = op->profiled_bci(); + ciMethod* callee = op->profiled_callee(); + Register tmp_load_klass = rscratch1; - Register temp = op->tmp1()->as_register_lo(); + Register temp = op->tmp1()->as_register_lo(); - if (stub != nullptr) __ bind(*stub->entry()); + if (stub != nullptr) __ bind(*stub->entry()); - // Update counter for all call types - ciMethodData* md = method->method_data_or_null(); - assert(md != nullptr, "Sanity"); - ciProfileData* data = md->bci_to_data(bci); - assert(data != nullptr && data->is_CounterData(), "need CounterData for calls"); - assert(op->mdo()->is_single_cpu(), "mdo must be allocated"); - Register mdo = op->mdo()->as_register(); - __ mov_metadata(mdo, md->constant_encoding()); - Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset())); - // Perform additional virtual call profiling for invokevirtual and - // invokeinterface bytecodes - if (op->should_profile_receiver_type()) { - assert(op->recv()->is_single_cpu(), "recv must be allocated"); - Register recv = op->recv()->as_register(); - assert_different_registers(mdo, recv); - assert(data->is_VirtualCallData(), "need VirtualCallData for virtual calls"); - ciKlass* known_klass = op->known_holder(); - if (C1OptimizeVirtualCallProfiling && known_klass != nullptr) { - // We know the type that will be seen at this call site; we can - // statically update the MethodData* rather than needing to do - // dynamic tests on the receiver type - - // NOTE: we should probably put a lock around this search to - // avoid collisions by concurrent compilations - ciVirtualCallData* vc_data = (ciVirtualCallData*) data; - uint i; - for (i = 0; i < VirtualCallData::row_limit(); i++) { - ciKlass* receiver = vc_data->receiver(i); - if (known_klass->equals(receiver)) { - Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i))); - __ addptr(data_addr, DataLayout::counter_increment); - goto exit; + // Update counter for all call types + ciMethodData* md = method->method_data_or_null(); + assert(md != nullptr, "Sanity"); + ciProfileData* data = md->bci_to_data(bci); + assert(data != nullptr && data->is_CounterData(), "need CounterData for calls"); + assert(op->mdo()->is_single_cpu(), "mdo must be allocated"); + Register mdo = op->mdo()->as_register(); + __ mov_metadata(mdo, md->constant_encoding()); + Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset())); + // Perform additional virtual call profiling for invokevirtual and + // invokeinterface bytecodes + if (op->should_profile_receiver_type()) { + assert(op->recv()->is_single_cpu(), "recv must be allocated"); + Register recv = op->recv()->as_register(); + assert_different_registers(mdo, recv); + assert(data->is_VirtualCallData(), "need VirtualCallData for virtual calls"); + ciKlass* known_klass = op->known_holder(); + if (C1OptimizeVirtualCallProfiling && known_klass != nullptr) { + // We know the type that will be seen at this call site; we can + // statically update the MethodData* rather than needing to do + // dynamic tests on the receiver type + + // NOTE: we should probably put a lock around this search to + // avoid collisions by concurrent compilations + ciVirtualCallData* vc_data = (ciVirtualCallData*) data; + uint i; + for (i = 0; i < VirtualCallData::row_limit(); i++) { + ciKlass* receiver = vc_data->receiver(i); + if (known_klass->equals(receiver)) { + Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i))); + __ addptr(data_addr, DataLayout::counter_increment); + goto exit; + } } - } - // Receiver type not found in profile data; select an empty slot - - // Note that this is less efficient than it should be because it - // always does a write to the receiver part of the - // VirtualCallData rather than just the first time - for (i = 0; i < VirtualCallData::row_limit(); i++) { - ciKlass* receiver = vc_data->receiver(i); - if (receiver == nullptr) { - Address recv_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_offset(i))); - __ mov_metadata(recv_addr, known_klass->constant_encoding(), rscratch1); - Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i))); - __ addptr(data_addr, DataLayout::counter_increment * ProfileCaptureRatio); - goto exit; + // Receiver type not found in profile data; select an empty slot + + // Note that this is less efficient than it should be because it + // always does a write to the receiver part of the + // VirtualCallData rather than just the first time + for (i = 0; i < VirtualCallData::row_limit(); i++) { + ciKlass* receiver = vc_data->receiver(i); + if (receiver == nullptr) { + Address recv_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_offset(i))); + __ mov_metadata(recv_addr, known_klass->constant_encoding(), rscratch1); + Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i))); + __ addptr(data_addr, DataLayout::counter_increment * ProfileCaptureRatio); + goto exit; + } } + } else { + __ load_klass(recv, recv, tmp_load_klass); + Label update_done; + ce->type_profile_helper(mdo, md, data, recv, &update_done); + // Receiver did not match any saved receiver and there is no empty row for it. + // Increment total counter to indicate polymorphic case. + __ addptr(counter_addr, DataLayout::counter_increment * ProfileCaptureRatio); + + __ bind(update_done); } + exit: {} } else { - __ load_klass(recv, recv, tmp_load_klass); - Label update_done; - ce->type_profile_helper(mdo, md, data, recv, &update_done); - // Receiver did not match any saved receiver and there is no empty row for it. - // Increment total counter to indicate polymorphic case. + // Static call __ addptr(counter_addr, DataLayout::counter_increment * ProfileCaptureRatio); - - __ bind(update_done); } - exit: {} - } else { - // Static call - __ addptr(counter_addr, DataLayout::counter_increment * ProfileCaptureRatio); - } - if (stub != nullptr) __ jmp(*stub->continuation()); + if (stub != nullptr) __ jmp(*stub->continuation()); #undef __ #define __ _masm-> @@ -3097,7 +3094,7 @@ void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) { __ bind(*stub->continuation()); __ step_random(r_profile_rng, temp); - stub->set_doit(new LambdaWrapper(lambda, op)); + stub->set_action(lambda, op); stub->set_name("ProfileCallStub"); append_code_stub(stub); } else { @@ -3305,7 +3302,7 @@ void LIR_Assembler::emit_profile_type(LIR_OpProfileType* op) { __ bind(*stub->continuation()); __ step_random(r_profile_rng, tmp); - stub->set_doit(new LambdaWrapper(lambda, op)); + stub->set_action(lambda, op); stub->set_name("ProfileTypeStub"); append_code_stub(stub); } else { diff --git a/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp b/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp index b2ca0dd1a168f..739c84f44f701 100644 --- a/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp +++ b/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp @@ -1321,9 +1321,6 @@ void LIRGenerator::do_InstanceOf(InstanceOf* x) { x->direct_compare(), patching_info, x->profiled_method(), x->profiled_bci()); } -void LIRGenerator::do_IncProfileCtr(ProfileInvoke* x) { -} - // Intrinsic for Class::isInstance address LIRGenerator::isInstance_entry() { return Runtime1::entry_for(StubId::c1_is_instance_of_id); diff --git a/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp index 3b42a59164bce..b631769b5df37 100644 --- a/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp @@ -317,9 +317,9 @@ void C1_MacroAssembler::step_random(Register state, Register temp) { if (VM_Version::supports_sse4_2()) { /* CRC used as a psuedo-random-number generator */ - // From a theoretical point of view a CRC is a poor RNG because - // it's linear. But it's unbeatably fast, and plenty good enough - // for what we need. + // In effect, the CRC instruction is being used here for its + // linear feedback shift register. It's unbeatably fast, and + // plenty good enough for what we need. movl(temp, 1); crc32(state, temp, /*sizeInBytes*/2); } else { diff --git a/src/hotspot/share/c1/c1_CodeStubs.hpp b/src/hotspot/share/c1/c1_CodeStubs.hpp index bf97bc2ecadf6..b0a581f6bc66c 100644 --- a/src/hotspot/share/c1/c1_CodeStubs.hpp +++ b/src/hotspot/share/c1/c1_CodeStubs.hpp @@ -105,7 +105,7 @@ class C1SafepointPollStub: public CodeStub { }; class CounterOverflowStub: public CodeStub { - protected: + private: CodeEmitInfo* _info; int _bci; LIR_Opr _method; @@ -119,8 +119,8 @@ class CounterOverflowStub: public CodeStub { virtual void emit_code(LIR_Assembler* e); virtual void visit(LIR_OpVisitState* visitor) { - if (_info) visitor->do_slow_case(_info); - if (_method->is_valid()) visitor->do_input(_method); + visitor->do_slow_case(_info); + visitor->do_input(_method); } #ifndef PRODUCT @@ -148,17 +148,18 @@ struct LambdaWrapper : public AbstractLambdaWrapper { class ProfileStub: public CodeStub { private: - AbstractLambdaWrapper *_doit; + AbstractLambdaWrapper *_action; const char* _name; public: ProfileStub() { _name = "ProfileStub"; } - void set_doit(AbstractLambdaWrapper *doit) { _doit = doit; } + template + void set_action(U action, LIR_Op *op) { _action = new LambdaWrapper(action, op); } void set_name(const char* name) { _name = name; } virtual void emit_code(LIR_Assembler* ce) { - (*_doit)(ce); + (*_action)(ce); } #ifndef PRODUCT virtual void print_name(outputStream* out) const { out->print("%s", _name); } diff --git a/src/hotspot/share/c1/c1_LIRGenerator.hpp b/src/hotspot/share/c1/c1_LIRGenerator.hpp index f85d6e2309dd0..d864f8cca2540 100644 --- a/src/hotspot/share/c1/c1_LIRGenerator.hpp +++ b/src/hotspot/share/c1/c1_LIRGenerator.hpp @@ -584,13 +584,13 @@ class LIRGenerator: public InstructionVisitor, public BlockClosure { virtual void do_ProfileCall (ProfileCall* x); virtual void do_ProfileReturnType (ProfileReturnType* x); virtual void do_ProfileInvoke (ProfileInvoke* x); - virtual void do_IncProfileCtr (ProfileInvoke* x); virtual void do_RuntimeCall (RuntimeCall* x); virtual void do_MemBar (MemBar* x); virtual void do_RangeCheckPredicate(RangeCheckPredicate* x); #ifdef ASSERT virtual void do_Assert (Assert* x); #endif + virtual void do_IncProfileCtr (ProfileInvoke* x) { } #ifdef C1_LIRGENERATOR_MD_HPP #include C1_LIRGENERATOR_MD_HPP From 2c249934d081595de2573fd4cecfc6178842643d Mon Sep 17 00:00:00 2001 From: Andrew Haley Date: Tue, 18 Nov 2025 09:03:32 +0000 Subject: [PATCH 33/48] So far, so good. --- src/hotspot/cpu/aarch64/assembler_aarch64.hpp | 3 + .../cpu/aarch64/c1_FrameMap_aarch64.cpp | 9 +- .../cpu/aarch64/c1_FrameMap_aarch64.hpp | 5 + .../cpu/aarch64/c1_LIRAssembler_aarch64.cpp | 136 ++++++++++++++++++ .../cpu/aarch64/c1_LIRGenerator_aarch64.cpp | 4 + .../cpu/aarch64/c1_MacroAssembler_aarch64.cpp | 72 ++++++++++ .../cpu/aarch64/c1_MacroAssembler_aarch64.hpp | 5 + 7 files changed, 233 insertions(+), 1 deletion(-) diff --git a/src/hotspot/cpu/aarch64/assembler_aarch64.hpp b/src/hotspot/cpu/aarch64/assembler_aarch64.hpp index a8f378e524fc3..cb8344d5749e5 100644 --- a/src/hotspot/cpu/aarch64/assembler_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/assembler_aarch64.hpp @@ -148,6 +148,9 @@ constexpr Register rdispatch = r21; // dispatch table base constexpr Register esp = r20; // Java expression stack pointer constexpr Register r19_sender_sp = r19; // sender's SP while in interpreter +// State for randomized profile counters. Used by C1. +extern Register r_profile_rng; + // Preserved predicate register with all elements set TRUE. constexpr PRegister ptrue = p7; diff --git a/src/hotspot/cpu/aarch64/c1_FrameMap_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_FrameMap_aarch64.cpp index 83d0952dcb487..f03c0fb1d696a 100644 --- a/src/hotspot/cpu/aarch64/c1_FrameMap_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c1_FrameMap_aarch64.cpp @@ -200,7 +200,14 @@ void FrameMap::initialize() { map_register(i, r27); r27_opr = LIR_OprFact::single_cpu(i); i++; // rheapbase } - if(!PreserveFramePointer) { + // r_profile_rng is allocated conditionally. It is used to hold the random + // generator for profile counters. + r_profile_rng + = (UseCompressedOops && ProfileCaptureRatio > 1) ? r26 + : (ProfileCaptureRatio > 1) ? r27 + : noreg; + + if(!PreserveFramePointer) { map_register(i, r29); r29_opr = LIR_OprFact::single_cpu(i); i++; } diff --git a/src/hotspot/cpu/aarch64/c1_FrameMap_aarch64.hpp b/src/hotspot/cpu/aarch64/c1_FrameMap_aarch64.hpp index 4d783418429cb..7091f3bdb6be1 100644 --- a/src/hotspot/cpu/aarch64/c1_FrameMap_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/c1_FrameMap_aarch64.hpp @@ -147,6 +147,11 @@ range -= 1; } + // Use r26 for randomized profile captures. + if (ProfileCaptureRatio > 1) { + range -= 1; + } + // r29 is not allocatable when PreserveFramePointer is on, // but fp saving is handled in MacroAssembler::build_frame()/remove_frame() if (exclude_fp) { diff --git a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp index 9ab463125fe11..4088ec75b1319 100644 --- a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp @@ -1983,6 +1983,7 @@ void LIR_Assembler::align_call(LIR_Code code) { } void LIR_Assembler::call(LIR_OpJavaCall* op, relocInfo::relocType rtype) { + __ save_profile_rng(); address call = __ trampoline_call(Address(op->addr(), rtype)); if (call == nullptr) { bailout("trampoline stub overflow"); @@ -1990,10 +1991,12 @@ void LIR_Assembler::call(LIR_OpJavaCall* op, relocInfo::relocType rtype) { } add_call_info(code_offset(), op->info()); __ post_call_nop(); + __ restore_profile_rng(); } void LIR_Assembler::ic_call(LIR_OpJavaCall* op) { + __ save_profile_rng(); address call = __ ic_call(op->addr()); if (call == nullptr) { bailout("trampoline stub overflow"); @@ -2001,6 +2004,7 @@ void LIR_Assembler::ic_call(LIR_OpJavaCall* op) { } add_call_info(code_offset(), op->info()); __ post_call_nop(); + __ restore_profile_rng(); } void LIR_Assembler::emit_static_call_stub() { @@ -2507,6 +2511,138 @@ void LIR_Assembler::emit_load_klass(LIR_OpLoadKlass* op) { __ load_klass(result, obj); } +void LIR_Assembler::increment_profile_ctr(LIR_Opr incr, LIR_Opr addr, LIR_Opr dest, LIR_Opr temp_op, + LIR_Opr freq_op, LIR_Opr step_op, + CodeStub* overflow_stub) { +#ifndef PRODUCT + if (CommentedAssembly) { + __ block_comment("increment_profile_ctr" " {"); + } +#endif + + int profile_capture_ratio = ProfileCaptureRatio; + int ratio_shift = exact_log2(profile_capture_ratio); + unsigned long threshold = (UCONST64(1) << 32) >> ratio_shift; + + assert(threshold > 0, "must be"); + + ProfileStub *counter_stub + = profile_capture_ratio > 1 ? new ProfileStub() : nullptr; + + Register temp = temp_op->is_register() ? temp_op->as_register() : noreg; + Address dest_adr = as_Address(addr->as_address_ptr()); + + auto lambda = [counter_stub, overflow_stub, freq_op, ratio_shift, incr, + temp, dest, dest_adr] (LIR_Assembler* ce, LIR_Op* op) { + +#undef __ +#define __ masm-> + + auto masm = ce->masm(); + + if (counter_stub != nullptr) __ bind(*counter_stub->entry()); + + if (incr->is_register()) { + Register inc = incr->as_register(); + auto ptr = __ legitimize_address(dest_adr, sizeof (jint), rscratch1); + __ ldr(temp, ptr); + if (ProfileCaptureRatio > 1) { + __ lsl(inc, inc, ratio_shift); + } + __ add(temp, temp, inc); + __ str(temp, ptr); + if (dest->is_register()) __ mov(dest->as_register(), temp); + if (ProfileCaptureRatio > 1) { + __ lsr(inc, inc, ratio_shift); + } + } else { + jint inc = incr->as_constant_ptr()->as_jint_bits(); + switch (dest->type()) { + case T_INT: { + inc *= ProfileCaptureRatio; + auto ptr = __ legitimize_address(dest_adr, sizeof (jint), rscratch1); + __ ldrw(temp, ptr); + __ addw(temp, temp, inc); + __ strw(temp, ptr); + if (dest->is_register()) __ movw(dest->as_register(), temp); + + break; + } + case T_LONG: { + inc *= ProfileCaptureRatio; + auto ptr = __ legitimize_address(dest_adr, sizeof (jlong), rscratch1); + __ ldr(temp, ptr); + __ add(temp, temp, inc); + __ str(temp, ptr); + if (dest->is_register()) __ mov(dest->as_register_lo(), temp); + + break; + } + default: + ShouldNotReachHere(); + } + + if (incr->is_valid() && overflow_stub) { + if (!freq_op->is_valid()) { + if (!incr->is_constant()) { + __ cmp(incr->as_register(), (u1)0); + __ br(__ EQ, *overflow_stub->entry()); + } else { + __ b(*overflow_stub->entry()); + goto exit; + } + } else { + Register result = + dest->type() == T_INT ? dest->as_register() : + dest->type() == T_LONG ? dest->as_register_lo() : + noreg; + if (!incr->is_constant()) { + // If step is 0, make sure the stub check below always fails + __ cmp(incr->as_register(), (u1)0); + __ mov(temp, InvocationCounter::count_increment * ProfileCaptureRatio); + __ csel(result, temp, result, __ EQ); + } + __ ands(zr, result, freq_op->as_jint()); + __ br(__ EQ, *overflow_stub->entry()); + } + } + } + + if (counter_stub != nullptr) { + __ b(*counter_stub->continuation()); + } + + exit: { } + +#undef __ +#define __ _masm-> + }; + + if (counter_stub != nullptr) { + __ mov(rscratch1, threshold); + __ cmpw(r_profile_rng, rscratch1); + __ br(__ LO, *counter_stub->entry()); + __ bind(*counter_stub->continuation()); + __ step_random(r_profile_rng, temp); + + counter_stub->set_action(lambda, nullptr); + counter_stub->set_name("IncrementProfileCtr"); + append_code_stub(counter_stub); + } else { + lambda(this, nullptr); + } + + if (overflow_stub != nullptr) { + __ bind(*overflow_stub->continuation()); + } + +#ifndef PRODUCT + if (CommentedAssembly) { + __ block_comment("} increment_profile_ctr"); + } +#endif +} + void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) { ciMethod* method = op->profiled_method(); int bci = op->profiled_bci(); diff --git a/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp index ad26d494b2d42..b182c12db5679 100644 --- a/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp @@ -1379,6 +1379,10 @@ void LIRGenerator::do_If(If* x) { __ cmp(lir_cond(cond), left, right); // Generate branch profiling. Profiling code doesn't kill flags. profile_branch(x, cond); + // If we're subsampling counter updates, then profiling code kills flags + // if (ProfileCaptureRatio != 1) { + __ cmp(lir_cond(cond), left, right); + // } move_to_phi(x->state()); if (x->x()->type()->is_float_kind()) { __ branch(lir_cond(cond), x->tsux(), x->usux()); diff --git a/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp index 31c36e749c596..5bca852ac52ba 100644 --- a/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp @@ -36,6 +36,8 @@ #include "runtime/sharedRuntime.hpp" #include "runtime/stubRoutines.hpp" +Register r_profile_rng; + void C1_MacroAssembler::float_cmp(bool is_float, int unordered_result, FloatRegister f0, FloatRegister f1, Register result) @@ -247,6 +249,7 @@ void C1_MacroAssembler::build_frame(int framesize, int bang_size_in_bytes) { // Note that we do this before creating a frame. generate_stack_overflow_check(bang_size_in_bytes); MacroAssembler::build_frame(framesize); + restore_profile_rng(); // Insert nmethod entry barrier into frame. BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); @@ -255,6 +258,7 @@ void C1_MacroAssembler::build_frame(int framesize, int bang_size_in_bytes) { void C1_MacroAssembler::remove_frame(int framesize) { MacroAssembler::remove_frame(framesize); + save_profile_rng(); } @@ -276,6 +280,74 @@ void C1_MacroAssembler::load_parameter(int offset_in_words, Register reg) { ldr(reg, Address(rfp, (offset_in_words + 2) * BytesPerWord)); } +int baz, barf; + +// Randomized profile capture. + +void C1_MacroAssembler::step_random(Register state, Register temp) { + // One of these will be the best for a particular CPU. + + /* Algorithm "xor" from p. 4 of Marsaglia, "Xorshift RNGs" */ + // movl(temp, state); + // sall(temp, 13); + // xorl(state, temp); + // movl(temp, state); + // shrl(temp, 7); + // xorl(state, temp); + // movl(temp, state); + // sall(temp, 5); + // xorl(state, temp); + + if (VM_Version::supports_crc32()) { + /* CRC used as a psuedo-random-number generator */ + // In effect, the CRC instruction is being used here for its + // linear feedback shift register. It's unbeatably fast, and + // plenty good enough for what we need. + mov(temp, 1); + crc32h(state, state, temp); + } else { + /* LCG from glibc. */ + mov(temp, 1103515245); + mulw(state, state, temp); + addw(state, state, 12345); + } + + int ratio_shift = exact_log2(ProfileCaptureRatio); + unsigned int threshold = (1ull << 32) >> ratio_shift; + + if (getenv("APH_BAZ_BARF")) { + Label big, done; + push(RegSet::of(temp), sp); + movw(rscratch1, threshold); + cmp(state, rscratch1); + br(HS, big); + + // lea(temp, ExternalAddress((address)&baz)); + // incrementw(Address(temp)); + // b(done); + + bind(big); + lea(temp, ExternalAddress((address)&barf)); + incrementw(Address(temp)); + + bind(done); + pop(RegSet::of(temp), sp); + } + +} + +void C1_MacroAssembler::save_profile_rng() { + if (ProfileCaptureRatio != 1) { + strw(r_profile_rng, Address(rthread, JavaThread::profile_rng_offset())); + } +} + +void C1_MacroAssembler::restore_profile_rng() { + if (ProfileCaptureRatio != 1) { + ldrw(r_profile_rng, Address(rthread, JavaThread::profile_rng_offset())); + } +} + #ifndef PRODUCT void C1_MacroAssembler::verify_stack_oop(int stack_offset) { diff --git a/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.hpp index 7b181b104c10f..706d114e19e75 100644 --- a/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.hpp @@ -113,4 +113,9 @@ using MacroAssembler::null_check; void load_parameter(int offset_in_words, Register reg); + // Randomized profile capture + void step_random(Register state, Register temp); + void save_profile_rng(); + void restore_profile_rng(); + #endif // CPU_AARCH64_C1_MACROASSEMBLER_AARCH64_HPP From 2ae02161d86ce57547d321306da0f65ee755875a Mon Sep 17 00:00:00 2001 From: Andrew Haley Date: Tue, 18 Nov 2025 11:50:18 +0000 Subject: [PATCH 34/48] AArch64 --- .../cpu/aarch64/c1_LIRAssembler_aarch64.cpp | 316 ++++++++++++------ .../cpu/aarch64/c1_MacroAssembler_aarch64.cpp | 6 +- .../cpu/aarch64/compiledIC_aarch64.cpp | 5 + 3 files changed, 226 insertions(+), 101 deletions(-) diff --git a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp index 4088ec75b1319..b0954efea3363 100644 --- a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp @@ -1230,7 +1230,7 @@ void LIR_Assembler::type_profile_helper(Register mdo, __ cmp(recv, rscratch1); __ br(Assembler::NE, next_test); __ addptr(slot_at(ReceiverTypeData::receiver_count_offset(i)), - DataLayout::counter_increment); + DataLayout::counter_increment * ProfileCaptureRatio); __ b(*update_done); __ bind(next_test); } @@ -1242,7 +1242,7 @@ void LIR_Assembler::type_profile_helper(Register mdo, __ ldr(rscratch1, recv_addr); __ cbnz(rscratch1, next_test); __ str(recv, recv_addr); - __ mov(rscratch1, DataLayout::counter_increment); + __ mov(rscratch1, DataLayout::counter_increment * ProfileCaptureRatio); __ str(rscratch1, slot_at(ReceiverTypeData::receiver_count_offset(i))); __ b(*update_done); __ bind(next_test); @@ -1259,6 +1259,11 @@ void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, L ciKlass* k = op->klass(); Register Rtmp1 = noreg; + int profile_capture_ratio = ProfileCaptureRatio; + int ratio_shift = exact_log2(profile_capture_ratio); + auto threshold = (1ull << 32) >> ratio_shift; + assert(threshold > 0, "must be"); + // check if it needs to be profiled ciMethodData* md; ciProfileData* data; @@ -1308,14 +1313,44 @@ void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, L __ b(*obj_is_null); __ bind(not_null); - Label update_done; - Register recv = k_RInfo; - __ load_klass(recv, obj); - type_profile_helper(mdo, md, data, recv, &update_done); - Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset())); - __ addptr(counter_addr, DataLayout::counter_increment); + ProfileStub *stub + = profile_capture_ratio > 1 ? new ProfileStub() : nullptr; + + auto lambda = [stub, md, mdo, data, k_RInfo, obj] (LIR_Assembler* ce, LIR_Op* base_op) { + +#undef __ +#define __ masm-> + + auto masm = ce->masm(); + if (stub != nullptr) __ bind(*stub->entry()); + + Label update_done; + Register recv = k_RInfo; + __ load_klass(recv, obj); + ce->type_profile_helper(mdo, md, data, recv, &update_done); + Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset())); + __ addptr(counter_addr, DataLayout::counter_increment * ProfileCaptureRatio); + + __ bind(update_done); + + if (stub != nullptr) __ b(*stub->continuation()); - __ bind(update_done); +#undef __ +#define __ _masm-> + }; + + if (stub != nullptr) { + __ ubfx(rscratch1, r_profile_rng, 32-ratio_shift, ratio_shift); + __ cbz(rscratch1, *stub->entry()); + __ bind(*stub->continuation()); + __ step_random(r_profile_rng, rscratch2); + + stub->set_action(lambda, op); + stub->set_name("Typecheck stub"); + append_code_stub(stub); + } else { + lambda(this, op); + } } else { __ cbz(obj, *obj_is_null); } @@ -1408,27 +1443,68 @@ void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) { Label* failure_target = stub->entry(); if (should_profile) { - Label not_null; - Register mdo = klass_RInfo; - __ mov_metadata(mdo, md->constant_encoding()); - __ cbnz(value, not_null); - // Object is null; update MDO and exit - Address data_addr - = __ form_address(rscratch2, mdo, - md->byte_offset_of_slot(data, DataLayout::flags_offset()), 0); - __ ldrb(rscratch1, data_addr); - __ orr(rscratch1, rscratch1, BitData::null_seen_byte_constant()); - __ strb(rscratch1, data_addr); - __ b(done); - __ bind(not_null); + int profile_capture_ratio = ProfileCaptureRatio; + int ratio_shift = exact_log2(profile_capture_ratio); + auto threshold = (1ull << 32) >> ratio_shift; + assert(threshold > 0, "must be"); - Label update_done; - Register recv = k_RInfo; - __ load_klass(recv, value); - type_profile_helper(mdo, md, data, recv, &update_done); - Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset())); - __ addptr(counter_addr, DataLayout::counter_increment); - __ bind(update_done); + ProfileStub *profile_stub + = profile_capture_ratio > 1 ? new ProfileStub() : nullptr; + + auto lambda = [profile_stub, md, data, value, + k_RInfo, klass_RInfo, success_target] (LIR_Assembler* ce, LIR_Op*) { +#undef __ +#define __ masm-> + + auto masm = ce->masm(); + + if (profile_stub != nullptr) __ bind(*profile_stub->entry()); + + Label not_null; + Register mdo = klass_RInfo; + __ mov_metadata(mdo, md->constant_encoding()); + __ cbnz(value, not_null); + // Object is null; update MDO and exit + Address data_addr + = __ form_address(rscratch2, mdo, + md->byte_offset_of_slot(data, DataLayout::flags_offset()), 0); + __ ldrb(rscratch1, data_addr); + __ orr(rscratch1, rscratch1, BitData::null_seen_byte_constant()); + __ strb(rscratch1, data_addr); + if (profile_stub != nullptr) { + __ b(*profile_stub->continuation()); + } else { + __ b(*success_target); + } + __ bind(not_null); + + Label update_done; + Register recv = k_RInfo; + __ load_klass(recv, value); + ce->type_profile_helper(mdo, md, data, recv, &update_done); + Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset())); + __ addptr(counter_addr, DataLayout::counter_increment * ProfileCaptureRatio); + __ bind(update_done); + + if (profile_stub != nullptr) __ b(*profile_stub->continuation()); + +#undef __ +#define __ _masm-> + }; + + if (profile_stub != nullptr) { + __ ubfx(rscratch1, r_profile_rng, 32-ratio_shift, ratio_shift); + __ cbz(rscratch1, *stub->entry()); + __ bind(*profile_stub->continuation()); + __ step_random(r_profile_rng, rscratch2); + __ cbz(value, done); + + profile_stub->set_action(lambda, op); + profile_stub->set_name("Typecheck profile stub"); + append_code_stub(profile_stub); + } else { + lambda(this, op); + } } else { __ cbz(value, done); } @@ -1979,11 +2055,11 @@ void LIR_Assembler::comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Op } -void LIR_Assembler::align_call(LIR_Code code) { } - +void LIR_Assembler::align_call(LIR_Code code) { + __ save_profile_rng(); +} void LIR_Assembler::call(LIR_OpJavaCall* op, relocInfo::relocType rtype) { - __ save_profile_rng(); address call = __ trampoline_call(Address(op->addr(), rtype)); if (call == nullptr) { bailout("trampoline stub overflow"); @@ -1996,7 +2072,6 @@ void LIR_Assembler::call(LIR_OpJavaCall* op, relocInfo::relocType rtype) { void LIR_Assembler::ic_call(LIR_OpJavaCall* op) { - __ save_profile_rng(); address call = __ ic_call(op->addr()); if (call == nullptr) { bailout("trampoline stub overflow"); @@ -2608,20 +2683,19 @@ void LIR_Assembler::increment_profile_ctr(LIR_Opr incr, LIR_Opr addr, LIR_Opr de } } + exit: + if (counter_stub != nullptr) { __ b(*counter_stub->continuation()); } - exit: { } - #undef __ #define __ _masm-> }; if (counter_stub != nullptr) { - __ mov(rscratch1, threshold); - __ cmpw(r_profile_rng, rscratch1); - __ br(__ LO, *counter_stub->entry()); + __ ubfx(rscratch1, r_profile_rng, 32-ratio_shift, ratio_shift); + __ cbz(rscratch1, *counter_stub->entry()); __ bind(*counter_stub->continuation()); __ step_random(r_profile_rng, temp); @@ -2648,73 +2722,119 @@ void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) { int bci = op->profiled_bci(); ciMethod* callee = op->profiled_callee(); - // Update counter for all call types - ciMethodData* md = method->method_data_or_null(); - assert(md != nullptr, "Sanity"); - ciProfileData* data = md->bci_to_data(bci); - assert(data != nullptr && data->is_CounterData(), "need CounterData for calls"); - assert(op->mdo()->is_single_cpu(), "mdo must be allocated"); - Register mdo = op->mdo()->as_register(); - __ mov_metadata(mdo, md->constant_encoding()); - Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset())); - // Perform additional virtual call profiling for invokevirtual and - // invokeinterface bytecodes - if (op->should_profile_receiver_type()) { - assert(op->recv()->is_single_cpu(), "recv must be allocated"); - Register recv = op->recv()->as_register(); - assert_different_registers(mdo, recv); - assert(data->is_VirtualCallData(), "need VirtualCallData for virtual calls"); - ciKlass* known_klass = op->known_holder(); - if (C1OptimizeVirtualCallProfiling && known_klass != nullptr) { - // We know the type that will be seen at this call site; we can - // statically update the MethodData* rather than needing to do - // dynamic tests on the receiver type - - // NOTE: we should probably put a lock around this search to - // avoid collisions by concurrent compilations - ciVirtualCallData* vc_data = (ciVirtualCallData*) data; - uint i; - for (i = 0; i < VirtualCallData::row_limit(); i++) { - ciKlass* receiver = vc_data->receiver(i); - if (known_klass->equals(receiver)) { - Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i))); - __ addptr(data_addr, DataLayout::counter_increment); - return; + + Register temp = op->tmp1()->as_register_lo(); + + int profile_capture_ratio = ProfileCaptureRatio; + int ratio_shift = exact_log2(profile_capture_ratio); + auto threshold = (1ull << 32) >> ratio_shift; + assert(threshold > 0, "must be"); + + ProfileStub *stub + = profile_capture_ratio > 1 ? new ProfileStub() : nullptr; + + auto lambda = [op, stub] (LIR_Assembler* ce, LIR_Op* base_op) { +#undef __ +#define __ masm-> + + auto masm = ce->masm(); + LIR_OpProfileCall* op = base_op->as_OpProfileCall(); + ciMethod* method = op->profiled_method(); + int bci = op->profiled_bci(); + ciMethod* callee = op->profiled_callee(); + Register tmp_load_klass = rscratch1; + + Register temp = op->tmp1()->as_register_lo(); + + if (stub != nullptr) __ bind(*stub->entry()); + + // Update counter for all call types + ciMethodData* md = method->method_data_or_null(); + assert(md != nullptr, "Sanity"); + ciProfileData* data = md->bci_to_data(bci); + assert(data != nullptr && data->is_CounterData(), "need CounterData for calls"); + assert(op->mdo()->is_single_cpu(), "mdo must be allocated"); + Register mdo = op->mdo()->as_register(); + __ mov_metadata(mdo, md->constant_encoding()); + Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset())); + // Perform additional virtual call profiling for invokevirtual and + // invokeinterface bytecodes + if (op->should_profile_receiver_type()) { + assert(op->recv()->is_single_cpu(), "recv must be allocated"); + Register recv = op->recv()->as_register(); + assert_different_registers(mdo, recv); + assert(data->is_VirtualCallData(), "need VirtualCallData for virtual calls"); + ciKlass* known_klass = op->known_holder(); + if (C1OptimizeVirtualCallProfiling && known_klass != nullptr) { + // We know the type that will be seen at this call site; we can + // statically update the MethodData* rather than needing to do + // dynamic tests on the receiver type + + // NOTE: we should probably put a lock around this search to + // avoid collisions by concurrent compilations + ciVirtualCallData* vc_data = (ciVirtualCallData*) data; + uint i; + for (i = 0; i < VirtualCallData::row_limit(); i++) { + ciKlass* receiver = vc_data->receiver(i); + if (known_klass->equals(receiver)) { + Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i))); + __ addptr(data_addr, DataLayout::counter_increment * ProfileCaptureRatio); + goto exit; + } } - } - // Receiver type not found in profile data; select an empty slot - - // Note that this is less efficient than it should be because it - // always does a write to the receiver part of the - // VirtualCallData rather than just the first time - for (i = 0; i < VirtualCallData::row_limit(); i++) { - ciKlass* receiver = vc_data->receiver(i); - if (receiver == nullptr) { - __ mov_metadata(rscratch1, known_klass->constant_encoding()); - Address recv_addr = - __ form_address(rscratch2, mdo, - md->byte_offset_of_slot(data, VirtualCallData::receiver_offset(i)), - LogBytesPerWord); - __ str(rscratch1, recv_addr); - Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i))); - __ addptr(data_addr, DataLayout::counter_increment); - return; + // Receiver type not found in profile data; select an empty slot + + // Note that this is less efficient than it should be because it + // always does a write to the receiver part of the + // VirtualCallData rather than just the first time + for (i = 0; i < VirtualCallData::row_limit(); i++) { + ciKlass* receiver = vc_data->receiver(i); + if (receiver == nullptr) { + __ mov_metadata(rscratch1, known_klass->constant_encoding()); + Address recv_addr = + __ form_address(rscratch2, mdo, + md->byte_offset_of_slot(data, VirtualCallData::receiver_offset(i)), + LogBytesPerWord); + __ str(rscratch1, recv_addr); + Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i))); + __ addptr(data_addr, DataLayout::counter_increment * ProfileCaptureRatio); + goto exit; + } } + } else { + __ load_klass(recv, recv); + Label update_done; + ce->type_profile_helper(mdo, md, data, recv, &update_done); + // Receiver did not match any saved receiver and there is no empty row for it. + // Increment total counter to indicate polymorphic case. + __ addptr(counter_addr, DataLayout::counter_increment * ProfileCaptureRatio); + + __ bind(update_done); } + exit: {} } else { - __ load_klass(recv, recv); - Label update_done; - type_profile_helper(mdo, md, data, recv, &update_done); - // Receiver did not match any saved receiver and there is no empty row for it. - // Increment total counter to indicate polymorphic case. - __ addptr(counter_addr, DataLayout::counter_increment); - - __ bind(update_done); + // Static call + __ addptr(counter_addr, DataLayout::counter_increment * ProfileCaptureRatio); } + + if (stub != nullptr) __ b(*stub->continuation()); + +#undef __ +#define __ _masm-> + }; + + if (stub != nullptr) { + __ ubfx(rscratch1, r_profile_rng, 32-ratio_shift, ratio_shift); + __ cbz(rscratch1, *stub->entry()); + __ bind(*stub->continuation()); + __ step_random(r_profile_rng, temp); + + stub->set_action(lambda, op); + stub->set_name("ProfileCallStub"); + append_code_stub(stub); } else { - // Static call - __ addptr(counter_addr, DataLayout::counter_increment); + lambda(this, op); } } diff --git a/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp index 5bca852ac52ba..26e4bd574b797 100644 --- a/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp @@ -322,9 +322,9 @@ void C1_MacroAssembler::step_random(Register state, Register temp) { cmp(state, rscratch1); br(HS, big); - // lea(temp, ExternalAddress((address)&baz)); - // incrementw(Address(temp)); - // b(done); + lea(temp, ExternalAddress((address)&baz)); + incrementw(Address(temp)); + b(done); bind(big); lea(temp, ExternalAddress((address)&barf)); diff --git a/src/hotspot/cpu/aarch64/compiledIC_aarch64.cpp b/src/hotspot/cpu/aarch64/compiledIC_aarch64.cpp index 6fe3315014b4e..3426f84954d76 100644 --- a/src/hotspot/cpu/aarch64/compiledIC_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/compiledIC_aarch64.cpp @@ -128,6 +128,11 @@ void CompiledDirectCall::verify() { // Verify stub. address stub = find_stub(); + if (! stub) { + asm("nop"); + for(;;) + find_stub(); + } assert(stub != nullptr, "no stub found for static call"); // Creation also verifies the object. NativeMovConstReg* method_holder From b9f8a623cc53564ca3a25a9aad6d65f422bf3d21 Mon Sep 17 00:00:00 2001 From: Andrew Haley Date: Tue, 18 Nov 2025 13:38:55 +0000 Subject: [PATCH 35/48] Oops --- src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp index b0954efea3363..e8e6d66ad4580 100644 --- a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp @@ -1343,6 +1343,7 @@ void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, L __ ubfx(rscratch1, r_profile_rng, 32-ratio_shift, ratio_shift); __ cbz(rscratch1, *stub->entry()); __ bind(*stub->continuation()); + __ block_comment("L1346"); __ step_random(r_profile_rng, rscratch2); stub->set_action(lambda, op); @@ -1494,8 +1495,9 @@ void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) { if (profile_stub != nullptr) { __ ubfx(rscratch1, r_profile_rng, 32-ratio_shift, ratio_shift); - __ cbz(rscratch1, *stub->entry()); + __ cbz(rscratch1, *profile_stub->entry()); __ bind(*profile_stub->continuation()); + __ block_comment("L1508"); __ step_random(r_profile_rng, rscratch2); __ cbz(value, done); @@ -2697,6 +2699,7 @@ void LIR_Assembler::increment_profile_ctr(LIR_Opr incr, LIR_Opr addr, LIR_Opr de __ ubfx(rscratch1, r_profile_rng, 32-ratio_shift, ratio_shift); __ cbz(rscratch1, *counter_stub->entry()); __ bind(*counter_stub->continuation()); + __ block_comment("L2702"); __ step_random(r_profile_rng, temp); counter_stub->set_action(lambda, nullptr); @@ -2828,6 +2831,7 @@ void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) { __ ubfx(rscratch1, r_profile_rng, 32-ratio_shift, ratio_shift); __ cbz(rscratch1, *stub->entry()); __ bind(*stub->continuation()); + __ block_comment("L2834"); __ step_random(r_profile_rng, temp); stub->set_action(lambda, op); From b907e6d6b264ae7dd295303926cdfc0c4cdd28b7 Mon Sep 17 00:00:00 2001 From: EC2 Default User Date: Tue, 18 Nov 2025 14:36:24 +0000 Subject: [PATCH 36/48] Delete debug code --- src/hotspot/cpu/aarch64/compiledIC_aarch64.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/hotspot/cpu/aarch64/compiledIC_aarch64.cpp b/src/hotspot/cpu/aarch64/compiledIC_aarch64.cpp index 3426f84954d76..6fe3315014b4e 100644 --- a/src/hotspot/cpu/aarch64/compiledIC_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/compiledIC_aarch64.cpp @@ -128,11 +128,6 @@ void CompiledDirectCall::verify() { // Verify stub. address stub = find_stub(); - if (! stub) { - asm("nop"); - for(;;) - find_stub(); - } assert(stub != nullptr, "no stub found for static call"); // Creation also verifies the object. NativeMovConstReg* method_holder From 49df925c50757edc0ba04f44aa37f02c4fd21c55 Mon Sep 17 00:00:00 2001 From: Andrew Haley Date: Wed, 19 Nov 2025 17:17:13 +0000 Subject: [PATCH 37/48] Foo --- .../cpu/aarch64/c1_FrameMap_aarch64.cpp | 34 +++++++-------- .../cpu/aarch64/c1_LIRAssembler_aarch64.cpp | 41 +++++++++---------- .../cpu/aarch64/macroAssembler_aarch64.hpp | 19 +++++++++ src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp | 4 +- src/hotspot/share/c1/c1_LIR.hpp | 2 +- src/hotspot/share/c1/c1_LIRGenerator.cpp | 17 ++++---- .../share/compiler/compiler_globals.hpp | 2 +- 7 files changed, 67 insertions(+), 52 deletions(-) diff --git a/src/hotspot/cpu/aarch64/c1_FrameMap_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_FrameMap_aarch64.cpp index f03c0fb1d696a..7006fc8daf4d9 100644 --- a/src/hotspot/cpu/aarch64/c1_FrameMap_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c1_FrameMap_aarch64.cpp @@ -191,31 +191,31 @@ void FrameMap::initialize() { map_register(i, r23); r23_opr = LIR_OprFact::single_cpu(i); i++; map_register(i, r24); r24_opr = LIR_OprFact::single_cpu(i); i++; map_register(i, r25); r25_opr = LIR_OprFact::single_cpu(i); i++; - map_register(i, r26); r26_opr = LIR_OprFact::single_cpu(i); i++; - // r27 is allocated conditionally. With compressed oops it holds - // the heapbase value and is not visible to the allocator. - bool preserve_rheapbase = i >= nof_caller_save_cpu_regs(); - if (!preserve_rheapbase) { - map_register(i, r27); r27_opr = LIR_OprFact::single_cpu(i); i++; // rheapbase + if (UseCompressedOops && (CompressedOops::base() != nullptr)) { + // r27 is allocated conditionally. With compressed oops it holds + // the heapbase value and is not visible to the allocator. + if (ProfileCaptureRatio > 1) { + r_profile_rng = r26; + } else { + map_register(i, r26); r26_opr = LIR_OprFact::single_cpu(i); i++; + } + } else { // r27 is free + map_register(i, r26); r26_opr = LIR_OprFact::single_cpu(i); i++; + if (ProfileCaptureRatio > 1) { + r_profile_rng = r27; + } else { + // push r27 into the allocation pool + map_register(i, r27); r27_opr = LIR_OprFact::single_cpu(i); i++; + } } - // r_profile_rng is allocated conditionally. It is used to hold the random - // generator for profile counters. - r_profile_rng - = (UseCompressedOops && ProfileCaptureRatio > 1) ? r26 - : (ProfileCaptureRatio > 1) ? r27 - : noreg; - - if(!PreserveFramePointer) { + if(!PreserveFramePointer) { map_register(i, r29); r29_opr = LIR_OprFact::single_cpu(i); i++; } // The unallocatable registers are at the end - if (preserve_rheapbase) { - map_register(i, r27); r27_opr = LIR_OprFact::single_cpu(i); i++; // rheapbase - } map_register(i, r28); r28_opr = LIR_OprFact::single_cpu(i); i++; // rthread if(PreserveFramePointer) { map_register(i, r29); r29_opr = LIR_OprFact::single_cpu(i); i++; // rfp diff --git a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp index e8e6d66ad4580..cf2d122a3d732 100644 --- a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp @@ -2588,12 +2588,12 @@ void LIR_Assembler::emit_load_klass(LIR_OpLoadKlass* op) { __ load_klass(result, obj); } -void LIR_Assembler::increment_profile_ctr(LIR_Opr incr, LIR_Opr addr, LIR_Opr dest, LIR_Opr temp_op, +void LIR_Assembler::increment_profile_ctr(LIR_Opr step, LIR_Opr counter_addr, LIR_Opr dest, LIR_Opr temp_op, LIR_Opr freq_op, LIR_Opr step_op, CodeStub* overflow_stub) { #ifndef PRODUCT if (CommentedAssembly) { - __ block_comment("increment_profile_ctr" " {"); + __ block_comment("increment_event_counter {"); } #endif @@ -2607,9 +2607,9 @@ void LIR_Assembler::increment_profile_ctr(LIR_Opr incr, LIR_Opr addr, LIR_Opr de = profile_capture_ratio > 1 ? new ProfileStub() : nullptr; Register temp = temp_op->is_register() ? temp_op->as_register() : noreg; - Address dest_adr = as_Address(addr->as_address_ptr()); + Address dest_adr = as_Address(counter_addr->as_address_ptr()); - auto lambda = [counter_stub, overflow_stub, freq_op, ratio_shift, incr, + auto lambda = [counter_stub, overflow_stub, freq_op, ratio_shift, step, temp, dest, dest_adr] (LIR_Assembler* ce, LIR_Op* op) { #undef __ @@ -2619,8 +2619,8 @@ void LIR_Assembler::increment_profile_ctr(LIR_Opr incr, LIR_Opr addr, LIR_Opr de if (counter_stub != nullptr) __ bind(*counter_stub->entry()); - if (incr->is_register()) { - Register inc = incr->as_register(); + if (step->is_register()) { + Register inc = step->as_register(); auto ptr = __ legitimize_address(dest_adr, sizeof (jint), rscratch1); __ ldr(temp, ptr); if (ProfileCaptureRatio > 1) { @@ -2633,7 +2633,7 @@ void LIR_Assembler::increment_profile_ctr(LIR_Opr incr, LIR_Opr addr, LIR_Opr de __ lsr(inc, inc, ratio_shift); } } else { - jint inc = incr->as_constant_ptr()->as_jint_bits(); + jint inc = step->as_constant_ptr()->as_jint_bits(); switch (dest->type()) { case T_INT: { inc *= ProfileCaptureRatio; @@ -2659,34 +2659,33 @@ void LIR_Assembler::increment_profile_ctr(LIR_Opr incr, LIR_Opr addr, LIR_Opr de ShouldNotReachHere(); } - if (incr->is_valid() && overflow_stub) { + if (step->is_valid() && overflow_stub) { if (!freq_op->is_valid()) { - if (!incr->is_constant()) { - __ cmp(incr->as_register(), (u1)0); + if (!step->is_constant()) { + __ cmp(step->as_register(), (u1)0); __ br(__ EQ, *overflow_stub->entry()); } else { __ b(*overflow_stub->entry()); - goto exit; + return; } } else { Register result = dest->type() == T_INT ? dest->as_register() : dest->type() == T_LONG ? dest->as_register_lo() : noreg; - if (!incr->is_constant()) { + if (!step->is_constant()) { // If step is 0, make sure the stub check below always fails - __ cmp(incr->as_register(), (u1)0); + __ cmp(step->as_register(), (u1)0); __ mov(temp, InvocationCounter::count_increment * ProfileCaptureRatio); - __ csel(result, temp, result, __ EQ); + __ csel(result, result, temp, __ NE); } - __ ands(zr, result, freq_op->as_jint()); - __ br(__ EQ, *overflow_stub->entry()); + juint mask = freq_op->as_jint(); + __ andw(rscratch1, result, mask); + __ cbzw(rscratch1, *overflow_stub->entry()); } } } - exit: - if (counter_stub != nullptr) { __ b(*counter_stub->continuation()); } @@ -2696,14 +2695,14 @@ void LIR_Assembler::increment_profile_ctr(LIR_Opr incr, LIR_Opr addr, LIR_Opr de }; if (counter_stub != nullptr) { - __ ubfx(rscratch1, r_profile_rng, 32-ratio_shift, ratio_shift); + __ ubfx(rscratch1, r_profile_rng, 32 - ratio_shift, ratio_shift); __ cbz(rscratch1, *counter_stub->entry()); __ bind(*counter_stub->continuation()); __ block_comment("L2702"); __ step_random(r_profile_rng, temp); counter_stub->set_action(lambda, nullptr); - counter_stub->set_name("IncrementProfileCtr"); + counter_stub->set_name("IncrementEventCounter"); append_code_stub(counter_stub); } else { lambda(this, nullptr); @@ -2715,7 +2714,7 @@ void LIR_Assembler::increment_profile_ctr(LIR_Opr incr, LIR_Opr addr, LIR_Opr de #ifndef PRODUCT if (CommentedAssembly) { - __ block_comment("} increment_profile_ctr"); + __ block_comment("} increment_event_counter"); } #endif } diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp index d5a16e424e428..67ac1ec79e7b6 100644 --- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp @@ -480,6 +480,25 @@ class MacroAssembler: public Assembler { WRAP(smaddl) WRAP(smsubl) WRAP(umaddl) WRAP(umsubl) #undef WRAP + using Assembler::andw, Assembler::andr; + void andw(Register Rd, Register Rn, uint64_t imm) { + if (operand_valid_for_logical_immediate(/*is32*/true, imm)) { + Assembler::andw(Rd, Rn, imm); + } else { + assert(Rd != Rn, "must be"); + movw(Rd, imm); + andw(Rd, Rn, Rd); + } + } + void andr(Register Rd, Register Rn, uint64_t imm) { + if (operand_valid_for_logical_immediate(/*is32*/false, imm)) { + Assembler::andr(Rd, Rn, imm); + } else { + assert(Rd != Rn, "must be"); + mov(Rd, imm); + andr(Rd, Rn, Rd); + } + } // macro assembly operations needed for aarch64 diff --git a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp index 4bb43d41267d5..f756314d7e796 100644 --- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp @@ -2947,7 +2947,7 @@ void LIR_Assembler::increment_profile_ctr(LIR_Opr incr, LIR_Opr addr, LIR_Opr de // If step is 0, make sure the stub check below always fails __ cmpl(incr->as_register(), 0); __ movl(temp, InvocationCounter::count_increment * ProfileCaptureRatio); - __ cmovl(Assembler::equal, result, temp); + __ cmovl(Assembler::notEqual, result, temp); } __ andl(result, freq_op->as_jint()); __ jcc(Assembler::equal, *overflow_stub->entry()); @@ -3375,13 +3375,11 @@ void LIR_Assembler::leal(LIR_Opr src, LIR_Opr dest, LIR_PatchCode patch_code, Co void LIR_Assembler::rt_call(LIR_Opr result, address dest, const LIR_OprList* args, LIR_Opr tmp, CodeEmitInfo* info) { assert(!tmp->is_valid(), "don't need temporary"); - __ save_profile_rng(); __ call(RuntimeAddress(dest)); if (info != nullptr) { add_call_info_here(info); } __ post_call_nop(); - __ restore_profile_rng(); } diff --git a/src/hotspot/share/c1/c1_LIR.hpp b/src/hotspot/share/c1/c1_LIR.hpp index 85622baeae6f6..5dce174aa1ee3 100644 --- a/src/hotspot/share/c1/c1_LIR.hpp +++ b/src/hotspot/share/c1/c1_LIR.hpp @@ -1620,7 +1620,7 @@ class LIR_Op2: public LIR_Op { } LIR_Op2(LIR_Code code, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, LIR_Opr tmp1, LIR_Opr tmp2 = LIR_OprFact::illegalOpr, - LIR_Opr tmp3 = LIR_OprFact::illegalOpr, LIR_Opr tmp4 = LIR_OprFact::illegalOpr, LIR_Opr tmp5 = LIR_OprFact::illegalOpr, int profile_limit = 0) + LIR_Opr tmp3 = LIR_OprFact::illegalOpr, LIR_Opr tmp4 = LIR_OprFact::illegalOpr, LIR_Opr tmp5 = LIR_OprFact::illegalOpr) : LIR_Op(code, result, nullptr) , _opr1(opr1) , _opr2(opr2) diff --git a/src/hotspot/share/c1/c1_LIRGenerator.cpp b/src/hotspot/share/c1/c1_LIRGenerator.cpp index a13719905de64..077dbb726879c 100644 --- a/src/hotspot/share/c1/c1_LIRGenerator.cpp +++ b/src/hotspot/share/c1/c1_LIRGenerator.cpp @@ -3177,22 +3177,21 @@ void LIRGenerator::increment_event_counter_impl(CodeEmitInfo* info, LIR_Opr result = notify ? new_register(T_INT) : LIR_OprFact::intConst(0); LIR_Opr tmp = new_register(T_INT); - CodeStub* overflow = nullptr; - if (notify && (!backedge || UseOnStackReplacement)) { + int ratio_shift = exact_log2(ProfileCaptureRatio); LIR_Opr meth = LIR_OprFact::metadataConst(method->constant_encoding()); // The bci for info can point to cmp for if's we want the if bci - int freq = frequency - // Clear the bottom bit based on capture ratio, such that we - // detect overflows. - >> exact_log2(ProfileCaptureRatio) << exact_log2(ProfileCaptureRatio) - << InvocationCounter::count_shift; - overflow = new CounterOverflowStub (info, bci, meth); + CodeStub* overflow = new CounterOverflowStub (info, bci, meth); + // Zero the low-order bits of the frequency, otherwise we'll miss + // overflows when usind randomized profile counters. + unsigned int freq = (unsigned int)frequency + >> ratio_shift << ratio_shift + << InvocationCounter::count_shift; __ increment_profile_ctr(step, counter, result, tmp, LIR_OprFact::intConst(freq), step, overflow, info); } else { __ increment_profile_ctr(step, counter, result, tmp, - LIR_OprFact::illegalOpr, step, overflow, info); + LIR_OprFact::illegalOpr, step, nullptr, info); } } diff --git a/src/hotspot/share/compiler/compiler_globals.hpp b/src/hotspot/share/compiler/compiler_globals.hpp index c39d839132970..bc7e1fa2f2323 100644 --- a/src/hotspot/share/compiler/compiler_globals.hpp +++ b/src/hotspot/share/compiler/compiler_globals.hpp @@ -395,7 +395,7 @@ "If compilation is stopped with an error, capture diagnostic " \ "information at the bailout point") \ \ - product(int, ProfileCaptureRatio, 1, EXPERIMENTAL, \ + product(int, ProfileCaptureRatio, 64, EXPERIMENTAL, \ "Reduce and randomize tiered-compilation profile captures " \ "in order to reduce cache contention on shared method data. " \ "Must be a power of 2.") \ From 8f5f66cfee33cabcf71d514ab3b4855f46322e4b Mon Sep 17 00:00:00 2001 From: Andrew Haley Date: Thu, 20 Nov 2025 10:22:00 +0000 Subject: [PATCH 38/48] Fix SNAFU --- .../cpu/aarch64/c1_FrameMap_aarch64.cpp | 27 ++++++++++--------- src/hotspot/cpu/aarch64/register_aarch64.hpp | 7 +++++ 2 files changed, 22 insertions(+), 12 deletions(-) diff --git a/src/hotspot/cpu/aarch64/c1_FrameMap_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_FrameMap_aarch64.cpp index 7006fc8daf4d9..9b73cd4a356cd 100644 --- a/src/hotspot/cpu/aarch64/c1_FrameMap_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c1_FrameMap_aarch64.cpp @@ -192,22 +192,25 @@ void FrameMap::initialize() { map_register(i, r24); r24_opr = LIR_OprFact::single_cpu(i); i++; map_register(i, r25); r25_opr = LIR_OprFact::single_cpu(i); i++; + auto remaining = RegSet::of(r26, r27); + if (UseCompressedOops && (CompressedOops::base() != nullptr)) { // r27 is allocated conditionally. With compressed oops it holds // the heapbase value and is not visible to the allocator. - if (ProfileCaptureRatio > 1) { - r_profile_rng = r26; - } else { - map_register(i, r26); r26_opr = LIR_OprFact::single_cpu(i); i++; - } - } else { // r27 is free + remaining -= r27; + } + + if (ProfileCaptureRatio > 1) { + // Use the highest remaining register for r_profile_rng. + r_profile_rng = *remaining.rbegin(); + remaining -= r_profile_rng; + } + + if (remaining.contains(r26)) { map_register(i, r26); r26_opr = LIR_OprFact::single_cpu(i); i++; - if (ProfileCaptureRatio > 1) { - r_profile_rng = r27; - } else { - // push r27 into the allocation pool - map_register(i, r27); r27_opr = LIR_OprFact::single_cpu(i); i++; - } + } + if (remaining.contains(r27)) { + map_register(i, r27); r27_opr = LIR_OprFact::single_cpu(i); i++; } if(!PreserveFramePointer) { diff --git a/src/hotspot/cpu/aarch64/register_aarch64.hpp b/src/hotspot/cpu/aarch64/register_aarch64.hpp index 108f0f34140b4..a3e1bbac44ac1 100644 --- a/src/hotspot/cpu/aarch64/register_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/register_aarch64.hpp @@ -399,6 +399,13 @@ inline Register AbstractRegSet::first() { return as_Register(count_trailing_zeros(_bitset)); } +template <> +inline Register AbstractRegSet::last() { + if (_bitset == 0) { return noreg; } + int last = max_size() - 1 - count_leading_zeros(_bitset); + return as_Register(last); +} + template <> inline FloatRegister AbstractRegSet::first() { if (_bitset == 0) { return fnoreg; } From 55ea06addc9b9fc55f8a058f8deba17b6d8c99a9 Mon Sep 17 00:00:00 2001 From: Andrew Haley Date: Thu, 20 Nov 2025 16:48:53 +0000 Subject: [PATCH 39/48] More --- .../cpu/aarch64/c1_LIRAssembler_aarch64.cpp | 25 ++++++++----------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp index cf2d122a3d732..8f4df5618cd31 100644 --- a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp @@ -2607,10 +2607,10 @@ void LIR_Assembler::increment_profile_ctr(LIR_Opr step, LIR_Opr counter_addr, LI = profile_capture_ratio > 1 ? new ProfileStub() : nullptr; Register temp = temp_op->is_register() ? temp_op->as_register() : noreg; - Address dest_adr = as_Address(counter_addr->as_address_ptr()); + Address raw_dest_adr = as_Address(counter_addr->as_address_ptr()); auto lambda = [counter_stub, overflow_stub, freq_op, ratio_shift, step, - temp, dest, dest_adr] (LIR_Assembler* ce, LIR_Op* op) { + temp, dest, raw_dest_adr] (LIR_Assembler* ce, LIR_Op* op) { #undef __ #define __ masm-> @@ -2620,37 +2620,34 @@ void LIR_Assembler::increment_profile_ctr(LIR_Opr step, LIR_Opr counter_addr, LI if (counter_stub != nullptr) __ bind(*counter_stub->entry()); if (step->is_register()) { + Address dest_adr = __ legitimize_address(raw_dest_adr, sizeof (jint), rscratch2); Register inc = step->as_register(); - auto ptr = __ legitimize_address(dest_adr, sizeof (jint), rscratch1); - __ ldr(temp, ptr); + __ ldrw(temp, dest_adr); if (ProfileCaptureRatio > 1) { __ lsl(inc, inc, ratio_shift); } - __ add(temp, temp, inc); - __ str(temp, ptr); + __ addw(temp, temp, inc); + __ strw(temp, dest_adr); if (dest->is_register()) __ mov(dest->as_register(), temp); if (ProfileCaptureRatio > 1) { __ lsr(inc, inc, ratio_shift); } + if (dest->is_register()) __ mov(dest->as_register(), temp); } else { jint inc = step->as_constant_ptr()->as_jint_bits(); switch (dest->type()) { case T_INT: { + Address dest_adr = __ legitimize_address(raw_dest_adr, sizeof (jint), rscratch2); inc *= ProfileCaptureRatio; - auto ptr = __ legitimize_address(dest_adr, sizeof (jint), rscratch1); - __ ldrw(temp, ptr); - __ addw(temp, temp, inc); - __ strw(temp, ptr); + __ incrementw(dest_adr, inc); if (dest->is_register()) __ movw(dest->as_register(), temp); break; } case T_LONG: { + Address dest_adr = __ legitimize_address(raw_dest_adr, sizeof (jlong), rscratch2); inc *= ProfileCaptureRatio; - auto ptr = __ legitimize_address(dest_adr, sizeof (jlong), rscratch1); - __ ldr(temp, ptr); - __ add(temp, temp, inc); - __ str(temp, ptr); + __ increment(dest_adr, inc); if (dest->is_register()) __ mov(dest->as_register_lo(), temp); break; From cd9abc1c1a39c2e13ccb44d3f812f6e82f770309 Mon Sep 17 00:00:00 2001 From: Andrew Haley Date: Fri, 21 Nov 2025 15:15:02 +0000 Subject: [PATCH 40/48] More --- src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp | 2 +- src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp | 55 ++++++++++--------- 2 files changed, 29 insertions(+), 28 deletions(-) diff --git a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp index f756314d7e796..85990944806db 100644 --- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp @@ -1358,7 +1358,7 @@ void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, L ce->type_profile_helper(mdo, md, data, recv, &update_done); Address nonprofiled_receiver_count_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset())); - __ addptr(nonprofiled_receiver_count_addr, DataLayout::counter_increment); + __ addptr(nonprofiled_receiver_count_addr, DataLayout::counter_increment * ProfileCaptureRatio); __ bind(update_done); diff --git a/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp index b631769b5df37..f9fc046fc02e9 100644 --- a/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp @@ -269,33 +269,6 @@ void C1_MacroAssembler::load_parameter(int offset_in_words, Register reg) { movptr(reg, Address(rbp, (offset_in_words + 2) * BytesPerWord)); } -#ifndef PRODUCT - -void C1_MacroAssembler::verify_stack_oop(int stack_offset) { - if (!VerifyOops) return; - verify_oop_addr(Address(rsp, stack_offset)); -} - -void C1_MacroAssembler::verify_not_null_oop(Register r) { - if (!VerifyOops) return; - Label not_null; - testptr(r, r); - jcc(Assembler::notZero, not_null); - stop("non-null oop required"); - bind(not_null); - verify_oop(r); -} - -void C1_MacroAssembler::invalidate_registers(bool inv_rax, bool inv_rbx, bool inv_rcx, bool inv_rdx, bool inv_rsi, bool inv_rdi) { -#ifdef ASSERT - if (inv_rax) movptr(rax, 0xDEAD); - if (inv_rbx) movptr(rbx, 0xDEAD); - if (inv_rcx) movptr(rcx, 0xDEAD); - if (inv_rdx) movptr(rdx, 0xDEAD); - if (inv_rsi) movptr(rsi, 0xDEAD); - if (inv_rdi) movptr(rdi, 0xDEAD); -#endif -} int baz, barf; @@ -364,4 +337,32 @@ void C1_MacroAssembler::restore_profile_rng() { } } +#ifndef PRODUCT + +void C1_MacroAssembler::verify_stack_oop(int stack_offset) { + if (!VerifyOops) return; + verify_oop_addr(Address(rsp, stack_offset)); +} + +void C1_MacroAssembler::verify_not_null_oop(Register r) { + if (!VerifyOops) return; + Label not_null; + testptr(r, r); + jcc(Assembler::notZero, not_null); + stop("non-null oop required"); + bind(not_null); + verify_oop(r); +} + +void C1_MacroAssembler::invalidate_registers(bool inv_rax, bool inv_rbx, bool inv_rcx, bool inv_rdx, bool inv_rsi, bool inv_rdi) { +#ifdef ASSERT + if (inv_rax) movptr(rax, 0xDEAD); + if (inv_rbx) movptr(rbx, 0xDEAD); + if (inv_rcx) movptr(rcx, 0xDEAD); + if (inv_rdx) movptr(rdx, 0xDEAD); + if (inv_rsi) movptr(rsi, 0xDEAD); + if (inv_rdi) movptr(rdi, 0xDEAD); +#endif +} + #endif // ifndef PRODUCT From d3b393f6a1a7a69ef658af6d808605e9f124ddd1 Mon Sep 17 00:00:00 2001 From: Andrew Haley Date: Fri, 21 Nov 2025 17:38:52 +0000 Subject: [PATCH 41/48] D'oh --- .../cpu/aarch64/c1_LIRAssembler_aarch64.cpp | 4 ++-- .../cpu/aarch64/macroAssembler_aarch64.cpp | 20 +++++++++---------- .../cpu/aarch64/macroAssembler_aarch64.hpp | 4 ++-- src/hotspot/share/c1/c1_CodeStubs.hpp | 1 - 4 files changed, 14 insertions(+), 15 deletions(-) diff --git a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp index 8f4df5618cd31..43a8258570ef4 100644 --- a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp @@ -2639,7 +2639,7 @@ void LIR_Assembler::increment_profile_ctr(LIR_Opr step, LIR_Opr counter_addr, LI case T_INT: { Address dest_adr = __ legitimize_address(raw_dest_adr, sizeof (jint), rscratch2); inc *= ProfileCaptureRatio; - __ incrementw(dest_adr, inc); + __ incrementw(dest_adr, inc, temp); if (dest->is_register()) __ movw(dest->as_register(), temp); break; @@ -2647,7 +2647,7 @@ void LIR_Assembler::increment_profile_ctr(LIR_Opr step, LIR_Opr counter_addr, LI case T_LONG: { Address dest_adr = __ legitimize_address(raw_dest_adr, sizeof (jlong), rscratch2); inc *= ProfileCaptureRatio; - __ increment(dest_adr, inc); + __ increment(dest_adr, inc, temp); if (dest->is_register()) __ mov(dest->as_register_lo(), temp); break; diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp index 1400978931986..1638dad9b6b20 100644 --- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp @@ -2747,30 +2747,30 @@ void MacroAssembler::increment(Register reg, int value) } } -void MacroAssembler::incrementw(Address dst, int value) +void MacroAssembler::incrementw(Address dst, int value, Register result) { - assert(!dst.uses(rscratch1), "invalid dst for address increment"); + assert(!dst.uses(result), "invalid dst for address increment"); if (dst.getMode() == Address::literal) { assert(abs(value) < (1 << 12), "invalid value and address mode combination"); lea(rscratch2, dst); dst = Address(rscratch2); } - ldrw(rscratch1, dst); - incrementw(rscratch1, value); - strw(rscratch1, dst); + ldrw(result, dst); + incrementw(result, value); + strw(result, dst); } -void MacroAssembler::increment(Address dst, int value) +void MacroAssembler::increment(Address dst, int value, Register result) { - assert(!dst.uses(rscratch1), "invalid dst for address increment"); + assert(!dst.uses(result), "invalid dst for address increment"); if (dst.getMode() == Address::literal) { assert(abs(value) < (1 << 12), "invalid value and address mode combination"); lea(rscratch2, dst); dst = Address(rscratch2); } - ldr(rscratch1, dst); - increment(rscratch1, value); - str(rscratch1, dst); + ldr(result, dst); + increment(result, value); + str(result, dst); } // Push lots of registers in the bit set supplied. Don't push sp. diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp index 67ac1ec79e7b6..2af83e9177b04 100644 --- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp @@ -771,11 +771,11 @@ class MacroAssembler: public Assembler { void decrement(Register reg, int value = 1); void decrement(Address dst, int value = 1); - void incrementw(Address dst, int value = 1); + void incrementw(Address dst, int value = 1, Register result = rscratch1); void incrementw(Register reg, int value = 1); void increment(Register reg, int value = 1); - void increment(Address dst, int value = 1); + void increment(Address dst, int value = 1, Register result = rscratch1); // Alignment diff --git a/src/hotspot/share/c1/c1_CodeStubs.hpp b/src/hotspot/share/c1/c1_CodeStubs.hpp index b0a581f6bc66c..ec1d52440b3a4 100644 --- a/src/hotspot/share/c1/c1_CodeStubs.hpp +++ b/src/hotspot/share/c1/c1_CodeStubs.hpp @@ -179,7 +179,6 @@ class ConversionStub: public CodeStub { public: ConversionStub(Bytecodes::Code bytecode, LIR_Opr input, LIR_Opr result) : _bytecode(bytecode), _input(input), _result(result) { - NOT_IA32( ShouldNotReachHere(); ) // used only on x86-32 } Bytecodes::Code bytecode() { return _bytecode; } From bfa7b61dc64b3aa98fa36a626079b64636bba0dd Mon Sep 17 00:00:00 2001 From: Andrew Haley Date: Tue, 25 Nov 2025 16:24:45 +0000 Subject: [PATCH 42/48] Cleanup --- .../cpu/aarch64/c1_FrameMap_aarch64.cpp | 3 ++ .../cpu/aarch64/c1_LIRAssembler_aarch64.cpp | 6 +-- .../cpu/aarch64/c1_LIRGenerator_aarch64.cpp | 4 +- .../cpu/aarch64/c1_MacroAssembler_aarch64.cpp | 38 ------------------- .../cpu/aarch64/macroAssembler_aarch64.cpp | 4 ++ src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp | 34 +++++------------ src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp | 4 +- src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp | 25 ------------ src/hotspot/cpu/x86/c1_Runtime1_x86.cpp | 3 -- src/hotspot/cpu/x86/macroAssembler_x86.hpp | 2 - src/hotspot/share/c1/c1_Runtime1.cpp | 3 -- .../share/compiler/abstractDisassembler.cpp | 2 - src/hotspot/share/runtime/javaThread.cpp | 2 +- .../libawt/java2d/pipe/ShapeSpanIterator.c | 2 +- 14 files changed, 24 insertions(+), 108 deletions(-) diff --git a/src/hotspot/cpu/aarch64/c1_FrameMap_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_FrameMap_aarch64.cpp index 9b73cd4a356cd..3dfbdbeaf876a 100644 --- a/src/hotspot/cpu/aarch64/c1_FrameMap_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c1_FrameMap_aarch64.cpp @@ -219,6 +219,9 @@ void FrameMap::initialize() { // The unallocatable registers are at the end + if (preserve_rheapbase) { + map_register(i, r27); r27_opr = LIR_OprFact::single_cpu(i); i++; // rheapbase + } map_register(i, r28); r28_opr = LIR_OprFact::single_cpu(i); i++; // rthread if(PreserveFramePointer) { map_register(i, r29); r29_opr = LIR_OprFact::single_cpu(i); i++; // rfp diff --git a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp index 43a8258570ef4..0d6ef4e951650 100644 --- a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp @@ -1497,7 +1497,6 @@ void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) { __ ubfx(rscratch1, r_profile_rng, 32-ratio_shift, ratio_shift); __ cbz(rscratch1, *profile_stub->entry()); __ bind(*profile_stub->continuation()); - __ block_comment("L1508"); __ step_random(r_profile_rng, rscratch2); __ cbz(value, done); @@ -2659,8 +2658,7 @@ void LIR_Assembler::increment_profile_ctr(LIR_Opr step, LIR_Opr counter_addr, LI if (step->is_valid() && overflow_stub) { if (!freq_op->is_valid()) { if (!step->is_constant()) { - __ cmp(step->as_register(), (u1)0); - __ br(__ EQ, *overflow_stub->entry()); + __ cbz(step->as_register(), *overflow_stub->entry()); } else { __ b(*overflow_stub->entry()); return; @@ -2695,7 +2693,6 @@ void LIR_Assembler::increment_profile_ctr(LIR_Opr step, LIR_Opr counter_addr, LI __ ubfx(rscratch1, r_profile_rng, 32 - ratio_shift, ratio_shift); __ cbz(rscratch1, *counter_stub->entry()); __ bind(*counter_stub->continuation()); - __ block_comment("L2702"); __ step_random(r_profile_rng, temp); counter_stub->set_action(lambda, nullptr); @@ -2827,7 +2824,6 @@ void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) { __ ubfx(rscratch1, r_profile_rng, 32-ratio_shift, ratio_shift); __ cbz(rscratch1, *stub->entry()); __ bind(*stub->continuation()); - __ block_comment("L2834"); __ step_random(r_profile_rng, temp); stub->set_action(lambda, op); diff --git a/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp index b182c12db5679..cd96e95d088aa 100644 --- a/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp @@ -1380,9 +1380,9 @@ void LIRGenerator::do_If(If* x) { // Generate branch profiling. Profiling code doesn't kill flags. profile_branch(x, cond); // If we're subsampling counter updates, then profiling code kills flags - // if (ProfileCaptureRatio != 1) { + if (ProfileCaptureRatio != 1) { __ cmp(lir_cond(cond), left, right); - // } + } move_to_phi(x->state()); if (x->x()->type()->is_float_kind()) { __ branch(lir_cond(cond), x->tsux(), x->usux()); diff --git a/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp index 26e4bd574b797..9b55a52180032 100644 --- a/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp @@ -280,24 +280,9 @@ void C1_MacroAssembler::load_parameter(int offset_in_words, Register reg) { ldr(reg, Address(rfp, (offset_in_words + 2) * BytesPerWord)); } -int baz, barf; - // Randomized profile capture. void C1_MacroAssembler::step_random(Register state, Register temp) { - // One of these will be the best for a particular CPU. - - /* Algorithm "xor" from p. 4 of Marsaglia, "Xorshift RNGs" */ - // movl(temp, state); - // sall(temp, 13); - // xorl(state, temp); - // movl(temp, state); - // shrl(temp, 7); - // xorl(state, temp); - // movl(temp, state); - // sall(temp, 5); - // xorl(state, temp); - if (VM_Version::supports_crc32()) { /* CRC used as a psuedo-random-number generator */ // In effect, the CRC instruction is being used here for its @@ -311,29 +296,6 @@ void C1_MacroAssembler::step_random(Register state, Register temp) { mulw(state, state, temp); addw(state, state, 12345); } - - int ratio_shift = exact_log2(ProfileCaptureRatio); - unsigned int threshold = (1ull << 32) >> ratio_shift; - - if (getenv("APH_BAZ_BARF")) { - Label big, done; - push(RegSet::of(temp), sp); - movw(rscratch1, threshold); - cmp(state, rscratch1); - br(HS, big); - - lea(temp, ExternalAddress((address)&baz)); - incrementw(Address(temp)); - b(done); - - bind(big); - lea(temp, ExternalAddress((address)&barf)); - incrementw(Address(temp)); - - bind(done); - pop(RegSet::of(temp), sp); - } - } void C1_MacroAssembler::save_profile_rng() { diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp index 1638dad9b6b20..927ce7ac00e03 100644 --- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp @@ -2750,6 +2750,8 @@ void MacroAssembler::increment(Register reg, int value) void MacroAssembler::incrementw(Address dst, int value, Register result) { assert(!dst.uses(result), "invalid dst for address increment"); + assert(result->is_valid(), "must be"); + assert_different_registers(result, rscratch2); if (dst.getMode() == Address::literal) { assert(abs(value) < (1 << 12), "invalid value and address mode combination"); lea(rscratch2, dst); @@ -2763,6 +2765,8 @@ void MacroAssembler::incrementw(Address dst, int value, Register result) void MacroAssembler::increment(Address dst, int value, Register result) { assert(!dst.uses(result), "invalid dst for address increment"); + assert(result->is_valid(), "must be"); + assert_different_registers(result, rscratch2); if (dst.getMode() == Address::literal) { assert(abs(value) < (1 << 12), "invalid value and address mode combination"); lea(rscratch2, dst); diff --git a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp index 85990944806db..1f8317f1e2e80 100644 --- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp @@ -1278,8 +1278,6 @@ void LIR_Assembler::type_profile_helper(Register mdo, } } -long blooper; - void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, Label* failure, Label* obj_is_null) { // we always need a stub for the failure case. CodeStub* stub = op->stub(); @@ -1514,11 +1512,6 @@ void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) { } __ bind(not_null); - __ push(rax); - __ lea(rax, ExternalAddress((address)&blooper)); - __ addl(Address(rax), 1); - __ pop(rax); - Label update_done; Register recv = k_RInfo; __ load_klass(recv, value, tmp_load_klass); @@ -2851,10 +2844,8 @@ void LIR_Assembler::emit_load_klass(LIR_OpLoadKlass* op) { __ load_klass(result, obj, rscratch1); } -// int ploopy; - void LIR_Assembler::increment_profile_ctr(LIR_Opr incr, LIR_Opr addr, LIR_Opr dest, LIR_Opr temp_op, - LIR_Opr freq_op, LIR_Opr step_op, + LIR_Opr freq_op, LIR_Opr, CodeStub* overflow_stub) { #ifndef PRODUCT if (CommentedAssembly) { @@ -2890,7 +2881,7 @@ void LIR_Assembler::increment_profile_ctr(LIR_Opr incr, LIR_Opr addr, LIR_Opr de if (ProfileCaptureRatio > 1) { __ shll(inc, ratio_shift); } - __ addl(temp, inc); + __ lea(temp, Address(temp, inc, Address::times_1)); __ movl(dest_adr, temp); __ movl(dest->as_register(), temp); if (ProfileCaptureRatio > 1) { @@ -2901,26 +2892,23 @@ void LIR_Assembler::increment_profile_ctr(LIR_Opr incr, LIR_Opr addr, LIR_Opr de switch (dest->type()) { case T_INT: { inc *= ProfileCaptureRatio; + __ movl(temp, dest_adr); + // Use lea instead of add to avoid destroying condition codes on x86 + __ lea(temp, Address(temp, inc, Address::times_1)); + __ movl(dest_adr, temp); if (dest->is_register()) { - __ movl(temp, dest_adr); - __ addl(temp, inc); - __ movl(dest_adr, temp); __ movl(dest->as_register(), temp); - } else { - __ addl(dest_adr, inc); } - break; } case T_LONG: { inc *= ProfileCaptureRatio; + __ movq(temp, dest_adr); + // Use lea instead of add to avoid destroying condition codes on x86 + __ lea(temp, Address(temp, inc, Address::times_1)); + __ movq(dest_adr, temp); if (dest->is_register()) { - __ movq(temp, dest_adr); - __ addq(temp, inc); - __ movq(dest_adr, temp); __ movq(dest->as_register_lo(), temp); - } else { - __ addq(dest_adr, inc); } break; @@ -3102,8 +3090,6 @@ void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) { } } -int kludge; - void LIR_Assembler::emit_profile_type(LIR_OpProfileType* op) { Register obj = op->obj()->as_register(); Register tmp = op->tmp()->as_pointer_register(); diff --git a/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp b/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp index 739c84f44f701..2bb7248803616 100644 --- a/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp +++ b/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp @@ -1377,9 +1377,9 @@ void LIRGenerator::do_If(If* x) { // Generate branch profiling. Profiling code doesn't kill flags. profile_branch(x, cond); // If we're subsampling counter updates, then profiling code kills flags - // if (ProfileCaptureRatio != 1) { + if (ProfileCaptureRatio != 1) { __ cmp(lir_cond(cond), left, right); - // } + } move_to_phi(x->state()); if (x->x()->type()->is_float_kind()) { __ branch(lir_cond(cond), x->tsux(), x->usux()); diff --git a/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp index f9fc046fc02e9..97f918097b440 100644 --- a/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp @@ -269,9 +269,6 @@ void C1_MacroAssembler::load_parameter(int offset_in_words, Register reg) { movptr(reg, Address(rbp, (offset_in_words + 2) * BytesPerWord)); } - -int baz, barf; - // Randomized profile capture. void C1_MacroAssembler::step_random(Register state, Register temp) { @@ -301,28 +298,6 @@ void C1_MacroAssembler::step_random(Register state, Register temp) { imull(state, temp); addl(state, 12345); } - - int ratio_shift = exact_log2(ProfileCaptureRatio); - int threshold = (1ull << 32) >> ratio_shift; - - if (getenv("APH_BAZ_BARF")) { - Label big, done; - push(temp); - cmpl(state, threshold); - jcc(Assembler::aboveEqual, big); - - lea(temp, ExternalAddress((address)&baz)); - addl(Address(temp), 1); - jmp(done); - - bind(big); - lea(temp, ExternalAddress((address)&barf)); - addl(Address(temp), 1); - - bind(done); - pop(temp); - } - } void C1_MacroAssembler::save_profile_rng() { diff --git a/src/hotspot/cpu/x86/c1_Runtime1_x86.cpp b/src/hotspot/cpu/x86/c1_Runtime1_x86.cpp index 8c1de238c68db..96439c719907e 100644 --- a/src/hotspot/cpu/x86/c1_Runtime1_x86.cpp +++ b/src/hotspot/cpu/x86/c1_Runtime1_x86.cpp @@ -810,7 +810,6 @@ OopMapSet* Runtime1::generate_patching(StubAssembler* sasm, address target) { return oop_maps; } -int foo; OopMapSet* Runtime1::generate_code_for(StubId id, StubAssembler* sasm) { @@ -868,8 +867,6 @@ OopMapSet* Runtime1::generate_code_for(StubId id, StubAssembler* sasm) { Register bci = rax, method = rbx; __ enter(); OopMap* map = save_live_registers(sasm, 3); - __ lea(rbx, ExternalAddress((address)&foo)); - __ addl(Address(rbx), 1); // Retrieve bci __ movl(bci, Address(rbp, 2*BytesPerWord)); // And a pointer to the Method* diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.hpp b/src/hotspot/cpu/x86/macroAssembler_x86.hpp index b2bb47165b482..ed1343d9c8ce4 100644 --- a/src/hotspot/cpu/x86/macroAssembler_x86.hpp +++ b/src/hotspot/cpu/x86/macroAssembler_x86.hpp @@ -38,8 +38,6 @@ // Instructions for which a 'better' code sequence exists depending // on arguments should also go in here. -extern int baz, barf; - class MacroAssembler: public Assembler { friend class LIR_Assembler; friend class Runtime1; // as_Address() diff --git a/src/hotspot/share/c1/c1_Runtime1.cpp b/src/hotspot/share/c1/c1_Runtime1.cpp index b2e78cbec57a6..a4c956ff5bea1 100644 --- a/src/hotspot/share/c1/c1_Runtime1.cpp +++ b/src/hotspot/share/c1/c1_Runtime1.cpp @@ -506,12 +506,9 @@ static nmethod* counter_overflow_helper(JavaThread* current, int branch_bci, Met return osr_nm; } -long c1_overflows; - JRT_BLOCK_ENTRY(address, Runtime1::counter_overflow(JavaThread* current, int bci, Method* method)) nmethod* osr_nm; JRT_BLOCK_NO_ASYNC - c1_overflows++; osr_nm = counter_overflow_helper(current, bci, method); if (osr_nm != nullptr) { RegisterMap map(current, diff --git a/src/hotspot/share/compiler/abstractDisassembler.cpp b/src/hotspot/share/compiler/abstractDisassembler.cpp index 26342fc8187ce..df7781e93d5db 100644 --- a/src/hotspot/share/compiler/abstractDisassembler.cpp +++ b/src/hotspot/share/compiler/abstractDisassembler.cpp @@ -338,7 +338,6 @@ void AbstractDisassembler::decode_range_abstract(address range_start, address ra } } -int barfism; // Decodes all instructions in the given range [start..end). // The output is enclosed in [MachCode] and [/MachCode] tags for later recognition. @@ -351,7 +350,6 @@ void AbstractDisassembler::decode_abstract(address start, address end, outputStr outputStream* st = (ost == nullptr) ? tty : ost; - barfism++; st->bol(); st->cr(); st->print_cr("Loading hsdis library failed, undisassembled code is shown in MachCode section"); diff --git a/src/hotspot/share/runtime/javaThread.cpp b/src/hotspot/share/runtime/javaThread.cpp index 33b5bbe41c676..f356ad6addb10 100644 --- a/src/hotspot/share/runtime/javaThread.cpp +++ b/src/hotspot/share/runtime/javaThread.cpp @@ -519,7 +519,7 @@ JavaThread::JavaThread(MemTag mem_tag) : _lock_stack(this), _om_cache(this), - _profile_rng(0) { + _profile_rng(-1) { set_jni_functions(jni_functions()); #if INCLUDE_JVMCI diff --git a/src/java.desktop/share/native/libawt/java2d/pipe/ShapeSpanIterator.c b/src/java.desktop/share/native/libawt/java2d/pipe/ShapeSpanIterator.c index d8ab84abb7bcd..5b34ac9fbaa64 100644 --- a/src/java.desktop/share/native/libawt/java2d/pipe/ShapeSpanIterator.c +++ b/src/java.desktop/share/native/libawt/java2d/pipe/ShapeSpanIterator.c @@ -580,7 +580,7 @@ Java_sun_java2d_pipe_ShapeSpanIterator_appendPoly { pathData *pd; int i; - jint *xPoints = 0, *yPoints = 0; + jint *xPoints, *yPoints; jboolean oom = JNI_FALSE; jfloat xoff = (jfloat) ixoff, yoff = (jfloat) iyoff; From eeaadc80c17683588a64e5933078bb6df399cdc5 Mon Sep 17 00:00:00 2001 From: Andrew Haley Date: Tue, 25 Nov 2025 17:01:19 +0000 Subject: [PATCH 43/48] Cleanup --- src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp | 3 +-- src/hotspot/share/c1/c1_LIR.cpp | 2 +- src/hotspot/share/c1/c1_LIR.hpp | 4 ++-- src/hotspot/share/c1/c1_LIRAssembler.cpp | 3 +-- src/hotspot/share/c1/c1_LIRAssembler.hpp | 2 +- src/hotspot/share/c1/c1_LIRGenerator.cpp | 4 ++-- 6 files changed, 8 insertions(+), 10 deletions(-) diff --git a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp index 1f8317f1e2e80..bb5b6bed7b952 100644 --- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp @@ -2845,8 +2845,7 @@ void LIR_Assembler::emit_load_klass(LIR_OpLoadKlass* op) { } void LIR_Assembler::increment_profile_ctr(LIR_Opr incr, LIR_Opr addr, LIR_Opr dest, LIR_Opr temp_op, - LIR_Opr freq_op, LIR_Opr, - CodeStub* overflow_stub) { + LIR_Opr freq_op, CodeStub* overflow_stub) { #ifndef PRODUCT if (CommentedAssembly) { __ block_comment("increment_profile_ctr" " {"); diff --git a/src/hotspot/share/c1/c1_LIR.cpp b/src/hotspot/share/c1/c1_LIR.cpp index 45cfd5b4bfa42..d173a44ccf36f 100644 --- a/src/hotspot/share/c1/c1_LIR.cpp +++ b/src/hotspot/share/c1/c1_LIR.cpp @@ -1273,7 +1273,7 @@ void LIR_List::volatile_store_unsafe_reg(LIR_Opr src, LIR_Opr base, LIR_Opr offs void LIR_List::increment_profile_ctr(LIR_Opr src, LIR_Address* addr, LIR_Opr res, LIR_Opr tmp, - LIR_Opr freq, LIR_Opr step, + LIR_Opr freq, CodeStub* overflow, CodeEmitInfo* info) { append(new LIR_Op2( lir_increment_profile_ctr, diff --git a/src/hotspot/share/c1/c1_LIR.hpp b/src/hotspot/share/c1/c1_LIR.hpp index 5dce174aa1ee3..2253cb0566540 100644 --- a/src/hotspot/share/c1/c1_LIR.hpp +++ b/src/hotspot/share/c1/c1_LIR.hpp @@ -2268,9 +2268,9 @@ class LIR_List: public CompilationResourceObj { void volatile_store_mem_reg(LIR_Opr src, LIR_Address* address, CodeEmitInfo* info, LIR_PatchCode patch_code = lir_patch_none); void volatile_store_unsafe_reg(LIR_Opr src, LIR_Opr base, LIR_Opr offset, BasicType type, CodeEmitInfo* info, LIR_PatchCode patch_code); - void increment_profile_ctr(LIR_Opr src, LIR_Address* addr, LIR_Opr res, LIR_Opr tmp, LIR_Opr freq, LIR_Opr step, CodeStub* overflow, CodeEmitInfo* info); + void increment_profile_ctr(LIR_Opr src, LIR_Address* addr, LIR_Opr res, LIR_Opr tmp, LIR_Opr freq, CodeStub* overflow, CodeEmitInfo* info); void increment_profile_ctr(LIR_Opr src, LIR_Address* addr, LIR_Opr res, LIR_Opr tmp, CodeStub* overflow = nullptr) { - increment_profile_ctr(src, addr, res, tmp, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, overflow, nullptr); + increment_profile_ctr(src, addr, res, tmp, LIR_OprFact::illegalOpr, overflow, nullptr); } void idiv(LIR_Opr left, LIR_Opr right, LIR_Opr res, LIR_Opr tmp, CodeEmitInfo* info); diff --git a/src/hotspot/share/c1/c1_LIRAssembler.cpp b/src/hotspot/share/c1/c1_LIRAssembler.cpp index 5a4ae5fc53a8e..de782a5229778 100644 --- a/src/hotspot/share/c1/c1_LIRAssembler.cpp +++ b/src/hotspot/share/c1/c1_LIRAssembler.cpp @@ -726,8 +726,7 @@ void LIR_Assembler::emit_op2(LIR_Op2* op) { case lir_increment_profile_ctr: increment_profile_ctr(op->in_opr1(), op->in_opr2(), op->result_opr(), op->tmp1_opr(), - op->freq_opr(), op->step_opr(), - op->overflow()); + op->freq_opr(), op->overflow()); break; default: diff --git a/src/hotspot/share/c1/c1_LIRAssembler.hpp b/src/hotspot/share/c1/c1_LIRAssembler.hpp index d3624ec3a76a6..452ff903d10bf 100644 --- a/src/hotspot/share/c1/c1_LIRAssembler.hpp +++ b/src/hotspot/share/c1/c1_LIRAssembler.hpp @@ -236,7 +236,7 @@ class LIR_Assembler: public CompilationResourceObj { void align_call(LIR_Code code); void increment_profile_ctr(LIR_Opr incr, LIR_Opr addr, LIR_Opr dest, LIR_Opr temp, - LIR_Opr freq_op, LIR_Opr step_op, + LIR_Opr freq_op, CodeStub *overflow); void negate(LIR_Opr left, LIR_Opr dest, LIR_Opr tmp = LIR_OprFact::illegalOpr); diff --git a/src/hotspot/share/c1/c1_LIRGenerator.cpp b/src/hotspot/share/c1/c1_LIRGenerator.cpp index 077dbb726879c..399ea086ed993 100644 --- a/src/hotspot/share/c1/c1_LIRGenerator.cpp +++ b/src/hotspot/share/c1/c1_LIRGenerator.cpp @@ -3188,10 +3188,10 @@ void LIRGenerator::increment_event_counter_impl(CodeEmitInfo* info, >> ratio_shift << ratio_shift << InvocationCounter::count_shift; __ increment_profile_ctr(step, counter, result, tmp, - LIR_OprFact::intConst(freq), step, overflow, info); + LIR_OprFact::intConst(freq), overflow, info); } else { __ increment_profile_ctr(step, counter, result, tmp, - LIR_OprFact::illegalOpr, step, nullptr, info); + LIR_OprFact::illegalOpr, nullptr, info); } } From 126cd0ed922c16d3378e0ab384b5c55f45891266 Mon Sep 17 00:00:00 2001 From: Andrew Haley Date: Wed, 26 Nov 2025 15:53:21 +0000 Subject: [PATCH 44/48] Inter --- src/hotspot/share/c1/c1_LIR.cpp | 73 +++++++++++++++++++----- src/hotspot/share/c1/c1_LIR.hpp | 45 +++++++++++++++ src/hotspot/share/c1/c1_LIRGenerator.cpp | 6 +- 3 files changed, 107 insertions(+), 17 deletions(-) diff --git a/src/hotspot/share/c1/c1_LIR.cpp b/src/hotspot/share/c1/c1_LIR.cpp index d173a44ccf36f..e92a858645a30 100644 --- a/src/hotspot/share/c1/c1_LIR.cpp +++ b/src/hotspot/share/c1/c1_LIR.cpp @@ -576,16 +576,16 @@ void LIR_OpVisitState::visit(LIR_Op* op) { assert(op->as_Op2() != nullptr, "must be"); if (op2->_info) do_info(op2->_info); - if (op2->_opr1->is_valid()) do_input(op2->_opr1); - if (op2->_opr2->is_valid()) do_input(op2->_opr2); - if (op2->_tmp1->is_valid()) do_temp(op2->_tmp1); - if (op2->_result->is_valid()) do_output(op2->_result); + if (op2->_opr1->is_valid()) do_input(op2->_opr1); // step + if (op2->_opr2->is_valid()) do_input(op2->_opr2); // ctr_address + if (op2->_tmp1->is_valid()) do_temp(op2->_tmp1); // tmp + if (op2->_result->is_valid()) do_output(op2->_result);// res if (op->code() == lir_xchg || op->code() == lir_xadd || op->code() == lir_increment_profile_ctr) { // on ARM and PPC, return value is loaded first so could // destroy inputs. On other platforms that implement those // (x86, sparc), the extra constrainsts are harmless. - if (op2->_opr1->is_valid()) do_temp(op2->_opr1); - if (op2->_opr2->is_valid()) do_temp(op2->_opr2); + if (op2->_opr1->is_valid()) do_temp(op2->_opr1); // step + if (op2->_opr2->is_valid()) do_temp(op2->_opr2); // ctr_address } if (op2->overflow() != nullptr) do_stub(op2->overflow()); @@ -899,7 +899,23 @@ void LIR_OpVisitState::visit(LIR_Op* op) { do_temp(opProfileType->_tmp); break; } - default: + + case lir_increment_counter: + { + LIR_OpIncrementCounter* opr = op->as_OpIncrementCounter(); + assert(opr != nullptr, "must be"); + + if (opr->_info) do_info(opr->_info); + do_input(opr->_counter_addr); do_temp(opr->_counter_addr); + do_input(opr->_step); do_temp(opr->_step); + if (opr->_dest->is_valid()) { do_output(opr->_dest); } + if (opr->_temp_op->is_valid()) do_temp(opr->_temp_op); + if (opr->overflow_stub() != nullptr) do_stub(opr->overflow_stub()); + + break; + } + + default: op->visit(this); } } @@ -1026,6 +1042,14 @@ void LIR_Op2::emit_code(LIR_Assembler* masm) { } } +void LIR_OpIncrementCounter::emit_code(LIR_Assembler* masm) { + masm->increment_profile_ctr + (_step, _counter_addr, _dest, _temp_op, _freq_op, _overflow_stub); + if (overflow_stub()) { + masm->append_code_stub(overflow_stub()); + } +} + void LIR_OpAllocArray::emit_code(LIR_Assembler* masm) { masm->emit_alloc_array(this); masm->append_code_stub(stub()); @@ -1272,18 +1296,31 @@ void LIR_List::volatile_store_unsafe_reg(LIR_Opr src, LIR_Opr base, LIR_Opr offs } +void LIR_List::increment_counter(LIR_Opr step, LIR_Address* addr, LIR_Opr dest, LIR_Opr tmp, + LIR_Opr freq, CodeStub* overflow, CodeEmitInfo* info) { + append(new LIR_OpIncrementCounter ( + step, + LIR_OprFact::address(addr), + dest, + tmp, + freq, + overflow, + info)); +} + + void LIR_List::increment_profile_ctr(LIR_Opr src, LIR_Address* addr, LIR_Opr res, LIR_Opr tmp, LIR_Opr freq, CodeStub* overflow, CodeEmitInfo* info) { append(new LIR_Op2( lir_increment_profile_ctr, - src, - LIR_OprFact::address(addr), - res, - tmp, - freq, - tmp, - overflow, + src, // opr1 + LIR_OprFact::address(addr), // opr2 + res, // result + tmp, // tmp1 + freq, // tmp2 + tmp, // tmp3 + overflow, // overflow info)); } @@ -1780,6 +1817,7 @@ const char * LIR_Op::name() const { // LIR_OpProfileType case lir_profile_type: s = "profile_type"; break; case lir_increment_profile_ctr: s = "increment_profile_ctr"; break; + case lir_increment_counter: s = "increment_counter"; break; // LIR_OpAssert #ifdef ASSERT case lir_assert: s = "assert"; break; @@ -2075,6 +2113,13 @@ void LIR_OpProfileType::print_instr(outputStream* out) const { tmp()->print(out); out->print(" "); } +void LIR_OpIncrementCounter::print_instr(outputStream* out) const { + step()->print(out); out->print(" "); + counter_addr()->print(out); out->print(" "); + dest()->print(out); out->print(" "); + temp_op()->print(out); out->print(" "); + freq_op()->print(out); out->print(" "); +} #endif // PRODUCT // Implementation of LIR_InsertionBuffer diff --git a/src/hotspot/share/c1/c1_LIR.hpp b/src/hotspot/share/c1/c1_LIR.hpp index 2253cb0566540..d1737a1ac181d 100644 --- a/src/hotspot/share/c1/c1_LIR.hpp +++ b/src/hotspot/share/c1/c1_LIR.hpp @@ -893,6 +893,7 @@ class LIR_OpCompareAndSwap; class LIR_OpLoadKlass; class LIR_OpProfileCall; class LIR_OpProfileType; +class LIR_OpIncrementCounter; #ifdef ASSERT class LIR_OpAssert; #endif @@ -998,6 +999,7 @@ enum LIR_Code { , begin_opMDOProfile , lir_profile_call , lir_profile_type + , lir_increment_counter , end_opMDOProfile , begin_opAssert , lir_assert @@ -1140,6 +1142,7 @@ class LIR_Op: public CompilationResourceObj { virtual LIR_OpLoadKlass* as_OpLoadKlass() { return nullptr; } virtual LIR_OpProfileCall* as_OpProfileCall() { return nullptr; } virtual LIR_OpProfileType* as_OpProfileType() { return nullptr; } + virtual LIR_OpIncrementCounter* as_OpIncrementCounter() { return nullptr; } #ifdef ASSERT virtual LIR_OpAssert* as_OpAssert() { return nullptr; } #endif @@ -1966,6 +1969,42 @@ class LIR_OpCompareAndSwap : public LIR_Op { virtual void print_instr(outputStream* out) const PRODUCT_RETURN; }; +// LIR_OpIncrementCounter +class LIR_OpIncrementCounter : public LIR_Op { + friend class LIR_OpVisitState; + + private: + LIR_Opr _step; + LIR_Opr _counter_addr; + LIR_Opr _dest; + LIR_Opr _temp_op; + LIR_Opr _freq_op; + CodeStub* _overflow_stub; + + public: + // Destroys recv + LIR_OpIncrementCounter(LIR_Opr step, LIR_Opr counter_addr, LIR_Opr dest, LIR_Opr temp_op, + LIR_Opr freq_op, CodeStub* overflow_stub, CodeEmitInfo *info) + : LIR_Op(lir_increment_counter, LIR_OprFact::illegalOpr, info) + , _step(step) + , _counter_addr(counter_addr) + , _dest(dest) + , _temp_op(temp_op) + , _freq_op(freq_op) + , _overflow_stub(overflow_stub) { } + + LIR_Opr step() const { return _step; } + LIR_Opr counter_addr() const { return _counter_addr; } + LIR_Opr dest() const { return _dest; } + LIR_Opr temp_op() const { return _temp_op; } + LIR_Opr freq_op() const { return _freq_op; } + CodeStub* overflow_stub() const { return _overflow_stub; }; + + virtual void emit_code(LIR_Assembler* masm); + virtual LIR_OpIncrementCounter* as_OpIncrementCounter() { return this; } + virtual void print_instr(outputStream* out) const PRODUCT_RETURN; +}; + // LIR_OpProfileCall class LIR_OpProfileCall : public LIR_Op { friend class LIR_OpVisitState; @@ -2273,6 +2312,12 @@ class LIR_List: public CompilationResourceObj { increment_profile_ctr(src, addr, res, tmp, LIR_OprFact::illegalOpr, overflow, nullptr); } + void increment_counter(LIR_Opr src, LIR_Address* addr, LIR_Opr res, LIR_Opr tmp, LIR_Opr freq, CodeStub* overflow, CodeEmitInfo* info); + void increment_counter(LIR_Opr src, LIR_Address* addr, LIR_Opr res, LIR_Opr tmp, CodeStub* overflow = nullptr) { + increment_counter(src, addr, res, tmp, LIR_OprFact::illegalOpr, overflow, nullptr); + } + + void idiv(LIR_Opr left, LIR_Opr right, LIR_Opr res, LIR_Opr tmp, CodeEmitInfo* info); void idiv(LIR_Opr left, int right, LIR_Opr res, LIR_Opr tmp, CodeEmitInfo* info); void irem(LIR_Opr left, LIR_Opr right, LIR_Opr res, LIR_Opr tmp, CodeEmitInfo* info); diff --git a/src/hotspot/share/c1/c1_LIRGenerator.cpp b/src/hotspot/share/c1/c1_LIRGenerator.cpp index 399ea086ed993..ddd31913c74eb 100644 --- a/src/hotspot/share/c1/c1_LIRGenerator.cpp +++ b/src/hotspot/share/c1/c1_LIRGenerator.cpp @@ -3187,10 +3187,10 @@ void LIRGenerator::increment_event_counter_impl(CodeEmitInfo* info, unsigned int freq = (unsigned int)frequency >> ratio_shift << ratio_shift << InvocationCounter::count_shift; - __ increment_profile_ctr(step, counter, result, tmp, - LIR_OprFact::intConst(freq), overflow, info); + __ increment_counter(step, counter, result, tmp, + LIR_OprFact::intConst(freq), overflow, info); } else { - __ increment_profile_ctr(step, counter, result, tmp, + __ increment_counter(step, counter, result, tmp, LIR_OprFact::illegalOpr, nullptr, info); } } From 026db53538d37f1492e7337b35eaa6a59504d31a Mon Sep 17 00:00:00 2001 From: Andrew Haley Date: Wed, 26 Nov 2025 16:58:58 +0000 Subject: [PATCH 45/48] Better --- src/hotspot/share/c1/c1_LIRGenerator.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/hotspot/share/c1/c1_LIRGenerator.cpp b/src/hotspot/share/c1/c1_LIRGenerator.cpp index ddd31913c74eb..24a1691803b81 100644 --- a/src/hotspot/share/c1/c1_LIRGenerator.cpp +++ b/src/hotspot/share/c1/c1_LIRGenerator.cpp @@ -929,7 +929,7 @@ void LIRGenerator::profile_branch(If* if_instr, If::Condition cond) { LIR_Address* data_addr = new LIR_Address(md_reg, data_offset_reg, data_reg->type()); LIR_Opr tmp = new_register(T_INT); LIR_Opr step = LIR_OprFact::intConst(DataLayout::counter_increment); - __ increment_profile_ctr(step, data_addr, LIR_OprFact::intConst(0), tmp, nullptr); + __ increment_counter(step, data_addr, LIR_OprFact::intConst(0), tmp, nullptr); } } @@ -2376,7 +2376,7 @@ void LIRGenerator::do_Goto(Goto* x) { LIR_Opr tmp = new_register(T_INT); LIR_Opr dummy = LIR_OprFact::intConst(0); LIR_Opr inc = LIR_OprFact::intConst(DataLayout::counter_increment); - __ increment_profile_ctr(inc, counter_addr, dummy, tmp, nullptr); + __ increment_counter(inc, counter_addr, dummy, tmp, nullptr); } // emit phi-instruction move after safepoint since this simplifies From 785369d8ac32a214389b2aacfe0cc3def1e53a77 Mon Sep 17 00:00:00 2001 From: Andrew Haley Date: Wed, 26 Nov 2025 17:18:19 +0000 Subject: [PATCH 46/48] Minimize deltas to master --- src/hotspot/share/c1/c1_LIR.cpp | 61 +++++++----------------- src/hotspot/share/c1/c1_LIR.hpp | 46 ++---------------- src/hotspot/share/c1/c1_LIRAssembler.cpp | 6 +-- src/hotspot/share/c1/c1_LIRGenerator.hpp | 1 - 4 files changed, 23 insertions(+), 91 deletions(-) diff --git a/src/hotspot/share/c1/c1_LIR.cpp b/src/hotspot/share/c1/c1_LIR.cpp index e92a858645a30..faf4a96224f6f 100644 --- a/src/hotspot/share/c1/c1_LIR.cpp +++ b/src/hotspot/share/c1/c1_LIR.cpp @@ -564,30 +564,25 @@ void LIR_OpVisitState::visit(LIR_Op* op) { case lir_ushr: case lir_xadd: case lir_xchg: - case lir_assert: { + case lir_assert: + { + assert(op->as_Op2() != nullptr, "must be"); LIR_Op2* op2 = (LIR_Op2*)op; assert(op2->_tmp2->is_illegal() && op2->_tmp3->is_illegal() && op2->_tmp4->is_illegal() && op2->_tmp5->is_illegal(), "not used"); - } - // fallthrough - case lir_increment_profile_ctr: - { - LIR_Op2* op2 = (LIR_Op2*)op; - assert(op->as_Op2() != nullptr, "must be"); if (op2->_info) do_info(op2->_info); - if (op2->_opr1->is_valid()) do_input(op2->_opr1); // step - if (op2->_opr2->is_valid()) do_input(op2->_opr2); // ctr_address - if (op2->_tmp1->is_valid()) do_temp(op2->_tmp1); // tmp - if (op2->_result->is_valid()) do_output(op2->_result);// res - if (op->code() == lir_xchg || op->code() == lir_xadd || op->code() == lir_increment_profile_ctr) { + if (op2->_opr1->is_valid()) do_input(op2->_opr1); + if (op2->_opr2->is_valid()) do_input(op2->_opr2); + if (op2->_tmp1->is_valid()) do_temp(op2->_tmp1); + if (op2->_result->is_valid()) do_output(op2->_result); + if (op->code() == lir_xchg || op->code() == lir_xadd) { // on ARM and PPC, return value is loaded first so could // destroy inputs. On other platforms that implement those // (x86, sparc), the extra constrainsts are harmless. - if (op2->_opr1->is_valid()) do_temp(op2->_opr1); // step - if (op2->_opr2->is_valid()) do_temp(op2->_opr2); // ctr_address + if (op2->_opr1->is_valid()) do_temp(op2->_opr1); + if (op2->_opr2->is_valid()) do_temp(op2->_opr2); } - if (op2->overflow() != nullptr) do_stub(op2->overflow()); break; } @@ -1037,17 +1032,6 @@ void LIR_OpConvert::emit_code(LIR_Assembler* masm) { void LIR_Op2::emit_code(LIR_Assembler* masm) { masm->emit_op2(this); - if (overflow()) { - masm->append_code_stub(overflow()); - } -} - -void LIR_OpIncrementCounter::emit_code(LIR_Assembler* masm) { - masm->increment_profile_ctr - (_step, _counter_addr, _dest, _temp_op, _freq_op, _overflow_stub); - if (overflow_stub()) { - masm->append_code_stub(overflow_stub()); - } } void LIR_OpAllocArray::emit_code(LIR_Assembler* masm) { @@ -1091,6 +1075,14 @@ void LIR_OpAssert::emit_code(LIR_Assembler* masm) { } #endif +void LIR_OpIncrementCounter::emit_code(LIR_Assembler* masm) { + masm->increment_profile_ctr + (_step, _counter_addr, _dest, _temp_op, _freq_op, _overflow_stub); + if (overflow_stub()) { + masm->append_code_stub(overflow_stub()); + } +} + void LIR_OpProfileCall::emit_code(LIR_Assembler* masm) { masm->emit_profile_call(this); } @@ -1309,22 +1301,6 @@ void LIR_List::increment_counter(LIR_Opr step, LIR_Address* addr, LIR_Opr dest, } -void LIR_List::increment_profile_ctr(LIR_Opr src, LIR_Address* addr, LIR_Opr res, LIR_Opr tmp, - LIR_Opr freq, - CodeStub* overflow, CodeEmitInfo* info) { - append(new LIR_Op2( - lir_increment_profile_ctr, - src, // opr1 - LIR_OprFact::address(addr), // opr2 - res, // result - tmp, // tmp1 - freq, // tmp2 - tmp, // tmp3 - overflow, // overflow - info)); -} - - void LIR_List::idiv(LIR_Opr left, LIR_Opr right, LIR_Opr res, LIR_Opr tmp, CodeEmitInfo* info) { append(new LIR_Op3( lir_idiv, @@ -1816,7 +1792,6 @@ const char * LIR_Op::name() const { case lir_profile_call: s = "profile_call"; break; // LIR_OpProfileType case lir_profile_type: s = "profile_type"; break; - case lir_increment_profile_ctr: s = "increment_profile_ctr"; break; case lir_increment_counter: s = "increment_counter"; break; // LIR_OpAssert #ifdef ASSERT diff --git a/src/hotspot/share/c1/c1_LIR.hpp b/src/hotspot/share/c1/c1_LIR.hpp index d1737a1ac181d..3f0fd8bc0090a 100644 --- a/src/hotspot/share/c1/c1_LIR.hpp +++ b/src/hotspot/share/c1/c1_LIR.hpp @@ -959,7 +959,6 @@ enum LIR_Code { , lir_throw , lir_xadd , lir_xchg - , lir_increment_profile_ctr , end_op2 , begin_op3 , lir_idiv @@ -1570,7 +1569,6 @@ class LIR_Op2: public LIR_Op { LIR_Opr _tmp5; LIR_Condition _condition; BasicType _type; - CodeStub* _overflow; void verify() const; @@ -1585,8 +1583,7 @@ class LIR_Op2: public LIR_Op { , _tmp4(LIR_OprFact::illegalOpr) , _tmp5(LIR_OprFact::illegalOpr) , _condition(condition) - , _type(type) - , _overflow(nullptr) { + , _type(type) { assert(code == lir_cmp || code == lir_branch || code == lir_cond_float_branch || code == lir_assert, "code check"); } @@ -1600,8 +1597,7 @@ class LIR_Op2: public LIR_Op { , _tmp4(LIR_OprFact::illegalOpr) , _tmp5(LIR_OprFact::illegalOpr) , _condition(condition) - , _type(type) - , _overflow(nullptr) { + , _type(type) { assert(code == lir_cmove, "code check"); assert(type != T_ILLEGAL, "cmove should have type"); } @@ -1617,8 +1613,7 @@ class LIR_Op2: public LIR_Op { , _tmp4(LIR_OprFact::illegalOpr) , _tmp5(LIR_OprFact::illegalOpr) , _condition(lir_cond_unknown) - , _type(type) - , _overflow(nullptr) { + , _type(type) { assert(code != lir_cmp && code != lir_branch && code != lir_cond_float_branch && is_in_range(code, begin_op2, end_op2), "code check"); } @@ -1633,33 +1628,10 @@ class LIR_Op2: public LIR_Op { , _tmp4(tmp4) , _tmp5(tmp5) , _condition(lir_cond_unknown) - , _type(T_ILLEGAL) - , _overflow(nullptr) { + , _type(T_ILLEGAL) { assert(code != lir_cmp && code != lir_branch && code != lir_cond_float_branch && is_in_range(code, begin_op2, end_op2), "code check"); } - LIR_Op2(LIR_Code code, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, LIR_Opr tmp1, - LIR_Opr freq, LIR_Opr step, CodeStub* overflow, CodeEmitInfo *info) - : LIR_Op(code, result, info) - , _opr1(opr1) - , _opr2(opr2) - , _tmp1(tmp1) - , _tmp2(freq) - , _tmp3(step) - , _tmp4(LIR_OprFact::illegalOpr) - , _tmp5(LIR_OprFact::illegalOpr) - , _condition(lir_cond_unknown) - , _type(T_ILLEGAL) - , _overflow(overflow) { - assert(code != lir_cmp && code != lir_branch && code != lir_cond_float_branch && is_in_range(code, begin_op2, end_op2), "code check"); - } - - LIR_Op2(LIR_Code code, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, LIR_Opr tmp1, - CodeStub* overflow) { - LIR_Op2(code, opr1, opr2, result, tmp1, - LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr); - } - LIR_Opr in_opr1() const { return _opr1; } LIR_Opr in_opr2() const { return _opr2; } BasicType type() const { return _type; } @@ -1668,12 +1640,9 @@ class LIR_Op2: public LIR_Op { LIR_Opr tmp3_opr() const { return _tmp3; } LIR_Opr tmp4_opr() const { return _tmp4; } LIR_Opr tmp5_opr() const { return _tmp5; } - LIR_Opr freq_opr() const { return _tmp2; } - LIR_Opr step_opr() const { return _tmp3; } LIR_Condition condition() const { assert(code() == lir_cmp || code() == lir_branch || code() == lir_cond_float_branch || code() == lir_assert, "only valid for branch and assert"); return _condition; } - CodeStub *overflow() const { return _overflow; } void set_condition(LIR_Condition condition) { assert(code() == lir_cmp || code() == lir_branch || code() == lir_cond_float_branch, "only valid for branch"); _condition = condition; } @@ -2307,11 +2276,6 @@ class LIR_List: public CompilationResourceObj { void volatile_store_mem_reg(LIR_Opr src, LIR_Address* address, CodeEmitInfo* info, LIR_PatchCode patch_code = lir_patch_none); void volatile_store_unsafe_reg(LIR_Opr src, LIR_Opr base, LIR_Opr offset, BasicType type, CodeEmitInfo* info, LIR_PatchCode patch_code); - void increment_profile_ctr(LIR_Opr src, LIR_Address* addr, LIR_Opr res, LIR_Opr tmp, LIR_Opr freq, CodeStub* overflow, CodeEmitInfo* info); - void increment_profile_ctr(LIR_Opr src, LIR_Address* addr, LIR_Opr res, LIR_Opr tmp, CodeStub* overflow = nullptr) { - increment_profile_ctr(src, addr, res, tmp, LIR_OprFact::illegalOpr, overflow, nullptr); - } - void increment_counter(LIR_Opr src, LIR_Address* addr, LIR_Opr res, LIR_Opr tmp, LIR_Opr freq, CodeStub* overflow, CodeEmitInfo* info); void increment_counter(LIR_Opr src, LIR_Address* addr, LIR_Opr res, LIR_Opr tmp, CodeStub* overflow = nullptr) { increment_counter(src, addr, res, tmp, LIR_OprFact::illegalOpr, overflow, nullptr); @@ -2448,8 +2412,6 @@ class LIR_InsertionBuffer : public CompilationResourceObj { // instruction void move(int index, LIR_Opr src, LIR_Opr dst, CodeEmitInfo* info = nullptr) { append(index, new LIR_Op1(lir_move, src, dst, dst->type(), lir_patch_none, info)); } - - void increment_profile_ctr(LIR_Opr src, LIR_Address* addr, LIR_Opr res, CodeEmitInfo* info = nullptr); }; diff --git a/src/hotspot/share/c1/c1_LIRAssembler.cpp b/src/hotspot/share/c1/c1_LIRAssembler.cpp index de782a5229778..e6963c00c6a64 100644 --- a/src/hotspot/share/c1/c1_LIRAssembler.cpp +++ b/src/hotspot/share/c1/c1_LIRAssembler.cpp @@ -724,11 +724,6 @@ void LIR_Assembler::emit_op2(LIR_Op2* op) { atomic_op(op->code(), op->in_opr1(), op->in_opr2(), op->result_opr(), op->tmp1_opr()); break; - case lir_increment_profile_ctr: - increment_profile_ctr(op->in_opr1(), op->in_opr2(), op->result_opr(), op->tmp1_opr(), - op->freq_opr(), op->overflow()); - break; - default: Unimplemented(); break; @@ -796,6 +791,7 @@ void LIR_Assembler::move_op(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_Patch } } + void LIR_Assembler::verify_oop_map(CodeEmitInfo* info) { #ifndef PRODUCT if (VerifyOops) { diff --git a/src/hotspot/share/c1/c1_LIRGenerator.hpp b/src/hotspot/share/c1/c1_LIRGenerator.hpp index d864f8cca2540..ec0ea5dc047d3 100644 --- a/src/hotspot/share/c1/c1_LIRGenerator.hpp +++ b/src/hotspot/share/c1/c1_LIRGenerator.hpp @@ -590,7 +590,6 @@ class LIRGenerator: public InstructionVisitor, public BlockClosure { #ifdef ASSERT virtual void do_Assert (Assert* x); #endif - virtual void do_IncProfileCtr (ProfileInvoke* x) { } #ifdef C1_LIRGENERATOR_MD_HPP #include C1_LIRGENERATOR_MD_HPP From 2f0e50f6b115a376a2aaeebe891cd8ca2b9f130a Mon Sep 17 00:00:00 2001 From: Andrew Haley Date: Thu, 27 Nov 2025 16:10:56 +0000 Subject: [PATCH 47/48] AArch64 --- src/hotspot/cpu/aarch64/assembler_aarch64.hpp | 10 +++++----- src/hotspot/cpu/aarch64/c1_FrameMap_aarch64.cpp | 4 ---- src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp | 3 +-- src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp | 5 ++--- src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.hpp | 2 +- 5 files changed, 9 insertions(+), 15 deletions(-) diff --git a/src/hotspot/cpu/aarch64/assembler_aarch64.hpp b/src/hotspot/cpu/aarch64/assembler_aarch64.hpp index cb8344d5749e5..70912313dc4e2 100644 --- a/src/hotspot/cpu/aarch64/assembler_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/assembler_aarch64.hpp @@ -3230,11 +3230,11 @@ template #undef INSN // CRC32 instructions -#define INSN(NAME, c, sf, sz) \ - void NAME(Register Rd, Register Rn, Register Rm) { \ - starti; \ - f(sf, 31), f(0b0011010110, 30, 21), f(0b010, 15, 13), f(c, 12); \ - f(sz, 11, 10), rf(Rm, 16), rf(Rn, 5), rf(Rd, 0); \ +#define INSN(NAME, c, sf, sz) \ + void NAME(Register Rd, Register Rn, Register Rm) { \ + starti; \ + f(sf, 31), f(0b0011010110, 30, 21), f(0b010, 15, 13), f(c, 12); \ + f(sz, 11, 10), zrf(Rm, 16), rf(Rn, 5), rf(Rd, 0); \ } INSN(crc32b, 0, 0, 0b00); diff --git a/src/hotspot/cpu/aarch64/c1_FrameMap_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_FrameMap_aarch64.cpp index 3dfbdbeaf876a..6bdd88fbd567c 100644 --- a/src/hotspot/cpu/aarch64/c1_FrameMap_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c1_FrameMap_aarch64.cpp @@ -218,10 +218,6 @@ void FrameMap::initialize() { } // The unallocatable registers are at the end - - if (preserve_rheapbase) { - map_register(i, r27); r27_opr = LIR_OprFact::single_cpu(i); i++; // rheapbase - } map_register(i, r28); r28_opr = LIR_OprFact::single_cpu(i); i++; // rthread if(PreserveFramePointer) { map_register(i, r29); r29_opr = LIR_OprFact::single_cpu(i); i++; // rfp diff --git a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp index 0d6ef4e951650..3298723a98178 100644 --- a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp @@ -1343,7 +1343,6 @@ void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, L __ ubfx(rscratch1, r_profile_rng, 32-ratio_shift, ratio_shift); __ cbz(rscratch1, *stub->entry()); __ bind(*stub->continuation()); - __ block_comment("L1346"); __ step_random(r_profile_rng, rscratch2); stub->set_action(lambda, op); @@ -2588,7 +2587,7 @@ void LIR_Assembler::emit_load_klass(LIR_OpLoadKlass* op) { } void LIR_Assembler::increment_profile_ctr(LIR_Opr step, LIR_Opr counter_addr, LIR_Opr dest, LIR_Opr temp_op, - LIR_Opr freq_op, LIR_Opr step_op, + LIR_Opr freq_op, CodeStub* overflow_stub) { #ifndef PRODUCT if (CommentedAssembly) { diff --git a/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp index 9b55a52180032..64ef09eb08da1 100644 --- a/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp @@ -282,14 +282,13 @@ void C1_MacroAssembler::load_parameter(int offset_in_words, Register reg) { // Randomized profile capture. -void C1_MacroAssembler::step_random(Register state, Register temp) { +void C1_MacroAssembler::step_random(Register state, Register temp, Register data) { if (VM_Version::supports_crc32()) { /* CRC used as a psuedo-random-number generator */ // In effect, the CRC instruction is being used here for its // linear feedback shift register. It's unbeatably fast, and // plenty good enough for what we need. - mov(temp, 1); - crc32h(state, state, temp); + crc32h(state, state, data); } else { /* LCG from glibc. */ mov(temp, 1103515245); diff --git a/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.hpp index 706d114e19e75..ed557045fce09 100644 --- a/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.hpp @@ -114,7 +114,7 @@ using MacroAssembler::null_check; void load_parameter(int offset_in_words, Register reg); // Randomized profile capture - void step_random(Register state, Register temp); + void step_random(Register state, Register temp, Register data = rthread); void save_profile_rng(); void restore_profile_rng(); From 66ea58723abc17e473e16504076d8bef459b8151 Mon Sep 17 00:00:00 2001 From: Andrew Haley Date: Thu, 27 Nov 2025 16:22:07 +0000 Subject: [PATCH 48/48] whitespace --- src/hotspot/share/c1/c1_LIR.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hotspot/share/c1/c1_LIR.hpp b/src/hotspot/share/c1/c1_LIR.hpp index 3f0fd8bc0090a..386fe39d626b0 100644 --- a/src/hotspot/share/c1/c1_LIR.hpp +++ b/src/hotspot/share/c1/c1_LIR.hpp @@ -1967,7 +1967,7 @@ class LIR_OpIncrementCounter : public LIR_Op { LIR_Opr dest() const { return _dest; } LIR_Opr temp_op() const { return _temp_op; } LIR_Opr freq_op() const { return _freq_op; } - CodeStub* overflow_stub() const { return _overflow_stub; }; + CodeStub* overflow_stub() const { return _overflow_stub; }; virtual void emit_code(LIR_Assembler* masm); virtual LIR_OpIncrementCounter* as_OpIncrementCounter() { return this; }