diff --git a/src/hotspot/cpu/aarch64/assembler_aarch64.hpp b/src/hotspot/cpu/aarch64/assembler_aarch64.hpp index 18807c667e391..d8b9deb1b4210 100644 --- a/src/hotspot/cpu/aarch64/assembler_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/assembler_aarch64.hpp @@ -148,6 +148,9 @@ constexpr Register rdispatch = r21; // dispatch table base constexpr Register esp = r20; // Java expression stack pointer constexpr Register r19_sender_sp = r19; // sender's SP while in interpreter +// State for randomized profile counters. Used by C1. +extern Register r_profile_rng; + // Preserved predicate register with all elements set TRUE. constexpr PRegister ptrue = p7; @@ -3227,11 +3230,11 @@ template #undef INSN // CRC32 instructions -#define INSN(NAME, c, sf, sz) \ - void NAME(Register Rd, Register Rn, Register Rm) { \ - starti; \ - f(sf, 31), f(0b0011010110, 30, 21), f(0b010, 15, 13), f(c, 12); \ - f(sz, 11, 10), rf(Rm, 16), rf(Rn, 5), rf(Rd, 0); \ +#define INSN(NAME, c, sf, sz) \ + void NAME(Register Rd, Register Rn, Register Rm) { \ + starti; \ + f(sf, 31), f(0b0011010110, 30, 21), f(0b010, 15, 13), f(c, 12); \ + f(sz, 11, 10), zrf(Rm, 16), rf(Rn, 5), rf(Rd, 0); \ } INSN(crc32b, 0, 0, 0b00); diff --git a/src/hotspot/cpu/aarch64/c1_FrameMap_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_FrameMap_aarch64.cpp index 83d0952dcb487..6bdd88fbd567c 100644 --- a/src/hotspot/cpu/aarch64/c1_FrameMap_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c1_FrameMap_aarch64.cpp @@ -191,13 +191,26 @@ void FrameMap::initialize() { map_register(i, r23); r23_opr = LIR_OprFact::single_cpu(i); i++; map_register(i, r24); r24_opr = LIR_OprFact::single_cpu(i); i++; map_register(i, r25); r25_opr = LIR_OprFact::single_cpu(i); i++; - map_register(i, r26); r26_opr = LIR_OprFact::single_cpu(i); i++; - // r27 is allocated conditionally. With compressed oops it holds - // the heapbase value and is not visible to the allocator. - bool preserve_rheapbase = i >= nof_caller_save_cpu_regs(); - if (!preserve_rheapbase) { - map_register(i, r27); r27_opr = LIR_OprFact::single_cpu(i); i++; // rheapbase + auto remaining = RegSet::of(r26, r27); + + if (UseCompressedOops && (CompressedOops::base() != nullptr)) { + // r27 is allocated conditionally. With compressed oops it holds + // the heapbase value and is not visible to the allocator. + remaining -= r27; + } + + if (ProfileCaptureRatio > 1) { + // Use the highest remaining register for r_profile_rng. + r_profile_rng = *remaining.rbegin(); + remaining -= r_profile_rng; + } + + if (remaining.contains(r26)) { + map_register(i, r26); r26_opr = LIR_OprFact::single_cpu(i); i++; + } + if (remaining.contains(r27)) { + map_register(i, r27); r27_opr = LIR_OprFact::single_cpu(i); i++; } if(!PreserveFramePointer) { @@ -205,10 +218,6 @@ void FrameMap::initialize() { } // The unallocatable registers are at the end - - if (preserve_rheapbase) { - map_register(i, r27); r27_opr = LIR_OprFact::single_cpu(i); i++; // rheapbase - } map_register(i, r28); r28_opr = LIR_OprFact::single_cpu(i); i++; // rthread if(PreserveFramePointer) { map_register(i, r29); r29_opr = LIR_OprFact::single_cpu(i); i++; // rfp diff --git a/src/hotspot/cpu/aarch64/c1_FrameMap_aarch64.hpp b/src/hotspot/cpu/aarch64/c1_FrameMap_aarch64.hpp index 4d783418429cb..7091f3bdb6be1 100644 --- a/src/hotspot/cpu/aarch64/c1_FrameMap_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/c1_FrameMap_aarch64.hpp @@ -147,6 +147,11 @@ range -= 1; } + // Use r26 for randomized profile captures. + if (ProfileCaptureRatio > 1) { + range -= 1; + } + // r29 is not allocatable when PreserveFramePointer is on, // but fp saving is handled in MacroAssembler::build_frame()/remove_frame() if (exclude_fp) { diff --git a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp index 37a6a130e0dd0..396aa6afe3c55 100644 --- a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp @@ -1238,7 +1238,7 @@ void LIR_Assembler::type_profile_helper(Register mdo, __ cmp(recv, rscratch1); __ br(Assembler::NE, next_test); __ addptr(slot_at(ReceiverTypeData::receiver_count_offset(i)), - DataLayout::counter_increment); + DataLayout::counter_increment * ProfileCaptureRatio); __ b(*update_done); __ bind(next_test); } @@ -1250,7 +1250,7 @@ void LIR_Assembler::type_profile_helper(Register mdo, __ ldr(rscratch1, recv_addr); __ cbnz(rscratch1, next_test); __ str(recv, recv_addr); - __ mov(rscratch1, DataLayout::counter_increment); + __ mov(rscratch1, DataLayout::counter_increment * ProfileCaptureRatio); __ str(rscratch1, slot_at(ReceiverTypeData::receiver_count_offset(i))); __ b(*update_done); __ bind(next_test); @@ -1267,6 +1267,11 @@ void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, L ciKlass* k = op->klass(); Register Rtmp1 = noreg; + int profile_capture_ratio = ProfileCaptureRatio; + int ratio_shift = exact_log2(profile_capture_ratio); + auto threshold = (1ull << 32) >> ratio_shift; + assert(threshold > 0, "must be"); + // check if it needs to be profiled ciMethodData* md; ciProfileData* data; @@ -1316,14 +1321,44 @@ void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, L __ b(*obj_is_null); __ bind(not_null); - Label update_done; - Register recv = k_RInfo; - __ load_klass(recv, obj); - type_profile_helper(mdo, md, data, recv, &update_done); - Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset())); - __ addptr(counter_addr, DataLayout::counter_increment); + ProfileStub *stub + = profile_capture_ratio > 1 ? new ProfileStub() : nullptr; + + auto lambda = [stub, md, mdo, data, k_RInfo, obj] (LIR_Assembler* ce, LIR_Op* base_op) { + +#undef __ +#define __ masm-> + + auto masm = ce->masm(); + if (stub != nullptr) __ bind(*stub->entry()); + + Label update_done; + Register recv = k_RInfo; + __ load_klass(recv, obj); + ce->type_profile_helper(mdo, md, data, recv, &update_done); + Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset())); + __ addptr(counter_addr, DataLayout::counter_increment * ProfileCaptureRatio); + + __ bind(update_done); + + if (stub != nullptr) __ b(*stub->continuation()); + +#undef __ +#define __ _masm-> + }; + + if (stub != nullptr) { + __ ubfx(rscratch1, r_profile_rng, 32-ratio_shift, ratio_shift); + __ cbz(rscratch1, *stub->entry()); + __ bind(*stub->continuation()); + __ step_random(r_profile_rng, rscratch2); - __ bind(update_done); + stub->set_action(lambda, op); + stub->set_name("Typecheck stub"); + append_code_stub(stub); + } else { + lambda(this, op); + } } else { __ cbz(obj, *obj_is_null); } @@ -1416,27 +1451,68 @@ void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) { Label* failure_target = stub->entry(); if (should_profile) { - Label not_null; - Register mdo = klass_RInfo; - __ mov_metadata(mdo, md->constant_encoding()); - __ cbnz(value, not_null); - // Object is null; update MDO and exit - Address data_addr - = __ form_address(rscratch2, mdo, - md->byte_offset_of_slot(data, DataLayout::flags_offset()), 0); - __ ldrb(rscratch1, data_addr); - __ orr(rscratch1, rscratch1, BitData::null_seen_byte_constant()); - __ strb(rscratch1, data_addr); - __ b(done); - __ bind(not_null); + int profile_capture_ratio = ProfileCaptureRatio; + int ratio_shift = exact_log2(profile_capture_ratio); + auto threshold = (1ull << 32) >> ratio_shift; + assert(threshold > 0, "must be"); - Label update_done; - Register recv = k_RInfo; - __ load_klass(recv, value); - type_profile_helper(mdo, md, data, recv, &update_done); - Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset())); - __ addptr(counter_addr, DataLayout::counter_increment); - __ bind(update_done); + ProfileStub *profile_stub + = profile_capture_ratio > 1 ? new ProfileStub() : nullptr; + + auto lambda = [profile_stub, md, data, value, + k_RInfo, klass_RInfo, success_target] (LIR_Assembler* ce, LIR_Op*) { +#undef __ +#define __ masm-> + + auto masm = ce->masm(); + + if (profile_stub != nullptr) __ bind(*profile_stub->entry()); + + Label not_null; + Register mdo = klass_RInfo; + __ mov_metadata(mdo, md->constant_encoding()); + __ cbnz(value, not_null); + // Object is null; update MDO and exit + Address data_addr + = __ form_address(rscratch2, mdo, + md->byte_offset_of_slot(data, DataLayout::flags_offset()), 0); + __ ldrb(rscratch1, data_addr); + __ orr(rscratch1, rscratch1, BitData::null_seen_byte_constant()); + __ strb(rscratch1, data_addr); + if (profile_stub != nullptr) { + __ b(*profile_stub->continuation()); + } else { + __ b(*success_target); + } + __ bind(not_null); + + Label update_done; + Register recv = k_RInfo; + __ load_klass(recv, value); + ce->type_profile_helper(mdo, md, data, recv, &update_done); + Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset())); + __ addptr(counter_addr, DataLayout::counter_increment * ProfileCaptureRatio); + __ bind(update_done); + + if (profile_stub != nullptr) __ b(*profile_stub->continuation()); + +#undef __ +#define __ _masm-> + }; + + if (profile_stub != nullptr) { + __ ubfx(rscratch1, r_profile_rng, 32-ratio_shift, ratio_shift); + __ cbz(rscratch1, *profile_stub->entry()); + __ bind(*profile_stub->continuation()); + __ step_random(r_profile_rng, rscratch2); + __ cbz(value, done); + + profile_stub->set_action(lambda, op); + profile_stub->set_name("Typecheck profile stub"); + append_code_stub(profile_stub); + } else { + lambda(this, op); + } } else { __ cbz(value, done); } @@ -1987,8 +2063,9 @@ void LIR_Assembler::comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Op } -void LIR_Assembler::align_call(LIR_Code code) { } - +void LIR_Assembler::align_call(LIR_Code code) { + __ save_profile_rng(); +} void LIR_Assembler::call(LIR_OpJavaCall* op, relocInfo::relocType rtype) { address call = __ trampoline_call(Address(op->addr(), rtype)); @@ -1998,6 +2075,7 @@ void LIR_Assembler::call(LIR_OpJavaCall* op, relocInfo::relocType rtype) { } add_call_info(code_offset(), op->info()); __ post_call_nop(); + __ restore_profile_rng(); } @@ -2009,6 +2087,7 @@ void LIR_Assembler::ic_call(LIR_OpJavaCall* op) { } add_call_info(code_offset(), op->info()); __ post_call_nop(); + __ restore_profile_rng(); } void LIR_Assembler::emit_static_call_stub() { @@ -2515,78 +2594,250 @@ void LIR_Assembler::emit_load_klass(LIR_OpLoadKlass* op) { __ load_klass(result, obj); } +void LIR_Assembler::increment_profile_ctr(LIR_Opr step, LIR_Opr counter_addr, LIR_Opr dest, LIR_Opr temp_op, + LIR_Opr freq_op, + CodeStub* overflow_stub) { +#ifndef PRODUCT + if (CommentedAssembly) { + __ block_comment("increment_event_counter {"); + } +#endif + + int profile_capture_ratio = ProfileCaptureRatio; + int ratio_shift = exact_log2(profile_capture_ratio); + unsigned long threshold = (UCONST64(1) << 32) >> ratio_shift; + + assert(threshold > 0, "must be"); + + ProfileStub *counter_stub + = profile_capture_ratio > 1 ? new ProfileStub() : nullptr; + + Register temp = temp_op->is_register() ? temp_op->as_register() : noreg; + Address raw_dest_adr = as_Address(counter_addr->as_address_ptr()); + + auto lambda = [counter_stub, overflow_stub, freq_op, ratio_shift, step, + temp, dest, raw_dest_adr] (LIR_Assembler* ce, LIR_Op* op) { + +#undef __ +#define __ masm-> + + auto masm = ce->masm(); + + if (counter_stub != nullptr) __ bind(*counter_stub->entry()); + + if (step->is_register()) { + Address dest_adr = __ legitimize_address(raw_dest_adr, sizeof (jint), rscratch2); + Register inc = step->as_register(); + __ ldrw(temp, dest_adr); + if (ProfileCaptureRatio > 1) { + __ lsl(inc, inc, ratio_shift); + } + __ addw(temp, temp, inc); + __ strw(temp, dest_adr); + if (dest->is_register()) __ mov(dest->as_register(), temp); + if (ProfileCaptureRatio > 1) { + __ lsr(inc, inc, ratio_shift); + } + if (dest->is_register()) __ mov(dest->as_register(), temp); + } else { + jint inc = step->as_constant_ptr()->as_jint_bits(); + switch (dest->type()) { + case T_INT: { + Address dest_adr = __ legitimize_address(raw_dest_adr, sizeof (jint), rscratch2); + inc *= ProfileCaptureRatio; + __ incrementw(dest_adr, inc, temp); + if (dest->is_register()) __ movw(dest->as_register(), temp); + + break; + } + case T_LONG: { + Address dest_adr = __ legitimize_address(raw_dest_adr, sizeof (jlong), rscratch2); + inc *= ProfileCaptureRatio; + __ increment(dest_adr, inc, temp); + if (dest->is_register()) __ mov(dest->as_register_lo(), temp); + + break; + } + default: + ShouldNotReachHere(); + } + + if (step->is_valid() && overflow_stub) { + if (!freq_op->is_valid()) { + if (!step->is_constant()) { + __ cbz(step->as_register(), *overflow_stub->entry()); + } else { + __ b(*overflow_stub->entry()); + return; + } + } else { + Register result = + dest->type() == T_INT ? dest->as_register() : + dest->type() == T_LONG ? dest->as_register_lo() : + noreg; + if (!step->is_constant()) { + // If step is 0, make sure the stub check below always fails + __ cmp(step->as_register(), (u1)0); + __ mov(temp, InvocationCounter::count_increment * ProfileCaptureRatio); + __ csel(result, result, temp, __ NE); + } + juint mask = freq_op->as_jint(); + __ andw(rscratch1, result, mask); + __ cbzw(rscratch1, *overflow_stub->entry()); + } + } + } + + if (counter_stub != nullptr) { + __ b(*counter_stub->continuation()); + } + +#undef __ +#define __ _masm-> + }; + + if (counter_stub != nullptr) { + __ ubfx(rscratch1, r_profile_rng, 32 - ratio_shift, ratio_shift); + __ cbz(rscratch1, *counter_stub->entry()); + __ bind(*counter_stub->continuation()); + __ step_random(r_profile_rng, temp); + + counter_stub->set_action(lambda, nullptr); + counter_stub->set_name("IncrementEventCounter"); + append_code_stub(counter_stub); + } else { + lambda(this, nullptr); + } + + if (overflow_stub != nullptr) { + __ bind(*overflow_stub->continuation()); + } + +#ifndef PRODUCT + if (CommentedAssembly) { + __ block_comment("} increment_event_counter"); + } +#endif +} + void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) { ciMethod* method = op->profiled_method(); int bci = op->profiled_bci(); ciMethod* callee = op->profiled_callee(); - // Update counter for all call types - ciMethodData* md = method->method_data_or_null(); - assert(md != nullptr, "Sanity"); - ciProfileData* data = md->bci_to_data(bci); - assert(data != nullptr && data->is_CounterData(), "need CounterData for calls"); - assert(op->mdo()->is_single_cpu(), "mdo must be allocated"); - Register mdo = op->mdo()->as_register(); - __ mov_metadata(mdo, md->constant_encoding()); - Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset())); - // Perform additional virtual call profiling for invokevirtual and - // invokeinterface bytecodes - if (op->should_profile_receiver_type()) { - assert(op->recv()->is_single_cpu(), "recv must be allocated"); - Register recv = op->recv()->as_register(); - assert_different_registers(mdo, recv); - assert(data->is_VirtualCallData(), "need VirtualCallData for virtual calls"); - ciKlass* known_klass = op->known_holder(); - if (C1OptimizeVirtualCallProfiling && known_klass != nullptr) { - // We know the type that will be seen at this call site; we can - // statically update the MethodData* rather than needing to do - // dynamic tests on the receiver type - - // NOTE: we should probably put a lock around this search to - // avoid collisions by concurrent compilations - ciVirtualCallData* vc_data = (ciVirtualCallData*) data; - uint i; - for (i = 0; i < VirtualCallData::row_limit(); i++) { - ciKlass* receiver = vc_data->receiver(i); - if (known_klass->equals(receiver)) { - Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i))); - __ addptr(data_addr, DataLayout::counter_increment); - return; + + Register temp = op->tmp1()->as_register_lo(); + + int profile_capture_ratio = ProfileCaptureRatio; + int ratio_shift = exact_log2(profile_capture_ratio); + auto threshold = (1ull << 32) >> ratio_shift; + assert(threshold > 0, "must be"); + + ProfileStub *stub + = profile_capture_ratio > 1 ? new ProfileStub() : nullptr; + + auto lambda = [op, stub] (LIR_Assembler* ce, LIR_Op* base_op) { +#undef __ +#define __ masm-> + + auto masm = ce->masm(); + LIR_OpProfileCall* op = base_op->as_OpProfileCall(); + ciMethod* method = op->profiled_method(); + int bci = op->profiled_bci(); + ciMethod* callee = op->profiled_callee(); + Register tmp_load_klass = rscratch1; + + Register temp = op->tmp1()->as_register_lo(); + + if (stub != nullptr) __ bind(*stub->entry()); + + // Update counter for all call types + ciMethodData* md = method->method_data_or_null(); + assert(md != nullptr, "Sanity"); + ciProfileData* data = md->bci_to_data(bci); + assert(data != nullptr && data->is_CounterData(), "need CounterData for calls"); + assert(op->mdo()->is_single_cpu(), "mdo must be allocated"); + Register mdo = op->mdo()->as_register(); + __ mov_metadata(mdo, md->constant_encoding()); + Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset())); + // Perform additional virtual call profiling for invokevirtual and + // invokeinterface bytecodes + if (op->should_profile_receiver_type()) { + assert(op->recv()->is_single_cpu(), "recv must be allocated"); + Register recv = op->recv()->as_register(); + assert_different_registers(mdo, recv); + assert(data->is_VirtualCallData(), "need VirtualCallData for virtual calls"); + ciKlass* known_klass = op->known_holder(); + if (C1OptimizeVirtualCallProfiling && known_klass != nullptr) { + // We know the type that will be seen at this call site; we can + // statically update the MethodData* rather than needing to do + // dynamic tests on the receiver type + + // NOTE: we should probably put a lock around this search to + // avoid collisions by concurrent compilations + ciVirtualCallData* vc_data = (ciVirtualCallData*) data; + uint i; + for (i = 0; i < VirtualCallData::row_limit(); i++) { + ciKlass* receiver = vc_data->receiver(i); + if (known_klass->equals(receiver)) { + Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i))); + __ addptr(data_addr, DataLayout::counter_increment * ProfileCaptureRatio); + goto exit; + } } - } - // Receiver type not found in profile data; select an empty slot - - // Note that this is less efficient than it should be because it - // always does a write to the receiver part of the - // VirtualCallData rather than just the first time - for (i = 0; i < VirtualCallData::row_limit(); i++) { - ciKlass* receiver = vc_data->receiver(i); - if (receiver == nullptr) { - __ mov_metadata(rscratch1, known_klass->constant_encoding()); - Address recv_addr = - __ form_address(rscratch2, mdo, - md->byte_offset_of_slot(data, VirtualCallData::receiver_offset(i)), - LogBytesPerWord); - __ str(rscratch1, recv_addr); - Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i))); - __ addptr(data_addr, DataLayout::counter_increment); - return; + // Receiver type not found in profile data; select an empty slot + + // Note that this is less efficient than it should be because it + // always does a write to the receiver part of the + // VirtualCallData rather than just the first time + for (i = 0; i < VirtualCallData::row_limit(); i++) { + ciKlass* receiver = vc_data->receiver(i); + if (receiver == nullptr) { + __ mov_metadata(rscratch1, known_klass->constant_encoding()); + Address recv_addr = + __ form_address(rscratch2, mdo, + md->byte_offset_of_slot(data, VirtualCallData::receiver_offset(i)), + LogBytesPerWord); + __ str(rscratch1, recv_addr); + Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i))); + __ addptr(data_addr, DataLayout::counter_increment * ProfileCaptureRatio); + goto exit; + } } + } else { + __ load_klass(recv, recv); + Label update_done; + ce->type_profile_helper(mdo, md, data, recv, &update_done); + // Receiver did not match any saved receiver and there is no empty row for it. + // Increment total counter to indicate polymorphic case. + __ addptr(counter_addr, DataLayout::counter_increment * ProfileCaptureRatio); + + __ bind(update_done); } + exit: {} } else { - __ load_klass(recv, recv); - Label update_done; - type_profile_helper(mdo, md, data, recv, &update_done); - // Receiver did not match any saved receiver and there is no empty row for it. - // Increment total counter to indicate polymorphic case. - __ addptr(counter_addr, DataLayout::counter_increment); - - __ bind(update_done); + // Static call + __ addptr(counter_addr, DataLayout::counter_increment * ProfileCaptureRatio); } + + if (stub != nullptr) __ b(*stub->continuation()); + +#undef __ +#define __ _masm-> + }; + + if (stub != nullptr) { + __ ubfx(rscratch1, r_profile_rng, 32-ratio_shift, ratio_shift); + __ cbz(rscratch1, *stub->entry()); + __ bind(*stub->continuation()); + __ step_random(r_profile_rng, temp); + + stub->set_action(lambda, op); + stub->set_name("ProfileCallStub"); + append_code_stub(stub); } else { - // Static call - __ addptr(counter_addr, DataLayout::counter_increment); + lambda(this, op); } } diff --git a/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp index ad26d494b2d42..cd96e95d088aa 100644 --- a/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp @@ -1379,6 +1379,10 @@ void LIRGenerator::do_If(If* x) { __ cmp(lir_cond(cond), left, right); // Generate branch profiling. Profiling code doesn't kill flags. profile_branch(x, cond); + // If we're subsampling counter updates, then profiling code kills flags + if (ProfileCaptureRatio != 1) { + __ cmp(lir_cond(cond), left, right); + } move_to_phi(x->state()); if (x->x()->type()->is_float_kind()) { __ branch(lir_cond(cond), x->tsux(), x->usux()); diff --git a/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp index e934632715c9a..245957f926c2f 100644 --- a/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp @@ -36,6 +36,8 @@ #include "runtime/sharedRuntime.hpp" #include "runtime/stubRoutines.hpp" +Register r_profile_rng; + void C1_MacroAssembler::float_cmp(bool is_float, int unordered_result, FloatRegister f0, FloatRegister f1, Register result) @@ -247,6 +249,7 @@ void C1_MacroAssembler::build_frame(int framesize, int bang_size_in_bytes) { // Note that we do this before creating a frame. generate_stack_overflow_check(bang_size_in_bytes); MacroAssembler::build_frame(framesize); + restore_profile_rng(); // Insert nmethod entry barrier into frame. BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); @@ -255,6 +258,7 @@ void C1_MacroAssembler::build_frame(int framesize, int bang_size_in_bytes) { void C1_MacroAssembler::remove_frame(int framesize) { MacroAssembler::remove_frame(framesize); + save_profile_rng(); } @@ -276,6 +280,35 @@ void C1_MacroAssembler::load_parameter(int offset_in_words, Register reg) { ldr(reg, Address(rfp, (offset_in_words + 2) * BytesPerWord)); } +// Randomized profile capture. + +void C1_MacroAssembler::step_random(Register state, Register temp, Register data) { + if (VM_Version::supports_crc32()) { + /* CRC used as a psuedo-random-number generator */ + // In effect, the CRC instruction is being used here for its + // linear feedback shift register. It's unbeatably fast, and + // plenty good enough for what we need. + crc32h(state, state, data); + } else { + /* LCG from glibc. */ + mov(temp, 1103515245); + mulw(state, state, temp); + addw(state, state, 12345); + } +} + +void C1_MacroAssembler::save_profile_rng() { + if (ProfileCaptureRatio != 1) { + strw(r_profile_rng, Address(rthread, JavaThread::profile_rng_offset())); + } +} + +void C1_MacroAssembler::restore_profile_rng() { + if (ProfileCaptureRatio != 1) { + ldrw(r_profile_rng, Address(rthread, JavaThread::profile_rng_offset())); + } +} + #ifndef PRODUCT void C1_MacroAssembler::verify_stack_oop(int stack_offset) { diff --git a/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.hpp index 7b181b104c10f..ed557045fce09 100644 --- a/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.hpp @@ -113,4 +113,9 @@ using MacroAssembler::null_check; void load_parameter(int offset_in_words, Register reg); + // Randomized profile capture + void step_random(Register state, Register temp, Register data = rthread); + void save_profile_rng(); + void restore_profile_rng(); + #endif // CPU_AARCH64_C1_MACROASSEMBLER_AARCH64_HPP diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp index ceedb4f10632a..c3e1e3f20957a 100644 --- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp @@ -2739,30 +2739,34 @@ void MacroAssembler::increment(Register reg, int value) } } -void MacroAssembler::incrementw(Address dst, int value) +void MacroAssembler::incrementw(Address dst, int value, Register result) { - assert(!dst.uses(rscratch1), "invalid dst for address increment"); + assert(!dst.uses(result), "invalid dst for address increment"); + assert(result->is_valid(), "must be"); + assert_different_registers(result, rscratch2); if (dst.getMode() == Address::literal) { assert(abs(value) < (1 << 12), "invalid value and address mode combination"); lea(rscratch2, dst); dst = Address(rscratch2); } - ldrw(rscratch1, dst); - incrementw(rscratch1, value); - strw(rscratch1, dst); + ldrw(result, dst); + incrementw(result, value); + strw(result, dst); } -void MacroAssembler::increment(Address dst, int value) +void MacroAssembler::increment(Address dst, int value, Register result) { - assert(!dst.uses(rscratch1), "invalid dst for address increment"); + assert(!dst.uses(result), "invalid dst for address increment"); + assert(result->is_valid(), "must be"); + assert_different_registers(result, rscratch2); if (dst.getMode() == Address::literal) { assert(abs(value) < (1 << 12), "invalid value and address mode combination"); lea(rscratch2, dst); dst = Address(rscratch2); } - ldr(rscratch1, dst); - increment(rscratch1, value); - str(rscratch1, dst); + ldr(result, dst); + increment(result, value); + str(result, dst); } // Push lots of registers in the bit set supplied. Don't push sp. diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp index 4baa07d7d4992..17e3fb80a31ef 100644 --- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp @@ -481,6 +481,25 @@ class MacroAssembler: public Assembler { WRAP(smaddl) WRAP(smsubl) WRAP(umaddl) WRAP(umsubl) #undef WRAP + using Assembler::andw, Assembler::andr; + void andw(Register Rd, Register Rn, uint64_t imm) { + if (operand_valid_for_logical_immediate(/*is32*/true, imm)) { + Assembler::andw(Rd, Rn, imm); + } else { + assert(Rd != Rn, "must be"); + movw(Rd, imm); + andw(Rd, Rn, Rd); + } + } + void andr(Register Rd, Register Rn, uint64_t imm) { + if (operand_valid_for_logical_immediate(/*is32*/false, imm)) { + Assembler::andr(Rd, Rn, imm); + } else { + assert(Rd != Rn, "must be"); + mov(Rd, imm); + andr(Rd, Rn, Rd); + } + } // macro assembly operations needed for aarch64 @@ -753,11 +772,11 @@ class MacroAssembler: public Assembler { void decrement(Register reg, int value = 1); void decrement(Address dst, int value = 1); - void incrementw(Address dst, int value = 1); + void incrementw(Address dst, int value = 1, Register result = rscratch1); void incrementw(Register reg, int value = 1); void increment(Register reg, int value = 1); - void increment(Address dst, int value = 1); + void increment(Address dst, int value = 1, Register result = rscratch1); // Alignment diff --git a/src/hotspot/cpu/aarch64/register_aarch64.hpp b/src/hotspot/cpu/aarch64/register_aarch64.hpp index 108f0f34140b4..a3e1bbac44ac1 100644 --- a/src/hotspot/cpu/aarch64/register_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/register_aarch64.hpp @@ -399,6 +399,13 @@ inline Register AbstractRegSet::first() { return as_Register(count_trailing_zeros(_bitset)); } +template <> +inline Register AbstractRegSet::last() { + if (_bitset == 0) { return noreg; } + int last = max_size() - 1 - count_leading_zeros(_bitset); + return as_Register(last); +} + template <> inline FloatRegister AbstractRegSet::first() { if (_bitset == 0) { return fnoreg; } diff --git a/src/hotspot/cpu/x86/assembler_x86.hpp b/src/hotspot/cpu/x86/assembler_x86.hpp index 43471a883919d..11ef6af737048 100644 --- a/src/hotspot/cpu/x86/assembler_x86.hpp +++ b/src/hotspot/cpu/x86/assembler_x86.hpp @@ -136,6 +136,9 @@ constexpr Register r15_thread = r15; // callee-saved // or compiled lambda forms. We indicate that by setting rbp_mh_SP_save to noreg. constexpr Register rbp_mh_SP_save = noreg; +// State for randomized profile counters. Used by C1. +extern Register r_profile_rng; + // Address is an abstraction used to represent a memory location // using any of the amd64 addressing modes with one object. // @@ -460,6 +463,7 @@ class Assembler : public AbstractAssembler { friend class AbstractAssembler; // for the non-virtual hack friend class LIR_Assembler; // as_Address() friend class StubGenerator; + friend class CodeStub; // as_Address() public: enum Condition { // The x86 condition codes used for conditional jumps/moves. diff --git a/src/hotspot/cpu/x86/c1_FrameMap_x86.cpp b/src/hotspot/cpu/x86/c1_FrameMap_x86.cpp index 68c9814fd201c..a81281d43f003 100644 --- a/src/hotspot/cpu/x86/c1_FrameMap_x86.cpp +++ b/src/hotspot/cpu/x86/c1_FrameMap_x86.cpp @@ -167,6 +167,12 @@ void FrameMap::initialize() { map_register(13, r15); r15_opr = LIR_OprFact::single_cpu(13); map_register(14, rsp); map_register(15, rbp); + // r_profile_rng is allocated conditionally. It is used to hold the random + // generator for profile counters. + r_profile_rng + = (UseCompressedOops && ProfileCaptureRatio > 1) ? r14 + : (ProfileCaptureRatio > 1) ? r12 + : noreg; long0_opr = LIR_OprFact::double_cpu(3 /*eax*/, 3 /*eax*/); long1_opr = LIR_OprFact::double_cpu(2 /*ebx*/, 2 /*ebx*/); diff --git a/src/hotspot/cpu/x86/c1_FrameMap_x86.hpp b/src/hotspot/cpu/x86/c1_FrameMap_x86.hpp index 08b872cb0951d..8d848288eda18 100644 --- a/src/hotspot/cpu/x86/c1_FrameMap_x86.hpp +++ b/src/hotspot/cpu/x86/c1_FrameMap_x86.hpp @@ -127,8 +127,10 @@ } static int adjust_reg_range(int range) { - // Reduce the number of available regs (to free r12) in case of compressed oops - if (UseCompressedOops) return range - 1; + // Reduce the number of available regs (to free r12 or r14) in + // case of compressed oops and randomized profile captures. + if (UseCompressedOops && ProfileCaptureRatio > 1) return range - 2; + if (UseCompressedOops || ProfileCaptureRatio > 1) return range - 1; return range; } diff --git a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp index a2ea7af606d4f..4c5d1f3004e37 100644 --- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp @@ -1268,7 +1268,7 @@ void LIR_Assembler::type_profile_helper(Register mdo, __ cmpptr(recv, Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i)))); __ jccb(Assembler::notEqual, next_test); Address data_addr(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i))); - __ addptr(data_addr, DataLayout::counter_increment); + __ addptr(data_addr, DataLayout::counter_increment * ProfileCaptureRatio); __ jmp(*update_done); __ bind(next_test); } @@ -1280,7 +1280,7 @@ void LIR_Assembler::type_profile_helper(Register mdo, __ cmpptr(recv_addr, NULL_WORD); __ jccb(Assembler::notEqual, next_test); __ movptr(recv_addr, recv); - __ movptr(Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i))), DataLayout::counter_increment); + __ movptr(Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i))), DataLayout::counter_increment * ProfileCaptureRatio); __ jmp(*update_done); __ bind(next_test); } @@ -1330,6 +1330,11 @@ void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, L __ testptr(obj, obj); if (op->should_profile()) { + int profile_capture_ratio = ProfileCaptureRatio; + int ratio_shift = exact_log2(profile_capture_ratio); + auto threshold = (1ull << 32) >> ratio_shift; + assert(threshold > 0, "must be"); + Label not_null; Register mdo = klass_RInfo; __ mov_metadata(mdo, md->constant_encoding()); @@ -1341,15 +1346,46 @@ void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, L __ jmp(*obj_is_null); __ bind(not_null); + ProfileStub *stub + = profile_capture_ratio > 1 ? new ProfileStub() : nullptr; + + auto lambda = [stub, md, mdo, data, k_RInfo, obj, tmp_load_klass] (LIR_Assembler* ce, LIR_Op* base_op) { + +#undef __ +#define __ masm-> + + auto masm = ce->masm(); + if (stub != nullptr) __ bind(*stub->entry()); + Label update_done; + Register recv = k_RInfo; __ load_klass(recv, obj, tmp_load_klass); - type_profile_helper(mdo, md, data, recv, &update_done); + ce->type_profile_helper(mdo, md, data, recv, &update_done); Address nonprofiled_receiver_count_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset())); - __ addptr(nonprofiled_receiver_count_addr, DataLayout::counter_increment); + __ addptr(nonprofiled_receiver_count_addr, DataLayout::counter_increment * ProfileCaptureRatio); __ bind(update_done); + + if (stub != nullptr) __ jmp(*stub->continuation()); + +#undef __ +#define __ _masm-> + }; + + if (stub != nullptr) { + __ cmpl(r_profile_rng, threshold); + __ jcc(Assembler::below, *stub->entry()); + __ bind(*stub->continuation()); + __ step_random(r_profile_rng, rscratch1); + + stub->set_action(lambda, op); + stub->set_name("Typecheck stub"); + append_code_stub(stub); + } else { + lambda(this, op); + } } else { __ jcc(Assembler::equal, *obj_is_null); } @@ -1448,28 +1484,74 @@ void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) { Label* success_target = &done; Label* failure_target = stub->entry(); - __ testptr(value, value); if (op->should_profile()) { + int profile_capture_ratio = ProfileCaptureRatio; + int ratio_shift = exact_log2(profile_capture_ratio); + auto threshold = (1ull << 32) >> ratio_shift; + assert(threshold > 0, "must be"); + + ProfileStub *profile_stub + = profile_capture_ratio > 1 ? new ProfileStub() : nullptr; + + auto lambda = [profile_stub, md, data, value, + k_RInfo, klass_RInfo, tmp_load_klass, success_target] (LIR_Assembler* ce, LIR_Op*) { +#undef __ +#define __ masm-> + + auto masm = ce->masm(); + + if (profile_stub != nullptr) __ bind(*profile_stub->entry()); + + __ testptr(value, value); + Label not_null; Register mdo = klass_RInfo; __ mov_metadata(mdo, md->constant_encoding()); __ jccb(Assembler::notEqual, not_null); + // Object is null; update MDO and exit Address data_addr(mdo, md->byte_offset_of_slot(data, DataLayout::flags_offset())); int header_bits = BitData::null_seen_byte_constant(); __ orb(data_addr, header_bits); - __ jmp(done); + if (profile_stub != nullptr) { + __ jmp(*profile_stub->continuation()); + } else { + __ jmp(*success_target); + } __ bind(not_null); Label update_done; Register recv = k_RInfo; __ load_klass(recv, value, tmp_load_klass); - type_profile_helper(mdo, md, data, recv, &update_done); + ce->type_profile_helper(mdo, md, data, recv, &update_done); Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset())); __ addptr(counter_addr, DataLayout::counter_increment); __ bind(update_done); + + if (profile_stub != nullptr) __ jmp(*profile_stub->continuation()); + +#undef __ +#define __ _masm-> + }; + + if (profile_stub != nullptr) { + __ cmpl(r_profile_rng, threshold); + __ jcc(Assembler::below, *profile_stub->entry()); + __ bind(*profile_stub->continuation()); + __ step_random(r_profile_rng, rscratch1); + __ testptr(value, value); + __ jcc(Assembler::equal, done); + + profile_stub->set_action(lambda, op); + profile_stub->set_name("Typecheck profile stub"); + append_code_stub(profile_stub); + } else { + lambda(this, op); + } + } else { + __ testptr(value, value); __ jcc(Assembler::equal, done); } @@ -2155,6 +2237,8 @@ void LIR_Assembler::comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Op void LIR_Assembler::align_call(LIR_Code code) { + // We do this here in order not to affect call site alignment. + __ save_profile_rng(); // make sure that the displacement word of the call ends up word aligned int offset = __ offset(); switch (code) { @@ -2178,6 +2262,8 @@ void LIR_Assembler::call(LIR_OpJavaCall* op, relocInfo::relocType rtype) { __ call(AddressLiteral(op->addr(), rtype)); add_call_info(code_offset(), op->info()); __ post_call_nop(); + + __ restore_profile_rng(); } @@ -2187,6 +2273,7 @@ void LIR_Assembler::ic_call(LIR_OpJavaCall* op) { assert((__ offset() - NativeCall::instruction_size + NativeCall::displacement_offset) % BytesPerWord == 0, "must be aligned"); __ post_call_nop(); + __ restore_profile_rng(); } @@ -2765,75 +2852,248 @@ void LIR_Assembler::emit_load_klass(LIR_OpLoadKlass* op) { __ load_klass(result, obj, rscratch1); } -void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) { - ciMethod* method = op->profiled_method(); - int bci = op->profiled_bci(); - ciMethod* callee = op->profiled_callee(); - Register tmp_load_klass = rscratch1; +void LIR_Assembler::increment_profile_ctr(LIR_Opr incr, LIR_Opr addr, LIR_Opr dest, LIR_Opr temp_op, + LIR_Opr freq_op, CodeStub* overflow_stub) { +#ifndef PRODUCT + if (CommentedAssembly) { + __ block_comment("increment_profile_ctr" " {"); + } +#endif + + int profile_capture_ratio = ProfileCaptureRatio; + int ratio_shift = exact_log2(profile_capture_ratio); + auto threshold = (UCONST64(1) << 32) >> ratio_shift; + + assert(threshold > 0, "must be"); + + ProfileStub *counter_stub + = profile_capture_ratio > 1 ? new ProfileStub() : nullptr; + + Register temp = temp_op->is_register() ? temp_op->as_register() : noreg; + Address dest_adr = as_Address(addr->as_address_ptr()); + + auto lambda = [counter_stub, overflow_stub, freq_op, ratio_shift, incr, + temp, dest, dest_adr] (LIR_Assembler* ce, LIR_Op* op) { + +#undef __ +#define __ masm-> + + auto masm = ce->masm(); + + if (counter_stub != nullptr) __ bind(*counter_stub->entry()); + + if (incr->is_register()) { + Register inc = incr->as_register(); + __ movl(temp, dest_adr); + if (ProfileCaptureRatio > 1) { + __ shll(inc, ratio_shift); + } + __ lea(temp, Address(temp, inc, Address::times_1)); + __ movl(dest_adr, temp); + __ movl(dest->as_register(), temp); + if (ProfileCaptureRatio > 1) { + __ shrl(inc, ratio_shift); + } + } else { + jint inc = incr->as_constant_ptr()->as_jint_bits(); + switch (dest->type()) { + case T_INT: { + inc *= ProfileCaptureRatio; + __ movl(temp, dest_adr); + // Use lea instead of add to avoid destroying condition codes on x86 + __ lea(temp, Address(temp, inc, Address::times_1)); + __ movl(dest_adr, temp); + if (dest->is_register()) { + __ movl(dest->as_register(), temp); + } + break; + } + case T_LONG: { + inc *= ProfileCaptureRatio; + __ movq(temp, dest_adr); + // Use lea instead of add to avoid destroying condition codes on x86 + __ lea(temp, Address(temp, inc, Address::times_1)); + __ movq(dest_adr, temp); + if (dest->is_register()) { + __ movq(dest->as_register_lo(), temp); + } - // Update counter for all call types - ciMethodData* md = method->method_data_or_null(); - assert(md != nullptr, "Sanity"); - ciProfileData* data = md->bci_to_data(bci); - assert(data != nullptr && data->is_CounterData(), "need CounterData for calls"); - assert(op->mdo()->is_single_cpu(), "mdo must be allocated"); - Register mdo = op->mdo()->as_register(); - __ mov_metadata(mdo, md->constant_encoding()); - Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset())); - // Perform additional virtual call profiling for invokevirtual and - // invokeinterface bytecodes - if (op->should_profile_receiver_type()) { - assert(op->recv()->is_single_cpu(), "recv must be allocated"); - Register recv = op->recv()->as_register(); - assert_different_registers(mdo, recv); - assert(data->is_VirtualCallData(), "need VirtualCallData for virtual calls"); - ciKlass* known_klass = op->known_holder(); - if (C1OptimizeVirtualCallProfiling && known_klass != nullptr) { - // We know the type that will be seen at this call site; we can - // statically update the MethodData* rather than needing to do - // dynamic tests on the receiver type - - // NOTE: we should probably put a lock around this search to - // avoid collisions by concurrent compilations - ciVirtualCallData* vc_data = (ciVirtualCallData*) data; - uint i; - for (i = 0; i < VirtualCallData::row_limit(); i++) { - ciKlass* receiver = vc_data->receiver(i); - if (known_klass->equals(receiver)) { - Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i))); - __ addptr(data_addr, DataLayout::counter_increment); - return; + break; } + default: + ShouldNotReachHere(); } - // Receiver type not found in profile data; select an empty slot - - // Note that this is less efficient than it should be because it - // always does a write to the receiver part of the - // VirtualCallData rather than just the first time - for (i = 0; i < VirtualCallData::row_limit(); i++) { - ciKlass* receiver = vc_data->receiver(i); - if (receiver == nullptr) { - Address recv_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_offset(i))); - __ mov_metadata(recv_addr, known_klass->constant_encoding(), rscratch1); - Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i))); - __ addptr(data_addr, DataLayout::counter_increment); - return; + if (incr->is_valid() && overflow_stub) { + if (!freq_op->is_valid()) { + if (!incr->is_constant()) { + __ cmpl(incr->as_register(), 0); + __ jcc(Assembler::equal, *overflow_stub->entry()); + } else { + __ jmp(*overflow_stub->entry()); + goto exit; + } + } else { + Register result = + dest->type() == T_INT ? dest->as_register() : + dest->type() == T_LONG ? dest->as_register_lo() : + noreg; + if (!incr->is_constant()) { + // If step is 0, make sure the stub check below always fails + __ cmpl(incr->as_register(), 0); + __ movl(temp, InvocationCounter::count_increment * ProfileCaptureRatio); + __ cmovl(Assembler::notEqual, result, temp); + } + __ andl(result, freq_op->as_jint()); + __ jcc(Assembler::equal, *overflow_stub->entry()); } } - } else { - __ load_klass(recv, recv, tmp_load_klass); - Label update_done; - type_profile_helper(mdo, md, data, recv, &update_done); - // Receiver did not match any saved receiver and there is no empty row for it. - // Increment total counter to indicate polymorphic case. - __ addptr(counter_addr, DataLayout::counter_increment); + } - __ bind(update_done); + if (counter_stub != nullptr) { + __ jmp(*counter_stub->continuation()); } + + exit: { } + +#undef __ +#define __ _masm-> + }; + + if (counter_stub != nullptr) { + __ cmpl(r_profile_rng, threshold); + __ jcc(Assembler::below, *counter_stub->entry()); + __ bind(*counter_stub->continuation()); + __ step_random(r_profile_rng, temp); + + counter_stub->set_action(lambda, nullptr); + counter_stub->set_name("IncrementProfileCtr"); + append_code_stub(counter_stub); } else { - // Static call - __ addptr(counter_addr, DataLayout::counter_increment); + lambda(this, nullptr); + } + + if (overflow_stub != nullptr) { + __ bind(*overflow_stub->continuation()); + } + +#ifndef PRODUCT + if (CommentedAssembly) { + __ block_comment("} increment_profile_ctr"); + } +#endif +} + +void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) { + + Register temp = op->tmp1()->as_register_lo(); + + int profile_capture_ratio = ProfileCaptureRatio; + int ratio_shift = exact_log2(profile_capture_ratio); + auto threshold = (1ull << 32) >> ratio_shift; + assert(threshold > 0, "must be"); + + ProfileStub *stub + = profile_capture_ratio > 1 ? new ProfileStub() : nullptr; + + auto lambda = [op, stub] (LIR_Assembler* ce, LIR_Op* base_op) { +#undef __ +#define __ masm-> + + auto masm = ce->masm(); + LIR_OpProfileCall* op = base_op->as_OpProfileCall(); + ciMethod* method = op->profiled_method(); + int bci = op->profiled_bci(); + ciMethod* callee = op->profiled_callee(); + Register tmp_load_klass = rscratch1; + + Register temp = op->tmp1()->as_register_lo(); + + if (stub != nullptr) __ bind(*stub->entry()); + + // Update counter for all call types + ciMethodData* md = method->method_data_or_null(); + assert(md != nullptr, "Sanity"); + ciProfileData* data = md->bci_to_data(bci); + assert(data != nullptr && data->is_CounterData(), "need CounterData for calls"); + assert(op->mdo()->is_single_cpu(), "mdo must be allocated"); + Register mdo = op->mdo()->as_register(); + __ mov_metadata(mdo, md->constant_encoding()); + Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset())); + // Perform additional virtual call profiling for invokevirtual and + // invokeinterface bytecodes + if (op->should_profile_receiver_type()) { + assert(op->recv()->is_single_cpu(), "recv must be allocated"); + Register recv = op->recv()->as_register(); + assert_different_registers(mdo, recv); + assert(data->is_VirtualCallData(), "need VirtualCallData for virtual calls"); + ciKlass* known_klass = op->known_holder(); + if (C1OptimizeVirtualCallProfiling && known_klass != nullptr) { + // We know the type that will be seen at this call site; we can + // statically update the MethodData* rather than needing to do + // dynamic tests on the receiver type + + // NOTE: we should probably put a lock around this search to + // avoid collisions by concurrent compilations + ciVirtualCallData* vc_data = (ciVirtualCallData*) data; + uint i; + for (i = 0; i < VirtualCallData::row_limit(); i++) { + ciKlass* receiver = vc_data->receiver(i); + if (known_klass->equals(receiver)) { + Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i))); + __ addptr(data_addr, DataLayout::counter_increment); + goto exit; + } + } + + // Receiver type not found in profile data; select an empty slot + + // Note that this is less efficient than it should be because it + // always does a write to the receiver part of the + // VirtualCallData rather than just the first time + for (i = 0; i < VirtualCallData::row_limit(); i++) { + ciKlass* receiver = vc_data->receiver(i); + if (receiver == nullptr) { + Address recv_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_offset(i))); + __ mov_metadata(recv_addr, known_klass->constant_encoding(), rscratch1); + Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i))); + __ addptr(data_addr, DataLayout::counter_increment * ProfileCaptureRatio); + goto exit; + } + } + } else { + __ load_klass(recv, recv, tmp_load_klass); + Label update_done; + ce->type_profile_helper(mdo, md, data, recv, &update_done); + // Receiver did not match any saved receiver and there is no empty row for it. + // Increment total counter to indicate polymorphic case. + __ addptr(counter_addr, DataLayout::counter_increment * ProfileCaptureRatio); + + __ bind(update_done); + } + exit: {} + } else { + // Static call + __ addptr(counter_addr, DataLayout::counter_increment * ProfileCaptureRatio); + } + + if (stub != nullptr) __ jmp(*stub->continuation()); + +#undef __ +#define __ _masm-> + }; + + if (stub != nullptr) { + __ cmpl(r_profile_rng, threshold); + __ jcc(Assembler::below, *stub->entry()); + __ bind(*stub->continuation()); + __ step_random(r_profile_rng, temp); + + stub->set_action(lambda, op); + stub->set_name("ProfileCallStub"); + append_code_stub(stub); + } else { + lambda(this, op); } } @@ -2847,6 +3107,26 @@ void LIR_Assembler::emit_profile_type(LIR_OpProfileType* op) { bool not_null = op->not_null(); bool no_conflict = op->no_conflict(); + __ verify_oop(obj); + +#ifdef ASSERT + assert_different_registers(obj, tmp, rscratch1, mdo_addr.base(), mdo_addr.index()); +#endif + + int profile_capture_ratio = ProfileCaptureRatio; + int ratio_shift = exact_log2(profile_capture_ratio); + auto threshold = (1ull << 32) >> ratio_shift; + assert(threshold > 0, "must be"); + + ProfileStub *stub + = profile_capture_ratio > 1 ? new ProfileStub() : nullptr; + + auto lambda = [stub, mdo_addr, not_null, exact_klass, current_klass, + obj, tmp, tmp_load_klass, no_conflict] (LIR_Assembler* ce, LIR_Op*) { +#undef __ +#define __ masm-> + + auto masm = ce->masm(); Label update, next, none; bool do_null = !not_null; @@ -2856,14 +3136,11 @@ void LIR_Assembler::emit_profile_type(LIR_OpProfileType* op) { assert(do_null || do_update, "why are we here?"); assert(!TypeEntries::was_null_seen(current_klass) || do_update, "why are we here?"); + if (stub != nullptr) __ bind(*stub->entry()); __ verify_oop(obj); #ifdef ASSERT - if (obj == tmp) { - assert_different_registers(obj, rscratch1, mdo_addr.base(), mdo_addr.index()); - } else { - assert_different_registers(obj, tmp, rscratch1, mdo_addr.base(), mdo_addr.index()); - } + assert_different_registers(obj, tmp, rscratch1, mdo_addr.base(), mdo_addr.index()); #endif if (do_null) { __ testptr(obj, obj); @@ -3003,8 +3280,27 @@ void LIR_Assembler::emit_profile_type(LIR_OpProfileType* op) { __ orptr(mdo_addr, TypeEntries::type_unknown); } } - } + } // do_update + __ bind(next); + if (stub != nullptr) __ jmp(*stub->continuation()); + +#undef __ +#define __ _masm-> + }; + + if (stub != nullptr) { + __ cmpl(r_profile_rng, threshold); + __ jcc(Assembler::below, *stub->entry()); + __ bind(*stub->continuation()); + __ step_random(r_profile_rng, tmp); + + stub->set_action(lambda, op); + stub->set_name("ProfileTypeStub"); + append_code_stub(stub); + } else { + lambda(this, op); + } } void LIR_Assembler::monitor_address(int monitor_no, LIR_Opr dst) { diff --git a/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp b/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp index 5459e8df22932..2bb7248803616 100644 --- a/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp +++ b/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp @@ -1376,6 +1376,10 @@ void LIRGenerator::do_If(If* x) { __ cmp(lir_cond(cond), left, right); // Generate branch profiling. Profiling code doesn't kill flags. profile_branch(x, cond); + // If we're subsampling counter updates, then profiling code kills flags + if (ProfileCaptureRatio != 1) { + __ cmp(lir_cond(cond), left, right); + } move_to_phi(x->state()); if (x->x()->type()->is_float_kind()) { __ branch(lir_cond(cond), x->tsux(), x->usux()); diff --git a/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp index 88e2e6c8ba953..aa9d1867bed7c 100644 --- a/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp @@ -41,6 +41,8 @@ #include "utilities/checkedCast.hpp" #include "utilities/globalDefinitions.hpp" +Register r_profile_rng; + int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register basic_lock, Register tmp, Label& slow_case) { assert(hdr == rax, "hdr must be rax, for the cmpxchg instruction"); assert_different_registers(hdr, obj, basic_lock, tmp); @@ -237,6 +239,8 @@ void C1_MacroAssembler::build_frame(int frame_size_in_bytes, int bang_size_in_by } decrement(rsp, frame_size_in_bytes); // does not emit code for frame_size == 0 + restore_profile_rng(); + BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); // C1 code is not hot enough to micro optimize the nmethod entry barrier with an out-of-line stub bs->nmethod_entry_barrier(this, nullptr /* slow_path */, nullptr /* continuation */); @@ -244,6 +248,7 @@ void C1_MacroAssembler::build_frame(int frame_size_in_bytes, int bang_size_in_by void C1_MacroAssembler::remove_frame(int frame_size_in_bytes) { + save_profile_rng(); increment(rsp, frame_size_in_bytes); // Does not emit code for frame_size == 0 pop(rbp); } @@ -264,6 +269,49 @@ void C1_MacroAssembler::load_parameter(int offset_in_words, Register reg) { movptr(reg, Address(rbp, (offset_in_words + 2) * BytesPerWord)); } +// Randomized profile capture. + +void C1_MacroAssembler::step_random(Register state, Register temp) { + // One of these will be the best for a particular CPU. + + /* Algorithm "xor" from p. 4 of Marsaglia, "Xorshift RNGs" */ + // movl(temp, state); + // sall(temp, 13); + // xorl(state, temp); + // movl(temp, state); + // shrl(temp, 7); + // xorl(state, temp); + // movl(temp, state); + // sall(temp, 5); + // xorl(state, temp); + + if (VM_Version::supports_sse4_2()) { + /* CRC used as a psuedo-random-number generator */ + // In effect, the CRC instruction is being used here for its + // linear feedback shift register. It's unbeatably fast, and + // plenty good enough for what we need. + movl(temp, 1); + crc32(state, temp, /*sizeInBytes*/2); + } else { + /* LCG from glibc. */ + movl(temp, 1103515245); + imull(state, temp); + addl(state, 12345); + } +} + +void C1_MacroAssembler::save_profile_rng() { + if (ProfileCaptureRatio != 1) { + movl(Address(r15_thread, JavaThread::profile_rng_offset()), r_profile_rng); + } +} + +void C1_MacroAssembler::restore_profile_rng() { + if (ProfileCaptureRatio != 1) { + movl(r_profile_rng, Address(r15_thread, JavaThread::profile_rng_offset())); + } +} + #ifndef PRODUCT void C1_MacroAssembler::verify_stack_oop(int stack_offset) { diff --git a/src/hotspot/cpu/x86/c1_MacroAssembler_x86.hpp b/src/hotspot/cpu/x86/c1_MacroAssembler_x86.hpp index f33e47aadb3af..a9ddfe1db64b0 100644 --- a/src/hotspot/cpu/x86/c1_MacroAssembler_x86.hpp +++ b/src/hotspot/cpu/x86/c1_MacroAssembler_x86.hpp @@ -128,4 +128,9 @@ void restore_live_registers_except_rax(bool restore_fpu_registers); void restore_live_registers(bool restore_fpu_registers); + // Randomized profile capture + void step_random(Register state, Register temp); + void save_profile_rng(); + void restore_profile_rng(); + #endif // CPU_X86_C1_MACROASSEMBLER_X86_HPP diff --git a/src/hotspot/share/c1/c1_CodeStubs.hpp b/src/hotspot/share/c1/c1_CodeStubs.hpp index c8b29f91bb2b5..ec1d52440b3a4 100644 --- a/src/hotspot/share/c1/c1_CodeStubs.hpp +++ b/src/hotspot/share/c1/c1_CodeStubs.hpp @@ -61,6 +61,8 @@ class CodeStub: public CompilationResourceObj { #ifndef PRODUCT virtual void print_name(outputStream* out) const = 0; #endif + Address as_Address(LIR_Assembler* ce, LIR_Address* addr, Register tmp); + Address as_Address(LIR_Assembler* ce, LIR_Address* addr); // label access Label* entry() { return &_entry; } @@ -127,6 +129,74 @@ class CounterOverflowStub: public CodeStub { }; + +class AbstractLambdaWrapper : public CompilationResourceObj { +public: + virtual void operator() (LIR_Assembler* ce) = 0; +}; + +template +struct LambdaWrapper : public AbstractLambdaWrapper { + T _lambda; + LIR_Op* _op; + + LambdaWrapper(T lambda, LIR_Op* op) : _lambda(lambda), _op(op) { } + virtual void operator() (LIR_Assembler* ce) { + _lambda(ce, _op); + } +}; + +class ProfileStub: public CodeStub { +private: + AbstractLambdaWrapper *_action; + const char* _name; + +public: + ProfileStub() { + _name = "ProfileStub"; + } + template + void set_action(U action, LIR_Op *op) { _action = new LambdaWrapper(action, op); } + void set_name(const char* name) { _name = name; } + virtual void emit_code(LIR_Assembler* ce) { + (*_action)(ce); + } +#ifndef PRODUCT + virtual void print_name(outputStream* out) const { out->print("%s", _name); } +#endif // PRODUCT + virtual void visit(LIR_OpVisitState* visitor) { } +}; + + +class ConversionStub: public CodeStub { + private: + Bytecodes::Code _bytecode; + LIR_Opr _input; + LIR_Opr _result; + + static float float_zero; + static double double_zero; + public: + ConversionStub(Bytecodes::Code bytecode, LIR_Opr input, LIR_Opr result) + : _bytecode(bytecode), _input(input), _result(result) { + } + + Bytecodes::Code bytecode() { return _bytecode; } + LIR_Opr input() { return _input; } + LIR_Opr result() { return _result; } + + virtual void emit_code(LIR_Assembler* e); + virtual void visit(LIR_OpVisitState* visitor) { + visitor->do_slow_case(); + visitor->do_input(_input); + visitor->do_output(_result); + } +#ifndef PRODUCT + virtual void print_name(outputStream* out) const { out->print("ConversionStub"); } +#endif // PRODUCT +}; + + // Throws ArrayIndexOutOfBoundsException by default but can be // configured to throw IndexOutOfBoundsException in constructor class RangeCheckStub: public CodeStub { diff --git a/src/hotspot/share/c1/c1_Compilation.cpp b/src/hotspot/share/c1/c1_Compilation.cpp index 368cf604eebed..e3554772970c6 100644 --- a/src/hotspot/share/c1/c1_Compilation.cpp +++ b/src/hotspot/share/c1/c1_Compilation.cpp @@ -294,7 +294,7 @@ void Compilation::emit_code_epilog(LIR_Assembler* assembler) { return; } - // generate code or slow cases + // generate code for slow cases assembler->emit_slow_case_stubs(); CHECK_BAILOUT(); diff --git a/src/hotspot/share/c1/c1_LIR.cpp b/src/hotspot/share/c1/c1_LIR.cpp index a67fa98f8f8dd..73fbeda6c9aa2 100644 --- a/src/hotspot/share/c1/c1_LIR.cpp +++ b/src/hotspot/share/c1/c1_LIR.cpp @@ -889,11 +889,27 @@ void LIR_OpVisitState::visit(LIR_Op* op) { LIR_OpProfileType* opProfileType = (LIR_OpProfileType*)op; do_input(opProfileType->_mdp); do_temp(opProfileType->_mdp); - do_input(opProfileType->_obj); + do_input(opProfileType->_obj); do_temp(opProfileType->_obj); do_temp(opProfileType->_tmp); break; } - default: + + case lir_increment_counter: + { + LIR_OpIncrementCounter* opr = op->as_OpIncrementCounter(); + assert(opr != nullptr, "must be"); + + if (opr->_info) do_info(opr->_info); + do_input(opr->_counter_addr); do_temp(opr->_counter_addr); + do_input(opr->_step); do_temp(opr->_step); + if (opr->_dest->is_valid()) { do_output(opr->_dest); } + if (opr->_temp_op->is_valid()) do_temp(opr->_temp_op); + if (opr->overflow_stub() != nullptr) do_stub(opr->overflow_stub()); + + break; + } + + default: op->visit(this); } } @@ -1055,6 +1071,14 @@ void LIR_OpAssert::emit_code(LIR_Assembler* masm) { } #endif +void LIR_OpIncrementCounter::emit_code(LIR_Assembler* masm) { + masm->increment_profile_ctr + (_step, _counter_addr, _dest, _temp_op, _freq_op, _overflow_stub); + if (overflow_stub()) { + masm->append_code_stub(overflow_stub()); + } +} + void LIR_OpProfileCall::emit_code(LIR_Assembler* masm) { masm->emit_profile_call(this); } @@ -1260,6 +1284,19 @@ void LIR_List::volatile_store_unsafe_reg(LIR_Opr src, LIR_Opr base, LIR_Opr offs } +void LIR_List::increment_counter(LIR_Opr step, LIR_Address* addr, LIR_Opr dest, LIR_Opr tmp, + LIR_Opr freq, CodeStub* overflow, CodeEmitInfo* info) { + append(new LIR_OpIncrementCounter ( + step, + LIR_OprFact::address(addr), + dest, + tmp, + freq, + overflow, + info)); +} + + void LIR_List::idiv(LIR_Opr left, LIR_Opr right, LIR_Opr res, LIR_Opr tmp, CodeEmitInfo* info) { append(new LIR_Op3( lir_idiv, @@ -1751,6 +1788,7 @@ const char * LIR_Op::name() const { case lir_profile_call: s = "profile_call"; break; // LIR_OpProfileType case lir_profile_type: s = "profile_type"; break; + case lir_increment_counter: s = "increment_counter"; break; // LIR_OpAssert #ifdef ASSERT case lir_assert: s = "assert"; break; @@ -2046,6 +2084,13 @@ void LIR_OpProfileType::print_instr(outputStream* out) const { tmp()->print(out); out->print(" "); } +void LIR_OpIncrementCounter::print_instr(outputStream* out) const { + step()->print(out); out->print(" "); + counter_addr()->print(out); out->print(" "); + dest()->print(out); out->print(" "); + temp_op()->print(out); out->print(" "); + freq_op()->print(out); out->print(" "); +} #endif // PRODUCT // Implementation of LIR_InsertionBuffer diff --git a/src/hotspot/share/c1/c1_LIR.hpp b/src/hotspot/share/c1/c1_LIR.hpp index 80b0fd65bc146..a76d2a76dea62 100644 --- a/src/hotspot/share/c1/c1_LIR.hpp +++ b/src/hotspot/share/c1/c1_LIR.hpp @@ -893,6 +893,7 @@ class LIR_OpCompareAndSwap; class LIR_OpLoadKlass; class LIR_OpProfileCall; class LIR_OpProfileType; +class LIR_OpIncrementCounter; #ifdef ASSERT class LIR_OpAssert; #endif @@ -997,6 +998,7 @@ enum LIR_Code { , begin_opMDOProfile , lir_profile_call , lir_profile_type + , lir_increment_counter , end_opMDOProfile , begin_opAssert , lir_assert @@ -1139,6 +1141,7 @@ class LIR_Op: public CompilationResourceObj { virtual LIR_OpLoadKlass* as_OpLoadKlass() { return nullptr; } virtual LIR_OpProfileCall* as_OpProfileCall() { return nullptr; } virtual LIR_OpProfileType* as_OpProfileType() { return nullptr; } + virtual LIR_OpIncrementCounter* as_OpIncrementCounter() { return nullptr; } #ifdef ASSERT virtual LIR_OpAssert* as_OpAssert() { return nullptr; } #endif @@ -1930,6 +1933,42 @@ class LIR_OpCompareAndSwap : public LIR_Op { virtual void print_instr(outputStream* out) const PRODUCT_RETURN; }; +// LIR_OpIncrementCounter +class LIR_OpIncrementCounter : public LIR_Op { + friend class LIR_OpVisitState; + + private: + LIR_Opr _step; + LIR_Opr _counter_addr; + LIR_Opr _dest; + LIR_Opr _temp_op; + LIR_Opr _freq_op; + CodeStub* _overflow_stub; + + public: + // Destroys recv + LIR_OpIncrementCounter(LIR_Opr step, LIR_Opr counter_addr, LIR_Opr dest, LIR_Opr temp_op, + LIR_Opr freq_op, CodeStub* overflow_stub, CodeEmitInfo *info) + : LIR_Op(lir_increment_counter, LIR_OprFact::illegalOpr, info) + , _step(step) + , _counter_addr(counter_addr) + , _dest(dest) + , _temp_op(temp_op) + , _freq_op(freq_op) + , _overflow_stub(overflow_stub) { } + + LIR_Opr step() const { return _step; } + LIR_Opr counter_addr() const { return _counter_addr; } + LIR_Opr dest() const { return _dest; } + LIR_Opr temp_op() const { return _temp_op; } + LIR_Opr freq_op() const { return _freq_op; } + CodeStub* overflow_stub() const { return _overflow_stub; }; + + virtual void emit_code(LIR_Assembler* masm); + virtual LIR_OpIncrementCounter* as_OpIncrementCounter() { return this; } + virtual void print_instr(outputStream* out) const PRODUCT_RETURN; +}; + // LIR_OpProfileCall class LIR_OpProfileCall : public LIR_Op { friend class LIR_OpVisitState; @@ -2232,6 +2271,12 @@ class LIR_List: public CompilationResourceObj { void volatile_store_mem_reg(LIR_Opr src, LIR_Address* address, CodeEmitInfo* info, LIR_PatchCode patch_code = lir_patch_none); void volatile_store_unsafe_reg(LIR_Opr src, LIR_Opr base, LIR_Opr offset, BasicType type, CodeEmitInfo* info, LIR_PatchCode patch_code); + void increment_counter(LIR_Opr src, LIR_Address* addr, LIR_Opr res, LIR_Opr tmp, LIR_Opr freq, CodeStub* overflow, CodeEmitInfo* info); + void increment_counter(LIR_Opr src, LIR_Address* addr, LIR_Opr res, LIR_Opr tmp, CodeStub* overflow = nullptr) { + increment_counter(src, addr, res, tmp, LIR_OprFact::illegalOpr, overflow, nullptr); + } + + void idiv(LIR_Opr left, LIR_Opr right, LIR_Opr res, LIR_Opr tmp, CodeEmitInfo* info); void idiv(LIR_Opr left, int right, LIR_Opr res, LIR_Opr tmp, CodeEmitInfo* info); void irem(LIR_Opr left, LIR_Opr right, LIR_Opr res, LIR_Opr tmp, CodeEmitInfo* info); diff --git a/src/hotspot/share/c1/c1_LIRAssembler.hpp b/src/hotspot/share/c1/c1_LIRAssembler.hpp index 4cb313af90152..452ff903d10bf 100644 --- a/src/hotspot/share/c1/c1_LIRAssembler.hpp +++ b/src/hotspot/share/c1/c1_LIRAssembler.hpp @@ -235,6 +235,10 @@ class LIR_Assembler: public CompilationResourceObj { void align_backward_branch_target(); void align_call(LIR_Code code); + void increment_profile_ctr(LIR_Opr incr, LIR_Opr addr, LIR_Opr dest, LIR_Opr temp, + LIR_Opr freq_op, + CodeStub *overflow); + void negate(LIR_Opr left, LIR_Opr dest, LIR_Opr tmp = LIR_OprFact::illegalOpr); void leal(LIR_Opr src, LIR_Opr dest, LIR_PatchCode patch_code = lir_patch_none, CodeEmitInfo* info = nullptr); diff --git a/src/hotspot/share/c1/c1_LIRGenerator.cpp b/src/hotspot/share/c1/c1_LIRGenerator.cpp index f6807abcd7acd..24a1691803b81 100644 --- a/src/hotspot/share/c1/c1_LIRGenerator.cpp +++ b/src/hotspot/share/c1/c1_LIRGenerator.cpp @@ -927,11 +927,9 @@ void LIRGenerator::profile_branch(If* if_instr, If::Condition cond) { // MDO cells are intptr_t, so the data_reg width is arch-dependent. LIR_Opr data_reg = new_pointer_register(); LIR_Address* data_addr = new LIR_Address(md_reg, data_offset_reg, data_reg->type()); - __ move(data_addr, data_reg); - // Use leal instead of add to avoid destroying condition codes on x86 - LIR_Address* fake_incr_value = new LIR_Address(data_reg, DataLayout::counter_increment, T_INT); - __ leal(LIR_OprFact::address(fake_incr_value), data_reg); - __ move(data_reg, data_addr); + LIR_Opr tmp = new_register(T_INT); + LIR_Opr step = LIR_OprFact::intConst(DataLayout::counter_increment); + __ increment_counter(step, data_addr, LIR_OprFact::intConst(0), tmp, nullptr); } } @@ -2373,8 +2371,12 @@ void LIRGenerator::do_Goto(Goto* x) { LIR_Opr md_reg = new_register(T_METADATA); __ metadata2reg(md->constant_encoding(), md_reg); - increment_counter(new LIR_Address(md_reg, offset, - NOT_LP64(T_INT) LP64_ONLY(T_LONG)), DataLayout::counter_increment); + LIR_Address *counter_addr = new LIR_Address(md_reg, offset, + NOT_LP64(T_INT) LP64_ONLY(T_LONG)); + LIR_Opr tmp = new_register(T_INT); + LIR_Opr dummy = LIR_OprFact::intConst(0); + LIR_Opr inc = LIR_OprFact::intConst(DataLayout::counter_increment); + __ increment_counter(inc, counter_addr, dummy, tmp, nullptr); } // emit phi-instruction move after safepoint since this simplifies @@ -3172,34 +3174,24 @@ void LIRGenerator::increment_event_counter_impl(CodeEmitInfo* info, ShouldNotReachHere(); } LIR_Address* counter = new LIR_Address(counter_holder, offset, T_INT); - LIR_Opr result = new_register(T_INT); - __ load(counter, result); - __ add(result, step, result); - __ store(result, counter); + LIR_Opr result = notify ? new_register(T_INT) : LIR_OprFact::intConst(0); + LIR_Opr tmp = new_register(T_INT); + if (notify && (!backedge || UseOnStackReplacement)) { + int ratio_shift = exact_log2(ProfileCaptureRatio); LIR_Opr meth = LIR_OprFact::metadataConst(method->constant_encoding()); // The bci for info can point to cmp for if's we want the if bci - CodeStub* overflow = new CounterOverflowStub(info, bci, meth); - int freq = frequency << InvocationCounter::count_shift; - if (freq == 0) { - if (!step->is_constant()) { - __ cmp(lir_cond_notEqual, step, LIR_OprFact::intConst(0)); - __ branch(lir_cond_notEqual, overflow); - } else { - __ branch(lir_cond_always, overflow); - } - } else { - LIR_Opr mask = load_immediate(freq, T_INT); - if (!step->is_constant()) { - // If step is 0, make sure the overflow check below always fails - __ cmp(lir_cond_notEqual, step, LIR_OprFact::intConst(0)); - __ cmove(lir_cond_notEqual, result, LIR_OprFact::intConst(InvocationCounter::count_increment), result, T_INT); - } - __ logical_and(result, mask, result); - __ cmp(lir_cond_equal, result, LIR_OprFact::intConst(0)); - __ branch(lir_cond_equal, overflow); - } - __ branch_destination(overflow->continuation()); + CodeStub* overflow = new CounterOverflowStub (info, bci, meth); + // Zero the low-order bits of the frequency, otherwise we'll miss + // overflows when usind randomized profile counters. + unsigned int freq = (unsigned int)frequency + >> ratio_shift << ratio_shift + << InvocationCounter::count_shift; + __ increment_counter(step, counter, result, tmp, + LIR_OprFact::intConst(freq), overflow, info); + } else { + __ increment_counter(step, counter, result, tmp, + LIR_OprFact::illegalOpr, nullptr, info); } } diff --git a/src/hotspot/share/compiler/compiler_globals.hpp b/src/hotspot/share/compiler/compiler_globals.hpp index 3919c596d7421..349ee43be48a8 100644 --- a/src/hotspot/share/compiler/compiler_globals.hpp +++ b/src/hotspot/share/compiler/compiler_globals.hpp @@ -388,6 +388,12 @@ "If compilation is stopped with an error, capture diagnostic " \ "information at the bailout point") \ \ + product(int, ProfileCaptureRatio, 64, EXPERIMENTAL, \ + "Reduce and randomize tiered-compilation profile captures " \ + "in order to reduce cache contention on shared method data. " \ + "Must be a power of 2.") \ + range(1, 65536) + // end of COMPILER_FLAGS DECLARE_FLAGS(COMPILER_FLAGS) diff --git a/src/hotspot/share/runtime/javaThread.cpp b/src/hotspot/share/runtime/javaThread.cpp index 28bc47c4c746d..74ef6d68dd302 100644 --- a/src/hotspot/share/runtime/javaThread.cpp +++ b/src/hotspot/share/runtime/javaThread.cpp @@ -522,7 +522,9 @@ JavaThread::JavaThread(MemTag mem_tag) : #endif _lock_stack(this), - _om_cache(this) { + _om_cache(this), + + _profile_rng(-1) { set_jni_functions(jni_functions()); #if INCLUDE_JVMCI @@ -539,6 +541,16 @@ JavaThread::JavaThread(MemTag mem_tag) : set_requires_cross_modify_fence(false); + // Initial state of random-number generator used when profiling + // C1-generated code. + if (ProfileCaptureRatio > 1) { + int state; + do { + state = os::random(); + } while (state == 0); + _profile_rng = state; + } + pd_initialize(); } diff --git a/src/hotspot/share/runtime/javaThread.hpp b/src/hotspot/share/runtime/javaThread.hpp index b0cd6fb3e4f41..951390abd3c30 100644 --- a/src/hotspot/share/runtime/javaThread.hpp +++ b/src/hotspot/share/runtime/javaThread.hpp @@ -925,6 +925,7 @@ class JavaThread: public Thread { static ByteSize preempt_alternate_return_offset() { return byte_offset_of(JavaThread, _preempt_alternate_return); } DEBUG_ONLY(static ByteSize interp_at_preemptable_vmcall_cnt_offset() { return byte_offset_of(JavaThread, _interp_at_preemptable_vmcall_cnt); }) static ByteSize unlocked_inflated_monitor_offset() { return byte_offset_of(JavaThread, _unlocked_inflated_monitor); } + static ByteSize profile_rng_offset() { return byte_offset_of(JavaThread, _profile_rng); } #if INCLUDE_JVMTI static ByteSize is_in_VTMS_transition_offset() { return byte_offset_of(JavaThread, _is_in_VTMS_transition); } @@ -1286,6 +1287,9 @@ class JavaThread: public Thread { LockStack _lock_stack; OMCache _om_cache; + // Random value for randomized profile counters. + uint32_t _profile_rng; + public: LockStack& lock_stack() { return _lock_stack; }