Skip to content

Commit

Permalink
8290700: Optimize AArch64 nmethod entry barriers
Browse files Browse the repository at this point in the history
Reviewed-by: kvn, dlong
  • Loading branch information
fisk committed Jul 25, 2022
1 parent 852e71d commit 228e8e9
Show file tree
Hide file tree
Showing 14 changed files with 157 additions and 58 deletions.
19 changes: 18 additions & 1 deletion src/hotspot/cpu/aarch64/aarch64.ad
Original file line number Diff line number Diff line change
Expand Up @@ -1920,7 +1920,24 @@ void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {

if (C->stub_function() == NULL) {
BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
bs->nmethod_entry_barrier(&_masm);
if (BarrierSet::barrier_set()->barrier_set_nmethod() != NULL) {
// Dummy labels for just measuring the code size
Label dummy_slow_path;
Label dummy_continuation;
Label dummy_guard;
Label* slow_path = &dummy_slow_path;
Label* continuation = &dummy_continuation;
Label* guard = &dummy_guard;
if (!Compile::current()->output()->in_scratch_emit_size()) {
// Use real labels from actual stub when not emitting code for the purpose of measuring its size
C2EntryBarrierStub* stub = Compile::current()->output()->entry_barrier_table()->add_entry_barrier();
slow_path = &stub->slow_path();
continuation = &stub->continuation();
guard = &stub->guard();
}
// In the C2 code, we move the non-hot part of nmethod entry barriers out-of-line to a stub.
bs->nmethod_entry_barrier(&_masm, slow_path, continuation, guard);
}
}

if (VerifyStackAtCalls) {
Expand Down
2 changes: 1 addition & 1 deletion src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,7 @@ void C1_MacroAssembler::build_frame(int framesize, int bang_size_in_bytes) {

// Insert nmethod entry barrier into frame.
BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
bs->nmethod_entry_barrier(this);
bs->nmethod_entry_barrier(this, NULL /* slow_path */, NULL /* continuation */, NULL /* guard */);
}

void C1_MacroAssembler::remove_frame(int framesize) {
Expand Down
16 changes: 16 additions & 0 deletions src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#include "opto/c2_MacroAssembler.hpp"
#include "opto/intrinsicnode.hpp"
#include "opto/matcher.hpp"
#include "opto/output.hpp"
#include "opto/subnode.hpp"
#include "runtime/stubRoutines.hpp"

Expand All @@ -43,6 +44,21 @@

typedef void (MacroAssembler::* chr_insn)(Register Rt, const Address &adr);

void C2_MacroAssembler::emit_entry_barrier_stub(C2EntryBarrierStub* stub) {
bind(stub->slow_path());
movptr(rscratch1, (uintptr_t) StubRoutines::aarch64::method_entry_barrier());
blr(rscratch1);
b(stub->continuation());

bind(stub->guard());
relocate(entry_guard_Relocation::spec());
emit_int32(0); // nmethod guard value
}

int C2_MacroAssembler::entry_barrier_stub_size() {
return 4 * 6;
}

// Search for str1 in str2 and return index or -1
void C2_MacroAssembler::string_indexof(Register str2, Register str1,
Register cnt2, Register cnt1,
Expand Down
4 changes: 2 additions & 2 deletions src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@
// C2_MacroAssembler contains high-level macros for C2

public:
void emit_entry_barrier_stub(C2EntryBarrierStub* stub) {}
static int entry_barrier_stub_size() { return 0; }
void emit_entry_barrier_stub(C2EntryBarrierStub* stub);
static int entry_barrier_stub_size();

void string_compare(Register str1, Register str2,
Register cnt1, Register cnt2, Register result,
Expand Down
46 changes: 35 additions & 11 deletions src/hotspot/cpu/aarch64/gc/shared/barrierSetAssembler_aarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -246,18 +246,38 @@ void BarrierSetAssembler::clear_patching_epoch() {
_patching_epoch = 0;
}

void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm) {
void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm, Label* slow_path, Label* continuation, Label* guard) {
BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod();

if (bs_nm == NULL) {
return;
}

Label skip_barrier, guard;
Label local_guard;
Label skip_barrier;
NMethodPatchingType patching_type = nmethod_patching_type();

__ ldrw(rscratch1, guard);
if (slow_path == NULL) {
guard = &local_guard;
}

// If the slow path is out of line in a stub, we flip the condition
Assembler::Condition condition = slow_path == NULL ? Assembler::EQ : Assembler::NE;
Label& barrier_target = slow_path == NULL ? skip_barrier : *slow_path;

if (nmethod_code_patching()) {
__ ldrw(rscratch1, *guard);

if (patching_type == NMethodPatchingType::stw_instruction_and_data_patch) {
// With STW patching, no data or instructions are updated concurrently,
// which means there isn't really any need for any fencing for neither
// data nor instruction modifications happening concurrently. The
// instruction patching is handled with isb fences on the way back
// from the safepoint to Java. So here we can do a plain conditional
// branch with no fencing.
Address thread_disarmed_addr(rthread, in_bytes(bs_nm->thread_disarmed_offset()));
__ ldrw(rscratch2, thread_disarmed_addr);
__ cmp(rscratch1, rscratch2);
} else if (patching_type == NMethodPatchingType::conc_instruction_and_data_patch) {
// If we patch code we need both a code patching and a loadload
// fence. It's not super cheap, so we use a global epoch mechanism
// to hide them in a slow path.
Expand All @@ -278,24 +298,28 @@ void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm) {
Address thread_disarmed_and_epoch_addr(rthread, in_bytes(bs_nm->thread_disarmed_offset()));
__ ldr(rscratch2, thread_disarmed_and_epoch_addr);
__ cmp(rscratch1, rscratch2);
__ br(Assembler::EQ, skip_barrier);
} else {
assert(patching_type == NMethodPatchingType::conc_data_patch, "must be");
// Subsequent loads of oops must occur after load of guard value.
// BarrierSetNMethod::disarm sets guard with release semantics.
__ membar(__ LoadLoad);
Address thread_disarmed_addr(rthread, in_bytes(bs_nm->thread_disarmed_offset()));
__ ldrw(rscratch2, thread_disarmed_addr);
__ cmpw(rscratch1, rscratch2);
__ br(Assembler::EQ, skip_barrier);
}
__ br(condition, barrier_target);

__ movptr(rscratch1, (uintptr_t) StubRoutines::aarch64::method_entry_barrier());
__ blr(rscratch1);
__ b(skip_barrier);
if (slow_path == NULL) {
__ movptr(rscratch1, (uintptr_t) StubRoutines::aarch64::method_entry_barrier());
__ blr(rscratch1);
__ b(skip_barrier);

__ bind(guard);
__ bind(local_guard);

__ emit_int32(0); // nmethod guard value. Skipped over in common case.
__ emit_int32(0); // nmethod guard value. Skipped over in common case.
} else {
__ bind(*continuation);
}

__ bind(skip_barrier);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,12 @@
#include "memory/allocation.hpp"
#include "oops/access.hpp"

enum class NMethodPatchingType {
stw_instruction_and_data_patch,
conc_instruction_and_data_patch,
conc_data_patch
};

class BarrierSetAssembler: public CHeapObj<mtGC> {
private:
void incr_allocated_bytes(MacroAssembler* masm,
Expand Down Expand Up @@ -68,9 +74,9 @@ class BarrierSetAssembler: public CHeapObj<mtGC> {
);
virtual void barrier_stubs_init() {}

virtual bool nmethod_code_patching() { return true; }
virtual NMethodPatchingType nmethod_patching_type() { return NMethodPatchingType::stw_instruction_and_data_patch; }

virtual void nmethod_entry_barrier(MacroAssembler* masm);
virtual void nmethod_entry_barrier(MacroAssembler* masm, Label* slow_path, Label* continuation, Label* guard);
virtual void c2i_entry_barrier(MacroAssembler* masm);

static address patching_epoch_addr();
Expand Down
83 changes: 50 additions & 33 deletions src/hotspot/cpu/aarch64/gc/shared/barrierSetNMethod_aarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,29 +37,62 @@
#include "utilities/align.hpp"
#include "utilities/debug.hpp"

static int slow_path_size(nmethod* nm) {
// The slow path code is out of line with C2
return nm->is_compiled_by_c2() ? 0 : 6;
}

// This is the offset of the entry barrier from where the frame is completed.
// If any code changes between the end of the verified entry where the entry
// barrier resides, and the completion of the frame, then
// NativeNMethodCmpBarrier::verify() will immediately complain when it does
// not find the expected native instruction at this offset, which needs updating.
// Note that this offset is invariant of PreserveFramePointer.
static int entry_barrier_offset(nmethod* nm) {
BarrierSetAssembler* bs_asm = BarrierSet::barrier_set()->barrier_set_assembler();
switch (bs_asm->nmethod_patching_type()) {
case NMethodPatchingType::stw_instruction_and_data_patch:
return -4 * (4 + slow_path_size(nm));
case NMethodPatchingType::conc_instruction_and_data_patch:
return -4 * (10 + slow_path_size(nm));
case NMethodPatchingType::conc_data_patch:
return -4 * (5 + slow_path_size(nm));
}
ShouldNotReachHere();
return 0;
}

class NativeNMethodBarrier: public NativeInstruction {
address instruction_address() const { return addr_at(0); }

int guard_offset() {
BarrierSetAssembler* bs_asm = BarrierSet::barrier_set()->barrier_set_assembler();
if (bs_asm->nmethod_code_patching()) {
return 4 * 15;
} else {
return 4 * 10;
}
int local_guard_offset(nmethod* nm) {
// It's the last instruction
return (-entry_barrier_offset(nm)) - 4;
}

int *guard_addr() {
return reinterpret_cast<int*>(instruction_address() + guard_offset());
int *guard_addr(nmethod* nm) {
if (nm->is_compiled_by_c2()) {
// With c2 compiled code, the guard is out-of-line in a stub
// We find it using the RelocIterator.
RelocIterator iter(nm);
while (iter.next()) {
if (iter.type() == relocInfo::entry_guard_type) {
entry_guard_Relocation* const reloc = iter.entry_guard_reloc();
return reinterpret_cast<int*>(reloc->addr());
}
}
ShouldNotReachHere();
}
return reinterpret_cast<int*>(instruction_address() + local_guard_offset(nm));
}

public:
int get_value() {
return Atomic::load_acquire(guard_addr());
int get_value(nmethod* nm) {
return Atomic::load_acquire(guard_addr(nm));
}

void set_value(int value) {
Atomic::release_store(guard_addr(), value);
void set_value(nmethod* nm, int value) {
Atomic::release_store(guard_addr(nm), value);
}

void verify() const;
Expand Down Expand Up @@ -120,24 +153,8 @@ void BarrierSetNMethod::deoptimize(nmethod* nm, address* return_address_ptr) {
new_frame->pc = SharedRuntime::get_handle_wrong_method_stub();
}

// This is the offset of the entry barrier from where the frame is completed.
// If any code changes between the end of the verified entry where the entry
// barrier resides, and the completion of the frame, then
// NativeNMethodCmpBarrier::verify() will immediately complain when it does
// not find the expected native instruction at this offset, which needs updating.
// Note that this offset is invariant of PreserveFramePointer.

static int entry_barrier_offset() {
BarrierSetAssembler* bs_asm = BarrierSet::barrier_set()->barrier_set_assembler();
if (bs_asm->nmethod_code_patching()) {
return -4 * 16;
} else {
return -4 * 11;
}
}

static NativeNMethodBarrier* native_nmethod_barrier(nmethod* nm) {
address barrier_address = nm->code_begin() + nm->frame_complete_offset() + entry_barrier_offset();
address barrier_address = nm->code_begin() + nm->frame_complete_offset() + entry_barrier_offset(nm);
NativeNMethodBarrier* barrier = reinterpret_cast<NativeNMethodBarrier*>(barrier_address);
debug_only(barrier->verify());
return barrier;
Expand All @@ -160,7 +177,7 @@ void BarrierSetNMethod::disarm(nmethod* nm) {
// Disarms the nmethod guard emitted by BarrierSetAssembler::nmethod_entry_barrier.
// Symmetric "LDR; DMB ISHLD" is in the nmethod barrier.
NativeNMethodBarrier* barrier = native_nmethod_barrier(nm);
barrier->set_value(disarmed_value());
barrier->set_value(nm, disarmed_value());
}

void BarrierSetNMethod::arm(nmethod* nm, int arm_value) {
Expand All @@ -180,7 +197,7 @@ void BarrierSetNMethod::arm(nmethod* nm, int arm_value) {
}

NativeNMethodBarrier* barrier = native_nmethod_barrier(nm);
barrier->set_value(arm_value);
barrier->set_value(nm, arm_value);
}

bool BarrierSetNMethod::is_armed(nmethod* nm) {
Expand All @@ -189,5 +206,5 @@ bool BarrierSetNMethod::is_armed(nmethod* nm) {
}

NativeNMethodBarrier* barrier = native_nmethod_barrier(nm);
return barrier->get_value() != disarmed_value();
return barrier->get_value(nm) != disarmed_value();
}
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ class ShenandoahBarrierSetAssembler: public BarrierSetAssembler {

void iu_barrier(MacroAssembler* masm, Register dst, Register tmp);

virtual bool nmethod_code_patching() { return false; }
virtual NMethodPatchingType nmethod_patching_type() { return NMethodPatchingType::conc_data_patch; }

#ifdef COMPILER1
void gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ class ZBarrierSetAssembler : public ZBarrierSetAssemblerBase {
Register tmp,
Label& slowpath);

virtual bool nmethod_code_patching() { return false; }
virtual NMethodPatchingType nmethod_patching_type() { return NMethodPatchingType::conc_data_patch; }

#ifdef COMPILER1
void generate_c1_load_barrier_test(LIR_Assembler* ce,
Expand Down
2 changes: 1 addition & 1 deletion src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4475,7 +4475,7 @@ void MacroAssembler::movoop(Register dst, jobject obj, bool immediate) {
// ordered with respected to oop accesses.
// Using immediate literals would necessitate ISBs.
BarrierSet* bs = BarrierSet::barrier_set();
if ((bs->barrier_set_nmethod() != NULL && !bs->barrier_set_assembler()->nmethod_code_patching()) || !immediate) {
if ((bs->barrier_set_nmethod() != NULL && bs->barrier_set_assembler()->nmethod_patching_type() == NMethodPatchingType::conc_data_patch) || !immediate) {
address dummy = address(uintptr_t(pc()) & -wordSize); // A nearby aligned address
ldr_constant(dst, Address(dummy, rspec));
} else
Expand Down
2 changes: 1 addition & 1 deletion src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1424,7 +1424,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
__ sub(sp, sp, stack_size - 2*wordSize);

BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
bs->nmethod_entry_barrier(masm);
bs->nmethod_entry_barrier(masm, NULL /* slow_path */, NULL /* continuation */, NULL /* guard */);

// Frame is now completed as far as size and linkage.
int frame_complete = ((intptr_t)__ pc()) - start;
Expand Down
6 changes: 3 additions & 3 deletions src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5145,7 +5145,7 @@ class StubGenerator: public StubCodeGenerator {
return entry;
}

address generate_method_entry_barrier() {
address generate_method_entry_barrier() {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", "nmethod_entry_barrier");

Expand All @@ -5155,10 +5155,10 @@ class StubGenerator: public StubCodeGenerator {

BarrierSetAssembler* bs_asm = BarrierSet::barrier_set()->barrier_set_assembler();

if (bs_asm->nmethod_code_patching()) {
if (bs_asm->nmethod_patching_type() == NMethodPatchingType::conc_instruction_and_data_patch) {
BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod();
// We can get here despite the nmethod being good, if we have not
// yet applied our cross modification fence.
// yet applied our cross modification fence (or data fence).
Address thread_epoch_addr(rthread, in_bytes(bs_nm->thread_disarmed_offset()) + 4);
__ lea(rscratch2, ExternalAddress(bs_asm->patching_epoch_addr()));
__ ldrw(rscratch2, rscratch2);
Expand Down

1 comment on commit 228e8e9

@openjdk-notifier
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.