Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

8291302: ARM32: nmethod entry barriers support #11442

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
19 changes: 19 additions & 0 deletions src/hotspot/cpu/arm/arm.ad
Expand Up @@ -59,6 +59,9 @@ source_hpp %{
// To keep related declarations/definitions/uses close together,
// we switch between source %{ }% and source_hpp %{ }% freely as needed.

#include "asm/macroAssembler.hpp"
#include "gc/shared/barrierSetAssembler.hpp"

// Does destination need to be loaded in a register then passed to a
// branch instruction?
extern bool maybe_far_call(const CallNode *n);
Expand Down Expand Up @@ -286,6 +289,17 @@ void MachPrologNode::format( PhaseRegAlloc *ra_, outputStream *st ) const {
if (framesize != 0) {
st->print ("SUB R_SP, R_SP, " SIZE_FORMAT,framesize);
}

if (C->stub_function() == NULL && BarrierSet::barrier_set()->barrier_set_nmethod() != NULL) {
st->print("ldr t0, [guard]\n\t");
voitylov marked this conversation as resolved.
Show resolved Hide resolved
st->print("ldr t1, [Rthread, #thread_disarmed_offset]\n\t");
st->print("cmp t0, t1\n\t");
st->print("beq skip\n\t");
st->print("blr #nmethod_entry_barrier_stub\n\t");
st->print("b skip\n\t");
st->print("guard: int\n\t");
st->print("skip:\n\t");
}
}
voitylov marked this conversation as resolved.
Show resolved Hide resolved
#endif

Expand Down Expand Up @@ -318,6 +332,11 @@ void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
__ sub_slow(SP, SP, framesize);
}

if (C->stub_function() == NULL) {
BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
bs->nmethod_entry_barrier(&_masm);
}

// offset from scratch buffer is not valid
if (strcmp(cbuf.name(), "Compile::Fill_buffer") == 0) {
C->output()->set_frame_complete( __ offset() );
Expand Down
6 changes: 6 additions & 0 deletions src/hotspot/cpu/arm/c1_MacroAssembler_arm.cpp
Expand Up @@ -25,6 +25,8 @@
#include "precompiled.hpp"
#include "c1/c1_MacroAssembler.hpp"
#include "c1/c1_Runtime1.hpp"
#include "gc/shared/barrierSet.hpp"
#include "gc/shared/barrierSetAssembler.hpp"
#include "gc/shared/collectedHeap.hpp"
#include "gc/shared/tlab_globals.hpp"
#include "interpreter/interpreter.hpp"
Expand Down Expand Up @@ -62,6 +64,10 @@ void C1_MacroAssembler::build_frame(int frame_size_in_bytes, int bang_size_in_by
// if this method contains a methodHandle call site
raw_push(FP, LR);
sub_slow(SP, SP, frame_size_in_bytes);

// Insert nmethod entry barrier into frame.
BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
bs->nmethod_entry_barrier(this);
}

void C1_MacroAssembler::remove_frame(int frame_size_in_bytes) {
Expand Down
47 changes: 47 additions & 0 deletions src/hotspot/cpu/arm/gc/shared/barrierSetAssembler_arm.cpp
Expand Up @@ -23,10 +23,13 @@
*/

#include "precompiled.hpp"
#include "gc/shared/barrierSet.hpp"
#include "gc/shared/barrierSetAssembler.hpp"
#include "gc/shared/barrierSetNMethod.hpp"
#include "gc/shared/collectedHeap.hpp"
#include "memory/universe.hpp"
#include "runtime/javaThread.hpp"
#include "runtime/stubRoutines.hpp"

#define __ masm->

Expand Down Expand Up @@ -195,3 +198,47 @@ void BarrierSetAssembler::incr_allocated_bytes(MacroAssembler* masm, RegisterOrC
// Unborrow the Rthread
__ sub(Rthread, Ralloc, in_bytes(JavaThread::allocated_bytes_offset()));
}

void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm) {

BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod();

Register tmp0 = Rtemp;
Register tmp1 = R5; // wrong register here will lead to crash
voitylov marked this conversation as resolved.
Show resolved Hide resolved

if (bs_nm == NULL) {
return;
}

// The are no GCs that require memory barrier on arm32 now
#ifndef PRODUCT
voitylov marked this conversation as resolved.
Show resolved Hide resolved
NMethodPatchingType patching_type = nmethod_patching_type();
assert(patching_type == NMethodPatchingType::stw_instruction_and_data_patch, "Unsupported patching type");
#endif

Label skip, guard;
Address thread_disarmed_addr(Rthread, in_bytes(bs_nm->thread_disarmed_offset()));

__ block_comment("nmethod_barrier begin");
__ ldr_label(tmp0, guard);

// No memory barrier here
__ ldr(tmp1, thread_disarmed_addr);
__ cmp(tmp0, tmp1);
__ b(skip, eq);

__ mov_address(tmp0, StubRoutines::Arm::method_entry_barrier());
__ call(tmp0);
__ b(skip);

__ bind(guard);

// nmethod guard value. Skipped over in common case.
//
// Put a debug value to make any offsets skew
// clearly visible in coredump
__ emit_int32(0xDEADBEAF);

__ bind(skip);
__ block_comment("nmethod_barrier end");
}
6 changes: 6 additions & 0 deletions src/hotspot/cpu/arm/gc/shared/barrierSetAssembler_arm.hpp
Expand Up @@ -29,6 +29,10 @@
#include "memory/allocation.hpp"
#include "oops/access.hpp"

enum class NMethodPatchingType {
stw_instruction_and_data_patch,
};

class BarrierSetAssembler: public CHeapObj<mtGC> {
private:
void incr_allocated_bytes(MacroAssembler* masm,
Expand Down Expand Up @@ -56,6 +60,8 @@ class BarrierSetAssembler: public CHeapObj<mtGC> {
);

virtual void barrier_stubs_init() {}
virtual NMethodPatchingType nmethod_patching_type() { return NMethodPatchingType::stw_instruction_and_data_patch; }
virtual void nmethod_entry_barrier(MacroAssembler* masm);
};

#endif // CPU_ARM_GC_SHARED_BARRIERSETASSEMBLER_ARM_HPP
114 changes: 110 additions & 4 deletions src/hotspot/cpu/arm/gc/shared/barrierSetNMethod_arm.cpp
Expand Up @@ -23,18 +23,124 @@
*/

#include "precompiled.hpp"
#include "code/nativeInst.hpp"
#include "gc/shared/barrierSetAssembler.hpp"
#include "gc/shared/barrierSetNMethod.hpp"
#include "logging/log.hpp"
#include "memory/resourceArea.hpp"
#include "runtime/frame.inline.hpp"
#include "runtime/javaThread.hpp"
#include "runtime/sharedRuntime.hpp"
#include "runtime/registerMap.hpp"
#include "utilities/align.hpp"
#include "utilities/debug.hpp"

// The constant below reflects the size of the barrier
// in barrierSetAssembler_arm.cpp
static const int entry_barrier_bytes = 36;
voitylov marked this conversation as resolved.
Show resolved Hide resolved

class NativeNMethodBarrier: public NativeInstruction {
address instruction_address() const { return addr_at(0); }

int *guard_addr() const {
// Last instruction in a barrier
return reinterpret_cast<int*>(instruction_address() + entry_barrier_bytes - wordSize);
}

public:
int get_value() {
return Atomic::load_acquire(guard_addr());
}

void set_value(int value) {
Atomic::release_store(guard_addr(), value);
}

void verify() const;
};

// Check the first instruction of the nmethod entry barrier
// to make sure that the offsets are not skewed.
void NativeNMethodBarrier::verify() const {
NativeInstruction *ni = (NativeInstruction *) instruction_address();
if (!ni->is_ldr()) {
uint32_t *addr = (uint32_t *) ni;
tty->print_cr("Addr: " INTPTR_FORMAT " Code: 0x%x", (intptr_t) addr, (uint32_t) *addr);
fatal("not an ldr instruction.");
}
}

static NativeNMethodBarrier* native_nmethod_barrier(nmethod* nm) {
address barrier_address = nm->code_begin() + nm->frame_complete_offset() - entry_barrier_bytes;
NativeNMethodBarrier* barrier = reinterpret_cast<NativeNMethodBarrier*>(barrier_address);
debug_only(barrier->verify());
return barrier;
}

/* We're called from an nmethod when we need to deoptimize it. We do
this by throwing away the nmethod's frame and jumping to the
ic_miss stub. This looks like there has been an IC miss at the
entry of the nmethod, so we resolve the call, which will fall back
to the interpreter if the nmethod has been unloaded. */
void BarrierSetNMethod::deoptimize(nmethod* nm, address* return_address_ptr) {
ShouldNotReachHere();

voitylov marked this conversation as resolved.
Show resolved Hide resolved
typedef struct {
intptr_t *sp; intptr_t *fp; address lr; address pc;
} frame_pointers_t;

frame_pointers_t *new_frame = (frame_pointers_t *)(return_address_ptr - 5);

JavaThread *thread = JavaThread::current();
RegisterMap reg_map(thread,
RegisterMap::UpdateMap::skip,
RegisterMap::ProcessFrames::include,
RegisterMap::WalkContinuation::skip);
frame frame = thread->last_frame();

assert(frame.is_compiled_frame() || frame.is_native_frame(), "must be");
assert(frame.cb() == nm, "must be");
frame = frame.sender(&reg_map);

LogTarget(Trace, nmethod, barrier) out;
if (out.is_enabled()) {
ResourceMark mark;
log_trace(nmethod, barrier)("deoptimize(nmethod: %s(%p), return_addr: %p, osr: %d, thread: %p(%s), making rsp: %p) -> %p",
nm->method()->name_and_sig_as_C_string(),
nm, *(address *) return_address_ptr, nm->is_osr_method(), thread,
thread->name(), frame.sp(), nm->verified_entry_point());
}

new_frame->sp = frame.sp();
new_frame->fp = frame.fp();
new_frame->lr = frame.pc();
new_frame->pc = SharedRuntime::get_handle_wrong_method_stub();
}

void BarrierSetNMethod::disarm(nmethod* nm) {
ShouldNotReachHere();
if (!supports_entry_barrier(nm)) {
return;
}

// Disarms the nmethod guard emitted by BarrierSetAssembler::nmethod_entry_barrier.
// Symmetric "LDR; DMB ISHLD" is in the nmethod barrier.
NativeNMethodBarrier* barrier = native_nmethod_barrier(nm);
barrier->set_value(disarmed_value());
}

void BarrierSetNMethod::arm(nmethod* nm, int arm_value) {
if (!supports_entry_barrier(nm)) {
return;
}

NativeNMethodBarrier* barrier = native_nmethod_barrier(nm);
barrier->set_value(arm_value);
}

bool BarrierSetNMethod::is_armed(nmethod* nm) {
ShouldNotReachHere();
return false;
if (!supports_entry_barrier(nm)) {
return false;
}

NativeNMethodBarrier* barrier = native_nmethod_barrier(nm);
return barrier->get_value() != disarmed_value();
}
14 changes: 14 additions & 0 deletions src/hotspot/cpu/arm/macroAssembler_arm.hpp
Expand Up @@ -587,9 +587,23 @@ class MacroAssembler: public Assembler {
AbstractAssembler::emit_address((address)L.data());
}

void ldr_label(Register rd, Label& L) {
ldr(rd, Address(PC, target(L) - pc() - 8));
}

void resolve_oop_handle(Register result);
void load_mirror(Register mirror, Register method, Register tmp);

void enter() {
raw_push(FP, LR);
mov(FP, SP);
}

void leave() {
mov(SP, FP);
raw_pop(FP, LR);
}

#define ARM_INSTR_1(common_mnemonic, arm32_mnemonic, arg_type) \
void common_mnemonic(arg_type arg) { \
arm32_mnemonic(arg); \
Expand Down
5 changes: 5 additions & 0 deletions src/hotspot/cpu/arm/sharedRuntime_arm.cpp
Expand Up @@ -28,6 +28,7 @@
#include "code/icBuffer.hpp"
#include "code/vtableStubs.hpp"
#include "compiler/oopMap.hpp"
#include "gc/shared/barrierSetAssembler.hpp"
#include "interpreter/interpreter.hpp"
#include "logging/log.hpp"
#include "memory/resourceArea.hpp"
Expand Down Expand Up @@ -873,6 +874,10 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
__ mov(FP, SP);
__ sub_slow(SP, SP, stack_size - 2*wordSize);

BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
assert(bs != NULL, "Sanity");
bs->nmethod_entry_barrier(masm);

int frame_complete = __ pc() - start;

OopMapSet* oop_maps = new OopMapSet();
Expand Down
53 changes: 53 additions & 0 deletions src/hotspot/cpu/arm/stubGenerator_arm.cpp
Expand Up @@ -27,6 +27,7 @@
#include "compiler/oopMap.hpp"
#include "gc/shared/barrierSet.hpp"
#include "gc/shared/barrierSetAssembler.hpp"
#include "gc/shared/barrierSetNMethod.hpp"
#include "interpreter/interpreter.hpp"
#include "memory/universe.hpp"
#include "nativeInst_arm.hpp"
Expand Down Expand Up @@ -2905,6 +2906,53 @@ class StubGenerator: public StubCodeGenerator {

}

address generate_method_entry_barrier() {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", "nmethod_entry_barrier");

Label deoptimize_label;

address start = __ pc();

// No need to save PC on Arm
__ set_last_Java_frame(SP, FP, false, Rtemp);

__ enter();

__ add(Rtemp, SP, wordSize); // Rtemp points to the saved lr
__ sub(SP, SP, 4 * wordSize); // four words for the returned {sp, fp, lr, pc}

const RegisterSet saved_regs = RegisterSet(R0, R10);
__ push(saved_regs);
__ fpush(FloatRegisterSet(D0, 16));

__ mov(c_rarg0, Rtemp);
__ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSetNMethod::nmethod_stub_entry_barrier), c_rarg0);

__ reset_last_Java_frame(Rtemp);

__ mov(Rtemp, R0);

__ fpop(FloatRegisterSet(D0, 16));
__ pop(saved_regs);

__ cbnz(Rtemp, deoptimize_label);

__ leave();
__ bx(LR);

__ BIND(deoptimize_label);

__ ldr(Rtemp, Address(SP, 0));
voitylov marked this conversation as resolved.
Show resolved Hide resolved
__ ldr(FP, Address(SP, wordSize));
__ ldr(LR, Address(SP, wordSize * 2));
__ ldr(R5, Address(SP, wordSize * 3));
__ mov(SP, Rtemp);
__ bx(R5);

return start;
}

#define COMPILE_CRYPTO
#include "stubRoutinesCrypto_arm.cpp"

Expand Down Expand Up @@ -3097,6 +3145,11 @@ class StubGenerator: public StubCodeGenerator {
// arraycopy stubs used by compilers
generate_arraycopy_stubs();

BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod();
if (bs_nm != NULL) {
StubRoutines::Arm::_method_entry_barrier = generate_method_entry_barrier();
}

#ifdef COMPILE_CRYPTO
// generate AES intrinsics code
if (UseAESIntrinsics) {
Expand Down
2 changes: 2 additions & 0 deletions src/hotspot/cpu/arm/stubRoutines_arm.cpp
Expand Up @@ -33,3 +33,5 @@ address StubRoutines::Arm::_partial_subtype_check = NULL;

address StubRoutines::_atomic_load_long_entry = NULL;
address StubRoutines::_atomic_store_long_entry = NULL;

address StubRoutines::Arm::_method_entry_barrier = NULL;