Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
8291302: ARM32: nmethod entry barriers support
Reviewed-by: eosterlund, rrich, mdoerr, aph
  • Loading branch information
Aleksei Voitylov authored and TheRealMDoerr committed Jan 3, 2023
1 parent a9ce772 commit 245f0cf
Show file tree
Hide file tree
Showing 12 changed files with 268 additions and 5 deletions.
19 changes: 19 additions & 0 deletions src/hotspot/cpu/arm/arm.ad
Expand Up @@ -59,6 +59,9 @@ source_hpp %{
// To keep related declarations/definitions/uses close together,
// we switch between source %{ }% and source_hpp %{ }% freely as needed.

#include "asm/macroAssembler.hpp"
#include "gc/shared/barrierSetAssembler.hpp"

// Does destination need to be loaded in a register then passed to a
// branch instruction?
extern bool maybe_far_call(const CallNode *n);
Expand Down Expand Up @@ -286,6 +289,17 @@ void MachPrologNode::format( PhaseRegAlloc *ra_, outputStream *st ) const {
if (framesize != 0) {
st->print ("SUB R_SP, R_SP, " SIZE_FORMAT,framesize);
}

if (C->stub_function() == NULL && BarrierSet::barrier_set()->barrier_set_nmethod() != NULL) {
st->print("ldr t0, [guard]\n\t");
st->print("ldr t1, [Rthread, #thread_disarmed_offset]\n\t");
st->print("cmp t0, t1\n\t");
st->print("beq skip\n\t");
st->print("blr #nmethod_entry_barrier_stub\n\t");
st->print("b skip\n\t");
st->print("guard: int\n\t");
st->print("skip:\n\t");
}
}
#endif

Expand Down Expand Up @@ -318,6 +332,11 @@ void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
__ sub_slow(SP, SP, framesize);
}

if (C->stub_function() == NULL) {
BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
bs->nmethod_entry_barrier(&_masm);
}

// offset from scratch buffer is not valid
if (strcmp(cbuf.name(), "Compile::Fill_buffer") == 0) {
C->output()->set_frame_complete( __ offset() );
Expand Down
6 changes: 6 additions & 0 deletions src/hotspot/cpu/arm/c1_MacroAssembler_arm.cpp
Expand Up @@ -25,6 +25,8 @@
#include "precompiled.hpp"
#include "c1/c1_MacroAssembler.hpp"
#include "c1/c1_Runtime1.hpp"
#include "gc/shared/barrierSet.hpp"
#include "gc/shared/barrierSetAssembler.hpp"
#include "gc/shared/collectedHeap.hpp"
#include "gc/shared/tlab_globals.hpp"
#include "interpreter/interpreter.hpp"
Expand Down Expand Up @@ -62,6 +64,10 @@ void C1_MacroAssembler::build_frame(int frame_size_in_bytes, int bang_size_in_by
// if this method contains a methodHandle call site
raw_push(FP, LR);
sub_slow(SP, SP, frame_size_in_bytes);

// Insert nmethod entry barrier into frame.
BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
bs->nmethod_entry_barrier(this);
}

void C1_MacroAssembler::remove_frame(int frame_size_in_bytes) {
Expand Down
47 changes: 47 additions & 0 deletions src/hotspot/cpu/arm/gc/shared/barrierSetAssembler_arm.cpp
Expand Up @@ -23,10 +23,13 @@
*/

#include "precompiled.hpp"
#include "gc/shared/barrierSet.hpp"
#include "gc/shared/barrierSetAssembler.hpp"
#include "gc/shared/barrierSetNMethod.hpp"
#include "gc/shared/collectedHeap.hpp"
#include "memory/universe.hpp"
#include "runtime/javaThread.hpp"
#include "runtime/stubRoutines.hpp"

#define __ masm->

Expand Down Expand Up @@ -195,3 +198,47 @@ void BarrierSetAssembler::incr_allocated_bytes(MacroAssembler* masm, RegisterOrC
// Unborrow the Rthread
__ sub(Rthread, Ralloc, in_bytes(JavaThread::allocated_bytes_offset()));
}

void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm) {

BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod();

Register tmp0 = Rtemp;
Register tmp1 = R5; // must be callee-save register

if (bs_nm == NULL) {
return;
}

// The are no GCs that require memory barrier on arm32 now
#ifdef ASSERT
NMethodPatchingType patching_type = nmethod_patching_type();
assert(patching_type == NMethodPatchingType::stw_instruction_and_data_patch, "Unsupported patching type");
#endif

Label skip, guard;
Address thread_disarmed_addr(Rthread, in_bytes(bs_nm->thread_disarmed_offset()));

__ block_comment("nmethod_barrier begin");
__ ldr_label(tmp0, guard);

// No memory barrier here
__ ldr(tmp1, thread_disarmed_addr);
__ cmp(tmp0, tmp1);
__ b(skip, eq);

__ mov_address(tmp0, StubRoutines::Arm::method_entry_barrier());
__ call(tmp0);
__ b(skip);

__ bind(guard);

// nmethod guard value. Skipped over in common case.
//
// Put a debug value to make any offsets skew
// clearly visible in coredump
__ emit_int32(0xDEADBEAF);

__ bind(skip);
__ block_comment("nmethod_barrier end");
}
6 changes: 6 additions & 0 deletions src/hotspot/cpu/arm/gc/shared/barrierSetAssembler_arm.hpp
Expand Up @@ -29,6 +29,10 @@
#include "memory/allocation.hpp"
#include "oops/access.hpp"

enum class NMethodPatchingType {
stw_instruction_and_data_patch,
};

class BarrierSetAssembler: public CHeapObj<mtGC> {
private:
void incr_allocated_bytes(MacroAssembler* masm,
Expand Down Expand Up @@ -56,6 +60,8 @@ class BarrierSetAssembler: public CHeapObj<mtGC> {
);

virtual void barrier_stubs_init() {}
virtual NMethodPatchingType nmethod_patching_type() { return NMethodPatchingType::stw_instruction_and_data_patch; }
virtual void nmethod_entry_barrier(MacroAssembler* masm);
};

#endif // CPU_ARM_GC_SHARED_BARRIERSETASSEMBLER_ARM_HPP
114 changes: 110 additions & 4 deletions src/hotspot/cpu/arm/gc/shared/barrierSetNMethod_arm.cpp
Expand Up @@ -23,18 +23,124 @@
*/

#include "precompiled.hpp"
#include "code/nativeInst.hpp"
#include "gc/shared/barrierSetAssembler.hpp"
#include "gc/shared/barrierSetNMethod.hpp"
#include "logging/log.hpp"
#include "memory/resourceArea.hpp"
#include "runtime/frame.inline.hpp"
#include "runtime/javaThread.hpp"
#include "runtime/sharedRuntime.hpp"
#include "runtime/registerMap.hpp"
#include "utilities/align.hpp"
#include "utilities/debug.hpp"

// The constant below reflects the size of the barrier
// in barrierSetAssembler_arm.cpp
static const int entry_barrier_bytes = 9 * NativeInstruction::size();

class NativeNMethodBarrier: public NativeInstruction {
address instruction_address() const { return addr_at(0); }

int *guard_addr() const {
// Last instruction in a barrier
return reinterpret_cast<int*>(instruction_address() + entry_barrier_bytes - wordSize);
}

public:
int get_value() {
return Atomic::load_acquire(guard_addr());
}

void set_value(int value) {
Atomic::release_store(guard_addr(), value);
}

void verify() const;
};

// Check the first instruction of the nmethod entry barrier
// to make sure that the offsets are not skewed.
void NativeNMethodBarrier::verify() const {
NativeInstruction *ni = (NativeInstruction *) instruction_address();
if (!ni->is_ldr()) {
uint32_t *addr = (uint32_t *) ni;
tty->print_cr("Addr: " INTPTR_FORMAT " Code: 0x%x", (intptr_t) addr, (uint32_t) *addr);
fatal("not an ldr instruction.");
}
}

static NativeNMethodBarrier* native_nmethod_barrier(nmethod* nm) {
address barrier_address = nm->code_begin() + nm->frame_complete_offset() - entry_barrier_bytes;
NativeNMethodBarrier* barrier = reinterpret_cast<NativeNMethodBarrier*>(barrier_address);
debug_only(barrier->verify());
return barrier;
}

/* We're called from an nmethod when we need to deoptimize it. We do
this by throwing away the nmethod's frame and jumping to the
ic_miss stub. This looks like there has been an IC miss at the
entry of the nmethod, so we resolve the call, which will fall back
to the interpreter if the nmethod has been unloaded. */
void BarrierSetNMethod::deoptimize(nmethod* nm, address* return_address_ptr) {
ShouldNotReachHere();

typedef struct {
intptr_t *sp; intptr_t *fp; address lr; address pc;
} frame_pointers_t;

frame_pointers_t *new_frame = (frame_pointers_t *)(return_address_ptr - 5);

JavaThread *thread = JavaThread::current();
RegisterMap reg_map(thread,
RegisterMap::UpdateMap::skip,
RegisterMap::ProcessFrames::include,
RegisterMap::WalkContinuation::skip);
frame frame = thread->last_frame();

assert(frame.is_compiled_frame() || frame.is_native_frame(), "must be");
assert(frame.cb() == nm, "must be");
frame = frame.sender(&reg_map);

LogTarget(Trace, nmethod, barrier) out;
if (out.is_enabled()) {
ResourceMark mark;
log_trace(nmethod, barrier)("deoptimize(nmethod: %s(%p), return_addr: %p, osr: %d, thread: %p(%s), making rsp: %p) -> %p",
nm->method()->name_and_sig_as_C_string(),
nm, *(address *) return_address_ptr, nm->is_osr_method(), thread,
thread->name(), frame.sp(), nm->verified_entry_point());
}

new_frame->sp = frame.sp();
new_frame->fp = frame.fp();
new_frame->lr = frame.pc();
new_frame->pc = SharedRuntime::get_handle_wrong_method_stub();
}

void BarrierSetNMethod::disarm(nmethod* nm) {
ShouldNotReachHere();
if (!supports_entry_barrier(nm)) {
return;
}

// Disarms the nmethod guard emitted by BarrierSetAssembler::nmethod_entry_barrier.
// Symmetric "LDR; DMB ISHLD" is in the nmethod barrier.
NativeNMethodBarrier* barrier = native_nmethod_barrier(nm);
barrier->set_value(disarmed_value());
}

void BarrierSetNMethod::arm(nmethod* nm, int arm_value) {
if (!supports_entry_barrier(nm)) {
return;
}

NativeNMethodBarrier* barrier = native_nmethod_barrier(nm);
barrier->set_value(arm_value);
}

bool BarrierSetNMethod::is_armed(nmethod* nm) {
ShouldNotReachHere();
return false;
if (!supports_entry_barrier(nm)) {
return false;
}

NativeNMethodBarrier* barrier = native_nmethod_barrier(nm);
return barrier->get_value() != disarmed_value();
}
14 changes: 14 additions & 0 deletions src/hotspot/cpu/arm/macroAssembler_arm.hpp
Expand Up @@ -587,9 +587,23 @@ class MacroAssembler: public Assembler {
AbstractAssembler::emit_address((address)L.data());
}

void ldr_label(Register rd, Label& L) {
ldr(rd, Address(PC, target(L) - pc() - 8));
}

void resolve_oop_handle(Register result);
void load_mirror(Register mirror, Register method, Register tmp);

void enter() {
raw_push(FP, LR);
mov(FP, SP);
}

void leave() {
mov(SP, FP);
raw_pop(FP, LR);
}

#define ARM_INSTR_1(common_mnemonic, arm32_mnemonic, arg_type) \
void common_mnemonic(arg_type arg) { \
arm32_mnemonic(arg); \
Expand Down
3 changes: 3 additions & 0 deletions src/hotspot/cpu/arm/nativeInst_arm_32.hpp
Expand Up @@ -77,9 +77,12 @@ class RawNativeInstruction {
address instruction_address() const { return addr_at(0); }
address next_raw_instruction_address() const { return addr_at(instruction_size); }

static int size() { return instruction_size; }

static RawNativeInstruction* at(address address) {
return (RawNativeInstruction*)address;
}

RawNativeInstruction* next_raw() const {
return at(next_raw_instruction_address());
}
Expand Down
5 changes: 5 additions & 0 deletions src/hotspot/cpu/arm/sharedRuntime_arm.cpp
Expand Up @@ -28,6 +28,7 @@
#include "code/icBuffer.hpp"
#include "code/vtableStubs.hpp"
#include "compiler/oopMap.hpp"
#include "gc/shared/barrierSetAssembler.hpp"
#include "interpreter/interpreter.hpp"
#include "logging/log.hpp"
#include "memory/resourceArea.hpp"
Expand Down Expand Up @@ -873,6 +874,10 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
__ mov(FP, SP);
__ sub_slow(SP, SP, stack_size - 2*wordSize);

BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
assert(bs != NULL, "Sanity");
bs->nmethod_entry_barrier(masm);

int frame_complete = __ pc() - start;

OopMapSet* oop_maps = new OopMapSet();
Expand Down

1 comment on commit 245f0cf

@openjdk-notifier
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.