Skip to content

Commit

Permalink
8288971: AArch64: Clean up stack and register handling in interpreter
Browse files Browse the repository at this point in the history
Reviewed-by: adinn, ngasson
  • Loading branch information
Andrew Haley committed Jul 4, 2022
1 parent d53b02e commit b5d9656
Show file tree
Hide file tree
Showing 15 changed files with 132 additions and 104 deletions.
17 changes: 15 additions & 2 deletions src/hotspot/cpu/aarch64/abstractInterpreter_aarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ int AbstractInterpreter::size_activation(int max_stack,
// for the callee's params we only need to account for the extra
// locals.
int size = overhead +
(callee_locals - callee_params) +
(callee_locals - callee_params) * Interpreter::stackElementWords +
monitors * frame::interpreter_frame_monitor_size() +
// On the top frame, at all times SP <= ESP, and SP is
// 16-aligned. We ensure this by adjusting SP on method
Expand Down Expand Up @@ -135,7 +135,7 @@ void AbstractInterpreter::layout_activation(Method* method,
// NOTE the difference in using sender_sp and
// interpreter_frame_sender_sp interpreter_frame_sender_sp is
// the original sp of the caller (the unextended_sp) and
// sender_sp is fp+8/16 (32bit/64bit) XXX
// sender_sp is fp+16
//
// The interpreted method entry on AArch64 aligns SP to 16 bytes
// before generating the fixed part of the activation frame. So there
Expand Down Expand Up @@ -165,6 +165,19 @@ void AbstractInterpreter::layout_activation(Method* method,
popframe_extra_args;
interpreter_frame->interpreter_frame_set_last_sp(esp);

// We have to add extra reserved slots to max_stack. There are 3 users of the extra slots,
// none of which are at the same time, so we just need to make sure there is enough room
// for the biggest user:
// -reserved slot for exception handler
// -reserved slots for JSR292. Method::extra_stack_entries() is the size.
// -reserved slots for TraceBytecodes
int max_stack = method->constMethod()->max_stack() + MAX2(3, Method::extra_stack_entries());
intptr_t* extended_sp = (intptr_t*) monbot -
(max_stack * Interpreter::stackElementWords) -
popframe_extra_args;
extended_sp = align_down(extended_sp, StackAlignmentInBytes);
interpreter_frame->interpreter_frame_set_extended_sp(extended_sp);

// All frames but the initial (oldest) interpreter frame we fill in have
// a value for sender_sp that allows walking the stack but isn't
// truly correct. Correct the value here.
Expand Down
2 changes: 1 addition & 1 deletion src/hotspot/cpu/aarch64/assembler_aarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
#include "memory/resourceArea.hpp"

#ifndef PRODUCT
const uintptr_t Assembler::asm_bp = 0x00007fffee09ac88;
const uintptr_t Assembler::asm_bp = 0x0000ffffac221240;
#endif

static float unpack(unsigned value);
Expand Down
8 changes: 5 additions & 3 deletions src/hotspot/cpu/aarch64/assembler_aarch64.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -142,16 +142,18 @@ REGISTER_DECLARATION(Register, rthread, r28);
REGISTER_DECLARATION(Register, rheapbase, r27);
// constant pool cache
REGISTER_DECLARATION(Register, rcpool, r26);
// monitors allocated on stack
REGISTER_DECLARATION(Register, rmonitors, r25);
// r25 is a callee-saved temp
// REGISTER_DECLARATION(Register, unused, r25);
// locals on stack
REGISTER_DECLARATION(Register, rlocals, r24);
// bytecode pointer
REGISTER_DECLARATION(Register, rbcp, r22);
// Dispatch table base
REGISTER_DECLARATION(Register, rdispatch, r21);
// Java stack pointer
// Java expression stack pointer
REGISTER_DECLARATION(Register, esp, r20);
// Sender's SP while in interpreter
REGISTER_DECLARATION(Register, r19_sender_sp, r19);

// Preserved predicate register with all elements set TRUE.
REGISTER_DECLARATION(PRegister, ptrue, p7);
Expand Down
9 changes: 8 additions & 1 deletion src/hotspot/cpu/aarch64/frame_aarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -354,6 +354,11 @@ void frame::interpreter_frame_set_last_sp(intptr_t* sp) {
*((intptr_t**)addr_at(interpreter_frame_last_sp_offset)) = sp;
}

// Used by template based interpreter deoptimization
void frame::interpreter_frame_set_extended_sp(intptr_t* sp) {
*((intptr_t**)addr_at(interpreter_frame_extended_sp_offset)) = sp;
}

frame frame::sender_for_entry_frame(RegisterMap* map) const {
assert(map != NULL, "map must be set");
// Java frame called from C; skip all C frames and return top C
Expand Down Expand Up @@ -599,6 +604,7 @@ void frame::describe_pd(FrameValues& values, int frame_no) {
DESCRIBE_FP_OFFSET(interpreter_frame_last_sp);
DESCRIBE_FP_OFFSET(interpreter_frame_method);
DESCRIBE_FP_OFFSET(interpreter_frame_mdp);
DESCRIBE_FP_OFFSET(interpreter_frame_extended_sp);
DESCRIBE_FP_OFFSET(interpreter_frame_mirror);
DESCRIBE_FP_OFFSET(interpreter_frame_cache);
DESCRIBE_FP_OFFSET(interpreter_frame_locals);
Expand Down Expand Up @@ -670,6 +676,8 @@ void internal_pf(uintptr_t sp, uintptr_t fp, uintptr_t pc, uintptr_t bcx) {
DESCRIBE_FP_OFFSET(interpreter_frame_last_sp);
DESCRIBE_FP_OFFSET(interpreter_frame_method);
DESCRIBE_FP_OFFSET(interpreter_frame_mdp);
DESCRIBE_FP_OFFSET(interpreter_frame_extended_sp);
DESCRIBE_FP_OFFSET(interpreter_frame_mirror);
DESCRIBE_FP_OFFSET(interpreter_frame_cache);
DESCRIBE_FP_OFFSET(interpreter_frame_locals);
DESCRIBE_FP_OFFSET(interpreter_frame_bcp);
Expand Down Expand Up @@ -772,4 +780,3 @@ void JavaFrameAnchor::make_walkable() {
_last_Java_pc = (address)_last_Java_sp[-1];
vmassert(walkable(), "something went wrong");
}

10 changes: 6 additions & 4 deletions src/hotspot/cpu/aarch64/frame_aarch64.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,13 +47,13 @@
// [constant pool cache ] = cache() cache_offset

// [klass of method ] = mirror() mirror_offset
// [padding ]
// [extended SP ] extended_sp offset

// [methodData ] = mdp() mdx_offset
// [Method ] = method() method_offset

// [last esp ] = last_sp() last_sp_offset
// [old stack pointer ] (sender_sp) sender_sp_offset
// [sender's SP ] (sender_sp) sender_sp_offset

// [old frame pointer ] <- fp = link()
// [return pc ]
Expand Down Expand Up @@ -82,8 +82,8 @@
interpreter_frame_last_sp_offset = interpreter_frame_sender_sp_offset - 1,
interpreter_frame_method_offset = interpreter_frame_last_sp_offset - 1,
interpreter_frame_mdp_offset = interpreter_frame_method_offset - 1,
interpreter_frame_padding_offset = interpreter_frame_mdp_offset - 1,
interpreter_frame_mirror_offset = interpreter_frame_padding_offset - 1,
interpreter_frame_extended_sp_offset = interpreter_frame_mdp_offset - 1,
interpreter_frame_mirror_offset = interpreter_frame_extended_sp_offset - 1,
interpreter_frame_cache_offset = interpreter_frame_mirror_offset - 1,
interpreter_frame_locals_offset = interpreter_frame_cache_offset - 1,
interpreter_frame_bcp_offset = interpreter_frame_locals_offset - 1,
Expand Down Expand Up @@ -182,6 +182,8 @@
// expression stack tos if we are nested in a java call
intptr_t* interpreter_frame_last_sp() const;

void interpreter_frame_set_extended_sp(intptr_t* sp);

template <typename RegisterMapT>
static void update_map_with_saved_link(RegisterMapT* map, intptr_t** link_addr);

Expand Down
2 changes: 1 addition & 1 deletion src/hotspot/cpu/aarch64/interp_masm_aarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -421,7 +421,7 @@ void InterpreterMacroAssembler::load_double(Address src) {

void InterpreterMacroAssembler::prepare_to_jump_from_interpreted() {
// set sender sp
mov(r13, sp);
mov(r19_sender_sp, sp);
// record last_sp
str(esp, Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize));
}
Expand Down
25 changes: 25 additions & 0 deletions src/hotspot/cpu/aarch64/interp_masm_aarch64.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,31 @@ class InterpreterMacroAssembler: public MacroAssembler {
ldr(rcpool, Address(rfp, frame::interpreter_frame_cache_offset * wordSize));
}

void restore_sp_after_call() {
Label L;
ldr(rscratch1, Address(rfp, frame::interpreter_frame_extended_sp_offset * wordSize));
#ifdef ASSERT
cbnz(rscratch1, L);
stop("SP is null");
#endif
bind(L);
mov(sp, rscratch1);
}

void check_extended_sp(const char* msg = "check extended SP") {
#ifdef ASSERT
Label L;
ldr(rscratch1, Address(rfp, frame::interpreter_frame_extended_sp_offset * wordSize));
cmp(sp, rscratch1);
br(EQ, L);
stop(msg);
bind(L);
#endif
}

#define check_extended_sp() \
check_extended_sp("SP does not match extended SP in frame at " __FILE__ ":" XSTR(__LINE__))

void get_dispatch();

// Helpers for runtime call arguments/results
Expand Down
8 changes: 4 additions & 4 deletions src/hotspot/cpu/aarch64/methodHandles_aarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler*
return NULL;
}

// r13: sender SP (must preserve; see prepare_to_jump_from_interpreted)
// r19_sender_sp: sender SP (must preserve; see prepare_to_jump_from_interpreted)
// rmethod: Method*
// r3: argument locator (parameter slot count, added to rsp)
// r1: used as temp to hold mh or receiver
Expand Down Expand Up @@ -283,7 +283,7 @@ void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm,
// temps used in this code are not used in *either* compiled or interpreted calling sequences
Register temp1 = r10;
Register temp2 = r11;
Register temp3 = r14; // r13 is live by this point: it contains the sender SP
Register temp3 = r14;
if (for_compiler_entry) {
assert(receiver_reg == (iid == vmIntrinsics::_linkToStatic || iid == vmIntrinsics::_linkToNative ? noreg : j_rarg0), "only valid assignment");
assert_different_registers(temp1, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5, j_rarg6, j_rarg7);
Expand Down Expand Up @@ -356,7 +356,7 @@ void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm,
// Live registers at this point:
// member_reg - MemberName that was the trailing argument
// temp1_recv_klass - klass of stacked receiver, if needed
// r13 - interpreter linkage (if interpreted) ??? FIXME
// r19 - interpreter linkage (if interpreted)
// r1 ... r0 - compiler arguments (if compiled)

Label L_incompatible_class_change_error;
Expand Down Expand Up @@ -443,7 +443,7 @@ void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm,
break;
}

// live at this point: rmethod, r13 (if interpreted)
// live at this point: rmethod, r19_sender_sp (if interpreted)

// After figuring out which concrete method to call, jump into it.
// Note that this works in the interpreter with no data motion.
Expand Down
8 changes: 3 additions & 5 deletions src/hotspot/cpu/aarch64/register_aarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,9 @@ const int ConcreteRegisterImpl::max_pr
const char* RegisterImpl::name() const {
static const char *const names[number_of_registers] = {
"c_rarg0", "c_rarg1", "c_rarg2", "c_rarg3", "c_rarg4", "c_rarg5", "c_rarg6", "c_rarg7",
"rscratch1", "rscratch2",
"r10", "r11", "r12", "r13", "r14", "r15", "r16",
"r17", "r18_tls", "r19",
"resp", "rdispatch", "rbcp", "r23", "rlocals", "rmonitors", "rcpool", "rheapbase",
"rthread", "rfp", "lr", "sp"
"rscratch1", "rscratch2", "r10", "r11", "r12", "r13", "r14", "r15",
"r16", "r17", "r18_tls", "r19", "resp", "rdispatch", "rbcp", "r23",
"rlocals", "r25", "rcpool", "rheapbase", "rthread", "rfp", "lr", "sp"
};
return is_valid() ? names[encoding()] : "noreg";
}
Expand Down
12 changes: 5 additions & 7 deletions src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -435,7 +435,7 @@ static void gen_c2i_adapter(MacroAssembler *masm,

int extraspace = total_args_passed * Interpreter::stackElementSize;

__ mov(r13, sp);
__ mov(r19_sender_sp, sp);

// stack is aligned, keep it that way
extraspace = align_up(extraspace, 2*wordSize);
Expand Down Expand Up @@ -552,12 +552,10 @@ void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
const BasicType *sig_bt,
const VMRegPair *regs) {

// Note: r13 contains the senderSP on entry. We must preserve it since
// we may do a i2c -> c2i transition if we lose a race where compiled
// code goes non-entrant while we get args ready.

// In addition we use r13 to locate all the interpreter args because
// we must align the stack to 16 bytes.
// Note: r19_sender_sp contains the senderSP on entry. We must
// preserve it since we may do a i2c -> c2i transition if we lose a
// race where compiled code goes non-entrant while we get args
// ready.

// Adapters are frameless.

Expand Down
4 changes: 2 additions & 2 deletions src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -298,9 +298,9 @@ class StubGenerator: public StubCodeGenerator {

// call Java entry -- passing methdoOop, and current sp
// rmethod: Method*
// r13: sender sp
// r19_sender_sp: sender sp
BLOCK_COMMENT("call Java function");
__ mov(r13, sp);
__ mov(r19_sender_sp, sp);
__ blr(c_rarg4);

// we do this here because the notify will already have been done
Expand Down

1 comment on commit b5d9656

@openjdk-notifier
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.