Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

8266890: [lworld] [AArch64] add support for InlineTypePassFieldsAsArgs #420

Closed
wants to merge 2 commits into from
Closed
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
@@ -1964,7 +1964,7 @@ void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
C2_MacroAssembler _masm(&cbuf);
int framesize = C->output()->frame_slots() << LogBytesPerInt;

__ remove_frame(framesize);
__ remove_frame(framesize, C->needs_stack_repair(), C->output()->sp_inc_offset());

if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
__ reserved_stack_check();
@@ -376,10 +376,7 @@ void LIR_Assembler::jobject2reg_with_patching(Register reg, CodeEmitInfo *info)
int LIR_Assembler::initial_frame_size_in_bytes() const {
// if rounding, must let FrameMap know!

// The frame_map records size in slots (32bit word)

// subtract two words to account for return address and link
return (frame_map()->framesize() - (2*VMRegImpl::slots_per_word)) * VMRegImpl::stack_slot_size;
return in_bytes(frame_map()->framesize_in_bytes());
}


@@ -461,7 +458,8 @@ int LIR_Assembler::emit_unwind_handler() {
// remove the activation and dispatch to the unwind handler
__ block_comment("remove_frame and dispatch to the unwind handler");
int initial_framesize = initial_frame_size_in_bytes();
__ remove_frame(initial_framesize, needs_stack_repair(), initial_framesize - wordSize);
int sp_inc_offset = initial_framesize - 3*wordSize; // Below saved FP and LR
__ remove_frame(initial_framesize, needs_stack_repair(), sp_inc_offset);
__ far_jump(RuntimeAddress(Runtime1::entry_for(Runtime1::unwind_exception_id)));

// Emit the slow path assembly
@@ -528,7 +526,8 @@ void LIR_Assembler::return_op(LIR_Opr result, C1SafepointPollStub* code_stub) {

// Pop the stack before the safepoint code
int initial_framesize = initial_frame_size_in_bytes();
__ remove_frame(initial_framesize, needs_stack_repair(), initial_framesize - wordSize);
int sp_inc_offset = initial_framesize - 3*wordSize; // Below saved FP and LR
__ remove_frame(initial_framesize, needs_stack_repair(), sp_inc_offset);

if (StackReservedPages > 0 && compilation()->has_reserved_stack_access()) {
__ reserved_stack_check();
@@ -349,35 +349,44 @@ void C1_MacroAssembler::inline_cache_check(Register receiver, Register iCache) {
}

void C1_MacroAssembler::build_frame_helper(int frame_size_in_bytes, int sp_inc, bool needs_stack_repair) {
MacroAssembler::build_frame(frame_size_in_bytes + 2 * wordSize);
MacroAssembler::build_frame(frame_size_in_bytes);

if (needs_stack_repair) {
Unimplemented();
int sp_inc_offset = frame_size_in_bytes - 3 * wordSize; // Immediately below saved LR and FP
save_stack_increment(sp_inc, sp_inc_offset);
}
}

void C1_MacroAssembler::build_frame(int frame_size_in_bytes, int bang_size_in_bytes, int sp_offset_for_orig_pc, bool needs_stack_repair, bool has_scalarized_args, Label* verified_inline_entry_label) {
assert(bang_size_in_bytes >= frame_size_in_bytes, "stack bang size incorrect");
// Make sure there is enough stack space for this method's activation.
// Note that we do this before doing an enter().
generate_stack_overflow_check(bang_size_in_bytes);

guarantee(needs_stack_repair == false, "Stack repair should not be true");
if (verified_inline_entry_label != NULL) {
if (has_scalarized_args) {
// Initialize orig_pc to detect deoptimization during buffering in the entry points
str(zr, Address(sp, sp_offset_for_orig_pc - frame_size_in_bytes));
}
if (!needs_stack_repair && verified_inline_entry_label != NULL) {
bind(*verified_inline_entry_label);
}

// Make sure there is enough stack space for this method's activation.
// Note that we do this before creating a frame.
assert(bang_size_in_bytes >= frame_size_in_bytes, "stack bang size incorrect");
generate_stack_overflow_check(bang_size_in_bytes);

build_frame_helper(frame_size_in_bytes, 0, needs_stack_repair);

// Insert nmethod entry barrier into frame.
BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
bs->nmethod_entry_barrier(this);

if (needs_stack_repair && verified_inline_entry_label != NULL) {
// Jump here from the scalarized entry points that require additional stack space
// for packing scalarized arguments and therefore already created the frame.
bind(*verified_inline_entry_label);
}
}

void C1_MacroAssembler::remove_frame(int frame_size_in_bytes, bool needs_stack_repair,
int sp_inc_offset) {
MacroAssembler::remove_frame(frame_size_in_bytes + 2 * wordSize,
needs_stack_repair, sp_inc_offset);
MacroAssembler::remove_frame(frame_size_in_bytes, needs_stack_repair, sp_inc_offset);
}

void C1_MacroAssembler::verified_entry() {
@@ -410,7 +419,7 @@ int C1_MacroAssembler::scalarized_entry(const CompiledEntrySignature* ces, int f
// Check if we need to extend the stack for packing
int sp_inc = 0;
if (args_on_stack > args_on_stack_cc) {
Unimplemented();
sp_inc = extend_stack_for_inline_args(args_on_stack);
}

// Create a temp frame so we can call into the runtime. It must be properly set up to accommodate GC.
@@ -423,25 +432,38 @@ int C1_MacroAssembler::scalarized_entry(const CompiledEntrySignature* ces, int f
BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
bs->nmethod_entry_barrier(this);

// The runtime call returns the new array in r0 which is also j_rarg7
// so we must avoid clobbering that. Temporarily save r0 in a
// non-argument register and pass the buffered array in r20 instead.
// This is safe because the runtime stub saves all registers.
Register val_array = r20;
Register tmp1 = r21;
mov(tmp1, j_rarg7);

// FIXME -- call runtime only if we cannot in-line allocate all the incoming inline type args.
mov(r1, (intptr_t) ces->method());
mov(r19, (intptr_t) ces->method());
if (is_inline_ro_entry) {
far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::buffer_inline_args_no_receiver_id)));
} else {
far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::buffer_inline_args_id)));
}
int rt_call_offset = offset();

mov(val_array, r0);
mov(j_rarg7, tmp1);

// Remove the temp frame
add(sp, sp, frame_size_in_bytes);
MacroAssembler::remove_frame(frame_size_in_bytes);

shuffle_inline_args(true, is_inline_ro_entry, sig_cc,
args_passed_cc, args_on_stack_cc, regs_cc, // from
args_passed, args_on_stack, regs, // to
sp_inc);
sp_inc, val_array);

if (ces->c1_needs_stack_repair()) {
Unimplemented();
// Create the real frame. Below jump will then skip over the stack banging and frame
// setup code in the verified_inline_entry (which has a different real_frame_size).
build_frame_helper(frame_size_in_bytes, sp_inc, true);
}

b(verified_inline_entry_label);
@@ -885,7 +885,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
"buffer_inline_args" : "buffer_inline_args_no_receiver";
StubFrame f(sasm, name, dont_gc_arguments);
OopMap* map = save_live_registers(sasm);
Register method = r1;
Register method = r19; // Incoming
address entry = (id == buffer_inline_args_id) ?
CAST_FROM_FN_PTR(address, buffer_inline_args) :
CAST_FROM_FN_PTR(address, buffer_inline_args_no_receiver);
@@ -150,12 +150,15 @@ bool frame::safe_for_sender(JavaThread *thread) {
if (!thread->is_in_full_stack_checked((address)sender_sp)) {
return false;
}
sender_unextended_sp = sender_sp;
sender_pc = (address) *(sender_sp-1);
// Note: frame::sender_sp_offset is only valid for compiled frame
saved_fp = (intptr_t*) *(sender_sp - frame::sender_sp_offset);
}
intptr_t **saved_fp_addr = (intptr_t**) (sender_sp - frame::sender_sp_offset);
saved_fp = *saved_fp_addr;

// Repair the sender sp if this is a method with scalarized inline type args
sender_sp = repair_sender_sp(sender_sp, saved_fp_addr);
sender_unextended_sp = sender_sp;
}

// If the potential sender is the interpreter then we can do some more checking
if (Interpreter::contains(sender_pc)) {
@@ -449,21 +452,50 @@ frame frame::sender_for_compiled_frame(RegisterMap* map) const {

assert(_cb->frame_size() >= 0, "must have non-zero frame size");
intptr_t* l_sender_sp = unextended_sp() + _cb->frame_size();
intptr_t* unextended_sp = l_sender_sp;

// the return_address is always the word on the stack
address sender_pc = (address) *(l_sender_sp-1);
#ifdef ASSERT
address sender_pc_copy = (address) *(l_sender_sp-1);
#endif

intptr_t** saved_fp_addr = (intptr_t**) (l_sender_sp - frame::sender_sp_offset);

// assert (sender_sp() == l_sender_sp, "should be");
// assert (*saved_fp_addr == link(), "should be");

// Repair the sender sp if the frame has been extended
l_sender_sp = repair_sender_sp(l_sender_sp, saved_fp_addr);

// The return address is always the first word on the stack
address sender_pc = (address) *(l_sender_sp-1);

#ifdef ASSERT
if (sender_pc != sender_pc_copy) {
// When extending the stack in the callee method entry to make room for unpacking of value
// type args, we keep a copy of the sender pc at the expected location in the callee frame.
// If the sender pc is patched due to deoptimization, the copy is not consistent anymore.
nmethod* nm = CodeCache::find_blob(sender_pc)->as_nmethod();
assert(sender_pc == nm->deopt_mh_handler_begin() || sender_pc == nm->deopt_handler_begin(), "unexpected sender pc");
}
#endif

if (map->update_map()) {
// Tell GC to use argument oopmaps for some runtime stubs that need it.
// For C1, the runtime stub might not have oop maps, so set this flag
// outside of update_register_map.
map->set_include_argument_oops(_cb->caller_must_gc_arguments(map->thread()));
bool caller_args = _cb->caller_must_gc_arguments(map->thread());
#ifdef COMPILER1
if (!caller_args) {
nmethod* nm = _cb->as_nmethod_or_null();
if (nm != NULL && nm->is_compiled_by_c1() && nm->method()->has_scalarized_args() &&
pc() < nm->verified_inline_entry_point()) {
// The VEP and VIEP(RO) of C1-compiled methods call buffer_inline_args_xxx
// before doing any argument shuffling, so we need to scan the oops
// as the caller passes them.
caller_args = true;
}
}
#endif
map->set_include_argument_oops(caller_args);
if (_cb->oop_maps() != NULL) {
OopMapSet::update_register_map(this, map);
}
@@ -475,7 +507,7 @@ frame frame::sender_for_compiled_frame(RegisterMap* map) const {
update_map_with_saved_link(map, saved_fp_addr);
}

return frame(l_sender_sp, unextended_sp, *saved_fp_addr, sender_pc);
return frame(l_sender_sp, l_sender_sp, *saved_fp_addr, sender_pc);
}

//------------------------------------------------------------------------------
@@ -797,6 +829,23 @@ frame::frame(void* sp, void* fp, void* pc) {
void frame::pd_ps() {}
#endif

// Check for a method with scalarized inline type arguments that needs
// a stack repair and return the repaired sender stack pointer.
intptr_t* frame::repair_sender_sp(intptr_t* sender_sp, intptr_t** saved_fp_addr) const {
CompiledMethod* cm = _cb->as_compiled_method_or_null();
if (cm != NULL && cm->needs_stack_repair()) {
// The stack increment resides just below the saved FP on the stack
intptr_t* sp_inc_addr = (intptr_t*) (saved_fp_addr - 1);
assert(*sp_inc_addr % StackAlignmentInBytes == 0, "sp_inc not aligned");
int sp_inc = *sp_inc_addr / wordSize;
int real_frame_size = _cb->frame_size() + sp_inc;
assert(real_frame_size >= _cb->frame_size() && real_frame_size <= 1000000, "invalid frame size");
assert(unextended_sp() + real_frame_size == sender_sp + sp_inc, "sanity");
sender_sp = unextended_sp() + real_frame_size;
}
return sender_sp;
}

void JavaFrameAnchor::make_walkable(JavaThread* thread) {
// last frame set?
if (last_Java_sp() == NULL) return;
@@ -1,5 +1,5 @@
/*
* Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, Red Hat Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -128,6 +128,9 @@
return (intptr_t*) addr_at(offset);
}

// Support for scalarized inline type calling convention
intptr_t* repair_sender_sp(intptr_t* sender_sp, intptr_t** saved_fp_addr) const;

#ifdef ASSERT
// Used in frame::sender_for_{interpreter,compiled}_frame
static void verify_deopt_original_pc( CompiledMethod* nm, intptr_t* unextended_sp);
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2018, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -193,8 +193,7 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
Register tmp,
Register tmp2) {
assert(thread == rthread, "must be");
assert_different_registers(store_addr, new_val, thread, tmp, tmp2,
rscratch1);
assert_different_registers(store_addr, new_val, thread, tmp, rscratch1);
assert(store_addr != noreg && new_val != noreg && tmp != noreg
&& tmp2 != noreg, "expecting a register");

@@ -220,6 +219,8 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,

// storing region crossing non-NULL, is card already dirty?

assert_different_registers(store_addr, thread, tmp, tmp2, rscratch1);

const Register card_addr = tmp;

__ lsr(card_addr, store_addr, CardTable::card_shift);
@@ -290,17 +291,15 @@ void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet deco

bool in_heap = (decorators & IN_HEAP) != 0;
bool as_normal = (decorators & AS_NORMAL) != 0;
assert((decorators & IS_DEST_UNINITIALIZED) == 0, "unsupported");
bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0;

bool needs_pre_barrier = as_normal;
bool needs_pre_barrier = as_normal && !dest_uninitialized;
bool needs_post_barrier = (val != noreg && in_heap);


if (tmp3 == noreg) {
tmp3 = rscratch2;
}
// assert_different_registers(val, tmp1, tmp2, tmp3, rscratch1, rscratch2);
assert_different_registers(val, tmp1, tmp2, tmp3);
if (tmp3 == noreg) {
tmp3 = rscratch2;
}
assert_different_registers(val, tmp1, tmp2, tmp3);

// flatten object address if needed
if (dst.index() == noreg && dst.offset() == 0) {
@@ -311,7 +310,6 @@ void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet deco
__ lea(tmp1, dst);
}


if (needs_pre_barrier) {
g1_write_barrier_pre(masm,
tmp1 /* obj */,
@@ -329,23 +327,22 @@ void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet deco
Register new_val = val;
if (needs_post_barrier) {
if (UseCompressedOops) {
// FIXME: Refactor the code to avoid usage of r19 and stay within tmpX
new_val = r19;
new_val = tmp3;
__ mov(new_val, val);
}
}
}

BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp1, 0), val, noreg, noreg, noreg);
BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp1, 0), val, noreg, noreg, noreg);

if (needs_post_barrier) {
g1_write_barrier_post(masm,
tmp1 /* store_adr */,
new_val /* new_val */,
rthread /* thread */,
tmp2 /* tmp */,
tmp3 /* tmp2 */);
}
}
g1_write_barrier_post(masm,
tmp1 /* store_adr */,
new_val /* new_val */,
rthread /* thread */,
tmp2 /* tmp */,
tmp3 /* tmp2 */);
}
}

}

@@ -64,7 +64,7 @@ define_pd_global(bool, RewriteFrequentPairs, true);

define_pd_global(bool, PreserveFramePointer, false);

define_pd_global(bool, InlineTypePassFieldsAsArgs, false);
define_pd_global(bool, InlineTypePassFieldsAsArgs, true);
define_pd_global(bool, InlineTypeReturnedAsFields, false);

define_pd_global(uintx, TypeProfileLevel, 111);