Skip to content
Permalink
Browse files
8266890: [lworld] [AArch64] add support for InlineTypePassFieldsAsArgs
Reviewed-by: thartmann
  • Loading branch information
nick-arm authored and TobiHartmann committed May 31, 2021
1 parent 0f1c33c commit 08715901f7463dc5bf77fc76ab33958c4de4e0bf
Showing 21 changed files with 398 additions and 273 deletions.
@@ -1964,7 +1964,7 @@ void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
C2_MacroAssembler _masm(&cbuf);
int framesize = C->output()->frame_slots() << LogBytesPerInt;

__ remove_frame(framesize);
__ remove_frame(framesize, C->needs_stack_repair(), C->output()->sp_inc_offset());

if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
__ reserved_stack_check();
@@ -376,10 +376,7 @@ void LIR_Assembler::jobject2reg_with_patching(Register reg, CodeEmitInfo *info)
int LIR_Assembler::initial_frame_size_in_bytes() const {
// if rounding, must let FrameMap know!

// The frame_map records size in slots (32bit word)

// subtract two words to account for return address and link
return (frame_map()->framesize() - (2*VMRegImpl::slots_per_word)) * VMRegImpl::stack_slot_size;
return in_bytes(frame_map()->framesize_in_bytes());
}


@@ -461,7 +458,8 @@ int LIR_Assembler::emit_unwind_handler() {
// remove the activation and dispatch to the unwind handler
__ block_comment("remove_frame and dispatch to the unwind handler");
int initial_framesize = initial_frame_size_in_bytes();
__ remove_frame(initial_framesize, needs_stack_repair(), initial_framesize - wordSize);
int sp_inc_offset = initial_framesize - 3*wordSize; // Below saved FP and LR
__ remove_frame(initial_framesize, needs_stack_repair(), sp_inc_offset);
__ far_jump(RuntimeAddress(Runtime1::entry_for(Runtime1::unwind_exception_id)));

// Emit the slow path assembly
@@ -528,7 +526,8 @@ void LIR_Assembler::return_op(LIR_Opr result, C1SafepointPollStub* code_stub) {

// Pop the stack before the safepoint code
int initial_framesize = initial_frame_size_in_bytes();
__ remove_frame(initial_framesize, needs_stack_repair(), initial_framesize - wordSize);
int sp_inc_offset = initial_framesize - 3*wordSize; // Below saved FP and LR
__ remove_frame(initial_framesize, needs_stack_repair(), sp_inc_offset);

if (StackReservedPages > 0 && compilation()->has_reserved_stack_access()) {
__ reserved_stack_check();
@@ -349,35 +349,44 @@ void C1_MacroAssembler::inline_cache_check(Register receiver, Register iCache) {
}

void C1_MacroAssembler::build_frame_helper(int frame_size_in_bytes, int sp_inc, bool needs_stack_repair) {
MacroAssembler::build_frame(frame_size_in_bytes + 2 * wordSize);
MacroAssembler::build_frame(frame_size_in_bytes);

if (needs_stack_repair) {
Unimplemented();
int sp_inc_offset = frame_size_in_bytes - 3 * wordSize; // Immediately below saved LR and FP
save_stack_increment(sp_inc, frame_size_in_bytes, sp_inc_offset);
}
}

void C1_MacroAssembler::build_frame(int frame_size_in_bytes, int bang_size_in_bytes, int sp_offset_for_orig_pc, bool needs_stack_repair, bool has_scalarized_args, Label* verified_inline_entry_label) {
assert(bang_size_in_bytes >= frame_size_in_bytes, "stack bang size incorrect");
// Make sure there is enough stack space for this method's activation.
// Note that we do this before doing an enter().
generate_stack_overflow_check(bang_size_in_bytes);

guarantee(needs_stack_repair == false, "Stack repair should not be true");
if (verified_inline_entry_label != NULL) {
if (has_scalarized_args) {
// Initialize orig_pc to detect deoptimization during buffering in the entry points
str(zr, Address(sp, sp_offset_for_orig_pc - frame_size_in_bytes));
}
if (!needs_stack_repair && verified_inline_entry_label != NULL) {
bind(*verified_inline_entry_label);
}

// Make sure there is enough stack space for this method's activation.
// Note that we do this before creating a frame.
assert(bang_size_in_bytes >= frame_size_in_bytes, "stack bang size incorrect");
generate_stack_overflow_check(bang_size_in_bytes);

build_frame_helper(frame_size_in_bytes, 0, needs_stack_repair);

// Insert nmethod entry barrier into frame.
BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
bs->nmethod_entry_barrier(this);

if (needs_stack_repair && verified_inline_entry_label != NULL) {
// Jump here from the scalarized entry points that require additional stack space
// for packing scalarized arguments and therefore already created the frame.
bind(*verified_inline_entry_label);
}
}

void C1_MacroAssembler::remove_frame(int frame_size_in_bytes, bool needs_stack_repair,
int sp_inc_offset) {
MacroAssembler::remove_frame(frame_size_in_bytes + 2 * wordSize,
needs_stack_repair, sp_inc_offset);
MacroAssembler::remove_frame(frame_size_in_bytes, needs_stack_repair, sp_inc_offset);
}

void C1_MacroAssembler::verified_entry() {
@@ -410,7 +419,7 @@ int C1_MacroAssembler::scalarized_entry(const CompiledEntrySignature* ces, int f
// Check if we need to extend the stack for packing
int sp_inc = 0;
if (args_on_stack > args_on_stack_cc) {
Unimplemented();
sp_inc = extend_stack_for_inline_args(args_on_stack);
}

// Create a temp frame so we can call into the runtime. It must be properly set up to accommodate GC.
@@ -423,25 +432,38 @@ int C1_MacroAssembler::scalarized_entry(const CompiledEntrySignature* ces, int f
BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
bs->nmethod_entry_barrier(this);

// The runtime call returns the new array in r0 which is also j_rarg7
// so we must avoid clobbering that. Temporarily save r0 in a
// non-argument register and pass the buffered array in r20 instead.
// This is safe because the runtime stub saves all registers.
Register val_array = r20;
Register tmp1 = r21;
mov(tmp1, j_rarg7);

// FIXME -- call runtime only if we cannot in-line allocate all the incoming inline type args.
mov(r1, (intptr_t) ces->method());
mov(r19, (intptr_t) ces->method());
if (is_inline_ro_entry) {
far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::buffer_inline_args_no_receiver_id)));
} else {
far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::buffer_inline_args_id)));
}
int rt_call_offset = offset();

mov(val_array, r0);
mov(j_rarg7, tmp1);

// Remove the temp frame
add(sp, sp, frame_size_in_bytes);
MacroAssembler::remove_frame(frame_size_in_bytes);

shuffle_inline_args(true, is_inline_ro_entry, sig_cc,
args_passed_cc, args_on_stack_cc, regs_cc, // from
args_passed, args_on_stack, regs, // to
sp_inc);
sp_inc, val_array);

if (ces->c1_needs_stack_repair()) {
Unimplemented();
// Create the real frame. Below jump will then skip over the stack banging and frame
// setup code in the verified_inline_entry (which has a different real_frame_size).
build_frame_helper(frame_size_in_bytes, sp_inc, true);
}

b(verified_inline_entry_label);
@@ -885,7 +885,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
"buffer_inline_args" : "buffer_inline_args_no_receiver";
StubFrame f(sasm, name, dont_gc_arguments);
OopMap* map = save_live_registers(sasm);
Register method = r1;
Register method = r19; // Incoming
address entry = (id == buffer_inline_args_id) ?
CAST_FROM_FN_PTR(address, buffer_inline_args) :
CAST_FROM_FN_PTR(address, buffer_inline_args_no_receiver);
@@ -150,12 +150,15 @@ bool frame::safe_for_sender(JavaThread *thread) {
if (!thread->is_in_full_stack_checked((address)sender_sp)) {
return false;
}
sender_unextended_sp = sender_sp;
sender_pc = (address) *(sender_sp-1);
// Note: frame::sender_sp_offset is only valid for compiled frame
saved_fp = (intptr_t*) *(sender_sp - frame::sender_sp_offset);
}
intptr_t **saved_fp_addr = (intptr_t**) (sender_sp - frame::sender_sp_offset);
saved_fp = *saved_fp_addr;

// Repair the sender sp if this is a method with scalarized inline type args
sender_sp = repair_sender_sp(sender_sp, saved_fp_addr);
sender_unextended_sp = sender_sp;
}

// If the potential sender is the interpreter then we can do some more checking
if (Interpreter::contains(sender_pc)) {
@@ -449,21 +452,50 @@ frame frame::sender_for_compiled_frame(RegisterMap* map) const {

assert(_cb->frame_size() >= 0, "must have non-zero frame size");
intptr_t* l_sender_sp = unextended_sp() + _cb->frame_size();
intptr_t* unextended_sp = l_sender_sp;

// the return_address is always the word on the stack
address sender_pc = (address) *(l_sender_sp-1);
#ifdef ASSERT
address sender_pc_copy = (address) *(l_sender_sp-1);
#endif

intptr_t** saved_fp_addr = (intptr_t**) (l_sender_sp - frame::sender_sp_offset);

// assert (sender_sp() == l_sender_sp, "should be");
// assert (*saved_fp_addr == link(), "should be");

// Repair the sender sp if the frame has been extended
l_sender_sp = repair_sender_sp(l_sender_sp, saved_fp_addr);

// The return address is always the first word on the stack
address sender_pc = (address) *(l_sender_sp-1);

#ifdef ASSERT
if (sender_pc != sender_pc_copy) {
// When extending the stack in the callee method entry to make room for unpacking of value
// type args, we keep a copy of the sender pc at the expected location in the callee frame.
// If the sender pc is patched due to deoptimization, the copy is not consistent anymore.
nmethod* nm = CodeCache::find_blob(sender_pc)->as_nmethod();
assert(sender_pc == nm->deopt_mh_handler_begin() || sender_pc == nm->deopt_handler_begin(), "unexpected sender pc");
}
#endif

if (map->update_map()) {
// Tell GC to use argument oopmaps for some runtime stubs that need it.
// For C1, the runtime stub might not have oop maps, so set this flag
// outside of update_register_map.
map->set_include_argument_oops(_cb->caller_must_gc_arguments(map->thread()));
bool caller_args = _cb->caller_must_gc_arguments(map->thread());
#ifdef COMPILER1
if (!caller_args) {
nmethod* nm = _cb->as_nmethod_or_null();
if (nm != NULL && nm->is_compiled_by_c1() && nm->method()->has_scalarized_args() &&
pc() < nm->verified_inline_entry_point()) {
// The VEP and VIEP(RO) of C1-compiled methods call buffer_inline_args_xxx
// before doing any argument shuffling, so we need to scan the oops
// as the caller passes them.
caller_args = true;
}
}
#endif
map->set_include_argument_oops(caller_args);
if (_cb->oop_maps() != NULL) {
OopMapSet::update_register_map(this, map);
}
@@ -475,7 +507,7 @@ frame frame::sender_for_compiled_frame(RegisterMap* map) const {
update_map_with_saved_link(map, saved_fp_addr);
}

return frame(l_sender_sp, unextended_sp, *saved_fp_addr, sender_pc);
return frame(l_sender_sp, l_sender_sp, *saved_fp_addr, sender_pc);
}

//------------------------------------------------------------------------------
@@ -797,6 +829,22 @@ frame::frame(void* sp, void* fp, void* pc) {
void frame::pd_ps() {}
#endif

// Check for a method with scalarized inline type arguments that needs
// a stack repair and return the repaired sender stack pointer.
intptr_t* frame::repair_sender_sp(intptr_t* sender_sp, intptr_t** saved_fp_addr) const {
CompiledMethod* cm = _cb->as_compiled_method_or_null();
if (cm != NULL && cm->needs_stack_repair()) {
// The stack increment resides just below the saved FP on the stack and
// records the total frame size exluding the two words for saving FP and LR.
intptr_t* sp_inc_addr = (intptr_t*) (saved_fp_addr - 1);
assert(*sp_inc_addr % StackAlignmentInBytes == 0, "sp_inc not aligned");
int real_frame_size = (*sp_inc_addr / wordSize) + 2;
assert(real_frame_size >= _cb->frame_size() && real_frame_size <= 1000000, "invalid frame size");
sender_sp = unextended_sp() + real_frame_size;
}
return sender_sp;
}

void JavaFrameAnchor::make_walkable(JavaThread* thread) {
// last frame set?
if (last_Java_sp() == NULL) return;
@@ -1,5 +1,5 @@
/*
* Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, Red Hat Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -128,6 +128,9 @@
return (intptr_t*) addr_at(offset);
}

// Support for scalarized inline type calling convention
intptr_t* repair_sender_sp(intptr_t* sender_sp, intptr_t** saved_fp_addr) const;

#ifdef ASSERT
// Used in frame::sender_for_{interpreter,compiled}_frame
static void verify_deopt_original_pc( CompiledMethod* nm, intptr_t* unextended_sp);
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2018, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -193,8 +193,7 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
Register tmp,
Register tmp2) {
assert(thread == rthread, "must be");
assert_different_registers(store_addr, new_val, thread, tmp, tmp2,
rscratch1);
assert_different_registers(store_addr, new_val, thread, tmp, rscratch1);
assert(store_addr != noreg && new_val != noreg && tmp != noreg
&& tmp2 != noreg, "expecting a register");

@@ -220,6 +219,8 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,

// storing region crossing non-NULL, is card already dirty?

assert_different_registers(store_addr, thread, tmp, tmp2, rscratch1);

const Register card_addr = tmp;

__ lsr(card_addr, store_addr, CardTable::card_shift);
@@ -290,17 +291,15 @@ void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet deco

bool in_heap = (decorators & IN_HEAP) != 0;
bool as_normal = (decorators & AS_NORMAL) != 0;
assert((decorators & IS_DEST_UNINITIALIZED) == 0, "unsupported");
bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0;

bool needs_pre_barrier = as_normal;
bool needs_pre_barrier = as_normal && !dest_uninitialized;
bool needs_post_barrier = (val != noreg && in_heap);


if (tmp3 == noreg) {
tmp3 = rscratch2;
}
// assert_different_registers(val, tmp1, tmp2, tmp3, rscratch1, rscratch2);
assert_different_registers(val, tmp1, tmp2, tmp3);
if (tmp3 == noreg) {
tmp3 = rscratch2;
}
assert_different_registers(val, tmp1, tmp2, tmp3);

// flatten object address if needed
if (dst.index() == noreg && dst.offset() == 0) {
@@ -311,7 +310,6 @@ void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet deco
__ lea(tmp1, dst);
}


if (needs_pre_barrier) {
g1_write_barrier_pre(masm,
tmp1 /* obj */,
@@ -329,23 +327,22 @@ void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet deco
Register new_val = val;
if (needs_post_barrier) {
if (UseCompressedOops) {
// FIXME: Refactor the code to avoid usage of r19 and stay within tmpX
new_val = r19;
new_val = tmp3;
__ mov(new_val, val);
}
}
}

BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp1, 0), val, noreg, noreg, noreg);
BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp1, 0), val, noreg, noreg, noreg);

if (needs_post_barrier) {
g1_write_barrier_post(masm,
tmp1 /* store_adr */,
new_val /* new_val */,
rthread /* thread */,
tmp2 /* tmp */,
tmp3 /* tmp2 */);
}
}
g1_write_barrier_post(masm,
tmp1 /* store_adr */,
new_val /* new_val */,
rthread /* thread */,
tmp2 /* tmp */,
tmp3 /* tmp2 */);
}
}

}

@@ -64,7 +64,7 @@ define_pd_global(bool, RewriteFrequentPairs, true);

define_pd_global(bool, PreserveFramePointer, false);

define_pd_global(bool, InlineTypePassFieldsAsArgs, false);
define_pd_global(bool, InlineTypePassFieldsAsArgs, true);
define_pd_global(bool, InlineTypeReturnedAsFields, false);

define_pd_global(uintx, TypeProfileLevel, 111);

0 comments on commit 0871590

Please sign in to comment.