Skip to content
Permalink
Browse files
Merge branch 'lworld' into lworld_merge
# Conflicts:
#	src/hotspot/cpu/x86/macroAssembler_x86.cpp
#	src/hotspot/cpu/x86/x86_64.ad
  • Loading branch information
MrSimms committed Apr 8, 2020
2 parents ddfee9a + 1aea5d2 commit ae1135cfa65e793a3c3e2e193bcbd0f57b12abc5
Showing with 3,130 additions and 269 deletions.
  1. +8 −4 src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp
  2. +39 −59 src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp
  3. +10 −12 src/hotspot/cpu/x86/frame_x86.cpp
  4. +23 −37 src/hotspot/cpu/x86/macroAssembler_x86.cpp
  5. +5 −5 src/hotspot/cpu/x86/macroAssembler_x86.hpp
  6. +3 −1 src/hotspot/cpu/x86/x86_64.ad
  7. +14 −10 src/hotspot/share/asm/macroAssembler_common.cpp
  8. +5 −5 src/hotspot/share/asm/macroAssembler_common.hpp
  9. +3 −0 src/hotspot/share/c1/c1_FrameMap.hpp
  10. +2 −0 src/hotspot/share/c1/c1_LIR.cpp
  11. +1 −0 src/hotspot/share/c1/c1_LIR.hpp
  12. +51 −49 src/hotspot/share/c1/c1_LIRAssembler.cpp
  13. +2 −1 src/hotspot/share/c1/c1_LIRAssembler.hpp
  14. +8 −0 src/hotspot/share/c1/c1_LIRGenerator.cpp
  15. +7 −8 src/hotspot/share/c1/c1_MacroAssembler.hpp
  16. +75 −12 src/hotspot/share/classfile/classFileParser.cpp
  17. +4 −0 src/hotspot/share/classfile/classFileParser.hpp
  18. +47 −9 src/hotspot/share/classfile/fieldLayoutBuilder.cpp
  19. +2 −0 src/hotspot/share/classfile/fieldLayoutBuilder.hpp
  20. +1 −0 src/hotspot/share/classfile/vmSymbols.hpp
  21. +4 −0 src/hotspot/share/oops/arrayKlass.hpp
  22. +29 −1 src/hotspot/share/oops/instanceKlass.hpp
  23. +1 −1 src/hotspot/share/oops/valueArrayKlass.cpp
  24. +2 −1 src/hotspot/share/oops/valueArrayKlass.hpp
  25. +6 −5 src/hotspot/share/oops/valueKlass.cpp
  26. +2 −2 src/hotspot/share/oops/valueKlass.hpp
  27. +7 −19 src/hotspot/share/opto/macro.cpp
  28. +0 −1 src/hotspot/share/opto/macro.hpp
  29. +2 −0 src/hotspot/share/opto/phaseX.cpp
  30. +1 −1 src/hotspot/share/opto/valuetypenode.cpp
  31. +1 −1 src/hotspot/share/opto/valuetypenode.hpp
  32. +2 −5 src/hotspot/share/prims/jvm.cpp
  33. +1 −1 src/hotspot/share/runtime/deoptimization.cpp
  34. +22 −0 src/hotspot/share/runtime/frame.cpp
  35. +5 −0 src/hotspot/share/runtime/globals.hpp
  36. +216 −0 src/hotspot/share/utilities/stringUtils.cpp
  37. +4 −0 src/hotspot/share/utilities/stringUtils.hpp
  38. +65 −0 src/java.base/share/classes/java/lang/NonTearable.java
  39. +40 −8 test/hotspot/jtreg/compiler/valhalla/valuetypes/TestC2CCalls.java
  40. +40 −11 test/hotspot/jtreg/compiler/valhalla/valuetypes/TestDeoptimizationWhenBuffering.java
  41. +2 −0 test/hotspot/jtreg/runtime/valhalla/valuetypes/FlattenableSemanticTest.java
  42. +280 −0 test/hotspot/jtreg/runtime/valhalla/valuetypes/ValueTearing.java
  43. +1 −0 test/hotspot/jtreg/runtime/valhalla/valuetypes/ValueTypeArray.java
  44. +3 −0 test/hotspot/jtreg/runtime/valhalla/valuetypes/ValueTypeDensity.java
  45. +6 −0 test/hotspot/jtreg/runtime/valhalla/valuetypes/ValueTypesTest.java
  46. +96 −0 test/micro/org/openjdk/bench/valhalla/corelibs/InlineCursor.java
  47. +1,906 −0 test/micro/org/openjdk/bench/valhalla/corelibs/XArrayList.java
  48. +76 −0 test/micro/org/openjdk/bench/valhalla/corelibs/XArrayListCursorTest.java
@@ -283,8 +283,7 @@ void LIR_Assembler::osr_entry() {

// build frame
ciMethod* m = compilation()->method();
__ build_frame(initial_frame_size_in_bytes(), bang_size_in_bytes(),
needs_stack_repair(), NULL);
__ build_frame(initial_frame_size_in_bytes(), bang_size_in_bytes());

// OSR buffer is
//
@@ -481,7 +480,8 @@ int LIR_Assembler::emit_unwind_handler() {
}

// remove the activation and dispatch to the unwind handler
__ remove_frame(initial_frame_size_in_bytes(), needs_stack_repair());
int initial_framesize = initial_frame_size_in_bytes();
__ remove_frame(initial_framesize, needs_stack_repair(), initial_framesize - wordSize);
__ jump(RuntimeAddress(Runtime1::entry_for(Runtime1::unwind_exception_id)));

// Emit the slow path assembly
@@ -547,7 +547,8 @@ void LIR_Assembler::return_op(LIR_Opr result) {
}

// Pop the stack before the safepoint code
__ remove_frame(initial_frame_size_in_bytes(), needs_stack_repair());
int initial_framesize = initial_frame_size_in_bytes();
__ remove_frame(initial_framesize, needs_stack_repair(), initial_framesize - wordSize);

if (StackReservedPages > 0 && compilation()->has_reserved_stack_access()) {
__ reserved_stack_check();
@@ -4269,6 +4270,9 @@ void LIR_Assembler::get_thread(LIR_Opr result_reg) {
#endif // _LP64
}

void LIR_Assembler::check_orig_pc() {
__ cmpptr(frame_map()->address_for_orig_pc_addr(), (int32_t)NULL_WORD);
}

void LIR_Assembler::peephole(LIR_List*) {
// do nothing for now
@@ -318,18 +318,21 @@ void C1_MacroAssembler::inline_cache_check(Register receiver, Register iCache) {
}


void C1_MacroAssembler::build_frame(int frame_size_in_bytes, int bang_size_in_bytes, bool needs_stack_repair, Label* verified_value_entry_label) {
assert(bang_size_in_bytes >= frame_size_in_bytes, "stack bang size incorrect");
void C1_MacroAssembler::build_frame(int frame_size_in_bytes, int bang_size_in_bytes, int sp_offset_for_orig_pc, bool needs_stack_repair, bool has_scalarized_args, Label* verified_value_entry_label) {
if (has_scalarized_args) {
// Initialize orig_pc to detect deoptimization during buffering in the entry points
movptr(Address(rsp, sp_offset_for_orig_pc - frame_size_in_bytes - wordSize), 0);
}
if (!needs_stack_repair && verified_value_entry_label != NULL) {
bind(*verified_value_entry_label);
}
// Make sure there is enough stack space for this method's activation.
// Note that we do this before doing an enter(). This matches the
// ordering of C2's stack overflow check / rsp decrement and allows
// the SharedRuntime stack overflow handling to be consistent
// between the two compilers.
assert(bang_size_in_bytes >= frame_size_in_bytes, "stack bang size incorrect");
generate_stack_overflow_check(bang_size_in_bytes);

if (!needs_stack_repair && verified_value_entry_label != NULL) {
bind(*verified_value_entry_label);
}
push(rbp);
if (PreserveFramePointer) {
mov(rbp, rsp);
@@ -342,9 +345,8 @@ void C1_MacroAssembler::build_frame(int frame_size_in_bytes, int bang_size_in_by
#endif // !_LP64 && TIERED
decrement(rsp, frame_size_in_bytes); // does not emit code for frame_size == 0
if (needs_stack_repair) {
int real_frame_size = frame_size_in_bytes
+ wordSize // skip over pushed rbp
+ wordSize; // skip over RA pushed by caller
// Save stack increment (also account for rbp)
int real_frame_size = frame_size_in_bytes + wordSize;
movptr(Address(rsp, frame_size_in_bytes - wordSize), real_frame_size);
if (verified_value_entry_label != NULL) {
bind(*verified_value_entry_label);
@@ -355,21 +357,7 @@ void C1_MacroAssembler::build_frame(int frame_size_in_bytes, int bang_size_in_by
bs->nmethod_entry_barrier(this);
}


void C1_MacroAssembler::remove_frame(int frame_size_in_bytes, bool needs_stack_repair) {
if (!needs_stack_repair) {
increment(rsp, frame_size_in_bytes); // Does not emit code for frame_size == 0
pop(rbp);
} else {
movq(r13, Address(rsp, frame_size_in_bytes + wordSize)); // return address
movq(rbp, Address(rsp, frame_size_in_bytes));
addq(rsp, Address(rsp, frame_size_in_bytes - wordSize)); // now we are back to caller frame, without the outgoing returned address
push(r13); // restore the returned address, as pushed by caller
}
}


void C1_MacroAssembler::verified_value_entry() {
void C1_MacroAssembler::verified_entry() {
if (C1Breakpoint || VerifyFPU || !UseStackBanging) {
// Verified Entry first instruction should be 5 bytes long for correct
// patching by patch_verified_entry().
@@ -385,22 +373,13 @@ void C1_MacroAssembler::verified_value_entry() {
IA32_ONLY( verify_FPU(0, "method_entry"); )
}

int C1_MacroAssembler::scalarized_entry(const CompiledEntrySignature *ces, int frame_size_in_bytes, int bang_size_in_bytes, Label& verified_value_entry_label, bool is_value_ro_entry) {
if (C1Breakpoint || VerifyFPU || !UseStackBanging) {
// Verified Entry first instruction should be 5 bytes long for correct
// patching by patch_verified_entry().
//
// C1Breakpoint and VerifyFPU have one byte first instruction.
// Also first instruction will be one byte "push(rbp)" if stack banging
// code is not generated (see build_frame() above).
// For all these cases generate long instruction first.
fat_nop();
}
if (C1Breakpoint)int3();
IA32_ONLY( verify_FPU(0, "method_entry"); )

int C1_MacroAssembler::scalarized_entry(const CompiledEntrySignature *ces, int frame_size_in_bytes, int bang_size_in_bytes, int sp_offset_for_orig_pc, Label& verified_value_entry_label, bool is_value_ro_entry) {
assert(ValueTypePassFieldsAsArgs, "sanity");
GrowableArray<SigEntry>* sig = &ces->sig();
// Make sure there is enough stack space for this method's activation.
assert(bang_size_in_bytes >= frame_size_in_bytes, "stack bang size incorrect");
generate_stack_overflow_check(bang_size_in_bytes);

GrowableArray<SigEntry>* sig = &ces->sig();
GrowableArray<SigEntry>* sig_cc = is_value_ro_entry ? &ces->sig_cc_ro() : &ces->sig_cc();
VMRegPair* regs = ces->regs();
VMRegPair* regs_cc = is_value_ro_entry ? ces->regs_cc_ro() : ces->regs_cc();
@@ -411,32 +390,36 @@ int C1_MacroAssembler::scalarized_entry(const CompiledEntrySignature *ces, int f
BasicType* sig_bt = NEW_RESOURCE_ARRAY(BasicType, sig_cc->length());
int args_passed = sig->length();
int args_passed_cc = SigEntry::fill_sig_bt(sig_cc, sig_bt);

int extra_stack_offset = wordSize; // tos is return address.

// Create a temp frame so we can call into runtime. It must be properly set up to accommodate GC.
int sp_inc = (args_on_stack - args_on_stack_cc) * VMRegImpl::stack_slot_size;
if (sp_inc > 0) {
pop(r13);
// Check if we need to extend the stack for packing
int sp_inc = 0;
if (args_on_stack > args_on_stack_cc) {
// Two additional slots to account for return address
sp_inc = (args_on_stack + 2) * VMRegImpl::stack_slot_size;
sp_inc = align_up(sp_inc, StackAlignmentInBytes);
pop(r13); // Copy return address
subptr(rsp, sp_inc);
push(r13);
} else {
sp_inc = 0;
}

// Create a temp frame so we can call into the runtime. It must be properly set up to accommodate GC.
push(rbp);
if (PreserveFramePointer) {
mov(rbp, rsp);
}
subptr(rsp, frame_size_in_bytes);
if (sp_inc > 0) {
int real_frame_size = frame_size_in_bytes +
+ wordSize // pushed rbp
+ wordSize // returned address pushed by the stack extension code
+ sp_inc; // stack extension

if (ces->c1_needs_stack_repair()) {
// Save stack increment (also account for fixed framesize and rbp)
assert((sp_inc & (StackAlignmentInBytes-1)) == 0, "stack increment not aligned");
int real_frame_size = sp_inc + frame_size_in_bytes + wordSize;
movptr(Address(rsp, frame_size_in_bytes - wordSize), real_frame_size);
}

// Initialize orig_pc to detect deoptimization during buffering in below runtime call
movptr(Address(rsp, sp_offset_for_orig_pc), 0);

// FIXME -- call runtime only if we cannot in-line allocate all the incoming value args.
movptr(rbx, (intptr_t)(ces->method()));
if (is_value_ro_entry) {
@@ -450,16 +433,13 @@ int C1_MacroAssembler::scalarized_entry(const CompiledEntrySignature *ces, int f
addptr(rsp, frame_size_in_bytes);
pop(rbp);

int n = shuffle_value_args(true, is_value_ro_entry, extra_stack_offset, sig_bt, sig_cc,
args_passed_cc, args_on_stack_cc, regs_cc, // from
args_passed, args_on_stack, regs); // to
assert(sp_inc == n, "must be");
shuffle_value_args(true, is_value_ro_entry, extra_stack_offset, sig_bt, sig_cc,
args_passed_cc, args_on_stack_cc, regs_cc, // from
args_passed, args_on_stack, regs, sp_inc); // to

if (sp_inc != 0) {
// Do the stack banging here, and skip over the stack repair code in the
if (ces->c1_needs_stack_repair()) {
// Skip over the stack banging and frame setup code in the
// verified_value_entry (which has a different real_frame_size).
assert(sp_inc > 0, "stack should not shrink");
generate_stack_overflow_check(bang_size_in_bytes);
push(rbp);
if (PreserveFramePointer) {
mov(rbp, rsp);
@@ -451,7 +451,7 @@ frame frame::sender_for_compiled_frame(RegisterMap* map) const {
// It is only an FP if the sender is an interpreter frame (or C1?).
intptr_t** saved_fp_addr = (intptr_t**) (sender_sp - frame::sender_sp_offset);

// Repair the sender sp if this is a method with scalarized value type args
// Repair the sender sp if the frame has been extended
sender_sp = repair_sender_sp(sender_sp, saved_fp_addr);

// On Intel the return_address is always the word on the stack
@@ -472,22 +472,24 @@ frame frame::sender_for_compiled_frame(RegisterMap* map) const {
// For C1, the runtime stub might not have oop maps, so set this flag
// outside of update_register_map.
bool caller_args = _cb->caller_must_gc_arguments(map->thread());
#ifdef COMPILER1
if (!caller_args) {
nmethod* nm = _cb->as_nmethod_or_null();
if (nm != NULL && nm->is_compiled_by_c1() &&
nm->method()->has_scalarized_args() &&
if (nm != NULL && nm->is_compiled_by_c1() && nm->method()->has_scalarized_args() &&
pc() < nm->verified_value_entry_point()) {
// The VEP and VVEP(RO) of C1-compiled methods call buffer_value_args_xxx
// before doing any argument shuffling, so we need to scan the oops
// as the caller passes them.
caller_args = true;
#ifdef ASSERT
NativeCall* call = nativeCall_before(pc());
address dest = call->destination();
if (dest == Runtime1::entry_for(Runtime1::buffer_value_args_no_receiver_id) ||
dest == Runtime1::entry_for(Runtime1::buffer_value_args_id)) {
caller_args = true;
}
assert(dest == Runtime1::entry_for(Runtime1::buffer_value_args_no_receiver_id) ||
dest == Runtime1::entry_for(Runtime1::buffer_value_args_id), "unexpected safepoint in entry point");
#endif
}
}
#endif
map->set_include_argument_oops(caller_args);
if (_cb->oop_maps() != NULL) {
OopMapSet::update_register_map(this, map);
@@ -712,11 +714,7 @@ intptr_t* frame::repair_sender_sp(intptr_t* sender_sp, intptr_t** saved_fp_addr)
// The stack increment resides just below the saved rbp on the stack
// and does not account for the return address.
intptr_t* real_frame_size_addr = (intptr_t*) (saved_fp_addr - 1);
int real_frame_size = (*real_frame_size_addr) / wordSize;
if (!cm->is_compiled_by_c1()) {
// Add size of return address (C1 already includes the RA size)
real_frame_size += 1;
}
int real_frame_size = ((*real_frame_size_addr) + wordSize) / wordSize;
assert(real_frame_size >= _cb->frame_size(), "invalid frame size");
sender_sp = unextended_sp() + real_frame_size;
}
@@ -5515,26 +5515,18 @@ void MacroAssembler::unpack_value_args(Compile* C, bool receiver_only) {
verified_entry(C, sp_inc);
}

int MacroAssembler::shuffle_value_args(bool is_packing, bool receiver_only, int extra_stack_offset,
BasicType* sig_bt, const GrowableArray<SigEntry>* sig_cc,
int args_passed, int args_on_stack, VMRegPair* regs, // from
int args_passed_to, int args_on_stack_to, VMRegPair* regs_to) { // to
void MacroAssembler::shuffle_value_args(bool is_packing, bool receiver_only, int extra_stack_offset,
BasicType* sig_bt, const GrowableArray<SigEntry>* sig_cc,
int args_passed, int args_on_stack, VMRegPair* regs,
int args_passed_to, int args_on_stack_to, VMRegPair* regs_to, int sp_inc) {
// Check if we need to extend the stack for packing/unpacking
int sp_inc = (args_on_stack_to - args_on_stack) * VMRegImpl::stack_slot_size;
if (sp_inc > 0) {
sp_inc = align_up(sp_inc, StackAlignmentInBytes);
if (!is_packing) {
// Save the return address, adjust the stack (make sure it is properly
// 16-byte aligned) and copy the return address to the new top of the stack.
// (Note: C1 does this in C1_MacroAssembler::scalarized_entry).
pop(r13);
subptr(rsp, sp_inc);
push(r13);
}
} else {
// The scalarized calling convention needs less stack space than the unscalarized one.
// No need to extend the stack, the caller will take care of these adjustments.
sp_inc = 0;
if (sp_inc > 0 && !is_packing) {
// Save the return address, adjust the stack (make sure it is properly
// 16-byte aligned) and copy the return address to the new top of the stack.
// (Note: C1 does this in C1_MacroAssembler::scalarized_entry).
pop(r13);
subptr(rsp, sp_inc);
push(r13);
}

int ret_off; // make sure we don't overwrite the return address
@@ -5547,31 +5539,25 @@ int MacroAssembler::shuffle_value_args(bool is_packing, bool receiver_only, int
ret_off = sp_inc;
}

return shuffle_value_args_common(is_packing, receiver_only, extra_stack_offset,
sig_bt, sig_cc,
args_passed, args_on_stack, regs,
args_passed_to, args_on_stack_to, regs_to,
sp_inc, ret_off);
shuffle_value_args_common(is_packing, receiver_only, extra_stack_offset,
sig_bt, sig_cc,
args_passed, args_on_stack, regs,
args_passed_to, args_on_stack_to, regs_to,
sp_inc, ret_off);
}

VMReg MacroAssembler::spill_reg_for(VMReg reg) {
return reg->is_XMMRegister() ? xmm8->as_VMReg() : r14->as_VMReg();
}

// Restores the stack on return
void MacroAssembler::restore_stack(Compile* C) {
int framesize = C->output()->frame_size_in_bytes();
assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
// Remove word for return addr already pushed and RBP
framesize -= 2*wordSize;

if (C->needs_stack_repair()) {
// Restore rbp and repair rsp by adding the stack increment
movq(rbp, Address(rsp, framesize));
addq(rsp, Address(rsp, C->output()->sp_inc_offset()));
void MacroAssembler::remove_frame(int initial_framesize, bool needs_stack_repair, int sp_inc_offset) {
assert((initial_framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
if (needs_stack_repair) {
movq(rbp, Address(rsp, initial_framesize));
addq(rsp, Address(rsp, sp_inc_offset));
} else {
if (framesize > 0) {
addq(rsp, framesize);
if (initial_framesize > 0) {
addq(rsp, initial_framesize);
}
pop(rbp);
}
@@ -1663,12 +1663,12 @@ class MacroAssembler: public Assembler {
bool pack_value_helper(const GrowableArray<SigEntry>* sig, int& sig_index, int vtarg_index,
VMReg to, VMRegPair* regs_from, int regs_from_count, int& from_index, RegState reg_state[],
int ret_off, int extra_stack_offset);
void restore_stack(Compile* C);
void remove_frame(int initial_framesize, bool needs_stack_repair, int sp_inc_offset);

int shuffle_value_args(bool is_packing, bool receiver_only, int extra_stack_offset,
BasicType* sig_bt, const GrowableArray<SigEntry>* sig_cc,
int args_passed, int args_on_stack, VMRegPair* regs,
int args_passed_to, int args_on_stack_to, VMRegPair* regs_to);
void shuffle_value_args(bool is_packing, bool receiver_only, int extra_stack_offset,
BasicType* sig_bt, const GrowableArray<SigEntry>* sig_cc,
int args_passed, int args_on_stack, VMRegPair* regs,
int args_passed_to, int args_on_stack_to, VMRegPair* regs_to, int sp_inc);
bool shuffle_value_args_spill(bool is_packing, const GrowableArray<SigEntry>* sig_cc, int sig_cc_index,
VMRegPair* regs_from, int from_index, int regs_from_count,
RegState* reg_state, int sp_inc, int extra_stack_offset);
@@ -963,7 +963,9 @@ void MachEpilogNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
__ vzeroupper();
}

__ restore_stack(C);
// Subtract two words to account for return address and rbp
int initial_framesize = C->output()->frame_size_in_bytes() - 2*wordSize;
__ remove_frame(initial_framesize, C->needs_stack_repair(), C->output()->sp_inc_offset());

if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
__ reserved_stack_check();

0 comments on commit ae1135c

Please sign in to comment.