-
Notifications
You must be signed in to change notification settings - Fork 6.1k
8322630: Remove ICStubs and related safepoints #17495
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
cc98cce
82134e6
140a8a1
2ee554e
a8cfe40
49758bf
707b271
7012996
fc04a60
60fa659
5487777
ba5a4dc
42a2198
6dd64b5
01733b8
dfdcdcc
08c146b
29790af
2999428
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
This file was deleted.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -29,6 +29,7 @@ | |
| #include "asm/assembler.hpp" | ||
| #include "asm/assembler.inline.hpp" | ||
| #include "ci/ciEnv.hpp" | ||
| #include "code/compiledIC.hpp" | ||
| #include "compiler/compileTask.hpp" | ||
| #include "compiler/disassembler.hpp" | ||
| #include "compiler/oopMap.hpp" | ||
|
|
@@ -965,7 +966,7 @@ int MacroAssembler::max_trampoline_stub_size() { | |
| } | ||
|
|
||
| void MacroAssembler::emit_static_call_stub() { | ||
| // CompiledDirectStaticCall::set_to_interpreted knows the | ||
| // CompiledDirectCall::set_to_interpreted knows the | ||
| // exact layout of this stub. | ||
|
|
||
| isb(); | ||
|
|
@@ -995,10 +996,51 @@ address MacroAssembler::ic_call(address entry, jint method_index) { | |
| // address const_ptr = long_constant((jlong)Universe::non_oop_word()); | ||
| // uintptr_t offset; | ||
| // ldr_constant(rscratch2, const_ptr); | ||
| movptr(rscratch2, (uintptr_t)Universe::non_oop_word()); | ||
| movptr(rscratch2, (intptr_t)Universe::non_oop_word()); | ||
| return trampoline_call(Address(entry, rh)); | ||
| } | ||
|
|
||
| int MacroAssembler::ic_check_size() { | ||
| if (target_needs_far_branch(CAST_FROM_FN_PTR(address, SharedRuntime::get_ic_miss_stub()))) { | ||
| return NativeInstruction::instruction_size * 7; | ||
| } else { | ||
| return NativeInstruction::instruction_size * 5; | ||
| } | ||
| } | ||
|
|
||
| int MacroAssembler::ic_check(int end_alignment) { | ||
| Register receiver = j_rarg0; | ||
| Register data = rscratch2; | ||
| Register tmp1 = rscratch1; | ||
| Register tmp2 = r10; | ||
|
|
||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It would be nice if we could still call There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah I thought the same. As it's important for correctness that ic_check_size is accurate, I was hoping to have as few different modes in it as possible. |
||
| // The UEP of a code blob ensures that the VEP is padded. However, the padding of the UEP is placed | ||
| // before the inline cache check, so we don't have to execute any nop instructions when dispatching | ||
| // through the UEP, yet we can ensure that the VEP is aligned appropriately. That's why we align | ||
| // before the inline cache check here, and not after | ||
| align(end_alignment, offset() + ic_check_size()); | ||
|
|
||
| int uep_offset = offset(); | ||
|
|
||
| if (UseCompressedClassPointers) { | ||
| ldrw(tmp1, Address(receiver, oopDesc::klass_offset_in_bytes())); | ||
| ldrw(tmp2, Address(data, CompiledICData::speculated_klass_offset())); | ||
| cmpw(tmp1, tmp2); | ||
| } else { | ||
| ldr(tmp1, Address(receiver, oopDesc::klass_offset_in_bytes())); | ||
| ldr(tmp2, Address(data, CompiledICData::speculated_klass_offset())); | ||
| cmp(tmp1, tmp2); | ||
| } | ||
|
|
||
| Label dont; | ||
| br(Assembler::EQ, dont); | ||
| far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); | ||
| bind(dont); | ||
| assert((offset() % end_alignment) == 0, "Misaligned verified entry point"); | ||
|
|
||
| return uep_offset; | ||
| } | ||
|
|
||
| // Implementation of call_VM versions | ||
|
|
||
| void MacroAssembler::call_VM(Register oop_result, | ||
|
|
@@ -1100,7 +1142,14 @@ void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thr | |
| } | ||
|
|
||
| void MacroAssembler::align(int modulus) { | ||
| while (offset() % modulus != 0) nop(); | ||
| align(modulus, offset()); | ||
| } | ||
|
|
||
| // Ensure that the code at target bytes offset from the current offset() is aligned | ||
| // according to modulus. | ||
| void MacroAssembler::align(int modulus, int target) { | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It would be nice to document what this extra There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good idea. I wrote a comment. |
||
| int delta = target - offset(); | ||
| while ((offset() + delta) % modulus != 0) nop(); | ||
| } | ||
|
|
||
| void MacroAssembler::post_call_nop() { | ||
|
|
@@ -1197,7 +1246,7 @@ void MacroAssembler::lookup_interface_method(Register recv_klass, | |
| } | ||
|
|
||
| // Look up the method for a megamorphic invokeinterface call in a single pass over itable: | ||
| // - check recv_klass (actual object class) is a subtype of resolved_klass from CompiledICHolder | ||
| // - check recv_klass (actual object class) is a subtype of resolved_klass from CompiledICData | ||
| // - find a holder_klass (class that implements the method) vtable offset and get the method from vtable by index | ||
| // The target method is determined by <holder_klass, itable_index>. | ||
| // The receiver klass is in recv_klass. | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we really want to remove the optimization that skips alignment for small methods?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's not obvious to me how likely this optimization is to kick in, and if it does, how many bytes it really saves. It optimizes accessor methods, when the UEP can be squeezed down to 4 instructions. The previous inline_cache_check was >= 2 instructions, there is a jump to skip the far jump, and the far_jump is >= 1 instructions. So it seems like it would kick in when far jumps can just branch and the compressed class encoding is simple enough. However, with the new ic_check we always have at least 5 instructions, sometimes 7. So it seemed to me like the intended optimization wouldn't apply any longer anyway.