Skip to content

Commit

Permalink
8300527: [Lilliput] Simplify and optimize loading of Klass*
Browse files Browse the repository at this point in the history
Reviewed-by: shade
  • Loading branch information
rkennke committed Jan 27, 2023
1 parent bf6823a commit bf6d4f3
Show file tree
Hide file tree
Showing 27 changed files with 110 additions and 313 deletions.
12 changes: 11 additions & 1 deletion src/hotspot/cpu/aarch64/aarch64.ad
Expand Up @@ -7237,7 +7237,17 @@ instruct loadNKlass(iRegNNoSp dst, memory4 mem, rFlagsReg cr)
ins_encode %{
assert($mem$$disp == oopDesc::klass_offset_in_bytes(), "expect correct offset");
assert($mem$$index$$Register == noreg, "expect no index");
__ load_nklass($dst$$Register, $mem$$base$$Register);
Register dst = $dst$$Register;
Register obj = $mem$$base$$Register;
C2LoadNKlassStub* stub = new (Compile::current()->comp_arena()) C2LoadNKlassStub(dst);
Compile::current()->output()->add_stub(stub);
__ ldr(dst, Address(obj, oopDesc::mark_offset_in_bytes()));
// NOTE: We can't use tbnz here, because the target is sometimes too far away
// and cannot be encoded.
__ tst(dst, markWord::monitor_value);
__ br(Assembler::NE, stub->entry());
__ bind(stub->continuation());
__ lsr(dst, dst, markWord::klass_shift);
%}
ins_pipe(pipe_slow);
%}
Expand Down
24 changes: 3 additions & 21 deletions src/hotspot/cpu/aarch64/c1_CodeStubs_aarch64.cpp
Expand Up @@ -32,6 +32,7 @@
#include "c1/c1_Runtime1.hpp"
#include "classfile/javaClasses.hpp"
#include "nativeInst_aarch64.hpp"
#include "runtime/objectMonitor.hpp"
#include "runtime/sharedRuntime.hpp"
#include "vmreg_aarch64.inline.hpp"

Expand Down Expand Up @@ -255,27 +256,8 @@ void MonitorExitStub::emit_code(LIR_Assembler* ce) {

void LoadKlassStub::emit_code(LIR_Assembler* ce) {
__ bind(_entry);
Register res = _result->as_register();
ce->store_parameter(_obj->as_register(), 0);
if (res != r0) {
// Note: we cannot push/pop r0 around the call, because that
// would mess with the stack pointer sp, and we need that to
// remain intact for store_paramater/load_argument to work correctly.
// We swap r0 and res instead, which preserves current r0 in res.
// The preserved value is later saved and restored around the
// call in Runtime1::load_klass_id.
__ mov(rscratch1, r0);
__ mov(r0, res);
__ mov(res, rscratch1);
}
__ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::load_klass_id)));
if (res != r0) {
// Swap back r0 and res. This brings the call return value
// from r0 into res, and the preserved value in res back into r0.
__ mov(rscratch1, r0);
__ mov(r0, res);
__ mov(res, rscratch1);
}
Register d = _result->as_register();
__ ldr(d, Address(d, OM_OFFSET_NO_MONITOR_VALUE_TAG(header)));
__ b(_continuation);
}

Expand Down
12 changes: 4 additions & 8 deletions src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp
Expand Up @@ -2562,7 +2562,6 @@ void LIR_Assembler::emit_lock(LIR_OpLock* op) {
void LIR_Assembler::emit_load_klass(LIR_OpLoadKlass* op) {
Register obj = op->obj()->as_pointer_register();
Register result = op->result_opr()->as_pointer_register();
Register tmp = rscratch1;

CodeEmitInfo* info = op->info();
if (info != NULL) {
Expand All @@ -2572,16 +2571,13 @@ void LIR_Assembler::emit_load_klass(LIR_OpLoadKlass* op) {
assert(UseCompressedClassPointers, "expects UseCompressedClassPointers");

// Check if we can take the (common) fast path, if obj is unlocked.
__ ldr(tmp, Address(obj, oopDesc::mark_offset_in_bytes()));
__ eor(tmp, tmp, markWord::unlocked_value);
__ tst(tmp, markWord::lock_mask_in_place);
__ ldr(result, Address(obj, oopDesc::mark_offset_in_bytes()));
__ tst(result, markWord::monitor_value);
__ br(Assembler::NE, *op->stub()->entry());
__ bind(*op->stub()->continuation());

// Fast-path: shift and decode Klass*.
__ mov(result, tmp);
// Shift and decode Klass*.
__ lsr(result, result, markWord::klass_shift);

__ bind(*op->stub()->continuation());
__ decode_klass_not_null(result);
}

Expand Down
10 changes: 0 additions & 10 deletions src/hotspot/cpu/aarch64/c1_Runtime1_aarch64.cpp
Expand Up @@ -675,16 +675,6 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {

break;

case load_klass_id:
{
StubFrame f(sasm, "load_klass", dont_gc_arguments);
save_live_registers_no_oop_map(sasm, true);
f.load_argument(0, r0); // obj
__ call_VM_leaf(CAST_FROM_FN_PTR(address, oopDesc::load_nklass_runtime), r0);
restore_live_registers_except_r0(sasm, true);
}
break;

case counter_overflow_id:
{
Register bci = r0, method = r1;
Expand Down
12 changes: 12 additions & 0 deletions src/hotspot/cpu/aarch64/c2_CodeStubs_aarch64.cpp
Expand Up @@ -25,6 +25,7 @@
#include "precompiled.hpp"
#include "opto/c2_MacroAssembler.hpp"
#include "opto/c2_CodeStubs.hpp"
#include "runtime/objectMonitor.hpp"
#include "runtime/sharedRuntime.hpp"
#include "runtime/stubRoutines.hpp"

Expand Down Expand Up @@ -74,4 +75,15 @@ void C2CheckLockStackStub::emit(C2_MacroAssembler& masm) {
__ b(continuation());
}

int C2LoadNKlassStub::max_size() const {
return 8;
}

void C2LoadNKlassStub::emit(C2_MacroAssembler& masm) {
__ bind(entry());
Register d = dst();
__ ldr(d, Address(d, OM_OFFSET_NO_MONITOR_VALUE_TAG(header)));
__ b(continuation());
}

#undef __
30 changes: 7 additions & 23 deletions src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
Expand Up @@ -4034,35 +4034,19 @@ void MacroAssembler::load_method_holder(Register holder, Register method) {
void MacroAssembler::load_nklass(Register dst, Register src) {
assert(UseCompressedClassPointers, "expects UseCompressedClassPointers");

assert_different_registers(src, dst);

Label slow, done;
Label fast;

// Check if we can take the (common) fast path, if obj is unlocked.
ldr(dst, Address(src, oopDesc::mark_offset_in_bytes()));
eor(dst, dst, markWord::unlocked_value);
tst(dst, markWord::lock_mask_in_place);
br(Assembler::NE, slow);
tst(dst, markWord::monitor_value);
br(Assembler::NE, fast);

// Fetch displaced header
ldr(dst, Address(dst, OM_OFFSET_NO_MONITOR_VALUE_TAG(header)));

// Fast-path: shift and decode Klass*.
bind(fast);
lsr(dst, dst, markWord::klass_shift);
b(done);

bind(slow);
RegSet saved_regs = RegSet::of(lr);
// We need r0 as argument and return register for the call. Preserve it, if necessary.
if (dst != r0) {
saved_regs += RegSet::of(r0);
}
push(saved_regs, sp);
mov(r0, src);
assert(StubRoutines::load_nklass() != NULL, "Must have stub");
far_call(RuntimeAddress(StubRoutines::load_nklass()));
if (dst != r0) {
mov(dst, r0);
}
pop(saved_regs, sp);
bind(done);
}

void MacroAssembler::load_klass(Register dst, Register src) {
Expand Down
25 changes: 0 additions & 25 deletions src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp
Expand Up @@ -6817,29 +6817,6 @@ class StubGenerator: public StubCodeGenerator {
}
#endif // LINUX

// Pass object argument in r0 (which has to be preserved outside this stub)
// Pass back result in r0
// Clobbers rscratch1
address generate_load_nklass() {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", "load_nklass");

address start = __ pc();

__ set_last_Java_frame(sp, rfp, lr, rscratch1);
__ enter();
__ push(RegSet::of(rscratch1, rscratch2), sp);
__ push_call_clobbered_registers_except(r0);
__ call_VM_leaf(CAST_FROM_FN_PTR(address, oopDesc::load_nklass_runtime), 1);
__ pop_call_clobbered_registers_except(r0);
__ pop(RegSet::of(rscratch1, rscratch2), sp);
__ leave();
__ reset_last_Java_frame(true);
__ ret(lr);

return start;
}

address generate_cont_thaw(Continuation::thaw_kind kind) {
bool return_barrier = Continuation::is_thaw_return_barrier(kind);
bool return_barrier_exception = Continuation::is_thaw_return_barrier_exception(kind);
Expand Down Expand Up @@ -8007,8 +7984,6 @@ class StubGenerator: public StubCodeGenerator {
if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dcos)) {
StubRoutines::_dcos = generate_dsin_dcos(/* isCos = */ true);
}

StubRoutines::_load_nklass = generate_load_nklass();
}

void generate_phase1() {
Expand Down
15 changes: 3 additions & 12 deletions src/hotspot/cpu/x86/c1_CodeStubs_x86.cpp
Expand Up @@ -30,6 +30,7 @@
#include "c1/c1_Runtime1.hpp"
#include "classfile/javaClasses.hpp"
#include "nativeInst_x86.hpp"
#include "runtime/objectMonitor.hpp"
#include "runtime/sharedRuntime.hpp"
#include "utilities/align.hpp"
#include "utilities/macros.hpp"
Expand Down Expand Up @@ -303,18 +304,8 @@ void MonitorExitStub::emit_code(LIR_Assembler* ce) {
void LoadKlassStub::emit_code(LIR_Assembler* ce) {
__ bind(_entry);
#ifdef _LP64
Register res = _result->as_register();
ce->store_parameter(_obj->as_register(), 0);
if (res != rax) {
// This preserves rax and allows it to be used as return-register,
// without messing with the stack.
__ xchgptr(rax, res);
}
__ call(RuntimeAddress(Runtime1::entry_for(Runtime1::load_klass_id)));
if (res != rax) {
// Swap back rax, and move result to correct register.
__ xchgptr(rax, res);
}
Register d = _result->as_register();
__ movq(d, Address(d, OM_OFFSET_NO_MONITOR_VALUE_TAG(header)));
__ jmp(_continuation);
#else
__ should_not_reach_here();
Expand Down
10 changes: 3 additions & 7 deletions src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp
Expand Up @@ -3541,16 +3541,12 @@ void LIR_Assembler::emit_load_klass(LIR_OpLoadKlass* op) {
assert_different_registers(tmp, result);

// Check if we can take the (common) fast path, if obj is unlocked.
__ movq(tmp, Address(obj, oopDesc::mark_offset_in_bytes()));
__ xorq(tmp, markWord::unlocked_value);
__ testb(tmp, markWord::lock_mask_in_place);
__ movq(result, Address(obj, oopDesc::mark_offset_in_bytes()));
__ testb(result, markWord::monitor_value);
__ jcc(Assembler::notZero, *op->stub()->entry());

__ bind(*op->stub()->continuation());
// Fast-path: shift and decode Klass*.
__ movq(result, tmp);
__ shrq(result, markWord::klass_shift);

__ bind(*op->stub()->continuation());
__ decode_klass_not_null(result, tmp);
#else
__ movptr(result, Address(obj, oopDesc::klass_offset_in_bytes()));
Expand Down
11 changes: 0 additions & 11 deletions src/hotspot/cpu/x86/c1_Runtime1_x86.cpp
Expand Up @@ -1040,17 +1040,6 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
}

break;
#ifdef _LP64
case load_klass_id:
{
StubFrame f(sasm, "load_klass", dont_gc_arguments);
sasm->save_live_registers_no_oop_map(true);
f.load_argument(0, c_rarg0); // obj
__ call_VM_leaf(CAST_FROM_FN_PTR(address, oopDesc::load_nklass_runtime), c_rarg0);
sasm->restore_live_registers_except_rax(true);
}
break;
#endif
case counter_overflow_id:
{
Register bci = rax, method = rbx;
Expand Down
14 changes: 14 additions & 0 deletions src/hotspot/cpu/x86/c2_CodeStubs_x86.cpp
Expand Up @@ -25,6 +25,7 @@
#include "precompiled.hpp"
#include "opto/c2_MacroAssembler.hpp"
#include "opto/c2_CodeStubs.hpp"
#include "runtime/objectMonitor.hpp"
#include "runtime/sharedRuntime.hpp"
#include "runtime/stubRoutines.hpp"

Expand Down Expand Up @@ -83,4 +84,17 @@ void C2CheckLockStackStub::emit(C2_MacroAssembler& masm) {
__ jmp(continuation(), false /* maybe_short */);
}

#ifdef _LP64
int C2LoadNKlassStub::max_size() const {
return 10;
}

void C2LoadNKlassStub::emit(C2_MacroAssembler& masm) {
__ bind(entry());
Register d = dst();
__ movq(d, Address(d, OM_OFFSET_NO_MONITOR_VALUE_TAG(header)));
__ jmp(continuation());
}
#endif

#undef __
40 changes: 8 additions & 32 deletions src/hotspot/cpu/x86/macroAssembler_x86.cpp
Expand Up @@ -5096,35 +5096,18 @@ void MacroAssembler::load_method_holder(Register holder, Register method) {

#ifdef _LP64
void MacroAssembler::load_nklass(Register dst, Register src) {
assert_different_registers(src, dst);
assert(UseCompressedClassPointers, "expect compressed class pointers");

Label slow, done;
Label fast;
movq(dst, Address(src, oopDesc::mark_offset_in_bytes()));
// NOTE: While it would seem nice to use xorb instead (for which we don't have an encoding in our assembler),
// the encoding for xorq uses the signed version (0x81/6) of xor, which encodes as compact as xorb would,
// and does't make a difference performance-wise.
xorq(dst, markWord::unlocked_value);
testb(dst, markWord::lock_mask_in_place);
jccb(Assembler::notZero, slow);
testb(dst, markWord::monitor_value);
jccb(Assembler::zero, fast);

shrq(dst, markWord::klass_shift);
jmp(done);
bind(slow);
// Fetch displaced header
movq(dst, Address(dst, OM_OFFSET_NO_MONITOR_VALUE_TAG(header)));

if (dst != rax) {
push(rax);
}
if (src != rax) {
mov(rax, src);
}
call(RuntimeAddress(StubRoutines::load_nklass()));
if (dst != rax) {
mov(dst, rax);
pop(rax);
}

bind(done);
bind(fast);
shrq(dst, markWord::klass_shift);
}
#endif

Expand All @@ -5133,17 +5116,10 @@ void MacroAssembler::load_klass(Register dst, Register src, Register tmp, bool n
assert_different_registers(dst, tmp);
#ifdef _LP64
assert(UseCompressedClassPointers, "expect compressed class pointers");
Register d = dst;
if (src == dst) {
d = tmp;
}
if (null_check_src) {
null_check(src, oopDesc::mark_offset_in_bytes());
}
load_nklass(d, src);
if (src == dst) {
mov(dst, d);
}
load_nklass(dst, src);
decode_klass_not_null(dst, tmp);
#else
if (null_check_src) {
Expand Down

0 comments on commit bf6d4f3

Please sign in to comment.