From c952a34b4668343e9bcd8dac4d866ffc07ce4eac Mon Sep 17 00:00:00 2001 From: Andrew Haley Date: Wed, 1 Jan 2025 22:03:49 +0100 Subject: [PATCH 1/4] tmp --- .../cpu/aarch64/c1_LIRAssembler_aarch64.cpp | 50 ++++++++++++------- .../cpu/aarch64/macroAssembler_aarch64.cpp | 2 +- .../cpu/aarch64/macroAssembler_aarch64.hpp | 1 + 3 files changed, 35 insertions(+), 18 deletions(-) diff --git a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp index b01360c3f7ebc..ee3b549732cfc 100644 --- a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp @@ -1215,18 +1215,33 @@ void LIR_Assembler::emit_alloc_array(LIR_OpAllocArray* op) { __ bind(*op->stub()->continuation()); } +auto receiver_offset = ReceiverTypeData::receiver_offset; +auto receiver_count_offset = ReceiverTypeData::receiver_count_offset; + +template +Address md_at(MacroAssembler *masm, Register scratch, Register mdo, ciMethodData *md, + ciProfileData *data, T offset, int i) { + return masm->form_address(scratch, mdo, + md->byte_offset_of_slot(data, offset(i)), + LogBytesPerWord); +} + void LIR_Assembler::type_profile_helper(Register mdo, ciMethodData *md, ciProfileData *data, Register recv, Label* update_done) { + Register scratch = rscratch2; + + auto data_at = [=](ByteSize (*offset)(uint), uint i) -> Address { + return md_at(_masm, scratch, mdo, md, data, offset, i); + }; + for (uint i = 0; i < ReceiverTypeData::row_limit(); i++) { Label next_test; // See if the receiver is receiver[n]. - __ lea(rscratch2, Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i)))); - __ ldr(rscratch1, Address(rscratch2)); + __ ldr(rscratch1, data_at(ReceiverTypeData::receiver_offset, i)); __ cmp(recv, rscratch1); __ br(Assembler::NE, next_test); - Address data_addr(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i))); - __ addptr(data_addr, DataLayout::counter_increment); + __ addptr(data_at(ReceiverTypeData::receiver_count_offset, i), DataLayout::counter_increment); __ b(*update_done); __ bind(next_test); } @@ -1234,15 +1249,14 @@ void LIR_Assembler::type_profile_helper(Register mdo, // Didn't find receiver; find next empty slot and fill it in for (uint i = 0; i < ReceiverTypeData::row_limit(); i++) { Label next_test; - __ lea(rscratch2, - Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i)))); - Address recv_addr(rscratch2); - __ ldr(rscratch1, recv_addr); - __ cbnz(rscratch1, next_test); - __ str(recv, recv_addr); + { + Address recv_addr(data_at(ReceiverTypeData::receiver_offset, i)); + __ ldr(rscratch1, recv_addr); + __ cbnz(rscratch1, next_test); + __ str(recv, recv_addr); + } __ mov(rscratch1, DataLayout::counter_increment); - __ lea(rscratch2, Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i)))); - __ str(rscratch1, Address(rscratch2)); + __ str(rscratch1, data_at(ReceiverTypeData::receiver_count_offset, i)); __ b(*update_done); __ bind(next_test); } @@ -1414,8 +1428,7 @@ void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) { // Object is null; update MDO and exit Address data_addr = __ form_address(rscratch2, mdo, - md->byte_offset_of_slot(data, DataLayout::flags_offset()), - 0); + md->byte_offset_of_slot(data, DataLayout::flags_offset()), 0); __ ldrb(rscratch1, data_addr); __ orr(rscratch1, rscratch1, BitData::null_seen_byte_constant()); __ strb(rscratch1, data_addr); @@ -2518,6 +2531,8 @@ void LIR_Assembler::emit_load_klass(LIR_OpLoadKlass* op) { __ load_klass(result, obj); } +using ::VirtualCallData; + void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) { ciMethod* method = op->profiled_method(); int bci = op->profiled_bci(); @@ -2566,10 +2581,11 @@ void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) { for (i = 0; i < VirtualCallData::row_limit(); i++) { ciKlass* receiver = vc_data->receiver(i); if (receiver == nullptr) { - Address recv_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_offset(i))); __ mov_metadata(rscratch1, known_klass->constant_encoding()); - __ lea(rscratch2, recv_addr); - __ str(rscratch1, Address(rscratch2)); + Address recv_addr + = __ form_address(rscratch2, mdo, + md->byte_offset_of_slot(data, VirtualCallData::receiver_offset(i)), LogBytesPerWord); + __ str(rscratch1, recv_addr); Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i))); __ addptr(data_addr, DataLayout::counter_increment); return; diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp index d561fb912a311..2cfe682cdd4fa 100644 --- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp @@ -1381,7 +1381,7 @@ void MacroAssembler::lookup_virtual_method(Register recv_klass, } else { vtable_offset_in_bytes += vtable_index.as_constant() * wordSize; ldr(method_result, - form_address(rscratch1, recv_klass, vtable_offset_in_bytes, 0)); + form_address(rscratch1, recv_klass, vtable_offset_in_bytes, LogBytesPerWord)); } } diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp index 244de10d0e26c..dc4332f1bd1f8 100644 --- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp @@ -1173,6 +1173,7 @@ class MacroAssembler: public Assembler { // Arithmetics + // Clobber: rscratch2 void addptr(const Address &dst, int32_t src); void cmpptr(Register src1, Address src2); From 3b93519fc2206820797d9aa6052196e5f2a57b82 Mon Sep 17 00:00:00 2001 From: Andrew Haley Date: Thu, 9 Jan 2025 16:15:26 +0100 Subject: [PATCH 2/4] 8346890: AArch64: Profile counters generate suboptimal code --- .../cpu/aarch64/c1_LIRAssembler_aarch64.cpp | 47 ++++++++----------- 1 file changed, 19 insertions(+), 28 deletions(-) diff --git a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp index ee3b549732cfc..04f87d38bf821 100644 --- a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp @@ -1215,33 +1215,27 @@ void LIR_Assembler::emit_alloc_array(LIR_OpAllocArray* op) { __ bind(*op->stub()->continuation()); } -auto receiver_offset = ReceiverTypeData::receiver_offset; -auto receiver_count_offset = ReceiverTypeData::receiver_count_offset; - -template -Address md_at(MacroAssembler *masm, Register scratch, Register mdo, ciMethodData *md, - ciProfileData *data, T offset, int i) { - return masm->form_address(scratch, mdo, - md->byte_offset_of_slot(data, offset(i)), - LogBytesPerWord); -} - void LIR_Assembler::type_profile_helper(Register mdo, ciMethodData *md, ciProfileData *data, Register recv, Label* update_done) { - Register scratch = rscratch2; - auto data_at = [=](ByteSize (*offset)(uint), uint i) -> Address { - return md_at(_masm, scratch, mdo, md, data, offset, i); + // Given a profile data offset, generate an Address which points to + // the corresponding slot in mdo->data(). + // Clobbers rscratch2. + auto slot_at = [=](ByteSize offset) -> Address { + return __ form_address(rscratch2, mdo, + md->byte_offset_of_slot(data, offset), + LogBytesPerWord); }; for (uint i = 0; i < ReceiverTypeData::row_limit(); i++) { Label next_test; // See if the receiver is receiver[n]. - __ ldr(rscratch1, data_at(ReceiverTypeData::receiver_offset, i)); + __ ldr(rscratch1, slot_at(ReceiverTypeData::receiver_offset(i))); __ cmp(recv, rscratch1); __ br(Assembler::NE, next_test); - __ addptr(data_at(ReceiverTypeData::receiver_count_offset, i), DataLayout::counter_increment); + __ addptr(slot_at(ReceiverTypeData::receiver_count_offset(i)), + DataLayout::counter_increment); __ b(*update_done); __ bind(next_test); } @@ -1249,14 +1243,12 @@ void LIR_Assembler::type_profile_helper(Register mdo, // Didn't find receiver; find next empty slot and fill it in for (uint i = 0; i < ReceiverTypeData::row_limit(); i++) { Label next_test; - { - Address recv_addr(data_at(ReceiverTypeData::receiver_offset, i)); - __ ldr(rscratch1, recv_addr); - __ cbnz(rscratch1, next_test); - __ str(recv, recv_addr); - } + Address recv_addr(slot_at(ReceiverTypeData::receiver_offset(i))); + __ ldr(rscratch1, recv_addr); + __ cbnz(rscratch1, next_test); + __ str(recv, recv_addr); __ mov(rscratch1, DataLayout::counter_increment); - __ str(rscratch1, data_at(ReceiverTypeData::receiver_count_offset, i)); + __ str(rscratch1, slot_at(ReceiverTypeData::receiver_count_offset(i))); __ b(*update_done); __ bind(next_test); } @@ -2531,8 +2523,6 @@ void LIR_Assembler::emit_load_klass(LIR_OpLoadKlass* op) { __ load_klass(result, obj); } -using ::VirtualCallData; - void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) { ciMethod* method = op->profiled_method(); int bci = op->profiled_bci(); @@ -2582,9 +2572,10 @@ void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) { ciKlass* receiver = vc_data->receiver(i); if (receiver == nullptr) { __ mov_metadata(rscratch1, known_klass->constant_encoding()); - Address recv_addr - = __ form_address(rscratch2, mdo, - md->byte_offset_of_slot(data, VirtualCallData::receiver_offset(i)), LogBytesPerWord); + Address recv_addr = + __ form_address(rscratch2, mdo, + md->byte_offset_of_slot(data, VirtualCallData::receiver_offset(i)), + LogBytesPerWord); __ str(rscratch1, recv_addr); Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i))); __ addptr(data_addr, DataLayout::counter_increment); From 0e84904f9530965e1ca5337cb9f9bc7aabe0f61d Mon Sep 17 00:00:00 2001 From: Andrew Haley Date: Fri, 10 Jan 2025 13:39:08 +0100 Subject: [PATCH 3/4] Leave for later --- src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp index 2cfe682cdd4fa..d561fb912a311 100644 --- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp @@ -1381,7 +1381,7 @@ void MacroAssembler::lookup_virtual_method(Register recv_klass, } else { vtable_offset_in_bytes += vtable_index.as_constant() * wordSize; ldr(method_result, - form_address(rscratch1, recv_klass, vtable_offset_in_bytes, LogBytesPerWord)); + form_address(rscratch1, recv_klass, vtable_offset_in_bytes, 0)); } } From fc1c714c5d8d1649a80630f0dc6d6231caf17dea Mon Sep 17 00:00:00 2001 From: Andrew Haley Date: Thu, 23 Jan 2025 11:56:33 +0000 Subject: [PATCH 4/4] Fix comments --- src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp index dc4332f1bd1f8..bd537af59e471 100644 --- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp @@ -1173,8 +1173,10 @@ class MacroAssembler: public Assembler { // Arithmetics - // Clobber: rscratch2 + // Clobber: rscratch1, rscratch2 void addptr(const Address &dst, int32_t src); + + // Clobber: rscratch1 void cmpptr(Register src1, Address src2); void cmpoop(Register obj1, Register obj2);