Skip to content

Commit 8d5c005

Browse files
Thomas SchatzloffamitkumarTheRealMDoerrc-reficeRealFYang
committed
8342382: Implement JEP 522: G1 GC: Improve Throughput by Reducing Synchronization
Co-authored-by: Amit Kumar <amitkumar@openjdk.org> Co-authored-by: Martin Doerr <mdoerr@openjdk.org> Co-authored-by: Carlo Refice <carlo.refice@oracle.com> Co-authored-by: Fei Yang <fyang@openjdk.org> Reviewed-by: iwalulya, rcastanedalo, aph, ayang
1 parent ca18291 commit 8d5c005

File tree

114 files changed

+3572
-4628
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

114 files changed

+3572
-4628
lines changed

src/hotspot/cpu/aarch64/gc/g1/g1BarrierSetAssembler_aarch64.cpp

Lines changed: 73 additions & 166 deletions
Original file line numberDiff line numberDiff line change
@@ -86,15 +86,48 @@ void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm
8686
}
8787
}
8888

89-
void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
90-
Register start, Register count, Register scratch, RegSet saved_regs) {
91-
__ push(saved_regs, sp);
92-
assert_different_registers(start, count, scratch);
93-
assert_different_registers(c_rarg0, count);
94-
__ mov(c_rarg0, start);
95-
__ mov(c_rarg1, count);
96-
__ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_post_entry), 2);
97-
__ pop(saved_regs, sp);
89+
void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm,
90+
DecoratorSet decorators,
91+
Register start,
92+
Register count,
93+
Register scratch,
94+
RegSet saved_regs) {
95+
96+
Label done;
97+
Label loop;
98+
Label next;
99+
100+
__ cbz(count, done);
101+
102+
// Calculate the number of card marks to set. Since the object might start and
103+
// end within a card, we need to calculate this via the card table indexes of
104+
// the actual start and last addresses covered by the object.
105+
// Temporarily use the count register for the last element address.
106+
__ lea(count, Address(start, count, Address::lsl(LogBytesPerHeapOop))); // end = start + count << LogBytesPerHeapOop
107+
__ sub(count, count, BytesPerHeapOop); // Use last element address for end.
108+
109+
__ lsr(start, start, CardTable::card_shift());
110+
__ lsr(count, count, CardTable::card_shift());
111+
__ sub(count, count, start); // Number of bytes to mark - 1.
112+
113+
// Add card table base offset to start.
114+
__ ldr(scratch, Address(rthread, in_bytes(G1ThreadLocalData::card_table_base_offset())));
115+
__ add(start, start, scratch);
116+
117+
__ bind(loop);
118+
if (UseCondCardMark) {
119+
__ ldrb(scratch, Address(start, count));
120+
// Instead of loading clean_card_val and comparing, we exploit the fact that
121+
// the LSB of non-clean cards is always 0, and the LSB of clean cards 1.
122+
__ tbz(scratch, 0, next);
123+
}
124+
static_assert(G1CardTable::dirty_card_val() == 0, "must be to use zr");
125+
__ strb(zr, Address(start, count));
126+
__ bind(next);
127+
__ subs(count, count, 1);
128+
__ br(Assembler::GE, loop);
129+
130+
__ bind(done);
98131
}
99132

100133
static void generate_queue_test_and_insertion(MacroAssembler* masm, ByteSize index_offset, ByteSize buffer_offset, Label& runtime,
@@ -202,10 +235,14 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
202235
static void generate_post_barrier_fast_path(MacroAssembler* masm,
203236
const Register store_addr,
204237
const Register new_val,
238+
const Register thread,
205239
const Register tmp1,
206240
const Register tmp2,
207241
Label& done,
208242
bool new_val_may_be_null) {
243+
assert(thread == rthread, "must be");
244+
assert_different_registers(store_addr, new_val, thread, tmp1, tmp2, noreg, rscratch1);
245+
209246
// Does store cross heap regions?
210247
__ eor(tmp1, store_addr, new_val); // tmp1 := store address ^ new value
211248
__ lsr(tmp1, tmp1, G1HeapRegion::LogOfHRGrainBytes); // tmp1 := ((store address ^ new value) >> LogOfHRGrainBytes)
@@ -214,33 +251,19 @@ static void generate_post_barrier_fast_path(MacroAssembler* masm,
214251
if (new_val_may_be_null) {
215252
__ cbz(new_val, done);
216253
}
217-
// Storing region crossing non-null, is card young?
254+
// Storing region crossing non-null.
218255
__ lsr(tmp1, store_addr, CardTable::card_shift()); // tmp1 := card address relative to card table base
219-
__ load_byte_map_base(tmp2); // tmp2 := card table base address
220-
__ add(tmp1, tmp1, tmp2); // tmp1 := card address
221-
__ ldrb(tmp2, Address(tmp1)); // tmp2 := card
222-
__ cmpw(tmp2, (int)G1CardTable::g1_young_card_val()); // tmp2 := card == young_card_val?
223-
}
224256

225-
static void generate_post_barrier_slow_path(MacroAssembler* masm,
226-
const Register thread,
227-
const Register tmp1,
228-
const Register tmp2,
229-
Label& done,
230-
Label& runtime) {
231-
__ membar(Assembler::StoreLoad); // StoreLoad membar
232-
__ ldrb(tmp2, Address(tmp1)); // tmp2 := card
233-
__ cbzw(tmp2, done);
234-
// Storing a region crossing, non-null oop, card is clean.
235-
// Dirty card and log.
236-
STATIC_ASSERT(CardTable::dirty_card_val() == 0);
237-
__ strb(zr, Address(tmp1)); // *(card address) := dirty_card_val
238-
generate_queue_test_and_insertion(masm,
239-
G1ThreadLocalData::dirty_card_queue_index_offset(),
240-
G1ThreadLocalData::dirty_card_queue_buffer_offset(),
241-
runtime,
242-
thread, tmp1, tmp2, rscratch1);
243-
__ b(done);
257+
Address card_table_addr(thread, in_bytes(G1ThreadLocalData::card_table_base_offset()));
258+
__ ldr(tmp2, card_table_addr); // tmp2 := card table base address
259+
if (UseCondCardMark) {
260+
__ ldrb(rscratch1, Address(tmp1, tmp2)); // rscratch1 := card
261+
// Instead of loading clean_card_val and comparing, we exploit the fact that
262+
// the LSB of non-clean cards is always 0, and the LSB of clean cards 1.
263+
__ tbz(rscratch1, 0, done);
264+
}
265+
static_assert(G1CardTable::dirty_card_val() == 0, "must be to use zr");
266+
__ strb(zr, Address(tmp1, tmp2)); // *(card address) := dirty_card_val
244267
}
245268

246269
void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
@@ -249,27 +272,8 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
249272
Register thread,
250273
Register tmp1,
251274
Register tmp2) {
252-
assert(thread == rthread, "must be");
253-
assert_different_registers(store_addr, new_val, thread, tmp1, tmp2,
254-
rscratch1);
255-
assert(store_addr != noreg && new_val != noreg && tmp1 != noreg
256-
&& tmp2 != noreg, "expecting a register");
257-
258275
Label done;
259-
Label runtime;
260-
261-
generate_post_barrier_fast_path(masm, store_addr, new_val, tmp1, tmp2, done, true /* new_val_may_be_null */);
262-
// If card is young, jump to done
263-
__ br(Assembler::EQ, done);
264-
generate_post_barrier_slow_path(masm, thread, tmp1, tmp2, done, runtime);
265-
266-
__ bind(runtime);
267-
// save the live input values
268-
RegSet saved = RegSet::of(store_addr);
269-
__ push(saved, sp);
270-
__ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), tmp1, thread);
271-
__ pop(saved, sp);
272-
276+
generate_post_barrier_fast_path(masm, store_addr, new_val, thread, tmp1, tmp2, done, false /* new_val_may_be_null */);
273277
__ bind(done);
274278
}
275279

@@ -329,38 +333,10 @@ void G1BarrierSetAssembler::g1_write_barrier_post_c2(MacroAssembler* masm,
329333
Register thread,
330334
Register tmp1,
331335
Register tmp2,
332-
G1PostBarrierStubC2* stub) {
333-
assert(thread == rthread, "must be");
334-
assert_different_registers(store_addr, new_val, thread, tmp1, tmp2,
335-
rscratch1);
336-
assert(store_addr != noreg && new_val != noreg && tmp1 != noreg
337-
&& tmp2 != noreg, "expecting a register");
338-
339-
stub->initialize_registers(thread, tmp1, tmp2);
340-
341-
bool new_val_may_be_null = (stub->barrier_data() & G1C2BarrierPostNotNull) == 0;
342-
generate_post_barrier_fast_path(masm, store_addr, new_val, tmp1, tmp2, *stub->continuation(), new_val_may_be_null);
343-
// If card is not young, jump to stub (slow path)
344-
__ br(Assembler::NE, *stub->entry());
345-
346-
__ bind(*stub->continuation());
347-
}
348-
349-
void G1BarrierSetAssembler::generate_c2_post_barrier_stub(MacroAssembler* masm,
350-
G1PostBarrierStubC2* stub) const {
351-
Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
352-
Label runtime;
353-
Register thread = stub->thread();
354-
Register tmp1 = stub->tmp1(); // tmp1 holds the card address.
355-
Register tmp2 = stub->tmp2();
356-
assert(stub->tmp3() == noreg, "not needed in this platform");
357-
358-
__ bind(*stub->entry());
359-
generate_post_barrier_slow_path(masm, thread, tmp1, tmp2, *stub->continuation(), runtime);
360-
361-
__ bind(runtime);
362-
generate_c2_barrier_runtime_call(masm, stub, tmp1, CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry));
363-
__ b(*stub->continuation());
336+
bool new_val_may_be_null) {
337+
Label done;
338+
generate_post_barrier_fast_path(masm, store_addr, new_val, thread, tmp1, tmp2, done, new_val_may_be_null);
339+
__ bind(done);
364340
}
365341

366342
#endif // COMPILER2
@@ -456,20 +432,19 @@ void G1BarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrier
456432
__ b(*stub->continuation());
457433
}
458434

459-
void G1BarrierSetAssembler::gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub) {
460-
G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
461-
__ bind(*stub->entry());
462-
assert(stub->addr()->is_register(), "Precondition.");
463-
assert(stub->new_val()->is_register(), "Precondition.");
464-
Register new_val_reg = stub->new_val()->as_register();
465-
__ cbz(new_val_reg, *stub->continuation());
466-
ce->store_parameter(stub->addr()->as_pointer_register(), 0);
467-
__ far_call(RuntimeAddress(bs->post_barrier_c1_runtime_code_blob()->code_begin()));
468-
__ b(*stub->continuation());
469-
}
470-
471435
#undef __
472436

437+
void G1BarrierSetAssembler::g1_write_barrier_post_c1(MacroAssembler* masm,
438+
Register store_addr,
439+
Register new_val,
440+
Register thread,
441+
Register tmp1,
442+
Register tmp2) {
443+
Label done;
444+
generate_post_barrier_fast_path(masm, store_addr, new_val, thread, tmp1, tmp2, done, true /* new_val_may_be_null */);
445+
masm->bind(done);
446+
}
447+
473448
#define __ sasm->
474449

475450
void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) {
@@ -521,74 +496,6 @@ void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler*
521496
__ epilogue();
522497
}
523498

524-
void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler* sasm) {
525-
__ prologue("g1_post_barrier", false);
526-
527-
// arg0: store_address
528-
Address store_addr(rfp, 2*BytesPerWord);
529-
530-
BarrierSet* bs = BarrierSet::barrier_set();
531-
CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
532-
CardTable* ct = ctbs->card_table();
533-
534-
Label done;
535-
Label runtime;
536-
537-
// At this point we know new_value is non-null and the new_value crosses regions.
538-
// Must check to see if card is already dirty
539-
540-
const Register thread = rthread;
541-
542-
Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()));
543-
Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset()));
544-
545-
const Register card_offset = rscratch2;
546-
// LR is free here, so we can use it to hold the byte_map_base.
547-
const Register byte_map_base = lr;
548-
549-
assert_different_registers(card_offset, byte_map_base, rscratch1);
550-
551-
__ load_parameter(0, card_offset);
552-
__ lsr(card_offset, card_offset, CardTable::card_shift());
553-
__ load_byte_map_base(byte_map_base);
554-
__ ldrb(rscratch1, Address(byte_map_base, card_offset));
555-
__ cmpw(rscratch1, (int)G1CardTable::g1_young_card_val());
556-
__ br(Assembler::EQ, done);
557-
558-
assert((int)CardTable::dirty_card_val() == 0, "must be 0");
559-
560-
__ membar(Assembler::StoreLoad);
561-
__ ldrb(rscratch1, Address(byte_map_base, card_offset));
562-
__ cbzw(rscratch1, done);
563-
564-
// storing region crossing non-null, card is clean.
565-
// dirty card and log.
566-
__ strb(zr, Address(byte_map_base, card_offset));
567-
568-
// Convert card offset into an address in card_addr
569-
Register card_addr = card_offset;
570-
__ add(card_addr, byte_map_base, card_addr);
571-
572-
__ ldr(rscratch1, queue_index);
573-
__ cbz(rscratch1, runtime);
574-
__ sub(rscratch1, rscratch1, wordSize);
575-
__ str(rscratch1, queue_index);
576-
577-
// Reuse LR to hold buffer_addr
578-
const Register buffer_addr = lr;
579-
580-
__ ldr(buffer_addr, buffer);
581-
__ str(card_addr, Address(buffer_addr, rscratch1));
582-
__ b(done);
583-
584-
__ bind(runtime);
585-
__ push_call_clobbered_registers();
586-
__ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, thread);
587-
__ pop_call_clobbered_registers();
588-
__ bind(done);
589-
__ epilogue();
590-
}
591-
592499
#undef __
593500

594501
#endif // COMPILER1

src/hotspot/cpu/aarch64/gc/g1/g1BarrierSetAssembler_aarch64.hpp

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2018, 2024, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved.
33
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
*
55
* This code is free software; you can redistribute it and/or modify it
@@ -32,9 +32,7 @@
3232
class LIR_Assembler;
3333
class StubAssembler;
3434
class G1PreBarrierStub;
35-
class G1PostBarrierStub;
3635
class G1PreBarrierStubC2;
37-
class G1PostBarrierStubC2;
3836

3937
class G1BarrierSetAssembler: public ModRefBarrierSetAssembler {
4038
protected:
@@ -65,10 +63,15 @@ class G1BarrierSetAssembler: public ModRefBarrierSetAssembler {
6563
public:
6664
#ifdef COMPILER1
6765
void gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub);
68-
void gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub);
6966

7067
void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm);
71-
void generate_c1_post_barrier_runtime_stub(StubAssembler* sasm);
68+
69+
void g1_write_barrier_post_c1(MacroAssembler* masm,
70+
Register store_addr,
71+
Register new_val,
72+
Register thread,
73+
Register tmp1,
74+
Register tmp2);
7275
#endif
7376

7477
#ifdef COMPILER2
@@ -87,9 +90,7 @@ class G1BarrierSetAssembler: public ModRefBarrierSetAssembler {
8790
Register thread,
8891
Register tmp1,
8992
Register tmp2,
90-
G1PostBarrierStubC2* c2_stub);
91-
void generate_c2_post_barrier_stub(MacroAssembler* masm,
92-
G1PostBarrierStubC2* stub) const;
93+
bool new_val_may_be_null);
9394
#endif
9495

9596
void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,

src/hotspot/cpu/aarch64/gc/g1/g1_aarch64.ad

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
//
2-
// Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
2+
// Copyright (c) 2024, 2025, Oracle and/or its affiliates. All rights reserved.
33
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
//
55
// This code is free software; you can redistribute it and/or modify it
@@ -62,13 +62,13 @@ static void write_barrier_post(MacroAssembler* masm,
6262
Register new_val,
6363
Register tmp1,
6464
Register tmp2) {
65-
if (!G1PostBarrierStubC2::needs_barrier(node)) {
65+
if (!G1BarrierStubC2::needs_post_barrier(node)) {
6666
return;
6767
}
6868
Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
6969
G1BarrierSetAssembler* g1_asm = static_cast<G1BarrierSetAssembler*>(BarrierSet::barrier_set()->barrier_set_assembler());
70-
G1PostBarrierStubC2* const stub = G1PostBarrierStubC2::create(node);
71-
g1_asm->g1_write_barrier_post_c2(masm, store_addr, new_val, rthread, tmp1, tmp2, stub);
70+
bool new_val_may_be_null = G1BarrierStubC2::post_new_val_may_be_null(node);
71+
g1_asm->g1_write_barrier_post_c2(masm, store_addr, new_val, rthread, tmp1, tmp2, new_val_may_be_null);
7272
}
7373

7474
%}

0 commit comments

Comments
 (0)