Skip to content

Commit 593bec6

Browse files
author
Thomas Schatzl
committed
8302122: Parallelize TLAB retirement in prologue in G1
8297611: G1: Merge tlab and per-thread dirty card log flushing Reviewed-by: kbarrett, ayang
1 parent e971f90 commit 593bec6

17 files changed

+328
-109
lines changed

src/hotspot/share/gc/g1/g1BarrierSet.cpp

+2-3
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ void G1BarrierSet::write_ref_field_post_slow(volatile CardValue* byte) {
101101
}
102102
}
103103

104-
void G1BarrierSet::invalidate(MemRegion mr) {
104+
void G1BarrierSet::invalidate(JavaThread* thread, MemRegion mr) {
105105
if (mr.is_empty()) {
106106
return;
107107
}
@@ -120,9 +120,8 @@ void G1BarrierSet::invalidate(MemRegion mr) {
120120

121121
OrderAccess::storeload();
122122
// Enqueue if necessary.
123-
Thread* thr = Thread::current();
124123
G1DirtyCardQueueSet& qset = G1BarrierSet::dirty_card_queue_set();
125-
G1DirtyCardQueue& queue = G1ThreadLocalData::dirty_card_queue(thr);
124+
G1DirtyCardQueue& queue = G1ThreadLocalData::dirty_card_queue(thread);
126125
for (; byte <= last_byte; byte++) {
127126
CardValue bv = *byte;
128127
assert(bv != G1CardTable::g1_young_card_val(), "Invalid card");

src/hotspot/share/gc/g1/g1BarrierSet.hpp

+5-5
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,8 @@ class G1BarrierSet: public CardTableBarrierSet {
4747
return barrier_set_cast<G1BarrierSet>(BarrierSet::barrier_set());
4848
}
4949

50+
void invalidate(JavaThread* thread, MemRegion mr);
51+
5052
public:
5153
G1BarrierSet(G1CardTable* table);
5254
~G1BarrierSet() { }
@@ -70,12 +72,10 @@ class G1BarrierSet: public CardTableBarrierSet {
7072
template <DecoratorSet decorators, typename T>
7173
void write_ref_field_pre(T* field);
7274

73-
// NB: if you do a whole-heap invalidation, the "usual invariant" defined
74-
// above no longer applies.
75-
void invalidate(MemRegion mr);
75+
inline void invalidate(MemRegion mr);
76+
inline void write_region(JavaThread* thread, MemRegion mr);
7677

77-
void write_region(MemRegion mr) { invalidate(mr); }
78-
void write_ref_array_work(MemRegion mr) { invalidate(mr); }
78+
inline void write_ref_array_work(MemRegion mr);
7979

8080
template <DecoratorSet decorators, typename T>
8181
void write_ref_field_post(T* field);

src/hotspot/share/gc/g1/g1BarrierSet.inline.hpp

+13
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
#include "oops/access.inline.hpp"
3434
#include "oops/compressedOops.inline.hpp"
3535
#include "oops/oop.hpp"
36+
#include "runtime/thread.hpp"
3637

3738
inline void G1BarrierSet::enqueue_preloaded(oop pre_val) {
3839
// Nulls should have been already filtered.
@@ -67,6 +68,18 @@ inline void G1BarrierSet::write_ref_field_pre(T* field) {
6768
enqueue(field);
6869
}
6970

71+
inline void G1BarrierSet::invalidate(MemRegion mr) {
72+
invalidate(JavaThread::current(), mr);
73+
}
74+
75+
inline void G1BarrierSet::write_region(JavaThread* thread, MemRegion mr) {
76+
invalidate(thread, mr);
77+
}
78+
79+
inline void G1BarrierSet::write_ref_array_work(MemRegion mr) {
80+
invalidate(mr);
81+
}
82+
7083
template <DecoratorSet decorators, typename T>
7184
inline void G1BarrierSet::write_ref_field_post(T* field) {
7285
volatile CardValue* byte = _card_table->byte_for(field);

src/hotspot/share/gc/g1/g1DirtyCardQueue.cpp

+19-29
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2001, 2022, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 2001, 2023, Oracle and/or its affiliates. All rights reserved.
33
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
*
55
* This code is free software; you can redistribute it and/or modify it
@@ -556,36 +556,10 @@ void G1DirtyCardQueueSet::abandon_logs_and_stats() {
556556
_detached_refinement_stats.reset();
557557
}
558558

559-
void G1DirtyCardQueueSet::concatenate_logs_and_stats() {
559+
void G1DirtyCardQueueSet::update_refinement_stats(G1ConcurrentRefineStats& stats) {
560560
assert_at_safepoint();
561561

562-
// Disable mutator refinement until concurrent refinement decides otherwise.
563-
set_mutator_refinement_threshold(SIZE_MAX);
564-
565-
// Iterate over all the threads, if we find a partial log add it to
566-
// the global list of logs.
567-
struct ConcatenateThreadLogClosure : public ThreadClosure {
568-
G1DirtyCardQueueSet& _qset;
569-
G1ConcurrentRefineStats _total_stats;
570-
571-
ConcatenateThreadLogClosure(G1DirtyCardQueueSet& qset) :
572-
_qset{qset}, _total_stats{} {}
573-
574-
virtual void do_thread(Thread* t) {
575-
G1DirtyCardQueue& queue = G1ThreadLocalData::dirty_card_queue(t);
576-
// Flush the buffer if non-empty. Flush before accumulating and
577-
// resetting stats, since flushing may modify the stats.
578-
if ((queue.buffer() != nullptr) &&
579-
(queue.index() != _qset.buffer_size())) {
580-
_qset.flush_queue(queue);
581-
}
582-
G1ConcurrentRefineStats& qstats = *queue.refinement_stats();
583-
_total_stats += qstats;
584-
qstats.reset();
585-
}
586-
} closure(*this);
587-
Threads::threads_do(&closure);
588-
_concatenated_refinement_stats = closure._total_stats;
562+
_concatenated_refinement_stats = stats;
589563

590564
enqueue_all_paused_buffers();
591565
verify_num_cards();
@@ -596,6 +570,22 @@ void G1DirtyCardQueueSet::concatenate_logs_and_stats() {
596570
_detached_refinement_stats.reset();
597571
}
598572

573+
G1ConcurrentRefineStats G1DirtyCardQueueSet::concatenate_log_and_stats(Thread* thread) {
574+
assert_at_safepoint();
575+
576+
G1DirtyCardQueue& queue = G1ThreadLocalData::dirty_card_queue(thread);
577+
// Flush the buffer if non-empty. Flush before accumulating and
578+
// resetting stats, since flushing may modify the stats.
579+
if ((queue.buffer() != nullptr) &&
580+
(queue.index() != buffer_size())) {
581+
flush_queue(queue);
582+
}
583+
584+
G1ConcurrentRefineStats result = *queue.refinement_stats();
585+
queue.refinement_stats()->reset();
586+
return result;
587+
}
588+
599589
G1ConcurrentRefineStats G1DirtyCardQueueSet::concatenated_refinement_stats() const {
600590
assert_at_safepoint();
601591
return _concatenated_refinement_stats;

src/hotspot/share/gc/g1/g1DirtyCardQueue.hpp

+8-4
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2001, 2022, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 2001, 2023, Oracle and/or its affiliates. All rights reserved.
33
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
*
55
* This code is free software; you can redistribute it and/or modify it
@@ -274,10 +274,14 @@ class G1DirtyCardQueueSet: public PtrQueueSet {
274274
// precondition: at safepoint.
275275
void abandon_logs_and_stats();
276276

277-
// Collect and reset all the per-thread refinement stats. If any threads
278-
// have partial logs then add them to the global list.
277+
// Update global refinement statistics with the ones given and the ones from
278+
// detached threads.
279279
// precondition: at safepoint.
280-
void concatenate_logs_and_stats();
280+
void update_refinement_stats(G1ConcurrentRefineStats& stats);
281+
// Add the given thread's partial logs to the global list and return and reset
282+
// its refinement stats.
283+
// precondition: at safepoint.
284+
G1ConcurrentRefineStats concatenate_log_and_stats(Thread* thread);
281285

282286
// Return the total of mutator refinement stats for all threads.
283287
// precondition: at safepoint.

src/hotspot/share/gc/g1/g1GCPhaseTimes.cpp

+8-5
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,9 @@ G1GCPhaseTimes::G1GCPhaseTimes(STWGCTimer* gc_timer, uint max_gc_threads) :
5151
{
5252
assert(max_gc_threads > 0, "Must have some GC threads");
5353

54+
_gc_par_phases[RetireTLABsAndFlushLogs] = new WorkerDataArray<double>("RetireTLABsAndFlushLogs", "JT Retire TLABs And Flush Logs (ms):", max_gc_threads);
55+
_gc_par_phases[NonJavaThreadFlushLogs] = new WorkerDataArray<double>("NonJavaThreadFlushLogs", "Non-JT Flush Logs (ms):", max_gc_threads);
56+
5457
_gc_par_phases[GCWorkerStart] = new WorkerDataArray<double>("GCWorkerStart", "GC Worker Start (ms):", max_gc_threads);
5558
_gc_par_phases[ExtRootScan] = new WorkerDataArray<double>("ExtRootScan", "Ext Root Scanning (ms):", max_gc_threads);
5659

@@ -165,7 +168,7 @@ void G1GCPhaseTimes::reset() {
165168
_cur_optional_merge_heap_roots_time_ms = 0.0;
166169
_cur_prepare_merge_heap_roots_time_ms = 0.0;
167170
_cur_optional_prepare_merge_heap_roots_time_ms = 0.0;
168-
_cur_prepare_tlab_time_ms = 0.0;
171+
_cur_pre_evacuate_prepare_time_ms = 0.0;
169172
_cur_post_evacuate_cleanup_1_time_ms = 0.0;
170173
_cur_post_evacuate_cleanup_2_time_ms = 0.0;
171174
_cur_expand_heap_time_ms = 0.0;
@@ -402,8 +405,7 @@ double G1GCPhaseTimes::print_pre_evacuate_collection_set() const {
402405
const double pre_concurrent_start_ms = average_time_ms(ResetMarkingState) +
403406
average_time_ms(NoteStartOfMark);
404407

405-
const double sum_ms = _cur_prepare_tlab_time_ms +
406-
_cur_concatenate_dirty_card_logs_time_ms +
408+
const double sum_ms = _cur_pre_evacuate_prepare_time_ms +
407409
_recorded_young_cset_choice_time_ms +
408410
_recorded_non_young_cset_choice_time_ms +
409411
_cur_region_register_time +
@@ -412,8 +414,9 @@ double G1GCPhaseTimes::print_pre_evacuate_collection_set() const {
412414

413415
info_time("Pre Evacuate Collection Set", sum_ms);
414416

415-
debug_time("Prepare TLABs", _cur_prepare_tlab_time_ms);
416-
debug_time("Concatenate Dirty Card Logs", _cur_concatenate_dirty_card_logs_time_ms);
417+
debug_time("Pre Evacuate Prepare", _cur_pre_evacuate_prepare_time_ms);
418+
debug_phase(_gc_par_phases[RetireTLABsAndFlushLogs], 1);
419+
debug_phase(_gc_par_phases[NonJavaThreadFlushLogs], 1);
417420
debug_time("Choose Collection Set", (_recorded_young_cset_choice_time_ms + _recorded_non_young_cset_choice_time_ms));
418421
debug_time("Region Register", _cur_region_register_time);
419422

src/hotspot/share/gc/g1/g1GCPhaseTimes.hpp

+5-9
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> {
4646

4747
public:
4848
enum GCParPhases {
49+
RetireTLABsAndFlushLogs,
50+
NonJavaThreadFlushLogs,
4951
GCWorkerStart,
5052
ExtRootScan,
5153
ThreadRoots,
@@ -172,9 +174,7 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> {
172174
double _cur_prepare_merge_heap_roots_time_ms;
173175
double _cur_optional_prepare_merge_heap_roots_time_ms;
174176

175-
double _cur_prepare_tlab_time_ms;
176-
177-
double _cur_concatenate_dirty_card_logs_time_ms;
177+
double _cur_pre_evacuate_prepare_time_ms;
178178

179179
double _cur_post_evacuate_cleanup_1_time_ms;
180180
double _cur_post_evacuate_cleanup_2_time_ms;
@@ -265,12 +265,8 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> {
265265

266266
size_t sum_thread_work_items(GCParPhases phase, uint index = 0);
267267

268-
void record_prepare_tlab_time_ms(double ms) {
269-
_cur_prepare_tlab_time_ms = ms;
270-
}
271-
272-
void record_concatenate_dirty_card_logs_time_ms(double ms) {
273-
_cur_concatenate_dirty_card_logs_time_ms = ms;
268+
void record_pre_evacuate_prepare_time_ms(double ms) {
269+
_cur_pre_evacuate_prepare_time_ms = ms;
274270
}
275271

276272
void record_expand_heap_time(double ms) {

src/hotspot/share/gc/g1/g1YoungCollector.cpp

+9-42
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2021, 2022, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 2021, 2023, Oracle and/or its affiliates. All rights reserved.
33
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
*
55
* This code is free software; you can redistribute it and/or modify it
@@ -45,6 +45,7 @@
4545
#include "gc/g1/g1Trace.hpp"
4646
#include "gc/g1/g1YoungCollector.hpp"
4747
#include "gc/g1/g1YoungGCPostEvacuateTasks.hpp"
48+
#include "gc/g1/g1YoungGCPreEvacuateTasks.hpp"
4849
#include "gc/g1/g1_globals.hpp"
4950
#include "gc/shared/concurrentGCBreakpoints.hpp"
5051
#include "gc/shared/gcTraceTime.inline.hpp"
@@ -462,48 +463,15 @@ void G1YoungCollector::set_young_collection_default_active_worker_threads(){
462463
log_info(gc,task)("Using %u workers of %u for evacuation", active_workers, workers()->max_workers());
463464
}
464465

465-
void G1YoungCollector::retire_tlabs() {
466-
Ticks start = Ticks::now();
467-
_g1h->retire_tlabs();
468-
double retire_time = (Ticks::now() - start).seconds() * MILLIUNITS;
469-
phase_times()->record_prepare_tlab_time_ms(retire_time);
470-
}
471-
472-
void G1YoungCollector::concatenate_dirty_card_logs_and_stats() {
473-
Ticks start = Ticks::now();
474-
G1DirtyCardQueueSet& qset = G1BarrierSet::dirty_card_queue_set();
475-
size_t old_cards = qset.num_cards();
476-
qset.concatenate_logs_and_stats();
477-
size_t pending_cards = qset.num_cards();
478-
size_t thread_buffer_cards = pending_cards - old_cards;
479-
policy()->record_concurrent_refinement_stats(pending_cards, thread_buffer_cards);
480-
double concat_time = (Ticks::now() - start).seconds() * MILLIUNITS;
481-
phase_times()->record_concatenate_dirty_card_logs_time_ms(concat_time);
482-
}
483-
484-
#ifdef ASSERT
485-
void G1YoungCollector::verify_empty_dirty_card_logs() const {
486-
struct Verifier : public ThreadClosure {
487-
size_t _buffer_size;
488-
Verifier() : _buffer_size(G1BarrierSet::dirty_card_queue_set().buffer_size()) {}
489-
void do_thread(Thread* t) override {
490-
G1DirtyCardQueue& queue = G1ThreadLocalData::dirty_card_queue(t);
491-
assert((queue.buffer() == nullptr) || (queue.index() == _buffer_size),
492-
"non-empty dirty card queue for thread");
493-
}
494-
} verifier;
495-
Threads::threads_do(&verifier);
496-
}
497-
#endif // ASSERT
498-
499466
void G1YoungCollector::pre_evacuate_collection_set(G1EvacInfo* evacuation_info) {
500-
// Flush early, so later phases don't need to account for per-thread stuff.
501-
// Flushes deferred card marks, so must precede concatenating logs.
502-
retire_tlabs();
503-
504-
// Flush early, so later phases don't need to account for per-thread stuff.
505-
concatenate_dirty_card_logs_and_stats();
467+
{
468+
Ticks start = Ticks::now();
469+
G1PreEvacuateCollectionSetBatchTask cl;
470+
G1CollectedHeap::heap()->run_batch_task(&cl);
471+
phase_times()->record_pre_evacuate_prepare_time_ms((Ticks::now() - start).seconds() * 1000.0);
472+
}
506473

474+
// Needs log buffers flushed.
507475
calculate_collection_set(evacuation_info, policy()->max_pause_time_ms());
508476

509477
// Please see comment in g1CollectedHeap.hpp and
@@ -535,7 +503,6 @@ void G1YoungCollector::pre_evacuate_collection_set(G1EvacInfo* evacuation_info)
535503
}
536504

537505
assert(_g1h->verifier()->check_region_attr_table(), "Inconsistency in the region attributes table.");
538-
verify_empty_dirty_card_logs();
539506

540507
#if COMPILER2_OR_JVMCI
541508
DerivedPointerTable::clear();

src/hotspot/share/gc/g1/g1YoungCollector.hpp

+1-5
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2021, 2022, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 2021, 2023, Oracle and/or its affiliates. All rights reserved.
33
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
*
55
* This code is free software; you can redistribute it and/or modify it
@@ -97,10 +97,6 @@ class G1YoungCollector {
9797

9898
void set_young_collection_default_active_worker_threads();
9999

100-
void retire_tlabs();
101-
void concatenate_dirty_card_logs_and_stats();
102-
void verify_empty_dirty_card_logs() const NOT_DEBUG_RETURN;
103-
104100
void pre_evacuate_collection_set(G1EvacInfo* evacuation_info);
105101
// Actually do the work of evacuating the parts of the collection set.
106102
// The has_optional_evacuation_work flag for the initial collection set

0 commit comments

Comments
 (0)