Skip to content

Commit

Permalink
8302122: Parallelize TLAB retirement in prologue in G1
Browse files Browse the repository at this point in the history
8297611: G1: Merge tlab and per-thread dirty card log flushing

Reviewed-by: kbarrett, ayang
  • Loading branch information
Thomas Schatzl committed Feb 20, 2023
1 parent e971f90 commit 593bec6
Show file tree
Hide file tree
Showing 17 changed files with 328 additions and 109 deletions.
5 changes: 2 additions & 3 deletions src/hotspot/share/gc/g1/g1BarrierSet.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ void G1BarrierSet::write_ref_field_post_slow(volatile CardValue* byte) {
}
}

void G1BarrierSet::invalidate(MemRegion mr) {
void G1BarrierSet::invalidate(JavaThread* thread, MemRegion mr) {
if (mr.is_empty()) {
return;
}
Expand All @@ -120,9 +120,8 @@ void G1BarrierSet::invalidate(MemRegion mr) {

OrderAccess::storeload();
// Enqueue if necessary.
Thread* thr = Thread::current();
G1DirtyCardQueueSet& qset = G1BarrierSet::dirty_card_queue_set();
G1DirtyCardQueue& queue = G1ThreadLocalData::dirty_card_queue(thr);
G1DirtyCardQueue& queue = G1ThreadLocalData::dirty_card_queue(thread);
for (; byte <= last_byte; byte++) {
CardValue bv = *byte;
assert(bv != G1CardTable::g1_young_card_val(), "Invalid card");
Expand Down
10 changes: 5 additions & 5 deletions src/hotspot/share/gc/g1/g1BarrierSet.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ class G1BarrierSet: public CardTableBarrierSet {
return barrier_set_cast<G1BarrierSet>(BarrierSet::barrier_set());
}

void invalidate(JavaThread* thread, MemRegion mr);

public:
G1BarrierSet(G1CardTable* table);
~G1BarrierSet() { }
Expand All @@ -70,12 +72,10 @@ class G1BarrierSet: public CardTableBarrierSet {
template <DecoratorSet decorators, typename T>
void write_ref_field_pre(T* field);

// NB: if you do a whole-heap invalidation, the "usual invariant" defined
// above no longer applies.
void invalidate(MemRegion mr);
inline void invalidate(MemRegion mr);
inline void write_region(JavaThread* thread, MemRegion mr);

void write_region(MemRegion mr) { invalidate(mr); }
void write_ref_array_work(MemRegion mr) { invalidate(mr); }
inline void write_ref_array_work(MemRegion mr);

template <DecoratorSet decorators, typename T>
void write_ref_field_post(T* field);
Expand Down
13 changes: 13 additions & 0 deletions src/hotspot/share/gc/g1/g1BarrierSet.inline.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
#include "oops/access.inline.hpp"
#include "oops/compressedOops.inline.hpp"
#include "oops/oop.hpp"
#include "runtime/thread.hpp"

inline void G1BarrierSet::enqueue_preloaded(oop pre_val) {
// Nulls should have been already filtered.
Expand Down Expand Up @@ -67,6 +68,18 @@ inline void G1BarrierSet::write_ref_field_pre(T* field) {
enqueue(field);
}

inline void G1BarrierSet::invalidate(MemRegion mr) {
invalidate(JavaThread::current(), mr);
}

inline void G1BarrierSet::write_region(JavaThread* thread, MemRegion mr) {
invalidate(thread, mr);
}

inline void G1BarrierSet::write_ref_array_work(MemRegion mr) {
invalidate(mr);
}

template <DecoratorSet decorators, typename T>
inline void G1BarrierSet::write_ref_field_post(T* field) {
volatile CardValue* byte = _card_table->byte_for(field);
Expand Down
48 changes: 19 additions & 29 deletions src/hotspot/share/gc/g1/g1DirtyCardQueue.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2001, 2022, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2001, 2023, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -556,36 +556,10 @@ void G1DirtyCardQueueSet::abandon_logs_and_stats() {
_detached_refinement_stats.reset();
}

void G1DirtyCardQueueSet::concatenate_logs_and_stats() {
void G1DirtyCardQueueSet::update_refinement_stats(G1ConcurrentRefineStats& stats) {
assert_at_safepoint();

// Disable mutator refinement until concurrent refinement decides otherwise.
set_mutator_refinement_threshold(SIZE_MAX);

// Iterate over all the threads, if we find a partial log add it to
// the global list of logs.
struct ConcatenateThreadLogClosure : public ThreadClosure {
G1DirtyCardQueueSet& _qset;
G1ConcurrentRefineStats _total_stats;

ConcatenateThreadLogClosure(G1DirtyCardQueueSet& qset) :
_qset{qset}, _total_stats{} {}

virtual void do_thread(Thread* t) {
G1DirtyCardQueue& queue = G1ThreadLocalData::dirty_card_queue(t);
// Flush the buffer if non-empty. Flush before accumulating and
// resetting stats, since flushing may modify the stats.
if ((queue.buffer() != nullptr) &&
(queue.index() != _qset.buffer_size())) {
_qset.flush_queue(queue);
}
G1ConcurrentRefineStats& qstats = *queue.refinement_stats();
_total_stats += qstats;
qstats.reset();
}
} closure(*this);
Threads::threads_do(&closure);
_concatenated_refinement_stats = closure._total_stats;
_concatenated_refinement_stats = stats;

enqueue_all_paused_buffers();
verify_num_cards();
Expand All @@ -596,6 +570,22 @@ void G1DirtyCardQueueSet::concatenate_logs_and_stats() {
_detached_refinement_stats.reset();
}

G1ConcurrentRefineStats G1DirtyCardQueueSet::concatenate_log_and_stats(Thread* thread) {
assert_at_safepoint();

G1DirtyCardQueue& queue = G1ThreadLocalData::dirty_card_queue(thread);
// Flush the buffer if non-empty. Flush before accumulating and
// resetting stats, since flushing may modify the stats.
if ((queue.buffer() != nullptr) &&
(queue.index() != buffer_size())) {
flush_queue(queue);
}

G1ConcurrentRefineStats result = *queue.refinement_stats();
queue.refinement_stats()->reset();
return result;
}

G1ConcurrentRefineStats G1DirtyCardQueueSet::concatenated_refinement_stats() const {
assert_at_safepoint();
return _concatenated_refinement_stats;
Expand Down
12 changes: 8 additions & 4 deletions src/hotspot/share/gc/g1/g1DirtyCardQueue.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2001, 2022, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2001, 2023, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -274,10 +274,14 @@ class G1DirtyCardQueueSet: public PtrQueueSet {
// precondition: at safepoint.
void abandon_logs_and_stats();

// Collect and reset all the per-thread refinement stats. If any threads
// have partial logs then add them to the global list.
// Update global refinement statistics with the ones given and the ones from
// detached threads.
// precondition: at safepoint.
void concatenate_logs_and_stats();
void update_refinement_stats(G1ConcurrentRefineStats& stats);
// Add the given thread's partial logs to the global list and return and reset
// its refinement stats.
// precondition: at safepoint.
G1ConcurrentRefineStats concatenate_log_and_stats(Thread* thread);

// Return the total of mutator refinement stats for all threads.
// precondition: at safepoint.
Expand Down
13 changes: 8 additions & 5 deletions src/hotspot/share/gc/g1/g1GCPhaseTimes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,9 @@ G1GCPhaseTimes::G1GCPhaseTimes(STWGCTimer* gc_timer, uint max_gc_threads) :
{
assert(max_gc_threads > 0, "Must have some GC threads");

_gc_par_phases[RetireTLABsAndFlushLogs] = new WorkerDataArray<double>("RetireTLABsAndFlushLogs", "JT Retire TLABs And Flush Logs (ms):", max_gc_threads);
_gc_par_phases[NonJavaThreadFlushLogs] = new WorkerDataArray<double>("NonJavaThreadFlushLogs", "Non-JT Flush Logs (ms):", max_gc_threads);

_gc_par_phases[GCWorkerStart] = new WorkerDataArray<double>("GCWorkerStart", "GC Worker Start (ms):", max_gc_threads);
_gc_par_phases[ExtRootScan] = new WorkerDataArray<double>("ExtRootScan", "Ext Root Scanning (ms):", max_gc_threads);

Expand Down Expand Up @@ -165,7 +168,7 @@ void G1GCPhaseTimes::reset() {
_cur_optional_merge_heap_roots_time_ms = 0.0;
_cur_prepare_merge_heap_roots_time_ms = 0.0;
_cur_optional_prepare_merge_heap_roots_time_ms = 0.0;
_cur_prepare_tlab_time_ms = 0.0;
_cur_pre_evacuate_prepare_time_ms = 0.0;
_cur_post_evacuate_cleanup_1_time_ms = 0.0;
_cur_post_evacuate_cleanup_2_time_ms = 0.0;
_cur_expand_heap_time_ms = 0.0;
Expand Down Expand Up @@ -402,8 +405,7 @@ double G1GCPhaseTimes::print_pre_evacuate_collection_set() const {
const double pre_concurrent_start_ms = average_time_ms(ResetMarkingState) +
average_time_ms(NoteStartOfMark);

const double sum_ms = _cur_prepare_tlab_time_ms +
_cur_concatenate_dirty_card_logs_time_ms +
const double sum_ms = _cur_pre_evacuate_prepare_time_ms +
_recorded_young_cset_choice_time_ms +
_recorded_non_young_cset_choice_time_ms +
_cur_region_register_time +
Expand All @@ -412,8 +414,9 @@ double G1GCPhaseTimes::print_pre_evacuate_collection_set() const {

info_time("Pre Evacuate Collection Set", sum_ms);

debug_time("Prepare TLABs", _cur_prepare_tlab_time_ms);
debug_time("Concatenate Dirty Card Logs", _cur_concatenate_dirty_card_logs_time_ms);
debug_time("Pre Evacuate Prepare", _cur_pre_evacuate_prepare_time_ms);
debug_phase(_gc_par_phases[RetireTLABsAndFlushLogs], 1);
debug_phase(_gc_par_phases[NonJavaThreadFlushLogs], 1);
debug_time("Choose Collection Set", (_recorded_young_cset_choice_time_ms + _recorded_non_young_cset_choice_time_ms));
debug_time("Region Register", _cur_region_register_time);

Expand Down
14 changes: 5 additions & 9 deletions src/hotspot/share/gc/g1/g1GCPhaseTimes.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> {

public:
enum GCParPhases {
RetireTLABsAndFlushLogs,
NonJavaThreadFlushLogs,
GCWorkerStart,
ExtRootScan,
ThreadRoots,
Expand Down Expand Up @@ -172,9 +174,7 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> {
double _cur_prepare_merge_heap_roots_time_ms;
double _cur_optional_prepare_merge_heap_roots_time_ms;

double _cur_prepare_tlab_time_ms;

double _cur_concatenate_dirty_card_logs_time_ms;
double _cur_pre_evacuate_prepare_time_ms;

double _cur_post_evacuate_cleanup_1_time_ms;
double _cur_post_evacuate_cleanup_2_time_ms;
Expand Down Expand Up @@ -265,12 +265,8 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> {

size_t sum_thread_work_items(GCParPhases phase, uint index = 0);

void record_prepare_tlab_time_ms(double ms) {
_cur_prepare_tlab_time_ms = ms;
}

void record_concatenate_dirty_card_logs_time_ms(double ms) {
_cur_concatenate_dirty_card_logs_time_ms = ms;
void record_pre_evacuate_prepare_time_ms(double ms) {
_cur_pre_evacuate_prepare_time_ms = ms;
}

void record_expand_heap_time(double ms) {
Expand Down
51 changes: 9 additions & 42 deletions src/hotspot/share/gc/g1/g1YoungCollector.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021, 2022, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2021, 2023, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -45,6 +45,7 @@
#include "gc/g1/g1Trace.hpp"
#include "gc/g1/g1YoungCollector.hpp"
#include "gc/g1/g1YoungGCPostEvacuateTasks.hpp"
#include "gc/g1/g1YoungGCPreEvacuateTasks.hpp"
#include "gc/g1/g1_globals.hpp"
#include "gc/shared/concurrentGCBreakpoints.hpp"
#include "gc/shared/gcTraceTime.inline.hpp"
Expand Down Expand Up @@ -462,48 +463,15 @@ void G1YoungCollector::set_young_collection_default_active_worker_threads(){
log_info(gc,task)("Using %u workers of %u for evacuation", active_workers, workers()->max_workers());
}

void G1YoungCollector::retire_tlabs() {
Ticks start = Ticks::now();
_g1h->retire_tlabs();
double retire_time = (Ticks::now() - start).seconds() * MILLIUNITS;
phase_times()->record_prepare_tlab_time_ms(retire_time);
}

void G1YoungCollector::concatenate_dirty_card_logs_and_stats() {
Ticks start = Ticks::now();
G1DirtyCardQueueSet& qset = G1BarrierSet::dirty_card_queue_set();
size_t old_cards = qset.num_cards();
qset.concatenate_logs_and_stats();
size_t pending_cards = qset.num_cards();
size_t thread_buffer_cards = pending_cards - old_cards;
policy()->record_concurrent_refinement_stats(pending_cards, thread_buffer_cards);
double concat_time = (Ticks::now() - start).seconds() * MILLIUNITS;
phase_times()->record_concatenate_dirty_card_logs_time_ms(concat_time);
}

#ifdef ASSERT
void G1YoungCollector::verify_empty_dirty_card_logs() const {
struct Verifier : public ThreadClosure {
size_t _buffer_size;
Verifier() : _buffer_size(G1BarrierSet::dirty_card_queue_set().buffer_size()) {}
void do_thread(Thread* t) override {
G1DirtyCardQueue& queue = G1ThreadLocalData::dirty_card_queue(t);
assert((queue.buffer() == nullptr) || (queue.index() == _buffer_size),
"non-empty dirty card queue for thread");
}
} verifier;
Threads::threads_do(&verifier);
}
#endif // ASSERT

void G1YoungCollector::pre_evacuate_collection_set(G1EvacInfo* evacuation_info) {
// Flush early, so later phases don't need to account for per-thread stuff.
// Flushes deferred card marks, so must precede concatenating logs.
retire_tlabs();

// Flush early, so later phases don't need to account for per-thread stuff.
concatenate_dirty_card_logs_and_stats();
{
Ticks start = Ticks::now();
G1PreEvacuateCollectionSetBatchTask cl;
G1CollectedHeap::heap()->run_batch_task(&cl);
phase_times()->record_pre_evacuate_prepare_time_ms((Ticks::now() - start).seconds() * 1000.0);
}

// Needs log buffers flushed.
calculate_collection_set(evacuation_info, policy()->max_pause_time_ms());

// Please see comment in g1CollectedHeap.hpp and
Expand Down Expand Up @@ -535,7 +503,6 @@ void G1YoungCollector::pre_evacuate_collection_set(G1EvacInfo* evacuation_info)
}

assert(_g1h->verifier()->check_region_attr_table(), "Inconsistency in the region attributes table.");
verify_empty_dirty_card_logs();

#if COMPILER2_OR_JVMCI
DerivedPointerTable::clear();
Expand Down
6 changes: 1 addition & 5 deletions src/hotspot/share/gc/g1/g1YoungCollector.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021, 2022, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2021, 2023, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -97,10 +97,6 @@ class G1YoungCollector {

void set_young_collection_default_active_worker_threads();

void retire_tlabs();
void concatenate_dirty_card_logs_and_stats();
void verify_empty_dirty_card_logs() const NOT_DEBUG_RETURN;

void pre_evacuate_collection_set(G1EvacInfo* evacuation_info);
// Actually do the work of evacuating the parts of the collection set.
// The has_optional_evacuation_work flag for the initial collection set
Expand Down
Loading

1 comment on commit 593bec6

@openjdk-notifier
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.