Skip to content

Commit

Permalink
8301116: Parallelize TLAB resizing in G1
Browse files Browse the repository at this point in the history
Reviewed-by: ayang, iwalulya
  • Loading branch information
Thomas Schatzl committed Feb 9, 2023
1 parent c72f951 commit 83e2db6
Show file tree
Hide file tree
Showing 11 changed files with 109 additions and 35 deletions.
18 changes: 6 additions & 12 deletions src/hotspot/share/gc/g1/g1CollectedHeap.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1010,7 +1010,7 @@ void G1CollectedHeap::verify_before_full_collection(bool explicit_gc) {
_verifier->verify_bitmap_clear(true /* above_tams_only */);
}

void G1CollectedHeap::prepare_heap_for_mutators() {
void G1CollectedHeap::prepare_for_mutator_after_full_collection() {
// Delete metaspaces for unloaded class loaders and clean up loader_data graph
ClassLoaderDataGraph::purge(/*at_safepoint*/true);
DEBUG_ONLY(MetaspaceUtils::verify();)
Expand All @@ -1025,9 +1025,8 @@ void G1CollectedHeap::prepare_heap_for_mutators() {
// Rebuild the code root lists for each region
rebuild_code_roots();

// Start a new incremental collection set for the next pause
start_new_collection_set();

allocate_dummy_regions();
_allocator->init_mutator_alloc_regions();

// Post collection state updates.
Expand Down Expand Up @@ -2642,8 +2641,6 @@ class VerifyRegionRemSetClosure : public HeapRegionClosure {
};

void G1CollectedHeap::start_new_collection_set() {
double start = os::elapsedTime();

collection_set()->start_incremental_building();

clear_region_attr();
Expand All @@ -2654,8 +2651,6 @@ void G1CollectedHeap::start_new_collection_set() {
// We redo the verification but now wrt to the new CSet which
// has just got initialized after the previous CSet was freed.
_cm->verify_no_collection_set_oops();

phase_times()->record_start_new_cset_time_ms((os::elapsedTime() - start) * 1000.0);
}

G1HeapVerifier::G1VerifyType G1CollectedHeap::young_collection_verify_type() const {
Expand Down Expand Up @@ -2765,19 +2760,18 @@ G1JFRTracerMark::~G1JFRTracerMark() {
_tracer->report_gc_end(_timer->gc_end(), _timer->time_partitions());
}

void G1CollectedHeap::prepare_tlabs_for_mutator() {
void G1CollectedHeap::prepare_for_mutator_after_young_collection() {
Ticks start = Ticks::now();

_survivor_evac_stats.adjust_desired_plab_size();
_old_evac_stats.adjust_desired_plab_size();

// Start a new incremental collection set for the mutator phase.
start_new_collection_set();
allocate_dummy_regions();

_allocator->init_mutator_alloc_regions();

resize_all_tlabs();

phase_times()->record_resize_tlab_time_ms((Ticks::now() - start).seconds() * 1000.0);
phase_times()->record_prepare_for_mutator_time_ms((Ticks::now() - start).seconds() * 1000.0);
}

void G1CollectedHeap::retire_tlabs() {
Expand Down
31 changes: 29 additions & 2 deletions src/hotspot/share/gc/g1/g1CollectedHeap.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
#include "memory/iterator.hpp"
#include "memory/memRegion.hpp"
#include "runtime/mutexLocker.hpp"
#include "runtime/threadSMR.hpp"
#include "utilities/bitMap.hpp"

// A "G1CollectedHeap" is an implementation of a java heap for HotSpot.
Expand Down Expand Up @@ -120,6 +121,32 @@ class G1RegionMappingChangedListener : public G1MappingChangedListener {
void on_commit(uint start_idx, size_t num_regions, bool zero_filled) override;
};

// Helper to claim contiguous sets of JavaThread for processing by multiple threads.
class G1JavaThreadsListClaimer : public StackObj {
ThreadsListHandle _list;
uint _claim_step;

volatile uint _cur_claim;

// Attempts to claim _claim_step JavaThreads, returning an array of claimed
// JavaThread* with count elements. Returns null (and a zero count) if there
// are no more threads to claim.
JavaThread* const* claim(uint& count);

public:
G1JavaThreadsListClaimer(uint claim_step) : _list(), _claim_step(claim_step), _cur_claim(0) {
assert(claim_step > 0, "must be");
}

// Executes the given closure on the elements of the JavaThread list, chunking the
// JavaThread set in claim_step chunks for each caller to reduce parallelization
// overhead.
void apply(ThreadClosure* cl);

// Total number of JavaThreads that can be claimed.
uint length() const { return _list.length(); }
};

class G1CollectedHeap : public CollectedHeap {
friend class VM_G1CollectForAllocation;
friend class VM_G1CollectFull;
Expand Down Expand Up @@ -491,7 +518,7 @@ class G1CollectedHeap : public CollectedHeap {
bool abort_concurrent_cycle();
void verify_before_full_collection(bool explicit_gc);
void prepare_heap_for_full_collection();
void prepare_heap_for_mutators();
void prepare_for_mutator_after_full_collection();
void abort_refinement();
void verify_after_full_collection();
void print_heap_after_full_collection();
Expand Down Expand Up @@ -771,7 +798,7 @@ class G1CollectedHeap : public CollectedHeap {
// Start a concurrent cycle.
void start_concurrent_cycle(bool concurrent_operation_is_full_mark);

void prepare_tlabs_for_mutator();
void prepare_for_mutator_after_young_collection();

void retire_tlabs();

Expand Down
25 changes: 25 additions & 0 deletions src/hotspot/share/gc/g1/g1CollectedHeap.inline.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
#include "gc/shared/taskqueue.inline.hpp"
#include "oops/stackChunkOop.hpp"
#include "runtime/atomic.hpp"
#include "runtime/threadSMR.inline.hpp"
#include "utilities/bitMap.inline.hpp"

inline bool G1STWIsAliveClosure::do_object_b(oop p) {
Expand All @@ -49,6 +50,30 @@ inline bool G1STWIsAliveClosure::do_object_b(oop p) {
return !_g1h->is_in_cset(p) || p->is_forwarded();
}

inline JavaThread* const* G1JavaThreadsListClaimer::claim(uint& count) {
count = 0;
if (Atomic::load(&_cur_claim) >= _list.length()) {
return nullptr;
}
uint claim = Atomic::fetch_and_add(&_cur_claim, _claim_step);
if (claim >= _list.length()) {
return nullptr;
}
count = MIN2(_list.length() - claim, _claim_step);
return _list.list()->threads() + claim;
}

inline void G1JavaThreadsListClaimer::apply(ThreadClosure* cl) {
JavaThread* const* list;
uint count;

while ((list = claim(count)) != nullptr) {
for (uint i = 0; i < count; i++) {
cl->do_thread(list[i]);
}
}
}

G1GCPhaseTimes* G1CollectedHeap::phase_times() const {
return _policy->phase_times();
}
Expand Down
2 changes: 1 addition & 1 deletion src/hotspot/share/gc/g1/g1FullCollector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ void G1FullCollector::complete_collection() {
// Prepare the bitmap for the next (potentially concurrent) marking.
_heap->concurrent_mark()->clear_bitmap(_heap->workers());

_heap->prepare_heap_for_mutators();
_heap->prepare_for_mutator_after_full_collection();

_heap->resize_all_tlabs();

Expand Down
15 changes: 8 additions & 7 deletions src/hotspot/share/gc/g1/g1GCPhaseTimes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,8 @@ G1GCPhaseTimes::G1GCPhaseTimes(STWGCTimer* gc_timer, uint max_gc_threads) :
_gc_par_phases[RedirtyCards] = new WorkerDataArray<double>("RedirtyCards", "Redirty Logged Cards (ms):", max_gc_threads);
_gc_par_phases[RedirtyCards]->create_thread_work_items("Redirtied Cards:");

_gc_par_phases[ResizeThreadLABs] = new WorkerDataArray<double>("ResizeTLABs", "Resize TLABs (ms):", max_gc_threads);

_gc_par_phases[FreeCollectionSet] = new WorkerDataArray<double>("FreeCSet", "Free Collection Set (ms):", max_gc_threads);
_gc_par_phases[YoungFreeCSet] = new WorkerDataArray<double>("YoungFreeCSet", "Young Free Collection Set (ms):", max_gc_threads);
_gc_par_phases[NonYoungFreeCSet] = new WorkerDataArray<double>("NonYoungFreeCSet", "Non-Young Free Collection Set (ms):", max_gc_threads);
Expand All @@ -173,7 +175,6 @@ void G1GCPhaseTimes::reset() {
_cur_prepare_merge_heap_roots_time_ms = 0.0;
_cur_optional_prepare_merge_heap_roots_time_ms = 0.0;
_cur_prepare_tlab_time_ms = 0.0;
_cur_resize_tlab_time_ms = 0.0;
_cur_post_evacuate_cleanup_1_time_ms = 0.0;
_cur_post_evacuate_cleanup_2_time_ms = 0.0;
_cur_expand_heap_time_ms = 0.0;
Expand All @@ -184,7 +185,7 @@ void G1GCPhaseTimes::reset() {
_recorded_prepare_heap_roots_time_ms = 0.0;
_recorded_young_cset_choice_time_ms = 0.0;
_recorded_non_young_cset_choice_time_ms = 0.0;
_recorded_start_new_cset_time_ms = 0.0;
_recorded_prepare_for_mutator_time_ms = 0.0;
_recorded_serial_free_cset_time_ms = 0.0;
_recorded_total_rebuild_freelist_time_ms = 0.0;
_recorded_serial_rebuild_freelist_time_ms = 0.0;
Expand Down Expand Up @@ -489,7 +490,7 @@ double G1GCPhaseTimes::print_post_evacuate_collection_set(bool evacuation_failed
_cur_post_evacuate_cleanup_1_time_ms +
_cur_post_evacuate_cleanup_2_time_ms +
_recorded_total_rebuild_freelist_time_ms +
_recorded_start_new_cset_time_ms +
_recorded_prepare_for_mutator_time_ms +
_cur_expand_heap_time_ms;

info_time("Post Evacuate Collection Set", sum_ms);
Expand Down Expand Up @@ -527,6 +528,9 @@ double G1GCPhaseTimes::print_post_evacuate_collection_set(bool evacuation_failed
debug_phase(_gc_par_phases[SampleCollectionSetCandidates], 1);
}
debug_phase(_gc_par_phases[RedirtyCards], 1);
if (UseTLAB && ResizeTLAB) {
debug_phase(_gc_par_phases[ResizeThreadLABs], 1);
}
debug_phase(_gc_par_phases[FreeCollectionSet], 1);
trace_phase(_gc_par_phases[YoungFreeCSet], true, 1);
trace_phase(_gc_par_phases[NonYoungFreeCSet], true, 1);
Expand All @@ -537,10 +541,7 @@ double G1GCPhaseTimes::print_post_evacuate_collection_set(bool evacuation_failed
trace_time("Serial Rebuild Free List ", _recorded_serial_rebuild_freelist_time_ms);
trace_phase(_gc_par_phases[RebuildFreeList]);

debug_time("Start New Collection Set", _recorded_start_new_cset_time_ms);
if (UseTLAB && ResizeTLAB) {
debug_time("Resize TLABs", _cur_resize_tlab_time_ms);
}
debug_time("Prepare For Mutator", _recorded_prepare_for_mutator_time_ms);
debug_time("Expand Heap After Collection", _cur_expand_heap_time_ms);

return sum_ms;
Expand Down
12 changes: 4 additions & 8 deletions src/hotspot/share/gc/g1/g1GCPhaseTimes.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> {
FreeCollectionSet,
YoungFreeCSet,
NonYoungFreeCSet,
ResizeThreadLABs,
RebuildFreeList,
SampleCollectionSetCandidates,
MergePSS,
Expand Down Expand Up @@ -179,7 +180,6 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> {
double _cur_optional_prepare_merge_heap_roots_time_ms;

double _cur_prepare_tlab_time_ms;
double _cur_resize_tlab_time_ms;

double _cur_concatenate_dirty_card_logs_time_ms;

Expand All @@ -199,7 +199,7 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> {
double _recorded_young_cset_choice_time_ms;
double _recorded_non_young_cset_choice_time_ms;

double _recorded_start_new_cset_time_ms;
double _recorded_prepare_for_mutator_time_ms;

double _recorded_serial_free_cset_time_ms;

Expand Down Expand Up @@ -276,10 +276,6 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> {
_cur_prepare_tlab_time_ms = ms;
}

void record_resize_tlab_time_ms(double ms) {
_cur_resize_tlab_time_ms = ms;
}

void record_concatenate_dirty_card_logs_time_ms(double ms) {
_cur_concatenate_dirty_card_logs_time_ms = ms;
}
Expand Down Expand Up @@ -356,8 +352,8 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> {
_recorded_non_young_cset_choice_time_ms = time_ms;
}

void record_start_new_cset_time_ms(double time_ms) {
_recorded_start_new_cset_time_ms = time_ms;
void record_prepare_for_mutator_time_ms(double time_ms) {
_recorded_prepare_for_mutator_time_ms = time_ms;
}

void record_cur_collection_start_sec(double time_ms) {
Expand Down
4 changes: 1 addition & 3 deletions src/hotspot/share/gc/g1/g1YoungCollector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1022,9 +1022,7 @@ void G1YoungCollector::post_evacuate_collection_set(G1EvacInfo* evacuation_info,

evacuation_info->set_bytes_used(_g1h->bytes_used_during_gc());

_g1h->start_new_collection_set();

_g1h->prepare_tlabs_for_mutator();
_g1h->prepare_for_mutator_after_young_collection();

_g1h->gc_epilogue(false);

Expand Down
30 changes: 30 additions & 0 deletions src/hotspot/share/gc/g1/g1YoungGCPostEvacuateTasks.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@
#include "gc/g1/g1YoungGCPostEvacuateTasks.hpp"
#include "gc/shared/preservedMarks.inline.hpp"
#include "jfr/jfrEvents.hpp"
#include "runtime/threads.hpp"
#include "runtime/threadSMR.hpp"
#include "utilities/ticks.hpp"

class G1PostEvacuateCollectionSetCleanupTask1::MergePssTask : public G1AbstractSubTask {
Expand Down Expand Up @@ -701,6 +703,31 @@ class G1PostEvacuateCollectionSetCleanupTask2::FreeCollectionSetTask : public G1
}
};

class G1PostEvacuateCollectionSetCleanupTask2::ResizeTLABsTask : public G1AbstractSubTask {
G1JavaThreadsListClaimer _claimer;

// There is not much work per thread so the number of threads per worker is high.
static const uint ThreadsPerWorker = 250;

public:
ResizeTLABsTask() : G1AbstractSubTask(G1GCPhaseTimes::ResizeThreadLABs), _claimer(ThreadsPerWorker) { }

void do_work(uint worker_id) override {
class ResizeClosure : public ThreadClosure {
public:

void do_thread(Thread* thread) {
static_cast<JavaThread*>(thread)->tlab().resize();
}
} cl;
_claimer.apply(&cl);
}

double worker_cost() const override {
return (double)_claimer.length() / ThreadsPerWorker;
}
};

G1PostEvacuateCollectionSetCleanupTask2::G1PostEvacuateCollectionSetCleanupTask2(G1ParScanThreadStateSet* per_thread_states,
G1EvacInfo* evacuation_info,
G1EvacFailureRegions* evac_failure_regions) :
Expand All @@ -722,6 +749,9 @@ G1PostEvacuateCollectionSetCleanupTask2::G1PostEvacuateCollectionSetCleanupTask2
}
}
add_parallel_task(new RedirtyLoggedCardsTask(per_thread_states->rdcqs(), evac_failure_regions));
if (UseTLAB && ResizeTLAB) {
add_parallel_task(new ResizeTLABsTask());
}
add_parallel_task(new FreeCollectionSetTask(evacuation_info,
per_thread_states->surviving_young_words(),
evac_failure_regions));
Expand Down
2 changes: 2 additions & 0 deletions src/hotspot/share/gc/g1/g1YoungGCPostEvacuateTasks.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ class G1PostEvacuateCollectionSetCleanupTask1 : public G1BatchedTask {
// - Redirty Logged Cards
// - Restore Preserved Marks (on evacuation failure)
// - Free Collection Set
// - Resize TLABs
class G1PostEvacuateCollectionSetCleanupTask2 : public G1BatchedTask {
class EagerlyReclaimHumongousObjectsTask;
class ResetHotCardCacheTask;
Expand All @@ -70,6 +71,7 @@ class G1PostEvacuateCollectionSetCleanupTask2 : public G1BatchedTask {
class ClearRetainedRegionBitmaps;
class RedirtyLoggedCardsTask;
class RestorePreservedMarksTask;
class ResizeTLABsTask;
class FreeCollectionSetTask;

public:
Expand Down
4 changes: 2 additions & 2 deletions test/hotspot/jtreg/gc/g1/TestGCLogMessages.java
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,7 @@ public boolean isAvailable() {
new LogMessageWithLevelC2OrJVMCIOnly("Update Derived Pointers", Level.DEBUG),
new LogMessageWithLevel("Redirty Logged Cards", Level.DEBUG),
new LogMessageWithLevel("Redirtied Cards", Level.DEBUG),
new LogMessageWithLevel("Resize TLABs", Level.DEBUG),
new LogMessageWithLevel("Free Collection Set", Level.DEBUG),
new LogMessageWithLevel("Serial Free Collection Set", Level.TRACE),
new LogMessageWithLevel("Young Free Collection Set", Level.TRACE),
Expand All @@ -192,8 +193,7 @@ public boolean isAvailable() {
new LogMessageWithLevel("Rebuild Free List", Level.DEBUG),
new LogMessageWithLevel("Serial Rebuild Free List", Level.TRACE),
new LogMessageWithLevel("Parallel Rebuild Free List", Level.TRACE),
new LogMessageWithLevel("Start New Collection Set", Level.DEBUG),
new LogMessageWithLevel("Resize TLABs", Level.DEBUG),
new LogMessageWithLevel("Prepare For Mutator", Level.DEBUG),
new LogMessageWithLevel("Expand Heap After Collection", Level.DEBUG),
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ public static void main(String[] args) throws IOException {
"RedirtyCards",
"RecalculateUsed",
"ResetHotCardCache",
"ResizeTLABs",
"FreeCSet",
"UpdateDerivedPointers",
"EagerlyReclaimHumongousObjects",
Expand Down

1 comment on commit 83e2db6

@openjdk-notifier
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.