Skip to content
This repository has been archived by the owner. It is now read-only.
Permalink
Browse files
8245961: Shenandoah: move some root marking to concurrent phase
Reviewed-by: shade
  • Loading branch information
zhengyu123 committed Jun 2, 2020
1 parent 8752e02 commit 512cc3ebf29cdddd6aa150b62df57a0edaf70aa9
@@ -174,7 +174,6 @@ class ShenandoahConcurrentMarkingTask : public AbstractGangTask {
rp = NULL;
}

_cm->concurrent_scan_code_roots(worker_id, rp);
_cm->mark_loop(worker_id, _terminator, rp,
true, // cancellable
ShenandoahStringDedup::is_enabled()); // perform string dedup
@@ -215,6 +214,44 @@ class ShenandoahSATBAndRemarkCodeRootsThreadsClosure : public ThreadClosure {
}
};

// Process concurrent roots at safepoints
template <typename T>
class ShenandoahProcessConcurrentRootsTask : public AbstractGangTask {
private:
ShenandoahConcurrentRootScanner<false /* concurrent */> _rs;
ShenandoahConcurrentMark* const _cm;
ReferenceProcessor* _rp;
public:

ShenandoahProcessConcurrentRootsTask(ShenandoahConcurrentMark* cm,
ShenandoahPhaseTimings::Phase phase,
uint nworkers);
void work(uint worker_id);
};

template <typename T>
ShenandoahProcessConcurrentRootsTask<T>::ShenandoahProcessConcurrentRootsTask(ShenandoahConcurrentMark* cm,
ShenandoahPhaseTimings::Phase phase,
uint nworkers) :
AbstractGangTask("Shenandoah STW Concurrent Mark Task"),
_rs(nworkers, phase),
_cm(cm),
_rp(NULL) {
ShenandoahHeap* heap = ShenandoahHeap::heap();
if (heap->process_references()) {
_rp = heap->ref_processor();
shenandoah_assert_rp_isalive_installed();
}
}

template <typename T>
void ShenandoahProcessConcurrentRootsTask<T>::work(uint worker_id) {
ShenandoahParallelWorkerSession worker_session(worker_id);
ShenandoahObjToScanQueue* q = _cm->task_queues()->queue(worker_id);
T cl(q, _rp);
_rs.oops_do(&cl, worker_id);
}

class ShenandoahFinalMarkingTask : public AbstractGangTask {
private:
ShenandoahConcurrentMark* _cm;
@@ -267,13 +304,6 @@ class ShenandoahFinalMarkingTask : public AbstractGangTask {
}
}

if (heap->is_degenerated_gc_in_progress() || heap->is_full_gc_in_progress()) {
// Full GC does not execute concurrent cycle.
// Degenerated cycle may bypass concurrent cycle.
// So code roots might not be scanned, let's scan here.
_cm->concurrent_scan_code_roots(worker_id, rp);
}

_cm->mark_loop(worker_id, _terminator, rp,
false, // not cancellable
_dedup_string);
@@ -308,8 +338,6 @@ void ShenandoahConcurrentMark::mark_roots(ShenandoahPhaseTimings::Phase root_pha
ShenandoahInitMarkRootsTask<NONE> mark_roots(&root_proc);
workers->run_task(&mark_roots);
}

clear_claim_codecache();
}

void ShenandoahConcurrentMark::update_roots(ShenandoahPhaseTimings::Phase root_phase) {
@@ -390,34 +418,47 @@ void ShenandoahConcurrentMark::initialize(uint workers) {
}
}

void ShenandoahConcurrentMark::concurrent_scan_code_roots(uint worker_id, ReferenceProcessor* rp) {
if (_heap->unload_classes()) {
return;
}
// Mark concurrent roots during concurrent phases
class ShenandoahMarkConcurrentRootsTask : public AbstractGangTask {
private:
SuspendibleThreadSetJoiner _sts_joiner;
ShenandoahConcurrentRootScanner<true /* concurrent */> _rs;
ShenandoahObjToScanQueueSet* const _queue_set;
ReferenceProcessor* const _rp;

if (claim_codecache()) {
ShenandoahObjToScanQueue* q = task_queues()->queue(worker_id);
MutexLocker mu(CodeCache_lock, Mutex::_no_safepoint_check_flag);
// TODO: We can not honor StringDeduplication here, due to lock ranking
// inversion. So, we may miss some deduplication candidates.
if (_heap->has_forwarded_objects()) {
ShenandoahMarkResolveRefsClosure cl(q, rp);
CodeBlobToOopClosure blobs(&cl, !CodeBlobToOopClosure::FixRelocations);
CodeCache::blobs_do(&blobs);
} else {
ShenandoahMarkRefsClosure cl(q, rp);
CodeBlobToOopClosure blobs(&cl, !CodeBlobToOopClosure::FixRelocations);
CodeCache::blobs_do(&blobs);
}
}
public:
ShenandoahMarkConcurrentRootsTask(ShenandoahObjToScanQueueSet* qs,
ReferenceProcessor* rp,
ShenandoahPhaseTimings::Phase phase,
uint nworkers);
void work(uint worker_id);
};

ShenandoahMarkConcurrentRootsTask::ShenandoahMarkConcurrentRootsTask(ShenandoahObjToScanQueueSet* qs,
ReferenceProcessor* rp,
ShenandoahPhaseTimings::Phase phase,
uint nworkers) :
AbstractGangTask("Shenandoah Concurrent Mark Task"),
_rs(nworkers, phase),
_queue_set(qs),
_rp(rp) {
assert(!ShenandoahHeap::heap()->has_forwarded_objects(), "Not expected");
}

void ShenandoahMarkConcurrentRootsTask::work(uint worker_id) {
ShenandoahConcurrentWorkerSession worker_session(worker_id);
ShenandoahObjToScanQueue* q = _queue_set->queue(worker_id);
ShenandoahMarkResolveRefsClosure cl(q, _rp);
_rs.oops_do(&cl, worker_id);
}

void ShenandoahConcurrentMark::mark_from_roots() {
WorkGang* workers = _heap->workers();
uint nworkers = workers->active_workers();

ReferenceProcessor* rp = NULL;
if (_heap->process_references()) {
ReferenceProcessor* rp = _heap->ref_processor();
rp = _heap->ref_processor();
rp->set_active_mt_degree(nworkers);

// enable ("weak") refs discovery
@@ -431,6 +472,13 @@ void ShenandoahConcurrentMark::mark_from_roots() {

task_queues()->reserve(nworkers);

{
ShenandoahGCPhase phase(ShenandoahPhaseTimings::conc_mark_roots);
// Use separate task to mark concurrent roots, since it may hold ClassLoaderData_lock and CodeCache_lock
ShenandoahMarkConcurrentRootsTask task(task_queues(), rp, ShenandoahPhaseTimings::conc_mark_roots, nworkers);
workers->run_task(&task);
}

{
TaskTerminator terminator(nworkers, task_queues());
ShenandoahConcurrentMarkingTask task(this, &terminator);
@@ -445,30 +493,50 @@ void ShenandoahConcurrentMark::finish_mark_from_roots(bool full_gc) {

uint nworkers = _heap->workers()->active_workers();

// Finally mark everything else we've got in our queues during the previous steps.
// It does two different things for concurrent vs. mark-compact GC:
// - For concurrent GC, it starts with empty task queues, drains the remaining
// SATB buffers, and then completes the marking closure.
// - For mark-compact GC, it starts out with the task queues seeded by initial
// root scan, and completes the closure, thus marking through all live objects
// The implementation is the same, so it's shared here.
{
ShenandoahGCPhase phase(full_gc ?
ShenandoahPhaseTimings::full_gc_mark_finish_queues :
ShenandoahPhaseTimings::finish_queues);
task_queues()->reserve(nworkers);

shenandoah_assert_rp_isalive_not_installed();
ShenandoahIsAliveSelector is_alive;
ReferenceProcessorIsAliveMutator fix_isalive(_heap->ref_processor(), is_alive.is_alive_closure());

StrongRootsScope scope(nworkers);
TaskTerminator terminator(nworkers, task_queues());
ShenandoahFinalMarkingTask task(this, &terminator, ShenandoahStringDedup::is_enabled());
_heap->workers()->run_task(&task);
}
// Full GC does not execute concurrent cycle. Degenerated cycle may bypass concurrent cycle.
// In those cases, concurrent roots might not be scanned, scan them here. Ideally, this
// should piggyback to ShenandoahFinalMarkingTask, but it makes time tracking very hard.
// Given full GC and degenerated GC are rare, use a separate task.
if (_heap->is_degenerated_gc_in_progress() || _heap->is_full_gc_in_progress()) {
ShenandoahPhaseTimings::Phase phase = _heap->is_full_gc_in_progress() ?
ShenandoahPhaseTimings::full_gc_scan_conc_roots :
ShenandoahPhaseTimings::degen_gc_scan_conc_roots;
ShenandoahGCPhase gc_phase(phase);
if (_heap->has_forwarded_objects()) {
ShenandoahProcessConcurrentRootsTask<ShenandoahMarkResolveRefsClosure> task(this, phase, nworkers);
_heap->workers()->run_task(&task);
} else {
ShenandoahProcessConcurrentRootsTask<ShenandoahMarkRefsClosure> task(this, phase, nworkers);
_heap->workers()->run_task(&task);
}
}

assert(task_queues()->is_empty(), "Should be empty");
// Finally mark everything else we've got in our queues during the previous steps.
// It does two different things for concurrent vs. mark-compact GC:
// - For concurrent GC, it starts with empty task queues, drains the remaining
// SATB buffers, and then completes the marking closure.
// - For mark-compact GC, it starts out with the task queues seeded by initial
// root scan, and completes the closure, thus marking through all live objects
// The implementation is the same, so it's shared here.
{
ShenandoahGCPhase phase(full_gc ?
ShenandoahPhaseTimings::full_gc_mark_finish_queues :
ShenandoahPhaseTimings::finish_queues);
task_queues()->reserve(nworkers);

StrongRootsScope scope(nworkers);
TaskTerminator terminator(nworkers, task_queues());
ShenandoahFinalMarkingTask task(this, &terminator, ShenandoahStringDedup::is_enabled());
_heap->workers()->run_task(&task);
}

assert(task_queues()->is_empty(), "Should be empty");
}

// When we're done marking everything, we process weak references.
if (_heap->process_references()) {
@@ -942,11 +1010,3 @@ void ShenandoahConcurrentMark::mark_loop_work(T* cl, ShenandoahLiveData* live_da
}
}
}

bool ShenandoahConcurrentMark::claim_codecache() {
return _claimed_codecache.try_set();
}

void ShenandoahConcurrentMark::clear_claim_codecache() {
_claimed_codecache.unset();
}
@@ -91,16 +91,6 @@ class ShenandoahConcurrentMark: public CHeapObj<mtGC> {
public:
void preclean_weak_refs();

// ---------- Concurrent code cache
//
private:
ShenandoahSharedFlag _claimed_codecache;

public:
void concurrent_scan_code_roots(uint worker_id, ReferenceProcessor* rp);
bool claim_codecache();
void clear_claim_codecache();

// ---------- Helpers
// Used from closures, need to be public
//
@@ -523,13 +523,13 @@ void ShenandoahNMethodList::transfer(ShenandoahNMethodList* const list, int limi
}

ShenandoahNMethodList* ShenandoahNMethodList::acquire() {
assert(CodeCache_lock->owned_by_self(), "Lock must be held");
assert_locked_or_safepoint(CodeCache_lock);
_ref_count++;
return this;
}

void ShenandoahNMethodList::release() {
assert(CodeCache_lock->owned_by_self(), "Lock must be held");
assert_locked_or_safepoint(CodeCache_lock);
_ref_count--;
if (_ref_count == 0) {
delete this;
@@ -103,12 +103,15 @@ bool ShenandoahPhaseTimings::is_worker_phase(Phase phase) {
case full_gc_scan_roots:
case full_gc_update_roots:
case full_gc_adjust_roots:
case degen_gc_scan_conc_roots:
case degen_gc_update_roots:
case full_gc_scan_conc_roots:
case full_gc_purge_class_unload:
case full_gc_purge_weak_par:
case purge_class_unload:
case purge_weak_par:
case heap_iteration_roots:
case conc_mark_roots:
case conc_weak_roots_work:
case conc_strong_roots:
return true;
@@ -68,6 +68,9 @@ class outputStream;
f(resize_tlabs, " Resize TLABs") \
\
f(conc_mark, "Concurrent Marking") \
f(conc_mark_roots, " Roots ") \
SHENANDOAH_PAR_PHASE_DO(conc_mark_roots, " CM: ", f) \
\
f(conc_preclean, "Concurrent Precleaning") \
\
f(final_mark_gross, "Pause Final Mark (G)") \
@@ -128,6 +131,8 @@ class outputStream;
\
f(degen_gc_gross, "Pause Degenerated GC (G)") \
f(degen_gc, "Pause Degenerated GC (N)") \
f(degen_gc_scan_conc_roots, " Degen Mark Roots") \
SHENANDOAH_PAR_PHASE_DO(degen_gc_conc_mark_, " DM: ", f) \
f(degen_gc_update_roots, " Degen Update Roots") \
SHENANDOAH_PAR_PHASE_DO(degen_gc_update_, " DU: ", f) \
\
@@ -137,6 +142,8 @@ class outputStream;
f(full_gc_prepare, " Prepare") \
f(full_gc_scan_roots, " Scan Roots") \
SHENANDOAH_PAR_PHASE_DO(full_gc_scan_roots_, " FS: ", f) \
f(full_gc_scan_conc_roots, " Scan Concurrent Roots") \
SHENANDOAH_PAR_PHASE_DO(full_gc_scan_conc_roots, " FCS: ", f) \
f(full_gc_update_roots, " Update Roots") \
SHENANDOAH_PAR_PHASE_DO(full_gc_update_roots_, " FU: ", f) \
f(full_gc_mark, " Mark") \
@@ -28,6 +28,7 @@
#include "classfile/stringTable.hpp"
#include "classfile/systemDictionary.hpp"
#include "code/codeCache.hpp"
#include "code/nmethod.hpp"
#include "gc/shenandoah/shenandoahClosures.inline.hpp"
#include "gc/shenandoah/shenandoahConcurrentRoots.hpp"
#include "gc/shenandoah/shenandoahRootProcessor.inline.hpp"
@@ -199,10 +200,12 @@ ShenandoahRootScanner::ShenandoahRootScanner(uint n_workers, ShenandoahPhaseTimi
ShenandoahRootProcessor(phase),
_serial_roots(phase),
_thread_roots(phase, n_workers > 1),
_code_roots(phase),
_vm_roots(phase),
_dedup_roots(phase),
_cld_roots(phase, n_workers) {
_dedup_roots(phase) {
nmethod::oops_do_marking_prologue();
}

ShenandoahRootScanner::~ShenandoahRootScanner() {
nmethod::oops_do_marking_epilogue();
}

void ShenandoahRootScanner::roots_do(uint worker_id, OopClosure* oops) {
@@ -232,9 +235,7 @@ void ShenandoahRootScanner::roots_do(uint worker_id, OopClosure* oops, CLDClosur
_serial_roots.oops_do(oops, worker_id);

// Process light-weight/limited parallel roots then
_vm_roots.oops_do(oops, worker_id);
_dedup_roots.oops_do(&always_true, oops, worker_id);
_cld_roots.cld_do(clds, worker_id);

// Process heavy-weight/fully parallel roots the last
_thread_roots.threads_do(&tc_cl, worker_id);
@@ -249,10 +250,6 @@ void ShenandoahRootScanner::strong_roots_do(uint worker_id, OopClosure* oops, CL
// Process serial-claiming roots first
_serial_roots.oops_do(oops, worker_id);

// Process light-weight/limited parallel roots then
_vm_roots.oops_do(oops, worker_id);
_cld_roots.always_strong_cld_do(clds, worker_id);

// Process heavy-weight/fully parallel roots the last
_thread_roots.threads_do(&tc_cl, worker_id);
}

0 comments on commit 512cc3e

Please sign in to comment.