Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

8276887: G1: Move precleaning to Concurrent Mark From Roots subphase #6327

Closed
wants to merge 6 commits into from
Closed
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
@@ -889,9 +889,44 @@ void G1ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
// at this point everything should be re-initialized and ready to go
}

class G1PrecleanYieldClosure : public YieldClosure {
G1ConcurrentMark* _cm;

public:
G1PrecleanYieldClosure(G1ConcurrentMark* cm) : _cm(cm) { }

bool should_return() override {
return _cm->has_aborted();
}

bool should_return_fine_grain() override {
_cm->do_yield_check();
return _cm->has_aborted();
}
};

class G1CMConcurrentMarkingTask : public WorkerTask {
G1ConcurrentMark* _cm;
ReferenceProcessor* _cm_rp;

void do_mark_step(G1CMTask* task) {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I found the name of this confusingly close to do_marking_step by the task. I can see how this could be called a "step" with preclean being another (new) step, but I think the confusion with the other function outweighs that and needs a different name here. Maybe just do_marking or do_all_marking or something like that.

Copy link
Member Author

@albertnetymk albertnetymk Nov 13, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Renamed to do_marking.

do {
task->do_marking_step(G1ConcMarkStepDurationMillis,
true /* do_termination */,
false /* is_serial*/);

_cm->do_yield_check();
} while (!_cm->has_aborted() && task->has_aborted());
}

void preclean() {
BarrierEnqueueDiscoveredFieldClosure enqueue;
G1PrecleanYieldClosure yield_cl(_cm);
_cm_rp->preclean_discovered_references(_cm_rp->is_alive_non_header(),
&enqueue,
&yield_cl,
_cm->_gc_timer_cm);
}
public:
void work(uint worker_id) {
ResourceMark rm;
@@ -906,26 +941,25 @@ class G1CMConcurrentMarkingTask : public WorkerTask {
G1CMTask* task = _cm->task(worker_id);
task->record_start_time();
if (!_cm->has_aborted()) {
do {
task->do_marking_step(G1ConcMarkStepDurationMillis,
true /* do_termination */,
false /* is_serial*/);
do_mark_step(task);

_cm->do_yield_check();
} while (!_cm->has_aborted() && task->has_aborted());
// Marking is complete; preclean non-strong references
if (G1UseReferencePrecleaning) {
preclean();
}
}
task->record_end_time();
guarantee(!task->has_aborted() || _cm->has_aborted(), "invariant");
task->record_end_time();
}

double end_vtime = os::elapsedVTime();
_cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
}

G1CMConcurrentMarkingTask(G1ConcurrentMark* cm) :
WorkerTask("Concurrent Mark"), _cm(cm) { }

~G1CMConcurrentMarkingTask() { }
G1CMConcurrentMarkingTask(G1ConcurrentMark* cm, ReferenceProcessor* cm_rp) :
WorkerTask("Concurrent Mark"),
_cm(cm),
_cm_rp(cm_rp) {}
};

uint G1ConcurrentMark::calc_active_marking_workers() {
@@ -1052,7 +1086,7 @@ void G1ConcurrentMark::mark_from_roots() {
// Parallel task terminator is set in "set_concurrency_and_phase()"
set_concurrency_and_phase(active_workers, true /* concurrent */);

G1CMConcurrentMarkingTask marking_task(this);
G1CMConcurrentMarkingTask marking_task(this, _g1h->ref_processor_cm());
_concurrent_workers->run_task(&marking_task);
print_stats();
}
@@ -1671,42 +1705,6 @@ void G1ConcurrentMark::weak_refs_work() {
}
}

class G1PrecleanYieldClosure : public YieldClosure {
G1ConcurrentMark* _cm;

public:
G1PrecleanYieldClosure(G1ConcurrentMark* cm) : _cm(cm) { }

virtual bool should_return() {
return _cm->has_aborted();
}

virtual bool should_return_fine_grain() {
_cm->do_yield_check();
return _cm->has_aborted();
}
};

void G1ConcurrentMark::preclean() {
assert(G1UseReferencePrecleaning, "Precleaning must be enabled.");

SuspendibleThreadSetJoiner joiner;

BarrierEnqueueDiscoveredFieldClosure enqueue;

set_concurrency_and_phase(1, true);

G1PrecleanYieldClosure yield_cl(this);

ReferenceProcessor* rp = _g1h->ref_processor_cm();
// Precleaning is single threaded. Temporarily disable MT discovery.
ReferenceProcessorMTDiscoveryMutator rp_mut_discovery(rp, false);
rp->preclean_discovered_references(rp->is_alive_non_header(),
&enqueue,
&yield_cl,
_gc_timer_cm);
}

// When sampling object counts, we already swapped the mark bitmaps, so we need to use
// the prev bitmap determining liveness.
class G1ObjectCountIsAliveClosure: public BoolObjectClosure {
@@ -553,9 +553,6 @@ class G1ConcurrentMark : public CHeapObj<mtGC> {
// Do concurrent phase of marking, to a tentative transitive closure.
void mark_from_roots();

// Do concurrent preclean work.
void preclean();

void remark();

void swap_mark_bitmaps();
@@ -191,15 +191,10 @@ bool G1ConcurrentMarkThread::phase_mark_loop() {
// Subphase 1: Mark From Roots.
if (subphase_mark_from_roots()) return true;

// Subphase 2: Preclean (optional)
if (G1UseReferencePrecleaning) {
if (subphase_preclean()) return true;
}

// Subphase 3: Wait for Remark.
// Subphase 2: Wait for Remark.
if (subphase_delay_to_keep_mmu_before_remark()) return true;

// Subphase 4: Remark pause
// Subphase 3: Remark pause
if (subphase_remark()) return true;

// Check if we need to restart the marking loop.
@@ -226,12 +221,6 @@ bool G1ConcurrentMarkThread::subphase_mark_from_roots() {
return _cm->has_aborted();
}

bool G1ConcurrentMarkThread::subphase_preclean() {
G1ConcPhaseTimer p(_cm, "Concurrent Preclean");
_cm->preclean();
return _cm->has_aborted();
}

bool G1ConcurrentMarkThread::subphase_delay_to_keep_mmu_before_remark() {
delay_to_keep_mmu(true /* remark */);
return _cm->has_aborted();
@@ -63,7 +63,6 @@ class G1ConcurrentMarkThread: public ConcurrentGCThread {

bool phase_mark_loop();
bool subphase_mark_from_roots();
bool subphase_preclean();
bool subphase_delay_to_keep_mmu_before_remark();
bool subphase_remark();

@@ -1068,75 +1068,23 @@ void ReferenceProcessor::preclean_discovered_references(BoolObjectClosure* is_al
EnqueueDiscoveredFieldClosure* enqueue,
YieldClosure* yield,
GCTimer* gc_timer) {
// These lists can be handled here in any order and, indeed, concurrently.
Ticks preclean_start = Ticks::now();

// Soft references
{
GCTraceTime(Debug, gc, ref) tm("Preclean SoftReferences", gc_timer);
log_reflist("SoftRef before: ", _discoveredSoftRefs, _max_num_queues);
for (uint i = 0; i < _max_num_queues; i++) {
if (yield->should_return()) {
return;
}
if (preclean_discovered_reflist(_discoveredSoftRefs[i], is_alive,
enqueue, yield)) {
log_reflist("SoftRef abort: ", _discoveredSoftRefs, _max_num_queues);
return;
}
}
log_reflist("SoftRef after: ", _discoveredSoftRefs, _max_num_queues);
}
uint worker_id = WorkerThread::current()->id();

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This logging is potentially far more verbose than previously, since there will now be a line for each concurrent marking thread, rather than one line for the previously single-threaded precleaning. We have mechanisms for collecting and reporting timing and work units for parallel activities that should be used here.

Copy link
Member Author

@albertnetymk albertnetymk Nov 13, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The new logs are on trace level, just FYI. How about I address all logging related issues (from Thomas and Stefan as well) in a follow-up PR?

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I know this is trace logging, but that doesn't make the verbosity or content good. Consider a machine with a large amount of concurrency; one might be interested in any of the totals/min/max/avg, and having to extract that manually is going to be annoying. I'm okay with it being a followup though.

Copy link
Member Author

@albertnetymk albertnetymk Nov 14, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

one might be interested in any of the totals/min/max/avg

Thomas suggested this as well. I will address this in a followup PR.


// Weak references
{
GCTraceTime(Debug, gc, ref) tm("Preclean WeakReferences", gc_timer);
log_reflist("WeakRef before: ", _discoveredWeakRefs, _max_num_queues);
for (uint i = 0; i < _max_num_queues; i++) {
if (yield->should_return()) {
return;
}
if (preclean_discovered_reflist(_discoveredWeakRefs[i], is_alive,
enqueue, yield)) {
log_reflist("WeakRef abort: ", _discoveredWeakRefs, _max_num_queues);
return;
}
}
log_reflist("WeakRef after: ", _discoveredWeakRefs, _max_num_queues);
}
size_t soft_count = _discoveredSoftRefs[worker_id].length();
size_t weak_count = _discoveredWeakRefs[worker_id].length();
size_t final_count = _discoveredFinalRefs[worker_id].length();
size_t phantom_count = _discoveredPhantomRefs[worker_id].length();

// Final references
{
GCTraceTime(Debug, gc, ref) tm("Preclean FinalReferences", gc_timer);
log_reflist("FinalRef before: ", _discoveredFinalRefs, _max_num_queues);
for (uint i = 0; i < _max_num_queues; i++) {
if (yield->should_return()) {
return;
}
if (preclean_discovered_reflist(_discoveredFinalRefs[i], is_alive,
enqueue, yield)) {
log_reflist("FinalRef abort: ", _discoveredFinalRefs, _max_num_queues);
return;
}
}
log_reflist("FinalRef after: ", _discoveredFinalRefs, _max_num_queues);
}
preclean_discovered_reflist(_discoveredSoftRefs[worker_id], is_alive, enqueue, yield);
preclean_discovered_reflist(_discoveredWeakRefs[worker_id], is_alive, enqueue, yield);
preclean_discovered_reflist(_discoveredFinalRefs[worker_id], is_alive, enqueue, yield);
preclean_discovered_reflist(_discoveredPhantomRefs[worker_id], is_alive, enqueue, yield);

// Phantom references
{
GCTraceTime(Debug, gc, ref) tm("Preclean PhantomReferences", gc_timer);
log_reflist("PhantomRef before: ", _discoveredPhantomRefs, _max_num_queues);
for (uint i = 0; i < _max_num_queues; i++) {
if (yield->should_return()) {
return;
}
if (preclean_discovered_reflist(_discoveredPhantomRefs[i], is_alive,
enqueue, yield)) {
log_reflist("PhantomRef abort: ", _discoveredPhantomRefs, _max_num_queues);
return;
}
}
log_reflist("PhantomRef after: ", _discoveredPhantomRefs, _max_num_queues);
}
log_trace(gc, ref)("Worker (%d): Precleaning Soft (%zu), Weak (%zu), Final (%zu), Phantom (%zu) %f ms",
worker_id, soft_count, weak_count, final_count, phantom_count,
(Ticks::now() - preclean_start).seconds()*1000);
}

bool ReferenceProcessor::preclean_discovered_reflist(DiscoveredList& refs_list,
@@ -501,27 +501,6 @@ class ReferenceProcessorSpanMutator : StackObj {
}
};

// A utility class to temporarily change the MT'ness of
// reference discovery for the given ReferenceProcessor
// in the scope that contains it.
class ReferenceProcessorMTDiscoveryMutator: StackObj {
private:
ReferenceProcessor* _rp;
bool _saved_mt;

public:
ReferenceProcessorMTDiscoveryMutator(ReferenceProcessor* rp,
bool mt):
_rp(rp) {
_saved_mt = _rp->discovery_is_mt();
_rp->set_mt_discovery(mt);
}

~ReferenceProcessorMTDiscoveryMutator() {
_rp->set_mt_discovery(_saved_mt);
}
};

// A utility class to temporarily change the disposition
// of the "is_alive_non_header" closure field of the
// given ReferenceProcessor in the scope that contains it.