Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

8276887: G1: Move precleaning to Concurrent Mark From Roots subphase #6327

Closed
wants to merge 6 commits into from
Closed
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
@@ -889,9 +889,44 @@ void G1ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
// at this point everything should be re-initialized and ready to go
}

class G1PrecleanYieldClosure : public YieldClosure {
G1ConcurrentMark* _cm;

public:
G1PrecleanYieldClosure(G1ConcurrentMark* cm) : _cm(cm) { }

bool should_return() override {
return _cm->has_aborted();
}

bool should_return_fine_grain() override {
_cm->do_yield_check();
return _cm->has_aborted();
}
};

class G1CMConcurrentMarkingTask : public WorkerTask {
G1ConcurrentMark* _cm;
ReferenceProcessor* _cm_rp;

void do_marking(G1CMTask* task) {
do {
task->do_marking_step(G1ConcMarkStepDurationMillis,
true /* do_termination */,
false /* is_serial*/);

_cm->do_yield_check();
} while (!_cm->has_aborted() && task->has_aborted());
}

void preclean() {
BarrierEnqueueDiscoveredFieldClosure enqueue;
G1PrecleanYieldClosure yield_cl(_cm);
_cm_rp->preclean_discovered_references(_cm_rp->is_alive_non_header(),
&enqueue,
&yield_cl,
_cm->_gc_timer_cm);
}
public:
void work(uint worker_id) {
ResourceMark rm;
@@ -906,26 +941,25 @@ class G1CMConcurrentMarkingTask : public WorkerTask {
G1CMTask* task = _cm->task(worker_id);
task->record_start_time();
if (!_cm->has_aborted()) {
do {
task->do_marking_step(G1ConcMarkStepDurationMillis,
true /* do_termination */,
false /* is_serial*/);
do_marking(task);

_cm->do_yield_check();
} while (!_cm->has_aborted() && task->has_aborted());
// Marking is complete; preclean non-strong references
if (G1UseReferencePrecleaning) {
preclean();
}
}
task->record_end_time();
guarantee(!task->has_aborted() || _cm->has_aborted(), "invariant");
task->record_end_time();
}

double end_vtime = os::elapsedVTime();
_cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
}

G1CMConcurrentMarkingTask(G1ConcurrentMark* cm) :
WorkerTask("Concurrent Mark"), _cm(cm) { }

~G1CMConcurrentMarkingTask() { }
G1CMConcurrentMarkingTask(G1ConcurrentMark* cm, ReferenceProcessor* cm_rp) :
WorkerTask("Concurrent Mark"),
_cm(cm),
_cm_rp(cm_rp) {}
};

uint G1ConcurrentMark::calc_active_marking_workers() {
@@ -1052,7 +1086,7 @@ void G1ConcurrentMark::mark_from_roots() {
// Parallel task terminator is set in "set_concurrency_and_phase()"
set_concurrency_and_phase(active_workers, true /* concurrent */);

G1CMConcurrentMarkingTask marking_task(this);
G1CMConcurrentMarkingTask marking_task(this, _g1h->ref_processor_cm());
_concurrent_workers->run_task(&marking_task);
print_stats();
}
@@ -1671,42 +1705,6 @@ void G1ConcurrentMark::weak_refs_work() {
}
}

class G1PrecleanYieldClosure : public YieldClosure {
G1ConcurrentMark* _cm;

public:
G1PrecleanYieldClosure(G1ConcurrentMark* cm) : _cm(cm) { }

virtual bool should_return() {
return _cm->has_aborted();
}

virtual bool should_return_fine_grain() {
_cm->do_yield_check();
return _cm->has_aborted();
}
};

void G1ConcurrentMark::preclean() {
assert(G1UseReferencePrecleaning, "Precleaning must be enabled.");

SuspendibleThreadSetJoiner joiner;

BarrierEnqueueDiscoveredFieldClosure enqueue;

set_concurrency_and_phase(1, true);

G1PrecleanYieldClosure yield_cl(this);

ReferenceProcessor* rp = _g1h->ref_processor_cm();
// Precleaning is single threaded. Temporarily disable MT discovery.
ReferenceProcessorMTDiscoveryMutator rp_mut_discovery(rp, false);
rp->preclean_discovered_references(rp->is_alive_non_header(),
&enqueue,
&yield_cl,
_gc_timer_cm);
}

// When sampling object counts, we already swapped the mark bitmaps, so we need to use
// the prev bitmap determining liveness.
class G1ObjectCountIsAliveClosure: public BoolObjectClosure {
@@ -553,9 +553,6 @@ class G1ConcurrentMark : public CHeapObj<mtGC> {
// Do concurrent phase of marking, to a tentative transitive closure.
void mark_from_roots();

// Do concurrent preclean work.
void preclean();

void remark();

void swap_mark_bitmaps();
@@ -191,15 +191,10 @@ bool G1ConcurrentMarkThread::phase_mark_loop() {
// Subphase 1: Mark From Roots.
if (subphase_mark_from_roots()) return true;

// Subphase 2: Preclean (optional)
if (G1UseReferencePrecleaning) {
if (subphase_preclean()) return true;
}

// Subphase 3: Wait for Remark.
// Subphase 2: Wait for Remark.
if (subphase_delay_to_keep_mmu_before_remark()) return true;

// Subphase 4: Remark pause
// Subphase 3: Remark pause
if (subphase_remark()) return true;

// Check if we need to restart the marking loop.
@@ -226,12 +221,6 @@ bool G1ConcurrentMarkThread::subphase_mark_from_roots() {
return _cm->has_aborted();
}

bool G1ConcurrentMarkThread::subphase_preclean() {
G1ConcPhaseTimer p(_cm, "Concurrent Preclean");
_cm->preclean();
return _cm->has_aborted();
}

bool G1ConcurrentMarkThread::subphase_delay_to_keep_mmu_before_remark() {
delay_to_keep_mmu(true /* remark */);
return _cm->has_aborted();
@@ -63,7 +63,6 @@ class G1ConcurrentMarkThread: public ConcurrentGCThread {

bool phase_mark_loop();
bool subphase_mark_from_roots();
bool subphase_preclean();
bool subphase_delay_to_keep_mmu_before_remark();
bool subphase_remark();

@@ -1068,75 +1068,37 @@ void ReferenceProcessor::preclean_discovered_references(BoolObjectClosure* is_al
EnqueueDiscoveredFieldClosure* enqueue,
YieldClosure* yield,
GCTimer* gc_timer) {
// These lists can be handled here in any order and, indeed, concurrently.

// Soft references
{
GCTraceTime(Debug, gc, ref) tm("Preclean SoftReferences", gc_timer);
log_reflist("SoftRef before: ", _discoveredSoftRefs, _max_num_queues);
for (uint i = 0; i < _max_num_queues; i++) {
if (yield->should_return()) {
return;
}
if (preclean_discovered_reflist(_discoveredSoftRefs[i], is_alive,
enqueue, yield)) {
log_reflist("SoftRef abort: ", _discoveredSoftRefs, _max_num_queues);
return;
}
}
log_reflist("SoftRef after: ", _discoveredSoftRefs, _max_num_queues);
}

// Weak references
{
GCTraceTime(Debug, gc, ref) tm("Preclean WeakReferences", gc_timer);
log_reflist("WeakRef before: ", _discoveredWeakRefs, _max_num_queues);
for (uint i = 0; i < _max_num_queues; i++) {
if (yield->should_return()) {
return;
}
if (preclean_discovered_reflist(_discoveredWeakRefs[i], is_alive,
enqueue, yield)) {
log_reflist("WeakRef abort: ", _discoveredWeakRefs, _max_num_queues);
return;
}
Ticks preclean_start = Ticks::now();

constexpr int ref_kinds = 4;
ReferenceType ref_type_arr[] = { REF_SOFT, REF_WEAK, REF_FINAL, REF_PHANTOM };
static_assert(ARRAY_SIZE(ref_type_arr) == ref_kinds, "invariant");

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think there's a need for the literal 4 initializer for ref_kinds and the static assert to verify it's correct. Why not just use this after the declaration of ref_type_arr?
constexpr int ref_kinds = ARRAY_SIZE(ref_type_arr);

Copy link
Member Author

@albertnetymk albertnetymk Nov 14, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Changed as suggested.

size_t ref_count_arr[ref_kinds] = {};

if (discovery_is_mt()) {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The old form of these loops called yield->should_return() for each queue. That's been lost in this rewrite, and I'm not sure that's correct.

Copy link
Member Author

@albertnetymk albertnetymk Nov 13, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's still correct because preclean_discovered_reflist checkes for if-aborted while iterating the list. I could have written sth like the following if you think prompt abort is important.

  bool is_aborted = preclean_discovered_reflist(...);
  if (is_aborted) {
    return;
  }

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's hard to know for sure, but the description of YieldClosure makes me think that's not really the intended usage. OTOH, this seems to be the only use of YieldClosure; maybe there were others in CMS? I would prefer the yield->should_return() checks be retained. Also, preclean_discovered_reflist might no longer need to return bool.

Copy link
Member Author

@albertnetymk albertnetymk Nov 14, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Restored yield->should_return().

for (int i = 0; i < ref_kinds; ++i) {
ReferenceType ref_type = ref_type_arr[i];
DiscoveredList* list = get_discovered_list(ref_type);
ref_count_arr[i] = list->length();
preclean_discovered_reflist(*list, is_alive, enqueue, yield);
}
log_reflist("WeakRef after: ", _discoveredWeakRefs, _max_num_queues);
}

// Final references
{
GCTraceTime(Debug, gc, ref) tm("Preclean FinalReferences", gc_timer);
log_reflist("FinalRef before: ", _discoveredFinalRefs, _max_num_queues);
for (uint i = 0; i < _max_num_queues; i++) {
if (yield->should_return()) {
return;
}
if (preclean_discovered_reflist(_discoveredFinalRefs[i], is_alive,
enqueue, yield)) {
log_reflist("FinalRef abort: ", _discoveredFinalRefs, _max_num_queues);
return;
} else {
for (int i = 0; i < ref_kinds; ++i) {
ReferenceType ref_type = ref_type_arr[i];
// When discovery is *not* multi-threaded, discovered refs are stored in
// list[0.._num_queues-1]. Loop _num_queues times to cover all lists.
for (uint j = 0; j < _num_queues; ++j) {
DiscoveredList* list = get_discovered_list(ref_type);
ref_count_arr[i] += list->length();
preclean_discovered_reflist(*list, is_alive, enqueue, yield);
}
}
log_reflist("FinalRef after: ", _discoveredFinalRefs, _max_num_queues);
}

// Phantom references
{
GCTraceTime(Debug, gc, ref) tm("Preclean PhantomReferences", gc_timer);
log_reflist("PhantomRef before: ", _discoveredPhantomRefs, _max_num_queues);
for (uint i = 0; i < _max_num_queues; i++) {
if (yield->should_return()) {
return;
}
if (preclean_discovered_reflist(_discoveredPhantomRefs[i], is_alive,
enqueue, yield)) {
log_reflist("PhantomRef abort: ", _discoveredPhantomRefs, _max_num_queues);
return;
}
}
log_reflist("PhantomRef after: ", _discoveredPhantomRefs, _max_num_queues);
}
uint worker_id = WorkerThread::current()->id();

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This logging is potentially far more verbose than previously, since there will now be a line for each concurrent marking thread, rather than one line for the previously single-threaded precleaning. We have mechanisms for collecting and reporting timing and work units for parallel activities that should be used here.

Copy link
Member Author

@albertnetymk albertnetymk Nov 13, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The new logs are on trace level, just FYI. How about I address all logging related issues (from Thomas and Stefan as well) in a follow-up PR?

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I know this is trace logging, but that doesn't make the verbosity or content good. Consider a machine with a large amount of concurrency; one might be interested in any of the totals/min/max/avg, and having to extract that manually is going to be annoying. I'm okay with it being a followup though.

Copy link
Member Author

@albertnetymk albertnetymk Nov 14, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

one might be interested in any of the totals/min/max/avg

Thomas suggested this as well. I will address this in a followup PR.

log_trace(gc, ref)("Worker (%d): Precleaning Soft (%zu), Weak (%zu), Final (%zu), Phantom (%zu) %f ms",
worker_id, ref_count_arr[0], ref_count_arr[1], ref_count_arr[2], ref_count_arr[3],
(Ticks::now() - preclean_start).seconds()*1000);
}

bool ReferenceProcessor::preclean_discovered_reflist(DiscoveredList& refs_list,
@@ -501,27 +501,6 @@ class ReferenceProcessorSpanMutator : StackObj {
}
};

// A utility class to temporarily change the MT'ness of
// reference discovery for the given ReferenceProcessor
// in the scope that contains it.
class ReferenceProcessorMTDiscoveryMutator: StackObj {
private:
ReferenceProcessor* _rp;
bool _saved_mt;

public:
ReferenceProcessorMTDiscoveryMutator(ReferenceProcessor* rp,
bool mt):
_rp(rp) {
_saved_mt = _rp->discovery_is_mt();
_rp->set_mt_discovery(mt);
}

~ReferenceProcessorMTDiscoveryMutator() {
_rp->set_mt_discovery(_saved_mt);
}
};

// A utility class to temporarily change the disposition
// of the "is_alive_non_header" closure field of the
// given ReferenceProcessor in the scope that contains it.