Skip to content

Commit

Permalink
limit partial array tasks, add length_addr() rather than breaking abs…
Browse files Browse the repository at this point in the history
…traction
  • Loading branch information
kimbarrett committed Sep 9, 2020
1 parent 164107c commit 9397514
Show file tree
Hide file tree
Showing 6 changed files with 291 additions and 66 deletions.
96 changes: 42 additions & 54 deletions src/hotspot/share/gc/g1/g1ParScanThreadState.cpp
Expand Up @@ -31,15 +31,19 @@
#include "gc/g1/g1RootClosures.hpp"
#include "gc/g1/g1StringDedup.hpp"
#include "gc/g1/g1Trace.hpp"
#include "gc/shared/partialArrayTaskStepper.inline.hpp"
#include "gc/shared/taskqueue.inline.hpp"
#include "memory/allocation.inline.hpp"
#include "oops/access.inline.hpp"
#include "oops/oop.inline.hpp"
#include "runtime/atomic.hpp"
#include "runtime/prefetch.inline.hpp"
#include "utilities/globalDefinitions.hpp"

G1ParScanThreadState::G1ParScanThreadState(G1CollectedHeap* g1h,
G1RedirtyCardsQueueSet* rdcqs,
uint worker_id,
uint n_workers,
size_t young_cset_length,
size_t optional_cset_length)
: _g1h(g1h),
Expand All @@ -60,8 +64,8 @@ G1ParScanThreadState::G1ParScanThreadState(G1CollectedHeap* g1h,
_surviving_young_words(NULL),
_surviving_words_length(young_cset_length + 1),
_old_gen_is_full(false),
_objarray_scan_chunk_size(ParGCArrayScanChunk),
_objarray_length_offset_in_bytes(arrayOopDesc::length_offset_in_bytes()),
_partial_objarray_chunk_size(ParGCArrayScanChunk),
_partial_array_stepper(n_workers),
_num_optional_regions(optional_cset_length),
_numa(g1h->numa()),
_obj_alloc_stat(NULL)
Expand Down Expand Up @@ -208,33 +212,27 @@ void G1ParScanThreadState::do_partial_array(PartialArrayScanTask task) {
assert(to_obj->is_objArray(), "must be obj array");
objArrayOop to_array = objArrayOop(to_obj);

// The next chunk index is in the length field of the to-space object.
// Atomically increment by the chunk size to claim the associated chunk.
char* to_addr = cast_from_oop<char*>(to_array);
char* length_addr_raw = (to_addr + _objarray_length_offset_in_bytes);
volatile int* length_addr = reinterpret_cast<int*>(length_addr_raw);
int end = Atomic::add(length_addr, _objarray_scan_chunk_size, memory_order_relaxed);
#ifdef ASSERT
// The from-space object contains the real length.
int length = objArrayOop(from_obj)->length();
assert(end <= length, "invariant: end %d, length %d", end, length);
assert(((length - end) % _objarray_scan_chunk_size) == 0,
"invariant: end %d, length %d, chunk size %d",
end, length, _objarray_scan_chunk_size);
#endif // ASSERT
PartialArrayTaskStepper::Step step
= _partial_array_stepper.next(objArrayOop(from_obj),
to_array,
_partial_objarray_chunk_size);
for (uint i = 0; i < step._ncreate; ++i) {
push_on_queue(ScannerTask(PartialArrayScanTask(from_obj)));
}

HeapRegion* hr = _g1h->heap_region_containing(to_array);
G1ScanInYoungSetter x(&_scanner, hr->is_young());
// Process claimed chunk. Note that the length field of
// to_obj_array is not correct. Fortunately, the iteration ignores
// the length and just relies on start / end. However, it does
// return the (incorrect) length, but we ignore it.
to_array->oop_iterate_range(&_scanner, end - _objarray_scan_chunk_size, end);
// Process claimed task. The length of to_array is not correct, but
// fortunately the iteration ignores the length field and just relies
// on start/end.
to_array->oop_iterate_range(&_scanner,
step._index,
step._index + _partial_objarray_chunk_size);
}

oop G1ParScanThreadState::start_partial_objArray(G1HeapRegionAttr dest_attr,
oop from_obj,
oop to_obj) {
void G1ParScanThreadState::start_partial_objArray(G1HeapRegionAttr dest_attr,
oop from_obj,
oop to_obj) {
assert(from_obj->is_objArray(), "precondition");
assert(from_obj->is_forwarded(), "precondition");
assert(from_obj->forwardee() == to_obj, "precondition");
Expand All @@ -243,34 +241,23 @@ oop G1ParScanThreadState::start_partial_objArray(G1HeapRegionAttr dest_attr,

objArrayOop to_array = objArrayOop(to_obj);

int length = objArrayOop(from_obj)->length();
int chunks = length / _objarray_scan_chunk_size;
int end = length % _objarray_scan_chunk_size;
assert(end <= length, "invariant");
assert(((length - end) % _objarray_scan_chunk_size) == 0, "invariant");
// The value of end can be 0, either because of a 0-length array or
// because length is a multiple of the chunk size. Both of those
// are rare and handled in the normal course of the iteration, so
// not worth doing anything special about here.

// Set to's length to end of initial chunk. Partial tasks use that
// length field as the start of the next chunk to process. Must be
// done before enqueuing partial scan tasks, in case other threads
// steal any of those tasks.
to_array->set_length(end);
// Push partial scan tasks for all but the initial chunk. Pushed
// before processing the initial chunk to allow other workers to
// steal while we're processing.
for (int i = 0; i < chunks; ++i) {
PartialArrayTaskStepper::Step step
= _partial_array_stepper.start(objArrayOop(from_obj),
to_array,
_partial_objarray_chunk_size);

// Push any needed partial scan tasks. Pushed before processing the
// intitial chunk to allow other workers to steal while we're processing.
for (uint i = 0; i < step._ncreate; ++i) {
push_on_queue(ScannerTask(PartialArrayScanTask(from_obj)));
}

G1ScanInYoungSetter x(&_scanner, dest_attr.is_young());
// Process the initial chunk. No need to process the type in the
// klass, as it will already be handled by processing the built-in
// module. The length of to_array is not correct, but fortunately
// the iteration ignores that length field and relies on start/end.
to_array->oop_iterate_range(&_scanner, 0, end);
return to_array;
to_array->oop_iterate_range(&_scanner, 0, step._index);
}

void G1ParScanThreadState::dispatch_task(ScannerTask task) {
Expand Down Expand Up @@ -494,19 +481,18 @@ oop G1ParScanThreadState::do_copy_to_survivor_space(G1HeapRegionAttr const regio
obj->set_mark_raw(old_mark);
}

// Most objects are not arrays, so do one array check rather than both
// typeArray and objArray checks for each object.
// Most objects are not arrays, so do one array check rather than
// checking for each array category for each object.
if (klass->is_array_klass()) {
if (klass->is_typeArray_klass()) {
if (klass->is_objArray_klass()) {
start_partial_objArray(dest_attr, old, obj);
} else {
// Nothing needs to be done for typeArrays. Body doesn't contain
// any oops to scan, and the type in the klass will already be handled
// by processing the built-in module.
return obj;
} else if (klass->is_objArray_klass()) {
// Do special handling for objArray.
return start_partial_objArray(dest_attr, old, obj);
assert(klass->is_typeArray_klass(), "invariant");
}
// Not a special array, so fall through to generic handling.
return obj;
}

if (G1StringDedup::is_enabled() && (klass == SystemDictionary::String_klass())) {
Expand Down Expand Up @@ -544,7 +530,9 @@ G1ParScanThreadState* G1ParScanThreadStateSet::state_for_worker(uint worker_id)
assert(worker_id < _n_workers, "out of bounds access");
if (_states[worker_id] == NULL) {
_states[worker_id] =
new G1ParScanThreadState(_g1h, _rdcqs, worker_id, _young_cset_length, _optional_cset_length);
new G1ParScanThreadState(_g1h, _rdcqs,
worker_id, _n_workers,
_young_cset_length, _optional_cset_length);
}
return _states[worker_id];
}
Expand Down
13 changes: 6 additions & 7 deletions src/hotspot/share/gc/g1/g1ParScanThreadState.hpp
Expand Up @@ -32,6 +32,7 @@
#include "gc/g1/g1RemSet.hpp"
#include "gc/g1/heapRegionRemSet.hpp"
#include "gc/shared/ageTable.hpp"
#include "gc/shared/partialArrayTaskStepper.hpp"
#include "gc/shared/taskqueue.hpp"
#include "memory/allocation.hpp"
#include "oops/oop.hpp"
Expand Down Expand Up @@ -79,9 +80,9 @@ class G1ParScanThreadState : public CHeapObj<mtGC> {
// Indicates whether in the last generation (old) there is no more space
// available for allocation.
bool _old_gen_is_full;

int _objarray_scan_chunk_size;
int _objarray_length_offset_in_bytes;
// Size (in elements) of a partial objArray task chunk.
int _partial_objarray_chunk_size;
PartialArrayTaskStepper _partial_array_stepper;

G1RedirtyCardsQueue& redirty_cards_queue() { return _rdcq; }
G1CardTable* ct() { return _ct; }
Expand All @@ -108,6 +109,7 @@ class G1ParScanThreadState : public CHeapObj<mtGC> {
G1ParScanThreadState(G1CollectedHeap* g1h,
G1RedirtyCardsQueueSet* rdcqs,
uint worker_id,
uint n_workers,
size_t young_cset_length,
size_t optional_cset_length);
virtual ~G1ParScanThreadState();
Expand Down Expand Up @@ -160,7 +162,7 @@ class G1ParScanThreadState : public CHeapObj<mtGC> {

private:
inline void do_partial_array(PartialArrayScanTask task);
inline oop start_partial_objArray(G1HeapRegionAttr dest_dir, oop from, oop to);
inline void start_partial_objArray(G1HeapRegionAttr dest_dir, oop from, oop to);

HeapWord* allocate_copy_slow(G1HeapRegionAttr* dest_attr,
oop old,
Expand Down Expand Up @@ -253,9 +255,6 @@ class G1ParScanThreadStateSet : public StackObj {
G1ParScanThreadState* state_for_worker(uint worker_id);

const size_t* surviving_young_words() const;

private:
G1ParScanThreadState* new_par_scan_state(uint worker_id, size_t young_cset_length);
};

#endif // SHARE_GC_G1_G1PARSCANTHREADSTATE_HPP
53 changes: 53 additions & 0 deletions src/hotspot/share/gc/shared/partialArrayTaskStepper.cpp
@@ -0,0 +1,53 @@
/*
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/

#include "precompiled.hpp"
#include "gc/shared/partialArrayTaskStepper.hpp"
#include "oops/arrayOop.hpp"
#include "utilities/globalDefinitions.hpp"

static uint compute_task_limit(uint n_workers) {
// Don't need more than n_workers tasks at a time. But allowing up to
// that maximizes available parallelism.
return n_workers;
}

static uint compute_task_fannout(uint task_limit) {
assert(task_limit > 0, "precondition");
// There is a tradeoff between providing parallelism more quickly and
// number of enqueued tasks. A constant fannout may be too slow when
// parallelism (and so task_limit) is large. A constant fraction might
// be overly eager. Using log2 attempts to balance between those.
uint result = log2_uint(task_limit);
// result must be > 0. result should be > 1 if task_limit > 1, to
// provide some potentially parallel tasks. But don't just +1 to
// avoid otherwise increasing rate of task generation.
if (result < 2) ++result;
return result;
}

PartialArrayTaskStepper::PartialArrayTaskStepper(uint n_workers) :
_task_limit(compute_task_limit(n_workers)),
_task_fannout(compute_task_fannout(_task_limit))
{}
72 changes: 72 additions & 0 deletions src/hotspot/share/gc/shared/partialArrayTaskStepper.hpp
@@ -0,0 +1,72 @@
/*
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/

#ifndef SHARE_GC_SHARED_PARTIALARRAYTASKSTEPPER_HPP
#define SHARE_GC_SHARED_PARTIALARRAYTASKSTEPPER_HPP

#include "oops/arrayOop.hpp"
#include "utilities/globalDefinitions.hpp"

// Helper for handling PartialArrayTasks.
//
// When an array is large, we want to split it up into chunks that can be
// processed in parallel. Each task (implicitly) represents such a chunk.
// We can enqueue multiple tasks at the same time. We want to enqueue
// enough tasks to benefit from the available parallelism, while not so many
// as to substantially expand the task queues.
//
// A task directly refers to the from-space array. The from-space array's
// forwarding pointer refers to the associated to-space array, and its
// length is the actual length. The to-space array's length field is used to
// indicate processing progress. It is the starting index of the next chunk
// to process, or equals the actual length when there are no more chunks to
// be processed.
class PartialArrayTaskStepper {
// Limit on the number of partial array tasks to create for a given array.
uint _task_limit;
// Maximum number of new tasks to create when processing an existing task.
uint _task_fannout;

public:
PartialArrayTaskStepper(uint n_workers);

struct Step {
int _index; // Array index for the step.
uint _ncreate; // Number of new tasks to create.
};

// Set to's length to the end of the initial chunk, which is the start of
// the first partial task if the array is large enough to need splitting.
// Returns a Step with _index being that index and _ncreate being the
// initial number of partial tasks to enqueue.
inline Step start(arrayOop from, arrayOop to, int chunk_size);

// Increment to's length by chunk_size to claim the next chunk. Returns a
// Step with _index being the starting index of the claimed chunk and
// _ncreate being the number of additional partial tasks to enqueue.
// precondition: chunk_size must be the same as used to start the task sequence.
inline Step next(arrayOop from, arrayOop to, int chunk_size);
};

#endif // SHARE_GC_SHARED_PARTIALARRAYTASKSTEPPER_HPP

0 comments on commit 9397514

Please sign in to comment.