Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

JDK-8260332: ParallelGC: Cooperative pretouch for oldgen expansion #2976

Open
wants to merge 4 commits into
base: master
from
Open
Changes from all commits
Commits
File filter
Filter file types
Jump to
Jump to file
Failed to load files.

Always

Just for now

@@ -34,6 +34,7 @@
#include "gc/g1/heapRegionSet.inline.hpp"
#include "gc/g1/heapRegionType.hpp"
#include "gc/shared/tlab_globals.hpp"
#include "gc/shared/pretouchTask.hpp"
#include "utilities/align.hpp"

G1Allocator::G1Allocator(G1CollectedHeap* heap) :
@@ -271,12 +272,35 @@ HeapWord* G1Allocator::old_attempt_allocation(size_t min_word_size,
desired_word_size,
actual_word_size);
if (result == NULL && !old_is_full()) {
MutexLocker x(FreeList_lock, Mutex::_no_safepoint_check_flag);
result = old_gc_alloc_region()->attempt_allocation_locked(min_word_size,
desired_word_size,
actual_word_size);
if (result == NULL) {
set_old_full();
if (UseMultithreadedPretouchForOldGen) {
bool is_locked = false;
PretouchTaskCoordinator *task_coordinator = PretouchTaskCoordinator::get_task_coordinator();
while (true) {
is_locked = FreeList_lock->try_lock();
if (is_locked) {
task_coordinator->release_set_task_notready();
result = old_gc_alloc_region()->attempt_allocation_locked(min_word_size,
desired_word_size,
actual_word_size);
task_coordinator->release_set_task_done();
if (result == NULL) {
set_old_full();
}
FreeList_lock->unlock();
break;
} else {
// Lets help expanding thread to pretouch the memory.
task_coordinator->worker_wait_for_task();
}
}
} else {
MutexLocker x(FreeList_lock, Mutex::_no_safepoint_check_flag);
result = old_gc_alloc_region()->attempt_allocation_locked(min_word_size,
desired_word_size,
actual_word_size);
if (result == NULL) {
set_old_full();
}
}
}
return result;
@@ -31,6 +31,7 @@
#include "gc/parallel/psOldGen.hpp"
#include "gc/shared/cardTableBarrierSet.hpp"
#include "gc/shared/gcLocker.hpp"
#include "gc/shared/pretouchTask.hpp"
#include "gc/shared/spaceDecorator.inline.hpp"
#include "logging/log.hpp"
#include "oops/oop.inline.hpp"
@@ -182,17 +183,44 @@ bool PSOldGen::expand_for_allocate(size_t word_size) {
assert(word_size > 0, "allocating zero words?");
bool result = true;
{
MutexLocker x(ExpandHeap_lock);
// Avoid "expand storms" by rechecking available space after obtaining
// the lock, because another thread may have already made sufficient
// space available. If insufficient space available, that will remain
// true until we expand, since we have the lock. Other threads may take
// the space we need before we can allocate it, regardless of whether we
// expand. That's okay, we'll just try expanding again.
if (object_space()->needs_expand(word_size)) {
result = expand(word_size*HeapWordSize);
bool is_locked = false;
PretouchTaskCoordinator *task_coordinator = PretouchTaskCoordinator::get_task_coordinator();
while(true) {
if (UseMultithreadedPretouchForOldGen) {
is_locked = ExpandHeap_lock->try_lock();
} else {
ExpandHeap_lock->lock();
is_locked = true;
}
// Avoid "expand storms" by rechecking available space after obtaining
// the lock, because another thread may have already made sufficient
// space available. If insufficient space available, that will remain
// true until we expand, since we have the lock. Other threads may take
// the space we need before we can allocate it, regardless of whether we
// expand. That's okay, we'll just try expanding again.
//
// Todo:
// Thread which holds the lock can expand once for all the threads and
// this will be win-win for all the threads.
if (is_locked) {
if (object_space()->needs_expand(word_size)) {
// Marking not ready makes other threads to spin in loop.
task_coordinator->release_set_task_notready();
result = expand(word_size*HeapWordSize);
task_coordinator->release_set_task_done();
}

assert (task_coordinator->is_task_done_acquire(), "Task should be done at this point");
ExpandHeap_lock->unlock();
break;

} else {
// Lets help expanding thread to pretouch the memory.
task_coordinator->worker_wait_for_task();
}
}
}

if (GCExpandToAllocateDelayMillis > 0) {
os::naked_sleep(GCExpandToAllocateDelayMillis);
}
@@ -196,6 +196,9 @@
"bigger than this") \
range(1, max_jint/3) \
\
product(bool, UseMultithreadedPretouchForOldGen, false, \
"Oldgen expands during promotional failure and pages are touched" \
"with single thread. This option makes it multi-threaded" ) \
\
product(bool, AlwaysPreTouch, false, \
"Force all freshly committed pages to be pre-touched") \
@@ -28,6 +28,11 @@
#include "runtime/atomic.hpp"
#include "runtime/globals.hpp"
#include "runtime/os.hpp"
#include "runtime/nonJavaThread.hpp"
#include "utilities/ticks.hpp"

PretouchTaskCoordinator* PretouchTaskCoordinator::_task_coordinator = NULL;
uint PretouchTaskCoordinator::_object_creation = 0;

PretouchTask::PretouchTask(const char* task_name,
char* start_address,
@@ -63,16 +68,23 @@ void PretouchTask::work(uint worker_id) {
}
}

void PretouchTask::pretouch(const char* task_name, char* start_address, char* end_address,
size_t page_size, WorkGang* pretouch_gang) {

void PretouchTask::setup_chunk_size_and_page_size(size_t& chunk_size, size_t& page_size)
{
// Chunk size should be at least (unmodified) page size as using multiple threads
// pretouch on a single page can decrease performance.
size_t chunk_size = MAX2(PretouchTask::chunk_size(), page_size);
chunk_size = MAX2(PretouchTask::chunk_size(), page_size);
#ifdef LINUX
// When using THP we need to always pre-touch using small pages as the OS will
// initially always use small pages.
page_size = UseTransparentHugePages ? (size_t)os::vm_page_size() : page_size;
#endif
}

void PretouchTask::pretouch(const char* task_name, char* start_address, char* end_address,
size_t page_size, WorkGang* pretouch_gang) {
size_t chunk_size = 0;
setup_chunk_size_and_page_size(chunk_size, page_size);

PretouchTask task(task_name, start_address, end_address, page_size, chunk_size);
size_t total_bytes = pointer_delta(end_address, start_address, sizeof(char));
@@ -87,12 +99,112 @@ void PretouchTask::pretouch(const char* task_name, char* start_address, char* en
uint num_workers = (uint)MIN2(num_chunks, (size_t)pretouch_gang->total_workers());
log_debug(gc, heap)("Running %s with %u workers for " SIZE_FORMAT " work units pre-touching " SIZE_FORMAT "B.",
task.name(), num_workers, num_chunks, total_bytes);

pretouch_gang->run_task(&task, num_workers);
} else {
log_debug(gc, heap)("Running %s pre-touching " SIZE_FORMAT "B.",
task.name(), total_bytes);
task.work(0);

Ticks start = Ticks::now();
if (UseMultithreadedPretouchForOldGen) {
PretouchTaskCoordinator::coordinate_and_execute(task_name, start_address, end_address, page_size);
} else {
// Test purpose following lines are commented.
//log_debug(gc, heap)("Running %s pre-touching " SIZE_FORMAT "B.",
// task.name(), total_bytes);
task.work(0);
}
Ticks end = Ticks::now();
log_debug(gc, heap)("Running %s pre-touching " SIZE_FORMAT "B %.4lfms",
task.name(), total_bytes, (double)(end-start).milliseconds());

}
}

// Called to initialize _task_coordinator
void PretouchTaskCoordinator::createObject() {
volatile uint my_id = Atomic::fetch_and_add(&_object_creation, 1u);
if (my_id == 0) {
// First thread creates the object.
_task_coordinator = new PretouchTaskCoordinator("Pretouch during oldgen expansion", NULL, NULL);
} else {
// Other threads will wait until _task_coordinator object is initialized.
PretouchTaskCoordinator *is_initialized = NULL;
do {
SpinPause();
is_initialized = Atomic::load_acquire(&_task_coordinator);
} while(!is_initialized);
}
my_id = Atomic::sub(&_object_creation, 1u);
}

PretouchTaskCoordinator::PretouchTaskCoordinator(const char* task_name, char* start_address,
char* end_address):
_n_threads(0),
_task_status(Done),
_pretouch_task(NULL){
;
}


void PretouchTaskCoordinator::coordinate_and_execute(const char* task_name, char* start_address,
char* end_address, size_t page_size) {

size_t total_bytes = pointer_delta(end_address, start_address, sizeof(char));

if (total_bytes == 0) {
return;
}

PretouchTaskCoordinator *task_coordinator = get_task_coordinator();

size_t chunk_size = 0;
PretouchTask::setup_chunk_size_and_page_size(chunk_size, page_size);

size_t num_chunks = (total_bytes + chunk_size - 1) / chunk_size;

PretouchTask task(task_name, start_address, end_address, page_size, chunk_size);
task_coordinator->release_set_pretouch_task(&task);

// Test purpose following lines are commented.
//log_debug(gc, heap)("Running %s with " SIZE_FORMAT " work units pre-touching " SIZE_FORMAT "B.",
// task->name(), num_chunks, total_bytes);

// Mark Pretouch task ready here to let other threads waiting to expand oldgen will join
// pretouch task.
task_coordinator->release_set_task_ready();

// Execute the task
task_coordinator->task_execute();

// Wait for other threads to finish.
do {
SpinPause();
} while (task_coordinator->wait_for_all_threads_acquire()) ;

}


void PretouchTaskCoordinator::task_execute() {

uint cur_thread_id = Atomic::add(&_n_threads, 1u);

PretouchTask *task = const_cast<PretouchTask *>(pretouch_task_acquire());
task->work(static_cast<AbstractGangWorker*>(Thread::current())->id());

// First thread to exit marks task completed.
if (! is_task_done_acquire()) {
release_set_task_done();
}

cur_thread_id = Atomic::sub(&_n_threads, 1u);
}

void PretouchTaskCoordinator::worker_wait_for_task(){

while (! is_task_done_acquire()) {
if (is_task_ready_acquire()) {
task_execute();
break;
}
SpinPause();
}
}

@@ -44,6 +44,50 @@ class PretouchTask : public AbstractGangTask {
static void pretouch(const char* task_name, char* start_address, char* end_address,
size_t page_size, WorkGang* pretouch_gang);

static void setup_chunk_size_and_page_size(size_t& chunk_size, size_t& page_size);
};

class PretouchTaskCoordinator : public CHeapObj<mtGC>{
volatile uint _n_threads; // Number of threads participating in pretouch.

enum TaskStatus{ NotReady, Ready, Done};
volatile size_t _task_status;

volatile PretouchTask* _pretouch_task;

PretouchTaskCoordinator(const char* task_name, char* start_address, char* end_address) ;

static uint _object_creation;
public:

static PretouchTaskCoordinator* _task_coordinator;

static PretouchTaskCoordinator* get_task_coordinator() {
if(!_task_coordinator) {
createObject();
}
return _task_coordinator;
}

void release_set_task_status(TaskStatus status) { Atomic::release_store(&_task_status, (size_t)status); }
void release_set_task_done() { release_set_task_status(Done); }
void release_set_task_ready() { release_set_task_status(Ready); }
void release_set_task_notready() { release_set_task_status(NotReady); }
bool is_task_ready_acquire() { return Atomic::load_acquire(&_task_status) == Ready; }
bool is_task_done_acquire() { return Atomic::load_acquire(&_task_status) == Done; }
bool wait_for_all_threads_acquire() { return Atomic::load_acquire(&_n_threads); }

void release_set_pretouch_task(PretouchTask *task) { Atomic::release_store(&_pretouch_task, task); }
volatile PretouchTask* pretouch_task_acquire() { return Atomic::load_acquire(&_pretouch_task); }

void task_execute();

static void createObject();
// GC thread coordinates with other GC threads for pretouch task.
static void coordinate_and_execute(const char* task_name, char* start_address, char* end_address, size_t page_size);

// GC threads fails to acquire the lock during oldgen expansion will call this to help pretouch task.
void worker_wait_for_task();
};

#endif // SHARE_GC_SHARED_PRETOUCH_HPP
ProTip! Use n and p to navigate between commits in a pull request.