Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

8264123: add ThreadsList.is_valid() support #3255

Closed
wants to merge 1 commit into from
Closed
Changes from all commits
Commits
File filter
Filter file types
Jump to
Jump to file
Failed to load files.

Always

Just for now

@@ -173,11 +173,12 @@ class Thread: public ThreadShadow {
friend class ScanHazardPtrPrintMatchingThreadsClosure; // for get_threads_hazard_ptr(), is_hazard_ptr_tagged() access
friend class ThreadsSMRSupport; // for _nested_threads_hazard_ptr_cnt, _threads_hazard_ptr, _threads_list_ptr access
friend class ThreadsListHandleTest; // for _nested_threads_hazard_ptr_cnt, _threads_hazard_ptr, _threads_list_ptr access
friend class ValidateHazardPtrsClosure; // for get_threads_hazard_ptr(), untag_hazard_ptr() access

ThreadsList* volatile _threads_hazard_ptr;
SafeThreadsListPtr* _threads_list_ptr;
ThreadsList* cmpxchg_threads_hazard_ptr(ThreadsList* exchange_value, ThreadsList* compare_value);
ThreadsList* get_threads_hazard_ptr();
ThreadsList* get_threads_hazard_ptr() const;
void set_threads_hazard_ptr(ThreadsList* new_list);
static bool is_hazard_ptr_tagged(ThreadsList* list) {
return (intptr_t(list) & intptr_t(1)) == intptr_t(1);
@@ -87,7 +87,7 @@ inline ThreadsList* Thread::cmpxchg_threads_hazard_ptr(ThreadsList* exchange_val
return (ThreadsList*)Atomic::cmpxchg(&_threads_hazard_ptr, compare_value, exchange_value);
}

inline ThreadsList* Thread::get_threads_hazard_ptr() {
inline ThreadsList* Thread::get_threads_hazard_ptr() const {
return (ThreadsList*)Atomic::load_acquire(&_threads_hazard_ptr);
}

@@ -282,6 +282,9 @@ class ScanHazardPtrGatherProtectedThreadsClosure : public ThreadClosure {
if (thread->cmpxchg_threads_hazard_ptr(NULL, current_list) == current_list) return;
}

guarantee(ThreadsList::is_valid(current_list), "current_list="

This comment has been minimized.

@dholmes-ora

dholmes-ora Mar 30, 2021
Member

Why a guarantee and not an assert? What is the cost of doing this check?

This comment has been minimized.

@dcubed-ojdk

dcubed-ojdk Mar 30, 2021
Author Member

This is a guarantee() instead of an assert() because this failure mode
is rarely caught with 'release' bits and very, very rarely caught with
'fastdebug' or 'slowdebug' bits. It's a very tight race.

My plan is to keep this as a guarantee() for April and switch it to an
assert in mid to late May.

I don't have data on what this correctness check costs, but it's just
a value check on the new _magic field value. Very much like other
"magic" field value checks that we have all over the VM...

This comment has been minimized.

@dholmes-ora

dholmes-ora Mar 30, 2021
Member

I should have edited this comment once I saw what the actual check was.

Keeping this as a guarantee initially then switching to assert seems a good strategy.

INTPTR_FORMAT " is not valid!", p2i(current_list));

// The current JavaThread has a hazard ptr (ThreadsList reference)
// which might be _java_thread_list or it might be an older
// ThreadsList that has been removed but not freed. In either case,
@@ -294,6 +297,10 @@ class ScanHazardPtrGatherProtectedThreadsClosure : public ThreadClosure {

// Closure to gather hazard ptrs (ThreadsList references) into a hash table.
//
// Since this closure gathers hazard ptrs that may be tagged, this hash
// table of hazard ptrs should only be used for value comparison and not
// traversal of the ThreadsList.
//
class ScanHazardPtrGatherThreadsListClosure : public ThreadClosure {
private:
ThreadScanHashtable *_table;
@@ -304,18 +311,23 @@ class ScanHazardPtrGatherThreadsListClosure : public ThreadClosure {
assert_locked_or_safepoint(Threads_lock);

if (thread == NULL) return;
ThreadsList *threads = thread->get_threads_hazard_ptr();
if (threads == NULL) {
return;
ThreadsList *hazard_ptr = thread->get_threads_hazard_ptr();
if (hazard_ptr == NULL) return;
if (!Thread::is_hazard_ptr_tagged(hazard_ptr)) {
// We only validate hazard_ptrs that are not tagged since a tagged
// hazard ptr can be deleted at any time.
guarantee(ThreadsList::is_valid(hazard_ptr), "hazard_ptr=" INTPTR_FORMAT

This comment has been minimized.

@dholmes-ora

dholmes-ora Mar 30, 2021
Member

Same query about guarantee versus assert

This comment has been minimized.

@dcubed-ojdk

dcubed-ojdk Mar 30, 2021
Author Member

Same answer and to reiterate the important part:

My plan is to keep this as a guarantee() for April and switch it to an
assert in mid to late May.

" for thread=" INTPTR_FORMAT " is not valid!", p2i(hazard_ptr),
p2i(thread));
}
// In this closure we always ignore the tag that might mark this
// hazard ptr as not yet verified. If we happen to catch an
// unverified hazard ptr that is subsequently discarded (not
// published), then the only side effect is that we might keep a
// to-be-deleted ThreadsList alive a little longer.
threads = Thread::untag_hazard_ptr(threads);
if (!_table->has_entry((void*)threads)) {
_table->add_entry((void*)threads);
hazard_ptr = Thread::untag_hazard_ptr(hazard_ptr);
if (!_table->has_entry((void*)hazard_ptr)) {
_table->add_entry((void*)hazard_ptr);
}
}
};
@@ -355,6 +367,27 @@ class ScanHazardPtrPrintMatchingThreadsClosure : public ThreadClosure {
}
};

// Closure to validate hazard ptrs.
//
class ValidateHazardPtrsClosure : public ThreadClosure {
public:
ValidateHazardPtrsClosure() {};

virtual void do_thread(Thread* thread) {
assert_locked_or_safepoint(Threads_lock);

if (thread == NULL) return;
ThreadsList *hazard_ptr = thread->get_threads_hazard_ptr();
if (hazard_ptr == NULL) return;
// If the hazard ptr is unverified, then ignore it since it could
// be deleted at any time now.
Comment on lines +382 to +383

This comment has been minimized.

@dholmes-ora

dholmes-ora Mar 30, 2021
Member

Why the difference in comment compared to line 317/318 when the subsequent checks are identical?

This comment has been minimized.

@dcubed-ojdk

dcubed-ojdk Mar 30, 2021
Author Member

The ValidateHazardPtrsClosure is called from places that will use
the collected hazard ptr for traversal. Deletion while being traversed
would lead to the crashes that we want to avoid.

The ScanHazardPtrGatherThreadsListClosure is used to gather
hazard ptrs where the only the address value of the hazard ptr is
used and no traversal takes place. This is why I added the comment
on L300-302 above.

if (Thread::is_hazard_ptr_tagged(hazard_ptr)) return;
guarantee(ThreadsList::is_valid(hazard_ptr), "hazard_ptr=" INTPTR_FORMAT
" for thread=" INTPTR_FORMAT " is not valid!", p2i(hazard_ptr),
p2i(thread));
}
};

// Closure to determine if the specified JavaThread is found by
// threads_do().
//
@@ -403,7 +436,7 @@ void SafeThreadsListPtr::acquire_stable_list_fast_path() {
ThreadsList* threads;

// Stable recording of a hazard ptr for SMR. This code does not use
// locks so its use of the _smr_java_thread_list & _threads_hazard_ptr
// locks so its use of the _java_thread_list & _threads_hazard_ptr
// fields is racy relative to code that uses those fields with locks.
// OrderAccess and Atomic functions are used to deal with those races.
//
@@ -419,7 +452,7 @@ void SafeThreadsListPtr::acquire_stable_list_fast_path() {
ThreadsList* unverified_threads = Thread::tag_hazard_ptr(threads);
_thread->set_threads_hazard_ptr(unverified_threads);

// If _smr_java_thread_list has changed, we have lost a race with
// If _java_thread_list has changed, we have lost a race with
// Threads::add() or Threads::remove() and have to try again.
if (ThreadsSMRSupport::get_java_thread_list() != threads) {
continue;
@@ -530,6 +563,10 @@ void SafeThreadsListPtr::release_stable_list() {
// An exiting thread might be waiting in smr_delete(); we need to
// check with delete_lock to be sure.
ThreadsSMRSupport::release_stable_list_wake_up(_has_ref_count);
guarantee(_previous == NULL || ThreadsList::is_valid(_previous->_list),
"_previous->_list=" INTPTR_FORMAT
" is not valid after calling release_stable_list_wake_up!",
p2i(_previous->_list));
}
}

@@ -601,6 +638,7 @@ static JavaThread* const* make_threads_list_data(int entries) {
}

ThreadsList::ThreadsList(int entries) :
_magic(THREADS_LIST_MAGIC),
_length(entries),
_next_list(NULL),
_threads(make_threads_list_data(entries)),
@@ -611,6 +649,7 @@ ThreadsList::~ThreadsList() {
if (_threads != empty_threads_list_data) {
FREE_C_HEAP_ARRAY(JavaThread*, _threads);
}
_magic = 0xDEADBEEF;
}

// Add a JavaThread to a ThreadsList. The returned ThreadsList is a
@@ -882,6 +921,9 @@ void ThreadsSMRSupport::free_list(ThreadsList* threads) {
log_debug(thread, smr)("tid=" UINTX_FORMAT ": ThreadsSMRSupport::free_list: threads=" INTPTR_FORMAT " is not freed.", os::current_thread_id(), p2i(threads));
}

ValidateHazardPtrsClosure validate_cl;
threads_do(&validate_cl);

delete scan_table;
}

@@ -1096,8 +1138,9 @@ void ThreadsSMRSupport::log_statistics() {

// Print SMR info for a thread to a given output stream.
void ThreadsSMRSupport::print_info_on(const Thread* thread, outputStream* st) {
if (thread->_threads_hazard_ptr != NULL) {
st->print(" _threads_hazard_ptr=" INTPTR_FORMAT, p2i(thread->_threads_hazard_ptr));
ThreadsList* hazard_ptr = thread->get_threads_hazard_ptr();
if (hazard_ptr != NULL) {
st->print(" _threads_hazard_ptr=" INTPTR_FORMAT, p2i(hazard_ptr));
}
if (EnableThreadSMRStatistics && thread->_threads_list_ptr != NULL) {
// The count is only interesting if we have a _threads_list_ptr.
@@ -162,11 +162,13 @@ class ThreadsSMRSupport : AllStatic {
// A fast list of JavaThreads.
//
class ThreadsList : public CHeapObj<mtThread> {
enum { THREADS_LIST_MAGIC = (int)(('T' << 24) | ('L' << 16) | ('S' << 8) | 'T') };
friend class VMStructs;
friend class SafeThreadsListPtr; // for {dec,inc}_nested_handle_cnt() access
friend class ThreadsSMRSupport; // for _nested_handle_cnt, {add,remove}_thread(), {,set_}next_list() access
friend class ThreadsListHandleTest; // for _nested_handle_cnt access

uint _magic;
const uint _length;
ThreadsList* _next_list;
JavaThread *const *const _threads;
@@ -203,6 +205,8 @@ class ThreadsList : public CHeapObj<mtThread> {
int find_index_of_JavaThread(JavaThread* target);
JavaThread* find_JavaThread_from_java_tid(jlong java_tid) const;
bool includes(const JavaThread * const p) const;

static bool is_valid(ThreadsList* list) { return list->_magic == THREADS_LIST_MAGIC; }

This comment has been minimized.

@dholmes-ora

dholmes-ora Mar 30, 2021
Member

This seems like a necessary, but not sufficient, check for validity. Unless we zap memory when these things are freed I don't see how this ensures validity.

This comment has been minimized.

@dcubed-ojdk

dcubed-ojdk Mar 30, 2021
Author Member

You missed L652 in the ThreadsList::~ThreadsList() above:

_magic = 0xDEADBEEF;

When the destructor runs, the magic value is stomped. I've captured
failures where the is_valid() check observes 0xDEADBEEF and also
failures where the ThreadsList has been recycled into something else
and the _magic value is some other value that doesn't match
THREADS_LIST_MAGIC.

This comment has been minimized.

@dholmes-ora

dholmes-ora Mar 30, 2021
Member

Sorry yes I did miss it - I misread the diff due to hidden lines.

};

// An abstract safe ptr to a ThreadsList comprising either a stable hazard ptr
ProTip! Use n and p to navigate between commits in a pull request.