Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 7 additions & 26 deletions src/hotspot/share/runtime/thread.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,6 @@
#include "runtime/objectMonitor.hpp"
#include "runtime/orderAccess.hpp"
#include "runtime/osThread.hpp"
#include "runtime/prefetch.inline.hpp"
#include "runtime/safepoint.hpp"
#include "runtime/safepointMechanism.inline.hpp"
#include "runtime/safepointVerifiers.hpp"
Expand Down Expand Up @@ -2478,28 +2477,6 @@ size_t JavaThread::_stack_size_at_create = 0;
bool Threads::_vm_complete = false;
#endif

static inline void *prefetch_and_load_ptr(void **addr, intx prefetch_interval) {
Prefetch::read((void*)addr, prefetch_interval);
return *addr;
}

// Possibly the ugliest for loop the world has seen. C++ does not allow
// multiple types in the declaration section of the for loop. In this case
// we are only dealing with pointers and hence can cast them. It looks ugly
// but macros are ugly and therefore it's fine to make things absurdly ugly.
#define DO_JAVA_THREADS(LIST, X) \
for (JavaThread *MACRO_scan_interval = (JavaThread*)(uintptr_t)PrefetchScanIntervalInBytes, \
*MACRO_list = (JavaThread*)(LIST), \
**MACRO_end = ((JavaThread**)((ThreadsList*)MACRO_list)->threads()) + ((ThreadsList*)MACRO_list)->length(), \
**MACRO_current_p = (JavaThread**)((ThreadsList*)MACRO_list)->threads(), \
*X = (JavaThread*)prefetch_and_load_ptr((void**)MACRO_current_p, (intx)MACRO_scan_interval); \
MACRO_current_p != MACRO_end; \
MACRO_current_p++, \
X = (JavaThread*)prefetch_and_load_ptr((void**)MACRO_current_p, (intx)MACRO_scan_interval))

// All JavaThreads
#define ALL_JAVA_THREADS(X) DO_JAVA_THREADS(ThreadsSMRSupport::get_java_thread_list(), X)

// All NonJavaThreads (i.e., every non-JavaThread in the system).
void Threads::non_java_threads_do(ThreadClosure* tc) {
NoSafepointVerifier nsv;
Expand All @@ -2508,6 +2485,10 @@ void Threads::non_java_threads_do(ThreadClosure* tc) {
}
}

// All JavaThreads
#define ALL_JAVA_THREADS(X) \
for (JavaThread* X : *ThreadsSMRSupport::get_java_thread_list())

// All JavaThreads
void Threads::java_threads_do(ThreadClosure* tc) {
assert_locked_or_safepoint(Threads_lock);
Expand Down Expand Up @@ -3641,7 +3622,7 @@ GrowableArray<JavaThread*>* Threads::get_pending_threads(ThreadsList * t_list,
GrowableArray<JavaThread*>* result = new GrowableArray<JavaThread*>(count);

int i = 0;
DO_JAVA_THREADS(t_list, p) {
for (JavaThread* p : *t_list) {
if (!p->can_call_java()) continue;

// The first stage of async deflation does not affect any field
Expand All @@ -3662,7 +3643,7 @@ JavaThread *Threads::owning_thread_from_monitor_owner(ThreadsList * t_list,
// NULL owner means not locked so we can skip the search
if (owner == NULL) return NULL;

DO_JAVA_THREADS(t_list, p) {
for (JavaThread* p : *t_list) {
// first, see if owner is the address of a Java thread
if (owner == (address)p) return p;
}
Expand All @@ -3677,7 +3658,7 @@ JavaThread *Threads::owning_thread_from_monitor_owner(ThreadsList * t_list,
// Lock Word in the owning Java thread's stack.
//
JavaThread* the_owner = NULL;
DO_JAVA_THREADS(t_list, q) {
for (JavaThread* q : *t_list) {
if (q->is_lock_owned(owner)) {
the_owner = q;
break;
Expand Down
24 changes: 24 additions & 0 deletions src/hotspot/share/runtime/threadSMR.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
#include "services/threadIdTable.hpp"
#include "services/threadService.hpp"
#include "utilities/copy.hpp"
#include "utilities/debug.hpp"
#include "utilities/globalDefinitions.hpp"
#include "utilities/ostream.hpp"
#include "utilities/powerOfTwo.hpp"
Expand Down Expand Up @@ -629,6 +630,29 @@ static JavaThread* const* make_threads_list_data(int entries) {
return data;
}

#ifdef ASSERT

ThreadsList::Iterator::Iterator() : _thread_ptr(nullptr), _list(nullptr) {}

uint ThreadsList::Iterator::check_index(ThreadsList* list, uint i) {
assert(i <= list->length(), "invalid index %u", i);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't that just be a '<' check?

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, less-eq is correct. The index being checked here can be 1-past-the-last-element, for an end-iterator. Including 0 for an empty sequence.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay - thanks for explaining.

return i;
}

void ThreadsList::Iterator::assert_not_singular() const {
assert(_list != nullptr, "singular iterator");
}

void ThreadsList::Iterator::assert_dereferenceable() const {
assert(_thread_ptr < (_list->threads() + _list->length()), "not dereferenceable");
}

void ThreadsList::Iterator::assert_same_list(Iterator i) const {
assert(_list == i._list, "iterators from different lists");
}

#endif // ASSERT

ThreadsList::ThreadsList(int entries) :
_magic(THREADS_LIST_MAGIC),
_length(entries),
Expand Down
28 changes: 28 additions & 0 deletions src/hotspot/share/runtime/threadSMR.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#include "runtime/mutexLocker.hpp"
#include "runtime/thread.hpp"
#include "runtime/timer.hpp"
#include "utilities/debug.hpp"

class JavaThread;
class Monitor;
Expand Down Expand Up @@ -192,6 +193,10 @@ class ThreadsList : public CHeapObj<mtThread> {
explicit ThreadsList(int entries);
~ThreadsList();

class Iterator;
inline Iterator begin();
inline Iterator end();

template <class T>
void threads_do(T *cl) const;

Expand All @@ -211,6 +216,29 @@ class ThreadsList : public CHeapObj<mtThread> {
#endif
};

class ThreadsList::Iterator {
JavaThread* const* _thread_ptr;
DEBUG_ONLY(ThreadsList* _list;)

static uint check_index(ThreadsList* list, uint i) NOT_DEBUG({ return i; });
void assert_not_singular() const NOT_DEBUG_RETURN;
void assert_dereferenceable() const NOT_DEBUG_RETURN;
void assert_same_list(Iterator i) const NOT_DEBUG_RETURN;

public:
Iterator() NOT_DEBUG(= default); // Singular iterator.
inline Iterator(ThreadsList* list, uint i);

inline bool operator==(Iterator other) const;
inline bool operator!=(Iterator other) const;

inline JavaThread* operator*() const;
inline JavaThread* operator->() const;

inline Iterator& operator++();
inline Iterator operator++(int);
};

// An abstract safe ptr to a ThreadsList comprising either a stable hazard ptr
// for leaves, or a retained reference count for nested uses. The user of this
// API does not need to know which mechanism is providing the safety.
Expand Down
51 changes: 51 additions & 0 deletions src/hotspot/share/runtime/threadSMR.inline.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,57 @@
#include "memory/iterator.hpp"
#include "runtime/prefetch.inline.hpp"
#include "runtime/thread.inline.hpp"
#include "utilities/debug.hpp"
#include "utilities/macros.hpp"

ThreadsList::Iterator::Iterator(ThreadsList* list, uint i) :
_thread_ptr(list->threads() + check_index(list, i))
DEBUG_ONLY(COMMA _list(list))
{}

bool ThreadsList::Iterator::operator==(Iterator i) const {
assert_not_singular();
assert_same_list(i);
return _thread_ptr == i._thread_ptr;
}

bool ThreadsList::Iterator::operator!=(Iterator i) const {
return !operator==(i);
}

JavaThread* ThreadsList::Iterator::operator*() const {
assert_not_singular();
assert_dereferenceable();
Prefetch::read(const_cast<JavaThread**>(_thread_ptr), PrefetchScanIntervalInBytes);

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This prefetch is included because the old code had it. But I'm dubious about it. This is just linear iteration through an array, which seems like the canonical best case for leaving it to the hardware, rather than doing cachelinesize/ptrsize software prefetches per cache line. I'm hoping the original authors can comment.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The original author that added the prefetch is @fisk. He's reviewed an earlier
version of this fix and I'm hoping he's around to review this version.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree that it should be okay without prefetch. I added it when I was running on a 4096 strand SPARC machine running over 8000 threads, and walking the list would approach 1 ms. While intuitively linear scan should be good enough, I did actually measure slight improvements with explicit prefetching, and the slight improvements did seem worth it at the time as the amount of time spent in there was non-trivial.
Now we don't support that kind of hardware any longer, so I don't think this trick would necessarily spark as much joy any longer.
I don't mind if we do or do not perform the prefetching. I'm okay either way.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm going to leave resolution of the prefetch question to a follow-up bug.
I think that discussion deserves a separate issue.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK.

return *_thread_ptr;
}

JavaThread* ThreadsList::Iterator::operator->() const {
return operator*();
}

ThreadsList::Iterator& ThreadsList::Iterator::operator++() {
assert_not_singular();
assert_dereferenceable();
++_thread_ptr;
return *this;
}

ThreadsList::Iterator ThreadsList::Iterator::operator++(int) {
assert_not_singular();
assert_dereferenceable();
Iterator result = *this;
++_thread_ptr;
return result;
}

ThreadsList::Iterator ThreadsList::begin() {
return Iterator(this, 0);
}

ThreadsList::Iterator ThreadsList::end() {
return Iterator(this, length());
}

// Devirtualize known thread closure types.
template <class T>
Expand Down