Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

8274298: JFR Thread Sampler thread must not acquire malloc lock after suspending a thread because of possible deadlock #5977

Closed
wants to merge 7 commits into from
Closed
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
@@ -27,8 +27,10 @@
#include "jfr/recorder/jfrRecorder.hpp"
#include "jfr/periodic/sampling/jfrCallTrace.hpp"
#include "jfr/periodic/sampling/jfrThreadSampler.hpp"
#include "jfr/recorder/checkpoint/types/traceid/jfrTraceIdLoadBarrier.inline.hpp"
#include "jfr/recorder/service/jfrOptionSet.hpp"
#include "jfr/recorder/stacktrace/jfrStackTraceRepository.hpp"
#include "jfr/recorder/storage/jfrBuffer.hpp"
#include "jfr/support/jfrThreadId.hpp"
#include "jfr/support/jfrThreadLocal.hpp"
#include "jfr/utilities/jfrTime.hpp"
@@ -323,10 +325,14 @@ class JfrThreadSampler : public NonJavaThread {
JavaThread* _last_thread_native;
size_t _interval_java;
size_t _interval_native;
const size_t _min_valid_free_size_bytes; // for enqueue buffer monitoring
int _cur_index;
const u4 _max_frames;
volatile bool _disenrolled;

const JfrBuffer* get_enqueue_buffer();
const JfrBuffer* renew_if_full(const JfrBuffer* enqueue_buffer);

JavaThread* next_thread(ThreadsList* t_list, JavaThread* first_sampled, JavaThread* current);
void task_stacktrace(JfrSampleType type, JavaThread** last_thread);
JfrThreadSampler(size_t interval_java, size_t interval_native, u4 max_frames);
@@ -396,6 +402,7 @@ JfrThreadSampler::JfrThreadSampler(size_t interval_java, size_t interval_native,
_last_thread_native(NULL),
_interval_java(interval_java),
_interval_native(interval_native),
_min_valid_free_size_bytes(JfrOptionSet::stackdepth() * sizeof(intptr_t)),
_cur_index(-1),
_max_frames(max_frames),
_disenrolled(true) {
@@ -520,6 +527,15 @@ void JfrThreadSampler::post_run() {
delete this;
}

const JfrBuffer* JfrThreadSampler::get_enqueue_buffer() {
const JfrBuffer* buffer = JfrTraceIdLoadBarrier::get_enqueue_buffer(this);
return buffer != nullptr ? renew_if_full(buffer) : JfrTraceIdLoadBarrier::renew_enqueue_buffer(this);
}

const JfrBuffer* JfrThreadSampler::renew_if_full(const JfrBuffer* enqueue_buffer) {
assert(enqueue_buffer != nullptr, "invariant");
return enqueue_buffer->free_size() < _min_valid_free_size_bytes ? JfrTraceIdLoadBarrier::renew_enqueue_buffer(this) : enqueue_buffer;
}

void JfrThreadSampler::task_stacktrace(JfrSampleType type, JavaThread** last_thread) {
ResourceMark rm;
@@ -530,7 +546,6 @@ void JfrThreadSampler::task_stacktrace(JfrSampleType type, JavaThread** last_thr
const uint sample_limit = JAVA_SAMPLE == type ? MAX_NR_OF_JAVA_SAMPLES : MAX_NR_OF_NATIVE_SAMPLES;
uint num_samples = 0;
JavaThread* start = NULL;

{
elapsedTimer sample_time;
sample_time.start();
@@ -542,6 +557,13 @@ void JfrThreadSampler::task_stacktrace(JfrSampleType type, JavaThread** last_thr
_cur_index = tlh.list()->find_index_of_JavaThread(*last_thread);
JavaThread* current = _cur_index != -1 ? *last_thread : NULL;

// Explicitly monitor the available space of the thread-local buffer used for enqueuing klasses as part of tagging methods.
// We do this because if space becomes sparse, we cannot rely on the implicit allocation of a new buffer as part of the
// regular tag mechanism. If the free list is empty, a malloc could result, and the problem with that is that the thread
// we have suspended could be the holder of the malloc lock. Instead, the buffer is pre-emptively renewed before thread suspension.
const JfrBuffer* enqueue_buffer = get_enqueue_buffer();
assert(enqueue_buffer != nullptr, "invariant");

while (num_samples < sample_limit) {
current = next_thread(tlh.list(), start, current);
if (current == NULL) {
@@ -553,9 +575,11 @@ void JfrThreadSampler::task_stacktrace(JfrSampleType type, JavaThread** last_thr
if (current->is_Compiler_thread()) {
continue;
}
assert(enqueue_buffer->free_size() >= _min_valid_free_size_bytes, "invariant");
if (sample_task.do_sample_thread(current, _frames, _max_frames, type)) {
num_samples++;
}
enqueue_buffer = renew_if_full(enqueue_buffer);
}
*last_thread = current; // remember the thread we last attempted to sample
}
@@ -28,6 +28,7 @@
#include "jfr/utilities/jfrAllocation.hpp"

class JavaThread;
class JfrBuffer;
class JfrStackFrame;
class JfrThreadSampler;
class Thread;
@@ -247,6 +247,14 @@ void JfrTraceIdKlassQueue::enqueue(const Klass* klass) {
_queue->enqueue(klass);
}

JfrBuffer* JfrTraceIdKlassQueue::get_enqueue_buffer(Thread* thread) {
return _queue->thread_local_storage(thread);
}

JfrBuffer* JfrTraceIdKlassQueue::renew_enqueue_buffer(Thread* thread) {
return _queue->renew(thread);
}

void JfrTraceIdKlassQueue::iterate(klass_callback callback, bool previous_epoch) {
assert_locked_or_safepoint(ClassLoaderDataGraph_lock);
KlassFunctor functor(callback);
@@ -28,6 +28,7 @@
#include "jfr/utilities/jfrAllocation.hpp"
#include "jfr/utilities/jfrEpochQueue.hpp"

class JfrBuffer;
class Klass;
class Thread;

@@ -47,7 +48,7 @@ class KlassFunctor {
// It details how to store and process an enqueued Klass representation. See utilities/jfrEpochQueue.hpp.
//
template <typename Buffer>
class JfrEpochQueueKlassPolicy {
class JfrEpochQueueKlassPolicy : public JfrCHeapObj {
public:
typedef Buffer* BufferPtr;
typedef Klass Type;
@@ -64,8 +65,11 @@ class JfrEpochQueueKlassPolicy {
};

class JfrTraceIdKlassQueue : public JfrCHeapObj {
friend class JfrTraceIdLoadBarrier;
private:
JfrEpochQueue<JfrEpochQueueKlassPolicy>* _queue;
JfrBuffer* get_enqueue_buffer(Thread* thread);
JfrBuffer* renew_enqueue_buffer(Thread* thread);
public:
JfrTraceIdKlassQueue();
~JfrTraceIdKlassQueue();
@@ -68,3 +68,11 @@ void JfrTraceIdLoadBarrier::do_klasses(klass_callback callback, bool previous_ep
assert_locked_or_safepoint(ClassLoaderDataGraph_lock);
klass_queue().iterate(callback, previous_epoch);
}

JfrBuffer* JfrTraceIdLoadBarrier::get_enqueue_buffer(Thread* thread) {
return klass_queue().get_enqueue_buffer(thread);
}

JfrBuffer* JfrTraceIdLoadBarrier::renew_enqueue_buffer(Thread* thread) {
return klass_queue().renew_enqueue_buffer(thread);
}
@@ -29,6 +29,7 @@
#include "memory/allocation.hpp"

class ClassLoaderData;
class JfrBuffer;
class Klass;
class Method;
class ModuleEntry;
@@ -69,12 +70,16 @@ class PackageEntry;
class JfrTraceIdLoadBarrier : AllStatic {
friend class Jfr;
friend class JfrCheckpointManager;
friend class JfrStackTrace;
friend class JfrThreadSampler;
private:
static bool initialize();
static void clear();
static void destroy();
static void enqueue(const Klass* klass);
static void load_barrier(const Klass* klass);
static JfrBuffer* get_enqueue_buffer(Thread* thread);
static JfrBuffer* renew_enqueue_buffer(Thread* thread);
public:
static traceid load(const ClassLoaderData* cld);
static traceid load(const Klass* klass);
@@ -27,6 +27,7 @@
#include "jfr/recorder/checkpoint/types/traceid/jfrTraceId.inline.hpp"
#include "jfr/recorder/repository/jfrChunkWriter.hpp"
#include "jfr/recorder/stacktrace/jfrStackTrace.hpp"
#include "jfr/recorder/storage/jfrBuffer.hpp"
#include "jfr/support/jfrMethodLookup.hpp"
#include "memory/allocation.inline.hpp"
#include "oops/instanceKlass.inline.hpp"
@@ -175,19 +176,30 @@ void vframeStreamSamples::samples_next() {
} while (!fill_from_frame());
}

static const size_t min_valid_free_size_bytes = 16;

static inline bool is_full(const JfrBuffer* enqueue_buffer) {
return enqueue_buffer->free_size() < min_valid_free_size_bytes;
}

bool JfrStackTrace::record_thread(JavaThread& thread, frame& frame) {
// Explicitly monitor the available space of the thread-local buffer used for enqueuing klasses as part of tagging methods.
// We do this because if space becomes sparse, we cannot rely on the implicit allocation of a new buffer as part of the
// regular tag mechanism. If the free list is empty, a malloc could result, and the problem with that is that the thread
// we have suspended could be the holder of the malloc lock. If there is no more available space, the attempt is aborted.
const JfrBuffer* const enqueue_buffer = JfrTraceIdLoadBarrier::get_enqueue_buffer(Thread::current());
assert(enqueue_buffer != nullptr, "invariant");
vframeStreamSamples st(&thread, frame, false);
u4 count = 0;
_reached_root = true;

_hash = 1;
while (!st.at_end()) {
if (count >= _max_frames) {
_reached_root = false;
break;
}
const Method* method = st.method();
if (!Method::is_valid_method(method)) {
if (!Method::is_valid_method(method) || is_full(enqueue_buffer)) {
// we throw away everything we've gathered in this sample since
// none of it is safe
return false;
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -27,6 +27,8 @@

#include "jfr/recorder/storage/jfrEpochStorage.hpp"

class Thread;

/*
* An ElmentPolicy template template argument provides the implementation for how elements
* associated with the queue is encoded and managed by exposing the following members:
@@ -43,7 +45,7 @@
* size_t operator()(const u1* next_element, Callback& callback, bool previous_epoch = false);
*/
template <template <typename> class ElementPolicy>
class JfrEpochQueue : public JfrCHeapObj {
class JfrEpochQueue : public ElementPolicy<typename JfrEpochStorage::Buffer> {
public:
typedef JfrEpochStorage::Buffer Buffer;
typedef JfrEpochStorage::BufferPtr BufferPtr;
@@ -53,22 +55,20 @@ class JfrEpochQueue : public JfrCHeapObj {
~JfrEpochQueue();
bool initialize(size_t min_buffer_size, size_t free_list_cache_count_limit, size_t cache_prealloc_count);
void enqueue(TypePtr t);
BufferPtr renew(Thread* thread);
template <typename Callback>
void iterate(Callback& callback, bool previous_epoch = false);
private:
typedef ElementPolicy<Buffer> Policy;
Policy _policy;
JfrEpochStorage* _storage;
BufferPtr storage_for_element(TypePtr t, size_t element_size);

template <typename Callback>
class ElementDispatch {
private:
Callback& _callback;
Policy& _policy;
JfrEpochQueue& _queue;
public:
typedef Buffer Type;
ElementDispatch(Callback& callback, Policy& policy);
ElementDispatch(Callback& callback, JfrEpochQueue& queue);
size_t operator()(const u1* element, bool previous_epoch);
};
};
@@ -26,13 +26,12 @@
#define SHARE_JFR_UTILITIES_JFREPOCHQUEUE_INLINE_HPP

#include "jfr/utilities/jfrEpochQueue.hpp"

#include "jfr/recorder/storage/jfrEpochStorage.inline.hpp"
#include "jfr/recorder/storage/jfrStorageUtils.inline.hpp"
#include "runtime/thread.inline.hpp"

template <template <typename> class ElementPolicy>
JfrEpochQueue<ElementPolicy>::JfrEpochQueue() : _policy(), _storage(NULL) {}
JfrEpochQueue<ElementPolicy>::JfrEpochQueue() : _storage(NULL) {}

template <template <typename> class ElementPolicy>
JfrEpochQueue<ElementPolicy>::~JfrEpochQueue() {
@@ -46,53 +45,67 @@ bool JfrEpochQueue<ElementPolicy>::initialize(size_t min_buffer_size, size_t fre
return _storage != NULL && _storage->initialize(min_buffer_size, free_list_cache_count_limit, cache_prealloc_count);
}

template <template <typename> class ElementPolicy>
inline typename JfrEpochQueue<ElementPolicy>::BufferPtr
JfrEpochQueue<ElementPolicy>::renew(Thread* thread) {
assert(thread != nullptr, "invariant");
BufferPtr buffer = this->thread_local_storage(thread);
if (buffer != nullptr) {
_storage->release(buffer);
}
buffer = _storage->acquire(0, thread);
this->set_thread_local_storage(buffer, thread);
assert(this->thread_local_storage(thread) == buffer, "invariant");
return buffer;
}

template <template <typename> class ElementPolicy>
inline typename JfrEpochQueue<ElementPolicy>::BufferPtr
JfrEpochQueue<ElementPolicy>::storage_for_element(JfrEpochQueue<ElementPolicy>::TypePtr t, size_t element_size) {
assert(_policy.element_size(t) == element_size, "invariant");
assert(this->element_size(t) == element_size, "invariant");
Thread* const thread = Thread::current();
BufferPtr buffer = _policy.thread_local_storage(thread);
if (buffer == NULL) {
BufferPtr buffer = this->thread_local_storage(thread);
if (buffer == nullptr) {
buffer = _storage->acquire(element_size, thread);
_policy.set_thread_local_storage(buffer, thread);
this->set_thread_local_storage(buffer, thread);
} else if (buffer->free_size() < element_size) {
_storage->release(buffer);
buffer = _storage->acquire(element_size, thread);
_policy.set_thread_local_storage(buffer, thread);
this->set_thread_local_storage(buffer, thread);
}
assert(buffer->free_size() >= element_size, "invariant");
assert(_policy.thread_local_storage(thread) == buffer, "invariant");
assert(this->thread_local_storage(thread) == buffer, "invariant");
return buffer;
}

template <template <typename> class ElementPolicy>
void JfrEpochQueue<ElementPolicy>::enqueue(JfrEpochQueue<ElementPolicy>::TypePtr t) {
assert(t != NULL, "invariant");
size_t element_size = _policy.element_size(t);
assert(t != nullptr, "invariant");
size_t element_size = this->element_size(t);
BufferPtr buffer = storage_for_element(t, element_size);
assert(buffer != NULL, "invariant");
_policy.store_element(t, buffer);
assert(buffer != nullptr, "invariant");
this->store_element(t, buffer);
buffer->set_pos(element_size);
}

template <template <typename> class ElementPolicy>
template <typename Callback>
JfrEpochQueue<ElementPolicy>::ElementDispatch<Callback>::ElementDispatch(Callback& callback, JfrEpochQueue<ElementPolicy>::Policy& policy) :
_callback(callback),_policy(policy) {}
JfrEpochQueue<ElementPolicy>::ElementDispatch<Callback>::ElementDispatch(Callback& callback, JfrEpochQueue<ElementPolicy>& queue) :
_callback(callback), _queue(queue) {}

template <template <typename> class ElementPolicy>
template <typename Callback>
size_t JfrEpochQueue<ElementPolicy>::ElementDispatch<Callback>::operator()(const u1* element, bool previous_epoch) {
assert(element != NULL, "invariant");
return _policy(element, _callback, previous_epoch);
assert(element != nullptr, "invariant");
return _queue(element, _callback, previous_epoch);
}

template <template <typename> class ElementPolicy>
template <typename Callback>
void JfrEpochQueue<ElementPolicy>::iterate(Callback& callback, bool previous_epoch) {
typedef ElementDispatch<Callback> ElementDispatcher;
typedef EpochDispatchOp<ElementDispatcher> QueueDispatcher;
ElementDispatcher element_dispatcher(callback, _policy);
ElementDispatcher element_dispatcher(callback, *this);
QueueDispatcher dispatch(element_dispatcher, previous_epoch);
_storage->iterate(dispatch, previous_epoch);
DEBUG_ONLY(_storage->verify_previous_empty();)