diff --git a/src/hotspot/share/jfr/periodic/sampling/jfrCPUTimeThreadSampler.cpp b/src/hotspot/share/jfr/periodic/sampling/jfrCPUTimeThreadSampler.cpp index fd7226eab2d..a59503f50d8 100644 --- a/src/hotspot/share/jfr/periodic/sampling/jfrCPUTimeThreadSampler.cpp +++ b/src/hotspot/share/jfr/periodic/sampling/jfrCPUTimeThreadSampler.cpp @@ -25,7 +25,6 @@ #include "jfr/periodic/sampling/jfrCPUTimeThreadSampler.hpp" #include "logging/log.hpp" - #if defined(LINUX) #include "jfr/periodic/sampling/jfrThreadSampling.hpp" #include "jfr/support/jfrThreadLocal.hpp" @@ -68,7 +67,7 @@ static JavaThread* get_java_thread_if_valid() { } JfrCPUTimeTraceQueue::JfrCPUTimeTraceQueue(u4 capacity) : - _data(nullptr), _capacity(capacity), _head(0), _lost_samples(0) { + _data(nullptr), _capacity(capacity), _head(0), _lost_samples(0), _lost_samples_due_to_queue_full(0) { if (capacity != 0) { _data = JfrCHeapObj::new_array(capacity); } @@ -111,10 +110,13 @@ void JfrCPUTimeTraceQueue::set_size(u4 size) { } u4 JfrCPUTimeTraceQueue::capacity() const { - return _capacity; + return Atomic::load_acquire(&_capacity); } void JfrCPUTimeTraceQueue::set_capacity(u4 capacity) { + if (capacity == Atomic::load(&_capacity)) { + return; + } _head = 0; if (_data != nullptr) { assert(_capacity != 0, "invariant"); @@ -125,7 +127,7 @@ void JfrCPUTimeTraceQueue::set_capacity(u4 capacity) { } else { _data = nullptr; } - _capacity = capacity; + Atomic::release_store(&_capacity, capacity); } bool JfrCPUTimeTraceQueue::is_empty() const { @@ -141,28 +143,51 @@ void JfrCPUTimeTraceQueue::increment_lost_samples() { Atomic::inc(&_lost_samples); } +void JfrCPUTimeTraceQueue::increment_lost_samples_due_to_queue_full() { + Atomic::inc(&_lost_samples_due_to_queue_full); +} + u4 JfrCPUTimeTraceQueue::get_and_reset_lost_samples() { return Atomic::xchg(&_lost_samples, (u4)0); } -void JfrCPUTimeTraceQueue::resize(u4 capacity) { - if (capacity != _capacity) { - set_capacity(capacity); - } +u4 JfrCPUTimeTraceQueue::get_and_reset_lost_samples_due_to_queue_full() { + return Atomic::xchg(&_lost_samples_due_to_queue_full, (u4)0); } -void JfrCPUTimeTraceQueue::resize_for_period(u4 period_millis) { - u4 capacity = CPU_TIME_QUEUE_CAPACITY; - if (period_millis > 0 && period_millis < 10) { - capacity = (u4) ((double) capacity * 10 / period_millis); - } - resize(capacity); +void JfrCPUTimeTraceQueue::init() { + set_capacity(JfrCPUTimeTraceQueue::CPU_TIME_QUEUE_INITIAL_CAPACITY); } void JfrCPUTimeTraceQueue::clear() { Atomic::release_store(&_head, (u4)0); } +void JfrCPUTimeTraceQueue::resize_if_needed() { + u4 lost_samples_due_to_queue_full = get_and_reset_lost_samples_due_to_queue_full(); + if (lost_samples_due_to_queue_full == 0) { + return; + } + u4 capacity = Atomic::load(&_capacity); + if (capacity < CPU_TIME_QUEUE_MAX_CAPACITY) { + float ratio = (float)lost_samples_due_to_queue_full / (float)capacity; + int factor = 1; + if (ratio > 8) { // idea is to quickly scale the queue in the worst case + factor = ratio; + } else if (ratio > 2) { + factor = 8; + } else if (ratio > 0.5) { + factor = 4; + } else if (ratio > 0.01) { + factor = 2; + } + if (factor > 1) { + u4 new_capacity = MIN2(CPU_TIME_QUEUE_MAX_CAPACITY, capacity * factor); + set_capacity(new_capacity); + } + } +} + // A throttle is either a rate or a fixed period class JfrCPUSamplerThrottle { @@ -206,6 +231,8 @@ class JfrCPUSamplerThread : public NonJavaThread { volatile bool _is_async_processing_of_cpu_time_jfr_requests_triggered; volatile bool _warned_about_timer_creation_failure; volatile bool _signal_handler_installed; + DEBUG_ONLY(volatile bool _out_of_stack_walking_enabled;) + DEBUG_ONLY(volatile u8 _out_of_stack_walking_iterations;) static const u4 STOP_SIGNAL_BIT = 0x80000000; @@ -252,6 +279,16 @@ class JfrCPUSamplerThread : public NonJavaThread { virtual void print_on(outputStream* st) const; void trigger_async_processing_of_cpu_time_jfr_requests(); + + #ifdef ASSERT + void set_out_of_stack_walking_enabled(bool runnable) { + Atomic::release_store(&_out_of_stack_walking_enabled, runnable); + } + + u8 out_of_stack_walking_iterations() const { + return Atomic::load(&_out_of_stack_walking_iterations); + } + #endif }; JfrCPUSamplerThread::JfrCPUSamplerThread(JfrCPUSamplerThrottle& throttle) : @@ -277,7 +314,7 @@ void JfrCPUSamplerThread::on_javathread_create(JavaThread* thread) { } JfrThreadLocal* tl = thread->jfr_thread_local(); assert(tl != nullptr, "invariant"); - tl->cpu_time_jfr_queue().resize_for_period(_current_sampling_period_ns / 1000000); + tl->cpu_time_jfr_queue().init(); timer_t timerid; if (create_timer_for_thread(thread, timerid)) { tl->set_cpu_timer(&timerid); @@ -296,12 +333,14 @@ void JfrCPUSamplerThread::on_javathread_terminate(JavaThread* thread) { if (timer == nullptr) { return; // no timer was created for this thread } + tl->acquire_cpu_time_jfr_dequeue_lock(); tl->unset_cpu_timer(); tl->deallocate_cpu_time_jfr_queue(); s4 lost_samples = tl->cpu_time_jfr_queue().lost_samples(); if (lost_samples > 0) { JfrCPUTimeThreadSampling::send_lost_event(JfrTicks::now(), JfrThreadLocal::thread_id(thread), lost_samples); } + tl->release_cpu_time_jfr_queue_lock(); } void JfrCPUSamplerThread::start_thread() { @@ -354,10 +393,12 @@ void JfrCPUSamplerThread::run() { recompute_period_if_needed(); last_recompute_check = os::javaTimeNanos(); } - - if (Atomic::cmpxchg(&_is_async_processing_of_cpu_time_jfr_requests_triggered, true, false)) { - stackwalk_threads_in_native(); - } + DEBUG_ONLY(if (Atomic::load_acquire(&_out_of_stack_walking_enabled)) {) + if (Atomic::cmpxchg(&_is_async_processing_of_cpu_time_jfr_requests_triggered, true, false)) { + DEBUG_ONLY(Atomic::inc(&_out_of_stack_walking_iterations);) + stackwalk_threads_in_native(); + } + DEBUG_ONLY(}) os::naked_sleep(100); } } @@ -547,6 +588,21 @@ void JfrCPUTimeThreadSampling::handle_timer_signal(siginfo_t* info, void* contex _sampler->decrement_signal_handler_count(); } +#ifdef ASSERT +void JfrCPUTimeThreadSampling::set_out_of_stack_walking_enabled(bool runnable) { + if (_instance != nullptr && _instance->_sampler != nullptr) { + _instance->_sampler->set_out_of_stack_walking_enabled(runnable); + } +} + +u8 JfrCPUTimeThreadSampling::out_of_stack_walking_iterations() { + if (_instance != nullptr && _instance->_sampler != nullptr) { + return _instance->_sampler->out_of_stack_walking_iterations(); + } + return 0; +} +#endif + void JfrCPUSamplerThread::sample_thread(JfrSampleRequest& request, void* ucontext, JavaThread* jt, JfrThreadLocal* tl, JfrTicks& now) { JfrSampleRequestBuilder::build_cpu_time_sample_request(request, ucontext, jt, jt->jfr_thread_local(), now); } @@ -592,6 +648,7 @@ void JfrCPUSamplerThread::handle_timer_signal(siginfo_t* info, void* context) { } } else { queue.increment_lost_samples(); + queue.increment_lost_samples_due_to_queue_full(); } if (jt->thread_state() == _thread_in_native) { @@ -815,4 +872,10 @@ void JfrCPUTimeThreadSampling::on_javathread_create(JavaThread* thread) { void JfrCPUTimeThreadSampling::on_javathread_terminate(JavaThread* thread) { } +#ifdef ASSERT +static void set_out_of_stack_walking_enabled(bool runnable) { + warn(); +} +#endif + #endif // defined(LINUX) && defined(INCLUDE_JFR) diff --git a/src/hotspot/share/jfr/periodic/sampling/jfrCPUTimeThreadSampler.hpp b/src/hotspot/share/jfr/periodic/sampling/jfrCPUTimeThreadSampler.hpp index dae0be5c3a7..e17e63fc3ed 100644 --- a/src/hotspot/share/jfr/periodic/sampling/jfrCPUTimeThreadSampler.hpp +++ b/src/hotspot/share/jfr/periodic/sampling/jfrCPUTimeThreadSampler.hpp @@ -50,12 +50,15 @@ class JfrCPUTimeTraceQueue { static const u4 CPU_TIME_QUEUE_CAPACITY = 500; JfrCPUTimeSampleRequest* _data; - u4 _capacity; + volatile u4 _capacity; // next unfilled index volatile u4 _head; volatile u4 _lost_samples; + volatile u4 _lost_samples_due_to_queue_full; + static const u4 CPU_TIME_QUEUE_INITIAL_CAPACITY = 20; + static const u4 CPU_TIME_QUEUE_MAX_CAPACITY = 2000; public: JfrCPUTimeTraceQueue(u4 capacity); @@ -81,12 +84,17 @@ class JfrCPUTimeTraceQueue { void increment_lost_samples(); + void increment_lost_samples_due_to_queue_full(); + // returns the previous lost samples count u4 get_and_reset_lost_samples(); - void resize(u4 capacity); + u4 get_and_reset_lost_samples_due_to_queue_full(); + + void resize_if_needed(); - void resize_for_period(u4 period_millis); + // init the queue capacity + void init(); void clear(); @@ -130,6 +138,10 @@ class JfrCPUTimeThreadSampling : public JfrCHeapObj { static void send_lost_event(const JfrTicks& time, traceid tid, s4 lost_samples); static void trigger_async_processing_of_cpu_time_jfr_requests(); + + DEBUG_ONLY(static void set_out_of_stack_walking_enabled(bool runnable);) + + DEBUG_ONLY(static u8 out_of_stack_walking_iterations();) }; #else @@ -150,6 +162,8 @@ class JfrCPUTimeThreadSampling : public JfrCHeapObj { static void on_javathread_create(JavaThread* thread); static void on_javathread_terminate(JavaThread* thread); + DEBUG_ONLY(static void set_out_of_stack_walking_enabled(bool runnable)); + DEBUG_ONLY(static u8 out_of_stack_walking_iterations();) }; #endif // defined(LINUX) diff --git a/src/hotspot/share/jfr/periodic/sampling/jfrThreadSampling.cpp b/src/hotspot/share/jfr/periodic/sampling/jfrThreadSampling.cpp index ddc9d59b295..f7a725fce6d 100644 --- a/src/hotspot/share/jfr/periodic/sampling/jfrThreadSampling.cpp +++ b/src/hotspot/share/jfr/periodic/sampling/jfrThreadSampling.cpp @@ -368,6 +368,7 @@ static void drain_enqueued_cpu_time_requests(const JfrTicks& now, JfrThreadLocal tl->set_has_cpu_time_jfr_requests(false); if (queue.lost_samples() > 0) { JfrCPUTimeThreadSampling::send_lost_event( now, JfrThreadLocal::thread_id(jt), queue.get_and_reset_lost_samples()); + queue.resize_if_needed(); } if (lock) { tl->release_cpu_time_jfr_queue_lock(); diff --git a/src/hotspot/share/jfr/support/jfrThreadLocal.cpp b/src/hotspot/share/jfr/support/jfrThreadLocal.cpp index 291169b9aa7..037faee1b9f 100644 --- a/src/hotspot/share/jfr/support/jfrThreadLocal.cpp +++ b/src/hotspot/share/jfr/support/jfrThreadLocal.cpp @@ -656,7 +656,7 @@ JfrCPUTimeTraceQueue& JfrThreadLocal::cpu_time_jfr_queue() { } void JfrThreadLocal::deallocate_cpu_time_jfr_queue() { - cpu_time_jfr_queue().resize(0); + cpu_time_jfr_queue().set_capacity(0); } void JfrThreadLocal::set_do_async_processing_of_cpu_time_jfr_requests(bool wants) { diff --git a/src/hotspot/share/prims/whitebox.cpp b/src/hotspot/share/prims/whitebox.cpp index 0041bf68729..d5d9eb1b20e 100644 --- a/src/hotspot/share/prims/whitebox.cpp +++ b/src/hotspot/share/prims/whitebox.cpp @@ -46,6 +46,7 @@ #include "gc/shared/concurrentGCBreakpoints.hpp" #include "gc/shared/gcConfig.hpp" #include "gc/shared/genArguments.hpp" +#include "jfr/periodic/sampling/jfrCPUTimeThreadSampler.hpp" #include "jvm.h" #include "jvmtifiles/jvmtiEnv.hpp" #include "logging/log.hpp" @@ -2683,6 +2684,32 @@ WB_ENTRY(void, WB_WaitUnsafe(JNIEnv* env, jobject wb, jint time)) os::naked_short_sleep(time); WB_END +WB_ENTRY(void, WB_BusyWait(JNIEnv* env, jobject wb, jint time)) + ThreadToNativeFromVM ttn(thread); + u8 start = os::current_thread_cpu_time(); + u8 target_duration = time * (u8)1000000; + while (os::current_thread_cpu_time() - start < target_duration) { + for (volatile int i = 0; i < 1000000; i++); + } +WB_END + +WB_ENTRY(jboolean, WB_CPUSamplerSetOutOfStackWalking(JNIEnv* env, jobject wb, jboolean enable)) + #if defined(ASSERT) && INCLUDE_JFR && defined(LINUX) + JfrCPUTimeThreadSampling::set_out_of_stack_walking_enabled(enable == JNI_TRUE); + return JNI_TRUE; + #else + return JNI_FALSE; + #endif +WB_END + +WB_ENTRY(jlong, WB_CPUSamplerOutOfStackWalkingIterations(JNIEnv* env, jobject wb)) + #if defined(ASSERT) && INCLUDE_JFR && defined(LINUX) + return (jlong)JfrCPUTimeThreadSampling::out_of_stack_walking_iterations(); + #else + return 0; + #endif +WB_END + WB_ENTRY(jstring, WB_GetLibcName(JNIEnv* env, jobject o)) ThreadToNativeFromVM ttn(thread); jstring info_string = env->NewStringUTF(XSTR(LIBC)); @@ -3032,6 +3059,9 @@ static JNINativeMethod methods[] = { {CC"isJVMTIIncluded", CC"()Z", (void*)&WB_IsJVMTIIncluded}, {CC"waitUnsafe", CC"(I)V", (void*)&WB_WaitUnsafe}, + {CC"busyWait", CC"(I)V", (void*)&WB_BusyWait}, + {CC"cpuSamplerSetOutOfStackWalking", CC"(Z)Z", (void*)&WB_CPUSamplerSetOutOfStackWalking}, + {CC"cpuSamplerOutOfStackWalkingIterations", CC"()J",(void*)&WB_CPUSamplerOutOfStackWalkingIterations}, {CC"getLibcName", CC"()Ljava/lang/String;", (void*)&WB_GetLibcName}, {CC"pinObject", CC"(Ljava/lang/Object;)V", (void*)&WB_PinObject}, diff --git a/test/jdk/jdk/jfr/event/profiling/TestCPUTimeSampleQueueAutoSizes.java b/test/jdk/jdk/jfr/event/profiling/TestCPUTimeSampleQueueAutoSizes.java new file mode 100644 index 00000000000..1ea96e3bad3 --- /dev/null +++ b/test/jdk/jdk/jfr/event/profiling/TestCPUTimeSampleQueueAutoSizes.java @@ -0,0 +1,154 @@ +/* + * Copyright (c) 2025 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +package jdk.jfr.event.profiling; + +import java.time.Duration; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.stream.Collectors; +import java.util.List; +import java.util.concurrent.atomic.AtomicLong; + +import jdk.jfr.Recording; +import jdk.jfr.consumer.RecordingStream; +import jdk.jfr.consumer.RecordedEvent; +import jdk.jfr.internal.JVM; +import jdk.test.lib.Asserts; +import jdk.test.lib.jfr.EventNames; +import jdk.test.whitebox.WhiteBox; + + +/* + * Tests the sample queues increase in size as needed, when loss is recorded. + * @test + * @requires vm.hasJFR & os.family == "linux" & vm.debug + * @library /test/lib + * @modules jdk.jfr/jdk.jfr.internal + * @build jdk.test.whitebox.WhiteBox + * @run driver jdk.test.lib.helpers.ClassFileInstaller jdk.test.whitebox.WhiteBox + * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI + * -Xbatch -XX:StartFlightRecording:dumponexit=true jdk.jfr.event.profiling.TestCPUTimeSampleQueueAutoSizes + */ +public class TestCPUTimeSampleQueueAutoSizes { + + private static final WhiteBox WHITE_BOX = WhiteBox.getWhiteBox(); + + private static final String BURST_THREAD_NAME = "Burst-Thread-1"; + + static volatile boolean alive = true; + + record LossEvent(long relativeTimeMillis, long lostSamples) {} + + /** A data collection from the CPUTimeSampleLost events for the burst thread */ + static class LossEventCollection { + private final List events = new ArrayList<>(); + + public synchronized void addEvent(LossEvent event) { + events.add(event); + } + + public synchronized List getSortedEvents() { + return events.stream() + .sorted(Comparator.comparingLong(e -> e.relativeTimeMillis)) + .collect(Collectors.toList()); + } + + public List getEventsPerInterval(long widthMillis, long stopTimeMillis) { + List ret = new ArrayList<>(); + for (long start = 0; start < stopTimeMillis; start += widthMillis) { + long actualStart = Math.min(start, stopTimeMillis - widthMillis); + long lostSamples = events.stream() + .filter(e -> e.relativeTimeMillis >= actualStart && e.relativeTimeMillis < actualStart + widthMillis) + .mapToLong(e -> e.lostSamples) + .sum(); + ret.add(new LossEvent(actualStart, lostSamples)); + } + return ret; + } + + } + + public static void main(String[] args) throws Exception { + try (RecordingStream rs = new RecordingStream()) { + // setup recording + AtomicLong firstSampleTimeMillis = new AtomicLong(0); + AtomicLong lastSampleTimeMillis = new AtomicLong(0); + LossEventCollection lossEvents = new LossEventCollection(); + rs.enable(EventNames.CPUTimeSample).with("throttle", "1ms"); + rs.onEvent(EventNames.CPUTimeSample, e -> { + if (firstSampleTimeMillis.get() == 0 && e.getThread("eventThread").getJavaName().equals(BURST_THREAD_NAME)) { + firstSampleTimeMillis.set(e.getStartTime().toEpochMilli()); + } + if (e.getThread("eventThread").getJavaName().equals(BURST_THREAD_NAME)) { + lastSampleTimeMillis.set(e.getStartTime().toEpochMilli()); + } + }); + rs.enable(EventNames.CPUTimeSamplesLost); + rs.onEvent(EventNames.CPUTimeSamplesLost, e -> { + if (e.getThread("eventThread").getJavaName().equals(BURST_THREAD_NAME)) { + long eventTime = e.getStartTime().toEpochMilli(); + long relativeTime = firstSampleTimeMillis.get() > 0 ? (eventTime - firstSampleTimeMillis.get()) : eventTime; + System.out.println("Lost samples: " + e.getLong("lostSamples") + " at " + relativeTime); + lossEvents.addEvent(new LossEvent(relativeTime, e.getLong("lostSamples"))); + } + }); + WHITE_BOX.cpuSamplerSetOutOfStackWalking(false); + rs.startAsync(); + // this thread runs all along + Thread burstThread = new Thread(() -> WHITE_BOX.busyWait(11000)); + burstThread.setName(BURST_THREAD_NAME); + burstThread.start(); + // now we toggle out-of-stack-walking off, wait 1 second and then turn it on for 500ms a few times + for (int i = 0; i < 5; i++) { + boolean supported = WHITE_BOX.cpuSamplerSetOutOfStackWalking(false); + if (!supported) { + System.out.println("Out-of-stack-walking not supported, skipping test"); + Asserts.assertFalse(true); + return; + } + Thread.sleep(700); + long iterations = WHITE_BOX.cpuSamplerOutOfStackWalkingIterations(); + WHITE_BOX.cpuSamplerSetOutOfStackWalking(true); + Thread.sleep(300); + while (WHITE_BOX.cpuSamplerOutOfStackWalkingIterations() == iterations) { + Thread.sleep(50); // just to make sure the stack walking really ran + } + } + rs.close(); + checkThatLossDecreased(lossEvents, lastSampleTimeMillis.get() - firstSampleTimeMillis.get()); + } + } + + static void checkThatLossDecreased(LossEventCollection lossEvents, long lastSampleTimeMillis) { + List intervalLosses = lossEvents.getEventsPerInterval(1000, lastSampleTimeMillis); + for (LossEvent interval : intervalLosses) { + System.out.println("Lost samples in interval " + interval.relativeTimeMillis + ": " + interval.lostSamples); + } + // check that there are at least 3 intervals + Asserts.assertTrue(intervalLosses.size() > 2); + // check that the second to last interval has far fewer lost samples than the first + Asserts.assertTrue(intervalLosses.get(intervalLosses.size() - 2).lostSamples < + intervalLosses.get(0).lostSamples / 2); + } +} diff --git a/test/lib/jdk/test/whitebox/WhiteBox.java b/test/lib/jdk/test/whitebox/WhiteBox.java index 2b96cbc3a94..350a6e11e50 100644 --- a/test/lib/jdk/test/whitebox/WhiteBox.java +++ b/test/lib/jdk/test/whitebox/WhiteBox.java @@ -845,6 +845,13 @@ public native int validateCgroup(boolean cgroupsV2Enabled, public native void waitUnsafe(int time_ms); + public native void busyWait(int cpuTimeMs); + + // returns true if supported, false if not + public native boolean cpuSamplerSetOutOfStackWalking(boolean enable); + + public native long cpuSamplerOutOfStackWalkingIterations(); + public native void pinObject(Object o); public native void unpinObject(Object o);