-
-
Notifications
You must be signed in to change notification settings - Fork 2.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Initial work on runtime stats #4043
Changes from 11 commits
bf905c3
e2905b7
db15d5b
e075fb2
37ef131
14f7c89
816fad3
34b814f
d87027d
0f7d7ee
2d70494
213218b
062e18a
366a32c
5ac84f9
b6ec8fb
3e756d3
b80d764
cb60bb1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,108 @@ | ||
//! This module contains a type that is essentially a (u16, Duration) pair, | ||
//! including an atomic container for this pair type. Supports durations up to | ||
//! 78 hours. | ||
//! | ||
//! The u16 counter can be used to determine if the duration has changed since | ||
//! last time you look at it. | ||
use crate::loom::sync::atomic::{AtomicU64, Ordering}; | ||
use std::fmt; | ||
use std::time::Duration; | ||
|
||
const MAX_NANOS: u64 = (1u64 << 48) - 1; | ||
const NANOS_MASK: u64 = MAX_NANOS; | ||
const COUNTER_MASK: u64 = !NANOS_MASK; | ||
const COUNTER_ONE: u64 = 1u64 << 48; | ||
|
||
#[derive(Copy, Clone, Default)] | ||
pub(crate) struct CounterDuration { | ||
value: u64, | ||
} | ||
|
||
impl CounterDuration { | ||
#[cfg(test)] | ||
pub(crate) fn new(counter: u16, duration: Duration) -> Self { | ||
let nanos = std::cmp::min(duration.as_nanos(), u128::from(MAX_NANOS)) as u64; | ||
Self { | ||
value: (u64::from(counter) << 48) | nanos, | ||
} | ||
} | ||
|
||
pub(crate) fn counter(self) -> u16 { | ||
(self.value >> 48) as u16 | ||
} | ||
|
||
pub(crate) fn duration(self) -> Duration { | ||
Duration::from_nanos(self.value & MAX_NANOS) | ||
} | ||
|
||
/// Increment the counter by one and replace the duration with the supplied | ||
/// duration. | ||
pub(crate) fn set_next_duration(&mut self, dur: Duration) { | ||
let nanos = std::cmp::min(dur.as_nanos(), u128::from(MAX_NANOS)) as u64; | ||
let counter_bits = (self.value & COUNTER_MASK).wrapping_add(COUNTER_ONE); | ||
Darksonn marked this conversation as resolved.
Show resolved
Hide resolved
|
||
self.value = counter_bits | nanos; | ||
} | ||
|
||
pub(crate) fn into_pair(self) -> (u16, Duration) { | ||
(self.counter(), self.duration()) | ||
} | ||
} | ||
|
||
#[derive(Default)] | ||
pub(crate) struct AtomicCounterDuration { | ||
value: AtomicU64, | ||
} | ||
|
||
impl AtomicCounterDuration { | ||
pub(crate) fn store(&self, new_value: CounterDuration, ordering: Ordering) { | ||
self.value.store(new_value.value, ordering); | ||
} | ||
|
||
pub(crate) fn load(&self, ordering: Ordering) -> CounterDuration { | ||
CounterDuration { | ||
value: self.value.load(ordering), | ||
} | ||
} | ||
} | ||
|
||
impl fmt::Debug for CounterDuration { | ||
fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { | ||
fmt.debug_struct("CounterDuration") | ||
.field("counter", &self.counter()) | ||
.field("duration", &self.duration()) | ||
.finish() | ||
} | ||
} | ||
|
||
impl fmt::Debug for AtomicCounterDuration { | ||
fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { | ||
let value = self.load(Ordering::Relaxed); | ||
fmt.debug_struct("AtomicCounterDuration") | ||
.field("counter", &value.counter()) | ||
.field("duration", &value.duration()) | ||
.finish() | ||
} | ||
} | ||
|
||
#[cfg(test)] | ||
mod tests { | ||
use super::*; | ||
|
||
#[test] | ||
fn counter_increment() { | ||
let mut dur = 1u64; | ||
let mut cd = CounterDuration::new(u16::MAX, Duration::from_nanos(dur)); | ||
|
||
for counter in 0..(1u32 << 18) { | ||
// Multiply by a prime number to get a sequence of mostly unrelated | ||
// durations. | ||
dur = (dur * 32717) % (1 + MAX_NANOS); | ||
cd.set_next_duration(Duration::from_nanos(dur)); | ||
|
||
// Note that `counter as u16` will truncate extra bits. This is | ||
// intended. | ||
assert_eq!(cd.counter(), counter as u16); | ||
assert_eq!(cd.duration().as_nanos(), u128::from(dur)); | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,124 @@ | ||
//! This file contains the types necessary to collect various types of metrics. | ||
use crate::loom::sync::atomic::{AtomicU64, Ordering::Relaxed}; | ||
use crate::runtime::metrics::counter_duration::{AtomicCounterDuration, CounterDuration}; | ||
|
||
use std::time::{Duration, Instant}; | ||
|
||
/// This type contains methods to retrieve metrics from a Tokio runtime. | ||
#[derive(Debug)] | ||
pub struct RuntimeMetrics { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would consider naming this just There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would call this |
||
workers: Box<[WorkerMetrics]>, | ||
} | ||
|
||
/// This type contains methods to retrieve metrics from a worker thread on a Tokio runtime. | ||
#[derive(Debug)] | ||
pub struct WorkerMetrics { | ||
park_count: AtomicU64, | ||
steal_count: AtomicU64, | ||
poll_count: AtomicU64, | ||
park_to_park: AtomicCounterDuration, | ||
} | ||
|
||
impl RuntimeMetrics { | ||
pub(crate) fn new(worker_threads: usize) -> Self { | ||
let mut workers = Vec::with_capacity(worker_threads); | ||
for _ in 0..worker_threads { | ||
workers.push(WorkerMetrics { | ||
park_count: AtomicU64::new(0), | ||
steal_count: AtomicU64::new(0), | ||
poll_count: AtomicU64::new(0), | ||
park_to_park: AtomicCounterDuration::default(), | ||
}); | ||
} | ||
|
||
Self { | ||
workers: workers.into_boxed_slice(), | ||
Darksonn marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
} | ||
|
||
/// Returns a slice containing the worker metrics for each worker thread. | ||
pub fn workers(&self) -> impl Iterator<Item = &WorkerMetrics> { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we could store |
||
self.workers.iter() | ||
} | ||
} | ||
|
||
impl WorkerMetrics { | ||
/// Returns the total number of times this worker thread has parked. | ||
pub fn park_count(&self) -> u64 { | ||
self.park_count.load(Relaxed) | ||
} | ||
|
||
/// Returns the number of tasks this worker has stolen from other worker | ||
/// threads. | ||
pub fn steal_count(&self) -> u64 { | ||
self.steal_count.load(Relaxed) | ||
} | ||
|
||
/// Returns the number of times this worker has polled a task. | ||
pub fn poll_count(&self) -> u64 { | ||
self.poll_count.load(Relaxed) | ||
} | ||
|
||
/// Returns the amount of time the runtime spent working between the last | ||
/// two times it parked. | ||
/// | ||
/// The `u16` is a counter that is incremented by one each time the duration | ||
/// is changed. The counter will wrap around when it reaches `u16::MAX`. | ||
pub fn park_to_park(&self) -> (u16, Duration) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This name feels odd, how about |
||
self.park_to_park.load(Relaxed).into_pair() | ||
} | ||
} | ||
|
||
pub(crate) struct WorkerMetricsBatcher { | ||
my_index: usize, | ||
park_count: u64, | ||
steal_count: u64, | ||
poll_count: u64, | ||
last_park: Instant, | ||
park_to_park: CounterDuration, | ||
} | ||
|
||
impl WorkerMetricsBatcher { | ||
pub(crate) fn new(my_index: usize) -> Self { | ||
Self { | ||
my_index, | ||
park_count: 0, | ||
steal_count: 0, | ||
poll_count: 0, | ||
last_park: Instant::now(), | ||
park_to_park: CounterDuration::default(), | ||
} | ||
} | ||
pub(crate) fn submit(&mut self, to: &RuntimeMetrics) { | ||
let worker = &to.workers[self.my_index]; | ||
|
||
worker.park_count.store(self.park_count, Relaxed); | ||
worker.steal_count.store(self.steal_count, Relaxed); | ||
worker.poll_count.store(self.poll_count, Relaxed); | ||
worker.park_to_park.store(self.park_to_park, Relaxed); | ||
} | ||
|
||
pub(crate) fn about_to_park(&mut self) { | ||
self.park_count += 1; | ||
self.update_park_to_park(); | ||
} | ||
|
||
pub(crate) fn returned_from_park(&mut self) { | ||
self.last_park = Instant::now(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So my design originally tried to avoid any There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I didn't notice this from your RFC, but I've punted this part of the feature for now. |
||
} | ||
|
||
#[cfg(feature = "rt-multi-thread")] | ||
pub(crate) fn incr_steal_count(&mut self, by: u16) { | ||
self.steal_count += u64::from(by); | ||
} | ||
|
||
pub(crate) fn incr_poll_count(&mut self) { | ||
self.poll_count += 1; | ||
} | ||
|
||
pub(crate) fn update_park_to_park(&mut self) { | ||
let now = Instant::now(); | ||
let diff = now - self.last_park; | ||
self.park_to_park.set_next_duration(diff); | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I wonder if it is worth making this a feature flag at all (vs. always on).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's going to be pretty slow on platforms that don't have an
AtomicU64
as we would then go through this mock.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think it is a big deal. Alternatively, we use AtomicUsize let it wrap and it is up to the receiver of data to handle that.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think there's any non 64bit platforms anymore that are important for production.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We can always add a feature flag later as well if someone would like it disabled.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Adding later is tricky as it technically is a breaking change.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
right not sure what I was thinking about 😅