Skip to content
This repository has been archived by the owner on Nov 15, 2023. It is now read-only.

Add support for sourced metrics. #6895

Merged
merged 8 commits into from
Aug 18, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 41 additions & 18 deletions client/network/src/service.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ use parking_lot::Mutex;
use prometheus_endpoint::{
register, Counter, CounterVec, Gauge, GaugeVec, Histogram, HistogramOpts, HistogramVec, Opts,
PrometheusError, Registry, U64,
SourcedCounter, MetricSource
};
use sc_peerset::PeersetHandle;
use sp_consensus::import_queue::{BlockImportError, BlockImportResult, ImportQueue, Link};
Expand Down Expand Up @@ -240,12 +241,6 @@ impl<B: BlockT + 'static, H: ExHashT> NetworkWorker<B, H> {
local_peer_id_legacy
);

// Initialize the metrics.
let metrics = match &params.metrics_registry {
Some(registry) => Some(Metrics::register(&registry)?),
None => None
};

let checker = params.on_demand.as_ref()
.map(|od| od.checker().clone())
.unwrap_or_else(|| Arc::new(AlwaysBadChecker));
Expand Down Expand Up @@ -353,6 +348,17 @@ impl<B: BlockT + 'static, H: ExHashT> NetworkWorker<B, H> {
(builder.build(), bandwidth)
};

// Initialize the metrics.
let metrics = match &params.metrics_registry {
Some(registry) => {
// Sourced metrics.
BandwidthCounters::register(registry, bandwidth.clone())?;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍

// Other (i.e. new) metrics.
Some(Metrics::register(registry)?)
}
None => None
};

// Listen on multiaddresses.
for addr in &params.network_config.listen_addresses {
if let Err(err) = Swarm::<B, H>::listen_on(&mut swarm, addr.clone()) {
Expand Down Expand Up @@ -1152,9 +1158,6 @@ struct Metrics {
kbuckets_num_nodes: GaugeVec<U64>,
listeners_local_addresses: Gauge<U64>,
listeners_errors_total: Counter<U64>,
// Note: `network_bytes_total` is a monotonic gauge obtained by
// sampling an existing counter.
network_bytes_total: GaugeVec<U64>,
notifications_sizes: HistogramVec,
notifications_streams_closed_total: CounterVec<U64>,
notifications_streams_opened_total: CounterVec<U64>,
Expand All @@ -1168,6 +1171,35 @@ struct Metrics {
requests_out_started_total: CounterVec<U64>,
}

/// The source for bandwidth metrics.
#[derive(Clone)]
struct BandwidthCounters(Arc<transport::BandwidthSinks>);

impl BandwidthCounters {
fn register(registry: &Registry, sinks: Arc<transport::BandwidthSinks>)
-> Result<(), PrometheusError>
{
register(SourcedCounter::new(
&Opts::new(
"sub_libp2p_network_bytes_total",
"Total bandwidth usage"
).variable_label("direction"),
BandwidthCounters(sinks),
)?, registry)?;

Ok(())
}
}

impl MetricSource for BandwidthCounters {
type N = u64;

fn collect(&self, mut set: impl FnMut(&[&str], Self::N)) {
set(&[&"in"], self.0.total_inbound());
set(&[&"out"], self.0.total_outbound());
}
}

impl Metrics {
fn register(registry: &Registry) -> Result<Self, PrometheusError> {
Ok(Self {
Expand Down Expand Up @@ -1271,13 +1303,6 @@ impl Metrics {
"sub_libp2p_listeners_errors_total",
"Total number of non-fatal errors reported by a listener"
)?, registry)?,
network_bytes_total: register(GaugeVec::new(
Opts::new(
"sub_libp2p_network_bytes_total",
"Total bandwidth usage"
),
&["direction"]
)?, registry)?,
notifications_sizes: register(HistogramVec::new(
HistogramOpts {
common_opts: Opts::new(
Expand Down Expand Up @@ -1725,8 +1750,6 @@ impl<B: BlockT + 'static, H: ExHashT> Future for NetworkWorker<B, H> {
this.is_major_syncing.store(is_major_syncing, Ordering::Relaxed);

if let Some(metrics) = this.metrics.as_ref() {
metrics.network_bytes_total.with_label_values(&["in"]).set(this.service.bandwidth.total_inbound());
metrics.network_bytes_total.with_label_values(&["out"]).set(this.service.bandwidth.total_outbound());
metrics.is_major_syncing.set(is_major_syncing as u64);
for (proto, num_entries) in this.network_service.num_kbuckets_entries() {
let proto = maybe_utf8_bytes_to_string(proto.as_bytes());
Expand Down
3 changes: 3 additions & 0 deletions utils/prometheus/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ use std::net::SocketAddr;

#[cfg(not(target_os = "unknown"))]
mod networking;
mod sourced;

pub use sourced::{SourcedCounter, SourcedGauge, MetricSource};

#[cfg(target_os = "unknown")]
pub use unknown_os::init_prometheus;
Expand Down
143 changes: 143 additions & 0 deletions utils/prometheus/src/sourced.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
// Copyright 2020 Parity Technologies (UK) Ltd.
// This file is part of Substrate.

// Substrate is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.

// Substrate is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.

// You should have received a copy of the GNU General Public License
// along with Substrate. If not, see <http://www.gnu.org/licenses/>.

//! Metrics that are collected from existing sources.

use prometheus::core::{Collector, Desc, Describer, Number, Opts};
use prometheus::proto;
use std::{cmp::Ordering, marker::PhantomData};

/// A counter whose values are obtained from an existing source.
///
/// > **Note*: The counter values provided by the source `S`
/// > must be monotonically increasing. Otherwise use a
/// > [`SourcedGauge`] instead.
pub type SourcedCounter<S> = SourcedMetric<Counter, S>;

/// A gauge whose values are obtained from an existing source.
pub type SourcedGauge<S> = SourcedMetric<Gauge, S>;

/// The type of a sourced counter.
mxinden marked this conversation as resolved.
Show resolved Hide resolved
#[derive(Copy, Clone)]
pub enum Counter {}

/// The type of a sourced gauge.
#[derive(Copy, Clone)]
pub enum Gauge {}

/// A metric whose values are obtained from an existing source,
/// instead of being independently recorded.
#[derive(Debug, Clone)]
pub struct SourcedMetric<T, S> {
source: S,
desc: Desc,
_type: PhantomData<T>,
}

/// A source of values for a [`SourcedMetric`].
pub trait MetricSource: Sync + Send + Clone {
/// The type of the collected values.
type N: Number;
/// Collects the current values of the metrics from the source.
fn collect(&self, set: impl FnMut(&[&str], Self::N));
}

impl<T: SourcedType, S: MetricSource> SourcedMetric<T, S> {
/// Creates a new metric that obtains its values from the given source.
pub fn new(opts: &Opts, source: S) -> prometheus::Result<Self> {
let desc = opts.describe()?;
Ok(Self { source, desc, _type: PhantomData })
}
}

impl<T: SourcedType, S: MetricSource> Collector for SourcedMetric<T, S> {
fn desc(&self) -> Vec<&Desc> {
vec![&self.desc]
}

fn collect(&self) -> Vec<proto::MetricFamily> {
let mut counters = Vec::new();

self.source.collect(|label_values, value| {
let mut m = proto::Metric::default();

match T::proto() {
proto::MetricType::COUNTER => {
let mut c = proto::Counter::default();
c.set_value(value.into_f64());
m.set_counter(c);
}
proto::MetricType::GAUGE => {
let mut g = proto::Gauge::default();
g.set_value(value.into_f64());
m.set_gauge(g);
}
t => {
log::error!("Unsupported sourced metric type: {:?}", t);
}
}

debug_assert_eq!(self.desc.variable_labels.len(), label_values.len());
match self.desc.variable_labels.len().cmp(&label_values.len()) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

All rust-prometheus methods panic when providing unequal label keys and values. While not the greates UX, I would do the same (at least in debug mode) to stay consistent.

E.g. https://docs.rs/prometheus/0.7.0/prometheus/core/struct.MetricVec.html#method.with_label_values.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done in 45b734c.

Ordering::Greater =>
log::warn!("Missing label values for sourced metric {}", self.desc.fq_name),
Ordering::Less =>
log::warn!("Too many label values for sourced metric {}", self.desc.fq_name),
Ordering::Equal => {}
}

m.set_label(self.desc.variable_labels.iter().zip(label_values)
.map(|(l_name, l_value)| {
let mut l = proto::LabelPair::default();
l.set_name(l_name.to_string());
l.set_value(l_value.to_string());
l
})
.chain(self.desc.const_label_pairs.iter().cloned())
.collect::<Vec<_>>());

counters.push(m);
});

let mut m = proto::MetricFamily::default();
m.set_name(self.desc.fq_name.clone());
m.set_help(self.desc.help.clone());
m.set_field_type(T::proto());
m.set_metric(counters);

vec![m]
}
}

/// Types of metrics that can obtain their values from an existing source.
pub trait SourcedType: private::Sealed + Sync + Send {
#[doc(hidden)]
fn proto() -> proto::MetricType;
}

impl SourcedType for Counter {
fn proto() -> proto::MetricType { proto::MetricType::COUNTER }
}

impl SourcedType for Gauge {
fn proto() -> proto::MetricType { proto::MetricType::GAUGE }
}

mod private {
pub trait Sealed {}
impl Sealed for super::Counter {}
impl Sealed for super::Gauge {}
}