Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions lighthouse/tests/validator_client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -476,3 +476,28 @@ fn disable_run_on_all() {
assert!(config.disable_run_on_all);
});
}

#[test]
fn latency_measurement_service() {
CommandLineTest::new().run().with_config(|config| {
assert!(config.enable_latency_measurement_service);
});
CommandLineTest::new()
.flag("latency-measurement-service", None)
.run()
.with_config(|config| {
assert!(config.enable_latency_measurement_service);
});
CommandLineTest::new()
.flag("latency-measurement-service", Some("true"))
.run()
.with_config(|config| {
assert!(config.enable_latency_measurement_service);
});
CommandLineTest::new()
.flag("latency-measurement-service", Some("false"))
.run()
.with_config(|config| {
assert!(!config.enable_latency_measurement_service);
});
}
51 changes: 50 additions & 1 deletion validator_client/src/beacon_node_fallback.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ use std::fmt::Debug;
use std::future::Future;
use std::marker::PhantomData;
use std::sync::Arc;
use std::time::Duration;
use std::time::{Duration, Instant};
use tokio::{sync::RwLock, time::sleep};
use types::{ChainSpec, Config, EthSpec};

Expand All @@ -27,6 +27,14 @@ use types::{ChainSpec, Config, EthSpec};
/// having the correct nodes up and running prior to the start of the slot.
const SLOT_LOOKAHEAD: Duration = Duration::from_secs(1);

/// Indicates a measurement of latency between the VC and a BN.
pub struct LatencyMeasurement {
/// An identifier for the beacon node (e.g. the URL).
pub beacon_node_id: String,
/// The round-trip latency, if the BN responded successfully.
pub latency: Option<Duration>,
}

/// Starts a service that will routinely try and update the status of the provided `beacon_nodes`.
///
/// See `SLOT_LOOKAHEAD` for information about when this should run.
Expand Down Expand Up @@ -394,6 +402,47 @@ impl<T: SlotClock, E: EthSpec> BeaconNodeFallback<T, E> {
let _ = future::join_all(futures).await;
}

/// Concurrently send a request to all candidates (regardless of
/// offline/online) status and attempt to collect a rough reading on the
/// latency between the VC and candidate.
pub async fn measure_latency(&self) -> Vec<LatencyMeasurement> {
let futures: Vec<_> = self
.candidates
.iter()
.map(|candidate| async {
let beacon_node_id = candidate.beacon_node.to_string();
// The `node/version` endpoint is used since I imagine it would
// require the least processing in the BN and therefore measure
// the connection moreso than the BNs processing speed.
//
// I imagine all clients have the version string availble as a
// pre-computed string.
let response_instant = candidate
.beacon_node
.get_node_version()
.await
.ok()
.map(|_| Instant::now());
(beacon_node_id, response_instant)
})
.collect();

let request_instant = Instant::now();

// Send the request to all BNs at the same time. This might involve some
// queueing on the sending host, however I hope it will avoid bias
// caused by sending requests at different times.
future::join_all(futures)
.await
.into_iter()
.map(|(beacon_node_id, response_instant)| LatencyMeasurement {
beacon_node_id,
latency: response_instant
.and_then(|response| response.checked_duration_since(request_instant)),
})
.collect()
}

/// Run `func` against each candidate in `self`, returning immediately if a result is found.
/// Otherwise, return all the errors encountered along the way.
///
Expand Down
9 changes: 9 additions & 0 deletions validator_client/src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -318,6 +318,15 @@ pub fn cli_app<'a, 'b>() -> App<'a, 'b> {
set here moves too far from the previous block's gas limit. [default: 30,000,000]")
.requires("builder-proposals"),
)
.arg(
Arg::with_name("latency-measurement-service")
.long("latency-measurement-service")
.value_name("BOOLEAN")
.help("Set to 'true' to enable a service that periodically attempts to measure latency to BNs. \
Set to 'false' to disable.")
.default_value("true")
.takes_value(true),
)
/*
* Experimental/development options.
*/
Expand Down
6 changes: 6 additions & 0 deletions validator_client/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ pub struct Config {
pub block_delay: Option<Duration>,
/// Disables publishing http api requests to all beacon nodes for select api calls.
pub disable_run_on_all: bool,
/// Enables a service which attempts to measure latency between the VC and BNs.
pub enable_latency_measurement_service: bool,
}

impl Default for Config {
Expand Down Expand Up @@ -111,6 +113,7 @@ impl Default for Config {
builder_registration_timestamp_override: None,
gas_limit: None,
disable_run_on_all: false,
enable_latency_measurement_service: true,
}
}
}
Expand Down Expand Up @@ -357,6 +360,9 @@ impl Config {
);
}

config.enable_latency_measurement_service =
parse_optional(cli_args, "latency-measurement-service")?.unwrap_or(true);

/*
* Experimental
*/
Expand Down
8 changes: 8 additions & 0 deletions validator_client/src/http_metrics/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,14 @@ lazy_static::lazy_static! {
"Attestation duty slot for all managed validators",
&["validator"]
);
/*
* BN latency
*/
pub static ref VC_BEACON_NODE_LATENCY: Result<HistogramVec> = try_create_histogram_vec(
"vc_beacon_node_latency",
"Round-trip latency for a simple API endpoint on each BN",
&["endpoint"]
);
}

pub fn gather_prometheus_metrics<T: EthSpec>(
Expand Down
58 changes: 58 additions & 0 deletions validator_client/src/latency.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
use crate::{http_metrics::metrics, BeaconNodeFallback};
use environment::RuntimeContext;
use slog::debug;
use slot_clock::SlotClock;
use std::sync::Arc;
use tokio::time::sleep;
use types::EthSpec;

/// The latency service will run 11/12ths of the way through the slot.
pub const SLOT_DELAY_MULTIPLIER: u32 = 11;
pub const SLOT_DELAY_DENOMINATOR: u32 = 12;

/// Starts a service that periodically checks the latency between the VC and the
/// candidate BNs.
pub fn start_latency_service<T: SlotClock + 'static, E: EthSpec>(
context: RuntimeContext<E>,
slot_clock: T,
beacon_nodes: Arc<BeaconNodeFallback<T, E>>,
) {
let log = context.log().clone();

let future = async move {
loop {
let sleep_time = slot_clock
.duration_to_next_slot()
.map(|next_slot| {
// This is 11/12ths through the next slot. On mainnet this
// will happen in the 11th second of each slot, one second
// before the next slot.
next_slot + (next_slot / SLOT_DELAY_DENOMINATOR) * SLOT_DELAY_MULTIPLIER
})
// If we can't read the slot clock, just wait one slot. Running
// the measurement at a non-exact time is not a big issue.
.unwrap_or_else(|| slot_clock.slot_duration());

// Sleep until it's time to perform the measurement.
sleep(sleep_time).await;

for measurement in beacon_nodes.measure_latency().await {
if let Some(latency) = measurement.latency {
debug!(
log,
"Measured BN latency";
"node" => &measurement.beacon_node_id,
"latency" => latency.as_millis(),
);
metrics::observe_timer_vec(
&metrics::VC_BEACON_NODE_LATENCY,
&[&measurement.beacon_node_id],
latency,
)
}
}
}
};

context.executor.spawn(future, "latency");
}
9 changes: 9 additions & 0 deletions validator_client/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ mod duties_service;
mod graffiti_file;
mod http_metrics;
mod key_cache;
mod latency;
mod notifier;
mod preparation_service;
mod signing_method;
Expand Down Expand Up @@ -563,6 +564,14 @@ impl<T: EthSpec> ProductionValidatorClient<T> {
None
};

if self.config.enable_latency_measurement_service {
latency::start_latency_service(
self.context.clone(),
self.duties_service.slot_clock.clone(),
self.duties_service.beacon_nodes.clone(),
);
}

Ok(())
}
}
Expand Down