Skip to content

Commit

Permalink
Metrics and DEBG log for late gossip blocks (#2533)
Browse files Browse the repository at this point in the history
## Issue Addressed

Which issue # does this PR address?

## Proposed Changes

- Add a counter metric to log when a block is received late from gossip.
- Also push a `DEBG` log for the above condition.
- Use Debug (`?`) instead of Display (`%`) for a bunch of logs in the beacon processor, so we don't have to deal with concatenated block roots.
- Add new ERRO and CRIT to HTTP API to alert users when they're publishing late blocks.

## Additional Info

NA
  • Loading branch information
paulhauner committed Aug 23, 2021
1 parent 12fe72b commit f2a8c62
Show file tree
Hide file tree
Showing 5 changed files with 60 additions and 29 deletions.
6 changes: 4 additions & 2 deletions beacon_node/beacon_chain/src/beacon_chain.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2932,6 +2932,7 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
.beacon_state
.previous_epoch()
.start_slot(T::EthSpec::slots_per_epoch());
let head_proposer_index = new_head.beacon_block.message().proposer_index();

// Update the snapshot that stores the head of the chain at the time it received the
// block.
Expand Down Expand Up @@ -2962,9 +2963,10 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
debug!(
self.log,
"Delayed head block";
"delay" => ?block_delay,
"root" => ?beacon_block_root,
"block_root" => ?beacon_block_root,
"proposer_index" => head_proposer_index,
"slot" => head_slot,
"block_delay" => ?block_delay,
);
}

Expand Down
27 changes: 15 additions & 12 deletions beacon_node/http_api/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -895,7 +895,10 @@ pub fn serve<T: BeaconChainTypes>(
info!(
log,
"Valid block from HTTP API";
"root" => format!("{}", root)
"block_delay" => ?delay,
"root" => format!("{}", root),
"proposer_index" => block.message().proposer_index(),
"slot" => block.slot(),
);

// Notify the validator monitor.
Expand All @@ -917,25 +920,25 @@ pub fn serve<T: BeaconChainTypes>(
//
// Check to see the thresholds are non-zero to avoid logging errors with small
// slot times (e.g., during testing)
let crit_threshold = chain.spec.seconds_per_slot / 3;
let warn_threshold = chain.spec.seconds_per_slot / 6;
if crit_threshold > 0 && delay.as_secs() > crit_threshold {
let crit_threshold = chain.slot_clock.unagg_attestation_production_delay();
let error_threshold = crit_threshold / 2;
if delay >= crit_threshold {
crit!(
log,
"Block was broadcast too late";
"root" => ?root,
"slot" => block.slot(),
"delay_ms" => delay.as_millis(),
"msg" => "system may be overloaded, block likely to be orphaned",
"delay_ms" => delay.as_millis(),
"slot" => block.slot(),
"root" => ?root,
)
} else if warn_threshold > 0 && delay.as_secs() > warn_threshold {
warn!(
} else if delay >= error_threshold {
error!(
log,
"Block broadcast was delayed";
"root" => ?root,
"slot" => block.slot(),
"delay_ms" => delay.as_millis(),
"msg" => "system may be overloaded, block may be orphaned",
"delay_ms" => delay.as_millis(),
"slot" => block.slot(),
"root" => ?root,
)
}

Expand Down
8 changes: 8 additions & 0 deletions beacon_node/http_api/src/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,12 @@ lazy_static::lazy_static! {
"http_api_block_broadcast_delay_times",
"Time between start of the slot and when the block was broadcast"
);
pub static ref HTTP_API_BLOCK_PUBLISHED_LATE_TOTAL: Result<IntCounter> = try_create_int_counter(
"http_api_block_published_late_total",
"The count of times a block was published beyond more than half way to the attestation deadline"
);
pub static ref HTTP_API_BLOCK_PUBLISHED_VERY_LATE_TOTAL: Result<IntCounter> = try_create_int_counter(
"http_api_block_published_very_late_total",
"The count of times a block was published beyond the attestation deadline"
);
}
44 changes: 29 additions & 15 deletions beacon_node/network/src/beacon_processor/worker/gossip_methods.rs
Original file line number Diff line number Diff line change
Expand Up @@ -291,19 +291,33 @@ impl<T: BeaconChainTypes> Worker<T> {
reprocess_tx: mpsc::Sender<ReprocessQueueMessage<T>>,
seen_duration: Duration,
) {
let block_delay =
get_block_delay_ms(seen_duration, block.message(), &self.chain.slot_clock);
// Log metrics to track delay from other nodes on the network.
metrics::observe_duration(
&metrics::BEACON_BLOCK_GOSSIP_SLOT_START_DELAY_TIME,
get_block_delay_ms(seen_duration, block.message(), &self.chain.slot_clock),
block_delay,
);

let verified_block = match self.chain.verify_block_for_gossip(block) {
Ok(verified_block) => {
if block_delay >= self.chain.slot_clock.unagg_attestation_production_delay() {
metrics::inc_counter(&metrics::BEACON_BLOCK_GOSSIP_ARRIVED_LATE_TOTAL);
debug!(
self.log,
"Gossip block arrived late";
"block_root" => ?verified_block.block_root,
"proposer_index" => verified_block.block.message().proposer_index(),
"slot" => verified_block.block.slot(),
"block_delay" => ?block_delay,
);
}

info!(
self.log,
"New block received";
"slot" => verified_block.block.slot(),
"hash" => %verified_block.block_root
"hash" => ?verified_block.block_root
);
self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Accept);

Expand All @@ -325,7 +339,7 @@ impl<T: BeaconChainTypes> Worker<T> {
debug!(
self.log,
"Unknown parent for gossip block";
"root" => %block.canonical_root()
"root" => ?block.canonical_root()
);
self.send_sync_message(SyncMessage::UnknownBlock(peer_id, block));
return;
Expand Down Expand Up @@ -392,7 +406,7 @@ impl<T: BeaconChainTypes> Worker<T> {
self.log,
"Block arrived early";
"block_slot" => %block_slot,
"block_root" => %block_root,
"block_root" => ?block_root,
"msg" => "if this happens consistently, check system clock"
);

Expand Down Expand Up @@ -423,7 +437,7 @@ impl<T: BeaconChainTypes> Worker<T> {
self.log,
"Failed to defer block import";
"block_slot" => %block_slot,
"block_root" => %block_root,
"block_root" => ?block_root,
"location" => "block gossip"
)
}
Expand All @@ -440,7 +454,7 @@ impl<T: BeaconChainTypes> Worker<T> {
"Failed to defer block import";
"error" => ?e,
"block_slot" => %block_slot,
"block_root" => %block_root,
"block_root" => ?block_root,
"location" => "block gossip"
)
}
Expand Down Expand Up @@ -472,7 +486,7 @@ impl<T: BeaconChainTypes> Worker<T> {
self.log,
"Failed to inform block import";
"source" => "gossip",
"block_root" => %block_root,
"block_root" => ?block_root,
)
};

Expand Down Expand Up @@ -511,7 +525,7 @@ impl<T: BeaconChainTypes> Worker<T> {
self.log,
"Invalid gossip beacon block";
"outcome" => ?other,
"block root" => %block.canonical_root(),
"block root" => ?block.canonical_root(),
"block slot" => block.slot()
);
self.gossip_penalize_peer(peer_id, PeerAction::MidToleranceError);
Expand Down Expand Up @@ -843,7 +857,7 @@ impl<T: BeaconChainTypes> Worker<T> {
self.log,
"Attestation is not within the last ATTESTATION_PROPAGATION_SLOT_RANGE slots";
"peer_id" => %peer_id,
"block" => %beacon_block_root,
"block" => ?beacon_block_root,
"type" => ?attestation_type,
);

Expand Down Expand Up @@ -918,7 +932,7 @@ impl<T: BeaconChainTypes> Worker<T> {
self.log,
"Attestation already known";
"peer_id" => %peer_id,
"block" => %beacon_block_root,
"block" => ?beacon_block_root,
"type" => ?attestation_type,
);
self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Ignore);
Expand All @@ -935,7 +949,7 @@ impl<T: BeaconChainTypes> Worker<T> {
self.log,
"Aggregator already known";
"peer_id" => %peer_id,
"block" => %beacon_block_root,
"block" => ?beacon_block_root,
"type" => ?attestation_type,
);
// This is an allowed behaviour.
Expand All @@ -956,7 +970,7 @@ impl<T: BeaconChainTypes> Worker<T> {
self.log,
"Prior attestation known";
"peer_id" => %peer_id,
"block" => %beacon_block_root,
"block" => ?beacon_block_root,
"epoch" => %epoch,
"validator_index" => validator_index,
"type" => ?attestation_type,
Expand All @@ -980,7 +994,7 @@ impl<T: BeaconChainTypes> Worker<T> {
self.log,
"Validation Index too high";
"peer_id" => %peer_id,
"block" => %beacon_block_root,
"block" => ?beacon_block_root,
"type" => ?attestation_type,
);
self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Reject);
Expand All @@ -991,7 +1005,7 @@ impl<T: BeaconChainTypes> Worker<T> {
self.log,
"Attestation for unknown block";
"peer_id" => %peer_id,
"block" => %beacon_block_root
"block" => ?beacon_block_root
);
if let Some(sender) = reprocess_tx {
// We don't know the block, get the sync manager to handle the block lookup, and
Expand Down Expand Up @@ -1200,7 +1214,7 @@ impl<T: BeaconChainTypes> Worker<T> {
self.log,
"Invalid attestation from network";
"reason" => ?error,
"block" => %beacon_block_root,
"block" => ?beacon_block_root,
"peer_id" => %peer_id,
"type" => ?attestation_type,
);
Expand Down
4 changes: 4 additions & 0 deletions beacon_node/network/src/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -465,6 +465,10 @@ lazy_static! {
"beacon_block_gossip_slot_start_delay_time",
"Duration between when the block is received and the start of the slot it belongs to.",
);
pub static ref BEACON_BLOCK_GOSSIP_ARRIVED_LATE_TOTAL: Result<IntCounter> = try_create_int_counter(
"beacon_block_gossip_arrived_late_total",
"Count of times when a gossip block arrived from the network later than the attestation deadline.",
);

/*
* Attestation reprocessing queue metrics.
Expand Down

0 comments on commit f2a8c62

Please sign in to comment.