Skip to content

Commit

Permalink
Remove grandparents from snapshot cache (#2917)
Browse files Browse the repository at this point in the history
## Issue Addressed

NA

## Proposed Changes

In #2832 we made some changes to the `SnapshotCache` to help deal with the one-block reorgs seen on mainnet (and testnets).

I believe the change in #2832 is good and we should keep it, but I think that in its present form it is causing the `SnapshotCache` to hold onto states that it doesn't need anymore. For example, a skip slot will result in one more `BeaconSnapshot` being stored in the cache.

This PR adds a new type of pruning that happens after a block is inserted to the cache. We will remove any snapshot from the cache that is a *grandparent* of the block being imported. Since we know the grandparent has two valid blocks built atop it, it is not at risk from a one-block re-org. 

## Additional Info

NA
  • Loading branch information
paulhauner committed Jan 14, 2022
1 parent ceeab02 commit c11253a
Show file tree
Hide file tree
Showing 3 changed files with 65 additions and 4 deletions.
7 changes: 7 additions & 0 deletions beacon_node/beacon_chain/src/beacon_chain.rs
Expand Up @@ -2796,6 +2796,7 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
beacon_block_root: block_root,
},
None,
&self.spec,
)
})
.unwrap_or_else(|e| {
Expand Down Expand Up @@ -3740,6 +3741,12 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
.try_write_for(BLOCK_PROCESSING_CACHE_LOCK_TIMEOUT)
.map(|mut snapshot_cache| {
snapshot_cache.prune(new_finalized_checkpoint.epoch);
debug!(
self.log,
"Snapshot cache pruned";
"new_len" => snapshot_cache.len(),
"remaining_roots" => ?snapshot_cache.beacon_block_roots(),
);
})
.unwrap_or_else(|| {
error!(
Expand Down
18 changes: 18 additions & 0 deletions beacon_node/beacon_chain/src/metrics.rs
Expand Up @@ -4,8 +4,12 @@ use crate::{BeaconChain, BeaconChainError, BeaconChainTypes};
use lazy_static::lazy_static;
pub use lighthouse_metrics::*;
use slot_clock::SlotClock;
use std::time::Duration;
use types::{BeaconState, Epoch, EthSpec, Hash256, Slot};

/// The maximum time to wait for the snapshot cache lock during a metrics scrape.
const SNAPSHOT_CACHE_TIMEOUT: Duration = Duration::from_millis(100);

lazy_static! {
/*
* Block Processing
Expand All @@ -18,6 +22,10 @@ lazy_static! {
"beacon_block_processing_successes_total",
"Count of blocks processed without error"
);
pub static ref BLOCK_PROCESSING_SNAPSHOT_CACHE_SIZE: Result<IntGauge> = try_create_int_gauge(
"beacon_block_processing_snapshot_cache_size",
"Count snapshots in the snapshot cache"
);
pub static ref BLOCK_PROCESSING_SNAPSHOT_CACHE_MISSES: Result<IntCounter> = try_create_int_counter(
"beacon_block_processing_snapshot_cache_misses",
"Count of snapshot cache misses"
Expand Down Expand Up @@ -913,6 +921,16 @@ pub fn scrape_for_metrics<T: BeaconChainTypes>(beacon_chain: &BeaconChain<T>) {

let attestation_stats = beacon_chain.op_pool.attestation_stats();

if let Some(snapshot_cache) = beacon_chain
.snapshot_cache
.try_write_for(SNAPSHOT_CACHE_TIMEOUT)
{
set_gauge(
&BLOCK_PROCESSING_SNAPSHOT_CACHE_SIZE,
snapshot_cache.len() as i64,
)
}

set_gauge_by_usize(
&OP_POOL_NUM_ATTESTATIONS,
attestation_stats.num_attestations,
Expand Down
44 changes: 40 additions & 4 deletions beacon_node/beacon_chain/src/snapshot_cache.rs
@@ -1,4 +1,5 @@
use crate::BeaconSnapshot;
use itertools::process_results;
use std::cmp;
use std::time::Duration;
use types::{
Expand Down Expand Up @@ -164,16 +165,51 @@ impl<T: EthSpec> SnapshotCache<T> {
}
}

/// The block roots of all snapshots contained in `self`.
pub fn beacon_block_roots(&self) -> Vec<Hash256> {
self.snapshots.iter().map(|s| s.beacon_block_root).collect()
}

/// The number of snapshots contained in `self`.
pub fn len(&self) -> usize {
self.snapshots.len()
}

/// Insert a snapshot, potentially removing an existing snapshot if `self` is at capacity (see
/// struct-level documentation for more info).
pub fn insert(&mut self, snapshot: BeaconSnapshot<T>, pre_state: Option<BeaconState<T>>) {
pub fn insert(
&mut self,
snapshot: BeaconSnapshot<T>,
pre_state: Option<BeaconState<T>>,
spec: &ChainSpec,
) {
let parent_root = snapshot.beacon_block.message().parent_root();
let item = CacheItem {
beacon_block: snapshot.beacon_block,
beacon_block_root: snapshot.beacon_block_root,
beacon_state: snapshot.beacon_state,
pre_state,
};

// Remove the grandparent of the block that was just inserted.
//
// Assuming it's unlikely to see re-orgs deeper than one block, this method helps keep the
// cache small by removing any states that already have more than one descendant.
//
// Remove the grandparent first to free up room in the cache.
let grandparent_result =
process_results(item.beacon_state.rev_iter_block_roots(spec), |iter| {
iter.map(|(_slot, root)| root)
.find(|root| *root != item.beacon_block_root && *root != parent_root)
});
if let Ok(Some(grandparent_root)) = grandparent_result {
let head_block_root = self.head_block_root;
self.snapshots.retain(|snapshot| {
let root = snapshot.beacon_block_root;
root == head_block_root || root != grandparent_root
});
}

if self.snapshots.len() < self.max_len {
self.snapshots.push(item);
} else {
Expand Down Expand Up @@ -384,7 +420,7 @@ mod test {
*snapshot.beacon_state.slot_mut() =
Slot::from(i * MainnetEthSpec::slots_per_epoch() + 1);

cache.insert(snapshot, None);
cache.insert(snapshot, None, &spec);

assert_eq!(
cache.snapshots.len(),
Expand All @@ -402,7 +438,7 @@ mod test {
// 2 2
// 3 3
assert_eq!(cache.snapshots.len(), CACHE_SIZE);
cache.insert(get_snapshot(42), None);
cache.insert(get_snapshot(42), None, &spec);
assert_eq!(cache.snapshots.len(), CACHE_SIZE);

assert!(
Expand Down Expand Up @@ -462,7 +498,7 @@ mod test {

// Over-fill the cache so it needs to eject some old values on insert.
for i in 0..CACHE_SIZE as u64 {
cache.insert(get_snapshot(u64::max_value() - i), None);
cache.insert(get_snapshot(u64::max_value() - i), None, &spec);
}

// Ensure that the new head value was not removed from the cache.
Expand Down

0 comments on commit c11253a

Please sign in to comment.