From fdbfcb1622ee1eccd380e1930ec5401c52b73567 Mon Sep 17 00:00:00 2001 From: Alex Ostrovski Date: Fri, 2 Feb 2024 12:25:44 +0200 Subject: [PATCH] feat(merkle-tree): Do not wait for tree initialization when starting node (#992) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## What ❔ Makes tree initialization async in metadata calculator (i.e., moves it from the constructor to the `run()` method). ## Why ❔ It currently takes ~1m to initialize Merkle tree RocksDB for ENs. ## Checklist - [x] PR title corresponds to the body of PR (we generate changelog entries from PRs). - [x] Tests for the changes have been added / updated. - [x] Documentation comments have been added / updated. - [x] Code has been formatted via `zk fmt` and `zk lint`. - [x] Spellcheck has been run via `zk spellcheck`. - [x] Linkcheck has been run via `zk linkcheck`. --- .../src/metadata_calculator/helpers.rs | 22 ++++++- .../src/metadata_calculator/mod.rs | 58 +++++++++++++------ .../src/metadata_calculator/recovery/mod.rs | 20 ++----- .../src/metadata_calculator/recovery/tests.rs | 2 +- .../src/metadata_calculator/tests.rs | 10 ++-- 5 files changed, 73 insertions(+), 39 deletions(-) diff --git a/core/lib/zksync_core/src/metadata_calculator/helpers.rs b/core/lib/zksync_core/src/metadata_calculator/helpers.rs index cdb44b67824..c9396976948 100644 --- a/core/lib/zksync_core/src/metadata_calculator/helpers.rs +++ b/core/lib/zksync_core/src/metadata_calculator/helpers.rs @@ -33,9 +33,27 @@ pub(crate) struct MerkleTreeInfo { pub leaf_count: u64, } +/// Health details for a Merkle tree. +#[derive(Debug, Serialize)] +#[serde(tag = "stage", rename_all = "snake_case")] +pub(super) enum MerkleTreeHealth { + Initialization, + Recovery { + chunk_count: u64, + recovered_chunk_count: u64, + }, + MainLoop(MerkleTreeInfo), +} + +impl From for Health { + fn from(details: MerkleTreeHealth) -> Self { + Self::from(HealthStatus::Ready).with_details(details) + } +} + impl From for Health { - fn from(tree_info: MerkleTreeInfo) -> Self { - Self::from(HealthStatus::Ready).with_details(tree_info) + fn from(info: MerkleTreeInfo) -> Self { + Self::from(HealthStatus::Ready).with_details(MerkleTreeHealth::MainLoop(info)) } } diff --git a/core/lib/zksync_core/src/metadata_calculator/mod.rs b/core/lib/zksync_core/src/metadata_calculator/mod.rs index b9eeada5dd3..846b73c1004 100644 --- a/core/lib/zksync_core/src/metadata_calculator/mod.rs +++ b/core/lib/zksync_core/src/metadata_calculator/mod.rs @@ -4,9 +4,10 @@ use std::{ future::{self, Future}, sync::Arc, - time::Duration, + time::{Duration, Instant}, }; +use anyhow::Context as _; use tokio::sync::watch; use zksync_config::configs::{ chain::OperationsManagerConfig, @@ -24,7 +25,7 @@ use zksync_types::{ pub(crate) use self::helpers::{AsyncTreeReader, L1BatchWithLogs, MerkleTreeInfo}; use self::{ - helpers::{create_db, Delayer, GenericAsyncTree}, + helpers::{create_db, Delayer, GenericAsyncTree, MerkleTreeHealth}, metrics::{TreeUpdateStage, METRICS}, updater::TreeUpdater, }; @@ -80,7 +81,7 @@ impl MetadataCalculatorConfig { #[derive(Debug)] pub struct MetadataCalculator { - tree: GenericAsyncTree, + config: MetadataCalculatorConfig, tree_reader: watch::Sender>, object_store: Option>, delayer: Delayer, @@ -99,24 +100,14 @@ impl MetadataCalculator { "Maximum L1 batches per iteration is misconfigured to be 0; please update it to positive value" ); - let db = create_db( - config.db_path.clone().into(), - config.block_cache_capacity, - config.memtable_capacity, - config.stalled_writes_timeout, - config.multi_get_chunk_size, - ) - .await?; - let tree = GenericAsyncTree::new(db, config.mode).await; - let (_, health_updater) = ReactiveHealthCheck::new("tree"); Ok(Self { - tree, tree_reader: watch::channel(None).0, object_store, delayer: Delayer::new(config.delay_interval), health_updater, max_l1_batches_per_iter: config.max_l1_batches_per_iter, + config, }) } @@ -141,19 +132,52 @@ impl MetadataCalculator { } } + async fn create_tree(&self) -> anyhow::Result { + self.health_updater + .update(MerkleTreeHealth::Initialization.into()); + + let started_at = Instant::now(); + let db = create_db( + self.config.db_path.clone().into(), + self.config.block_cache_capacity, + self.config.memtable_capacity, + self.config.stalled_writes_timeout, + self.config.multi_get_chunk_size, + ) + .await + .with_context(|| { + format!( + "failed opening Merkle tree RocksDB with configuration {:?}", + self.config + ) + })?; + tracing::info!( + "Opened Merkle tree RocksDB with configuration {:?} in {:?}", + self.config, + started_at.elapsed() + ); + + Ok(GenericAsyncTree::new(db, self.config.mode).await) + } + pub async fn run( self, pool: ConnectionPool, stop_receiver: watch::Receiver, ) -> anyhow::Result<()> { - let tree = self - .tree + let tree = self.create_tree().await?; + let tree = tree .ensure_ready(&pool, &stop_receiver, &self.health_updater) .await?; let Some(tree) = tree else { return Ok(()); // recovery was aborted because a stop signal was received }; - self.tree_reader.send_replace(Some(tree.reader())); + let tree_reader = tree.reader(); + tracing::info!( + "Merkle tree is initialized and ready to process L1 batches: {:?}", + tree_reader.clone().info().await + ); + self.tree_reader.send_replace(Some(tree_reader)); let updater = TreeUpdater::new(tree, self.max_l1_batches_per_iter, self.object_store); updater diff --git a/core/lib/zksync_core/src/metadata_calculator/recovery/mod.rs b/core/lib/zksync_core/src/metadata_calculator/recovery/mod.rs index a0e50c1d2ca..f6b6f74fb2b 100644 --- a/core/lib/zksync_core/src/metadata_calculator/recovery/mod.rs +++ b/core/lib/zksync_core/src/metadata_calculator/recovery/mod.rs @@ -33,10 +33,9 @@ use std::{ use anyhow::Context as _; use async_trait::async_trait; use futures::future; -use serde::{Deserialize, Serialize}; use tokio::sync::{watch, Mutex, Semaphore}; use zksync_dal::{ConnectionPool, StorageProcessor}; -use zksync_health_check::{Health, HealthStatus, HealthUpdater}; +use zksync_health_check::HealthUpdater; use zksync_merkle_tree::TreeEntry; use zksync_types::{ snapshots::{uniform_hashed_keys_chunk, SnapshotRecoveryStatus}, @@ -44,7 +43,7 @@ use zksync_types::{ }; use super::{ - helpers::{AsyncTree, AsyncTreeRecovery, GenericAsyncTree}, + helpers::{AsyncTree, AsyncTreeRecovery, GenericAsyncTree, MerkleTreeHealth}, metrics::{ChunkRecoveryStage, RecoveryStage, RECOVERY_METRICS}, }; @@ -68,14 +67,6 @@ trait HandleRecoveryEvent: fmt::Debug + Send + Sync { } } -/// Information about a Merkle tree during its snapshot recovery. -#[derive(Debug, Clone, Copy, Serialize, Deserialize)] -struct RecoveryMerkleTreeInfo { - mode: &'static str, // always set to "recovery" to distinguish from `MerkleTreeInfo` - chunk_count: u64, - recovered_chunk_count: u64, -} - /// [`HealthUpdater`]-based [`HandleRecoveryEvent`] implementation. #[derive(Debug)] struct RecoveryHealthUpdater<'a> { @@ -109,12 +100,11 @@ impl HandleRecoveryEvent for RecoveryHealthUpdater<'_> { RECOVERY_METRICS .recovered_chunk_count .set(recovered_chunk_count); - let health = Health::from(HealthStatus::Ready).with_details(RecoveryMerkleTreeInfo { - mode: "recovery", + let health = MerkleTreeHealth::Recovery { chunk_count: self.chunk_count, recovered_chunk_count, - }); - self.inner.update(health); + }; + self.inner.update(health.into()); } } diff --git a/core/lib/zksync_core/src/metadata_calculator/recovery/tests.rs b/core/lib/zksync_core/src/metadata_calculator/recovery/tests.rs index d3eff132a86..5d1d37deeab 100644 --- a/core/lib/zksync_core/src/metadata_calculator/recovery/tests.rs +++ b/core/lib/zksync_core/src/metadata_calculator/recovery/tests.rs @@ -10,7 +10,7 @@ use zksync_config::configs::{ chain::OperationsManagerConfig, database::{MerkleTreeConfig, MerkleTreeMode}, }; -use zksync_health_check::{CheckHealth, ReactiveHealthCheck}; +use zksync_health_check::{CheckHealth, HealthStatus, ReactiveHealthCheck}; use zksync_merkle_tree::{domain::ZkSyncTree, TreeInstruction}; use zksync_types::{L1BatchNumber, L2ChainId, StorageLog}; diff --git a/core/lib/zksync_core/src/metadata_calculator/tests.rs b/core/lib/zksync_core/src/metadata_calculator/tests.rs index 4f49a052aa3..c5c99db624c 100644 --- a/core/lib/zksync_core/src/metadata_calculator/tests.rs +++ b/core/lib/zksync_core/src/metadata_calculator/tests.rs @@ -49,8 +49,9 @@ async fn genesis_creation() { run_calculator(calculator, pool.clone()).await; let (calculator, _) = setup_calculator(temp_dir.path(), &pool).await; - let GenericAsyncTree::Ready(tree) = &calculator.tree else { - panic!("Unexpected tree state: {:?}", calculator.tree); + let tree = calculator.create_tree().await.unwrap(); + let GenericAsyncTree::Ready(tree) = tree else { + panic!("Unexpected tree state: {tree:?}"); }; assert_eq!(tree.next_l1_batch_number(), L1BatchNumber(1)); } @@ -77,8 +78,9 @@ async fn basic_workflow() { assert!(merkle_paths.iter().all(|log| log.is_write)); let (calculator, _) = setup_calculator(temp_dir.path(), &pool).await; - let GenericAsyncTree::Ready(tree) = &calculator.tree else { - panic!("Unexpected tree state: {:?}", calculator.tree); + let tree = calculator.create_tree().await.unwrap(); + let GenericAsyncTree::Ready(tree) = tree else { + panic!("Unexpected tree state: {tree:?}"); }; assert_eq!(tree.next_l1_batch_number(), L1BatchNumber(2)); }