Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add metrics for transaction execution result in state keeper #2021

Merged
merged 19 commits into from
Jun 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions core/node/node_sync/src/external_io.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ use zksync_state_keeper::{
L1BatchParams, L2BlockParams, PendingBatchData, StateKeeperIO,
},
metrics::KEEPER_METRICS,
seal_criteria::IoSealCriteria,
seal_criteria::{IoSealCriteria, UnexecutableReason},
updates::UpdatesManager,
};
use zksync_types::{
Expand Down Expand Up @@ -304,10 +304,10 @@ impl StateKeeperIO for ExternalIO {
anyhow::bail!("Rollback requested. Transaction hash: {:?}", tx.hash());
}

async fn reject(&mut self, tx: &Transaction, error: &str) -> anyhow::Result<()> {
async fn reject(&mut self, tx: &Transaction, reason: UnexecutableReason) -> anyhow::Result<()> {
// We are replaying the already executed transactions so no rejections are expected to occur.
anyhow::bail!(
"Requested rejection of transaction {:?} because of the following error: {error}. \
"Requested rejection of transaction {:?} because of the following error: {reason}. \
This is not supported on external node",
tx.hash()
);
Expand Down
23 changes: 16 additions & 7 deletions core/node/state_keeper/src/io/mempool.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,9 @@ use crate::{
},
mempool_actor::l2_tx_filter,
metrics::KEEPER_METRICS,
seal_criteria::{IoSealCriteria, L2BlockMaxPayloadSizeSealer, TimeoutSealer},
seal_criteria::{
IoSealCriteria, L2BlockMaxPayloadSizeSealer, TimeoutSealer, UnexecutableReason,
},
updates::UpdatesManager,
MempoolGuard,
};
Expand Down Expand Up @@ -245,7 +247,8 @@ impl StateKeeperIO for MempoolIO {
tx.hash(),
tx.gas_limit()
);
self.reject(&tx, &Halt::TooBigGasLimit.to_string()).await?;
self.reject(&tx, UnexecutableReason::Halt(Halt::TooBigGasLimit))
.await?;
continue;
}
return Ok(Some(tx));
Expand All @@ -265,25 +268,31 @@ impl StateKeeperIO for MempoolIO {
Ok(())
}

async fn reject(&mut self, rejected: &Transaction, error: &str) -> anyhow::Result<()> {
async fn reject(
&mut self,
rejected: &Transaction,
reason: UnexecutableReason,
) -> anyhow::Result<()> {
anyhow::ensure!(
!rejected.is_l1(),
"L1 transactions should not be rejected: {error}"
"L1 transactions should not be rejected: {reason}"
);

// Reset the nonces in the mempool, but don't insert the transaction back.
self.mempool.rollback(rejected);

// Mark tx as rejected in the storage.
let mut storage = self.pool.connection_tagged("state_keeper").await?;
KEEPER_METRICS.rejected_transactions.inc();

KEEPER_METRICS.inc_rejected_txs(reason.as_metric_label());

tracing::warn!(
"Transaction {} is rejected with error: {error}",
"Transaction {} is rejected with error: {reason}",
rejected.hash()
);
storage
.transactions_dal()
.mark_tx_as_rejected(rejected.hash(), &format!("rejected: {error}"))
.mark_tx_as_rejected(rejected.hash(), &format!("rejected: {reason}"))
.await?;
Ok(())
}
Expand Down
4 changes: 2 additions & 2 deletions core/node/state_keeper/src/io/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ pub use self::{
output_handler::{OutputHandler, StateKeeperOutputHandler},
persistence::{L2BlockSealerTask, StateKeeperPersistence, TreeWritesPersistence},
};
use super::seal_criteria::IoSealCriteria;
use super::seal_criteria::{IoSealCriteria, UnexecutableReason};

pub mod common;
pub(crate) mod mempool;
Expand Down Expand Up @@ -136,7 +136,7 @@ pub trait StateKeeperIO: 'static + Send + Sync + fmt::Debug + IoSealCriteria {
/// Marks the transaction as "not executed", so it can be retrieved from the IO again.
async fn rollback(&mut self, tx: Transaction) -> anyhow::Result<()>;
/// Marks the transaction as "rejected", e.g. one that is not correct and can't be executed.
async fn reject(&mut self, tx: &Transaction, error: &str) -> anyhow::Result<()>;
async fn reject(&mut self, tx: &Transaction, reason: UnexecutableReason) -> anyhow::Result<()>;

/// Loads base system contracts with the specified version.
async fn load_base_system_contracts(
Expand Down
21 changes: 14 additions & 7 deletions core/node/state_keeper/src/keeper.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ use super::{
updates::UpdatesManager,
utils::gas_count_from_writes,
};
use crate::seal_criteria::UnexecutableReason;
AnastasiiaVashchuk marked this conversation as resolved.
Show resolved Hide resolved

/// Amount of time to block on waiting for some resource. The exact value is not really important,
/// we only need it to not block on waiting indefinitely and be able to process cancellation requests.
Expand Down Expand Up @@ -581,7 +582,7 @@ impl ZkSyncStateKeeper {
format!("failed rolling back transaction {tx_hash:?} in batch executor")
})?;
self.io
.reject(&tx, reason)
.reject(&tx, reason.clone())
.await
.with_context(|| format!("cannot reject transaction {tx_hash:?}"))?;
}
Expand Down Expand Up @@ -690,23 +691,29 @@ impl ZkSyncStateKeeper {
| TxExecutionResult::RejectedByVm {
reason: Halt::NotEnoughGasProvided,
} => {
let error_message = match &exec_result {
TxExecutionResult::BootloaderOutOfGasForTx => "bootloader_tx_out_of_gas",
let (reason, criterion) = match &exec_result {
TxExecutionResult::BootloaderOutOfGasForTx => (
UnexecutableReason::BootloaderOutOfGas,
"bootloader_tx_out_of_gas",
),
TxExecutionResult::RejectedByVm {
reason: Halt::NotEnoughGasProvided,
} => "not_enough_gas_provided_to_start_tx",
} => (
UnexecutableReason::NotEnoughGasProvided,
"not_enough_gas_provided_to_start_tx",
),
_ => unreachable!(),
};
let resolution = if is_first_tx {
SealResolution::Unexecutable(error_message.to_string())
SealResolution::Unexecutable(reason)
} else {
SealResolution::ExcludeAndSeal
};
AGGREGATION_METRICS.inc(error_message, &resolution);
AGGREGATION_METRICS.inc(criterion, &resolution);
AnastasiiaVashchuk marked this conversation as resolved.
Show resolved Hide resolved
resolution
}
TxExecutionResult::RejectedByVm { reason } => {
SealResolution::Unexecutable(reason.to_string())
UnexecutableReason::Halt(reason.clone()).into()
}
TxExecutionResult::Success {
tx_result,
Expand Down
58 changes: 55 additions & 3 deletions core/node/state_keeper/src/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use std::{
time::Duration,
};

use multivm::interface::VmExecutionResultAndLogs;
use multivm::interface::{VmExecutionResultAndLogs, VmRevertReason};
use vise::{
Buckets, Counter, EncodeLabelSet, EncodeLabelValue, Family, Gauge, Histogram, LatencyObserver,
Metrics,
Expand All @@ -30,6 +30,20 @@ pub enum TxExecutionType {
L2,
}

#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, EncodeLabelValue)]
#[metrics(rename_all = "snake_case")]
pub enum TxExecutionStatus {
Success,
Rejected,
Reverted,
}

#[derive(Debug, Clone, PartialEq, Eq, Hash, EncodeLabelSet)]
pub struct TxExecutionResult {
status: TxExecutionStatus,
reason: Option<&'static str>,
}

impl TxExecutionType {
pub fn from_is_l1(is_l1: bool) -> TxExecutionType {
match is_l1 {
Expand Down Expand Up @@ -57,8 +71,8 @@ pub struct StateKeeperMetrics {
/// Latency of the state keeper getting a transaction from the mempool.
#[metrics(buckets = Buckets::LATENCIES)]
pub get_tx_from_mempool: Histogram<Duration>,
/// Number of transactions rejected by the state keeper.
pub rejected_transactions: Counter,
/// Number of transactions completed with a specific result.
pub tx_execution_result: Family<TxExecutionResult, Counter>,
/// Time spent waiting for the hash of a previous L1 batch.
#[metrics(buckets = Buckets::LATENCIES)]
pub wait_for_prev_hash_time: Histogram<Duration>,
Expand All @@ -77,6 +91,44 @@ pub struct StateKeeperMetrics {
pub blob_base_fee_too_high: Counter,
}

fn vm_revert_reason_as_metric_label(reason: &VmRevertReason) -> &'static str {
match reason {
VmRevertReason::General { .. } => "General",
VmRevertReason::InnerTxError => "InnerTxError",
VmRevertReason::VmError => "VmError",
VmRevertReason::Unknown { .. } => "Unknown",
}
}

impl StateKeeperMetrics {
pub fn inc_rejected_txs(&self, reason: &'static str) {
let result = TxExecutionResult {
status: TxExecutionStatus::Rejected,
reason: Some(reason),
};

self.tx_execution_result[&result].inc();
}

pub fn inc_succeeded_txs(&self) {
let result = TxExecutionResult {
status: TxExecutionStatus::Success,
reason: None,
};

self.tx_execution_result[&result].inc();
}

pub fn inc_reverted_txs(&self, reason: &VmRevertReason) {
let result = TxExecutionResult {
status: TxExecutionStatus::Reverted,
reason: Some(vm_revert_reason_as_metric_label(reason)),
};

self.tx_execution_result[&result].inc();
}
}

#[vise::register]
pub static KEEPER_METRICS: vise::Global<StateKeeperMetrics> = vise::Global::new();

Expand Down
8 changes: 5 additions & 3 deletions core/node/state_keeper/src/seal_criteria/criteria/gas.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
use zksync_types::ProtocolVersionId;

use crate::{
seal_criteria::{SealCriterion, SealData, SealResolution, StateKeeperConfig},
seal_criteria::{
SealCriterion, SealData, SealResolution, StateKeeperConfig, UnexecutableReason,
},
utils::new_block_gas_count,
};

Expand Down Expand Up @@ -30,7 +32,7 @@ impl SealCriterion for GasCriterion {
(config.max_single_tx_gas as f64 * config.close_block_at_gas_percentage).round() as u32;

if (tx_data.gas_count + new_block_gas_count()).any_field_greater_than(tx_bound) {
SealResolution::Unexecutable("Transaction requires too much gas".into())
UnexecutableReason::TooMuchGas.into()
} else if block_data
.gas_count
.any_field_greater_than(config.max_single_tx_gas)
Expand Down Expand Up @@ -103,7 +105,7 @@ mod tests {
);
assert_eq!(
huge_transaction_resolution,
SealResolution::Unexecutable("Transaction requires too much gas".into())
UnexecutableReason::TooMuchGas.into()
);

// Check criterion workflow
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
use multivm::utils::gas_bootloader_batch_tip_overhead;
use zksync_types::ProtocolVersionId;

use crate::seal_criteria::{SealCriterion, SealData, SealResolution, StateKeeperConfig};
use crate::seal_criteria::{
SealCriterion, SealData, SealResolution, StateKeeperConfig, UnexecutableReason,
};

/// Checks whether we should exclude the transaction because we don't have enough gas for batch tip.
#[derive(Debug)]
Expand All @@ -22,7 +24,7 @@ impl SealCriterion for GasForBatchTipCriterion {

if tx_data.gas_remaining < batch_tip_overhead {
if is_tx_first {
SealResolution::Unexecutable("not_enough_gas_for_batch_tip".to_string())
UnexecutableReason::OutOfGasForBatchTip.into()
} else {
SealResolution::ExcludeAndSeal
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use zksync_config::configs::chain::StateKeeperConfig;
use zksync_types::ProtocolVersionId;

// Local uses
use crate::seal_criteria::{SealCriterion, SealData, SealResolution};
use crate::seal_criteria::{SealCriterion, SealData, SealResolution, UnexecutableReason};

// Collected vm execution metrics should fit into geometry limits.
// Otherwise witness generation will fail and proof won't be generated.
Expand Down Expand Up @@ -52,7 +52,7 @@ impl SealCriterion for CircuitsCriterion {
let used_circuits_batch = block_data.execution_metrics.circuit_statistic.total();

if used_circuits_tx + batch_tip_circuit_overhead >= reject_bound {
SealResolution::Unexecutable("ZK proof cannot be generated for a transaction".into())
UnexecutableReason::ProofWillFail.into()
} else if used_circuits_batch + batch_tip_circuit_overhead >= config.max_circuits_per_batch
{
SealResolution::ExcludeAndSeal
Expand Down Expand Up @@ -162,10 +162,7 @@ mod tests {
protocol_version,
);

assert_eq!(
block_resolution,
SealResolution::Unexecutable("ZK proof cannot be generated for a transaction".into())
);
assert_eq!(block_resolution, UnexecutableReason::ProofWillFail.into());
}

#[test]
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
use multivm::utils::execution_metrics_bootloader_batch_tip_overhead;
use zksync_types::ProtocolVersionId;

use crate::seal_criteria::{SealCriterion, SealData, SealResolution, StateKeeperConfig};
use crate::seal_criteria::{
SealCriterion, SealData, SealResolution, StateKeeperConfig, UnexecutableReason,
};

#[derive(Debug)]
pub struct PubDataBytesCriterion {
Expand Down Expand Up @@ -41,8 +43,7 @@ impl SealCriterion for PubDataBytesCriterion {
if tx_size + execution_metrics_bootloader_batch_tip_overhead(protocol_version.into())
> reject_bound as usize
{
let message = "Transaction cannot be sent to L1 due to pubdata limits";
SealResolution::Unexecutable(message.into())
UnexecutableReason::PubdataLimit.into()
} else if block_size
+ execution_metrics_bootloader_batch_tip_overhead(protocol_version.into())
> max_pubdata_per_l1_batch
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
use multivm::utils::get_bootloader_encoding_space;
use zksync_types::ProtocolVersionId;

use crate::seal_criteria::{SealCriterion, SealData, SealResolution, StateKeeperConfig};
use crate::seal_criteria::{
SealCriterion, SealData, SealResolution, StateKeeperConfig, UnexecutableReason,
};

#[derive(Debug)]
pub struct TxEncodingSizeCriterion;
Expand All @@ -26,8 +28,7 @@ impl SealCriterion for TxEncodingSizeCriterion {
.round();

if tx_data.cumulative_size > reject_bound as usize {
let message = "Transaction cannot be included due to large encoding size";
SealResolution::Unexecutable(message.into())
UnexecutableReason::LargeEncodingSize.into()
} else if block_data.cumulative_size > bootloader_tx_encoding_space as usize {
SealResolution::ExcludeAndSeal
} else if block_data.cumulative_size > include_and_seal_bound as usize {
Expand Down Expand Up @@ -83,9 +84,7 @@ mod tests {
);
assert_eq!(
unexecutable_resolution,
SealResolution::Unexecutable(
"Transaction cannot be included due to large encoding size".into()
)
UnexecutableReason::LargeEncodingSize.into()
);

let exclude_and_seal_resolution = criterion.should_seal(
Expand Down
Loading
Loading