diff --git a/core/lib/dal/.sqlx/query-3c1d5f985be7e378211aa339c2c6387f2f3eda07a630503324bd6576dbdf8231.json b/core/lib/dal/.sqlx/query-3c1d5f985be7e378211aa339c2c6387f2f3eda07a630503324bd6576dbdf8231.json deleted file mode 100644 index ad5c726ea13..00000000000 --- a/core/lib/dal/.sqlx/query-3c1d5f985be7e378211aa339c2c6387f2f3eda07a630503324bd6576dbdf8231.json +++ /dev/null @@ -1,22 +0,0 @@ -{ - "db_name": "PostgreSQL", - "query": "\n SELECT\n trace\n FROM\n transaction_traces\n WHERE\n tx_hash = $1\n ", - "describe": { - "columns": [ - { - "ordinal": 0, - "name": "trace", - "type_info": "Jsonb" - } - ], - "parameters": { - "Left": [ - "Bytea" - ] - }, - "nullable": [ - false - ] - }, - "hash": "3c1d5f985be7e378211aa339c2c6387f2f3eda07a630503324bd6576dbdf8231" -} diff --git a/core/lib/dal/.sqlx/query-f922c0718c9dda2f285f09cbabad425bac8ed3d2780c60c9b63afbcea131f9a0.json b/core/lib/dal/.sqlx/query-f922c0718c9dda2f285f09cbabad425bac8ed3d2780c60c9b63afbcea131f9a0.json deleted file mode 100644 index c10268b7332..00000000000 --- a/core/lib/dal/.sqlx/query-f922c0718c9dda2f285f09cbabad425bac8ed3d2780c60c9b63afbcea131f9a0.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "db_name": "PostgreSQL", - "query": "\n INSERT INTO\n transaction_traces (tx_hash, trace, created_at, updated_at)\n VALUES\n ($1, $2, NOW(), NOW())\n ", - "describe": { - "columns": [], - "parameters": { - "Left": [ - "Bytea", - "Jsonb" - ] - }, - "nullable": [] - }, - "hash": "f922c0718c9dda2f285f09cbabad425bac8ed3d2780c60c9b63afbcea131f9a0" -} diff --git a/core/lib/dal/README.md b/core/lib/dal/README.md index d86aa8c655e..61bd43489b5 100644 --- a/core/lib/dal/README.md +++ b/core/lib/dal/README.md @@ -3,52 +3,118 @@ This crate provides read and write access to the main database (which is Postgres), that acts as a primary source of truth. -Current schema is managed by `diesel` - that applies all the schema changes from `migrations` directory. - -## Schema - -### Storage tables - -| Table name | Description | Usage | -| ------------ | --------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------- | -| storage | Main storage column: mapping from hashed StorageKey (account + key) to the value. | We also store additional columns there (like transaction hash or creation time). | -| storage_logs | Stores all the storage access logs for all the transactions. | Main source of truth - other columns (like `storage`) are created by compacting this column. Its primary index is (storage key, mini_block, operation_id) | - -### Prover queue tables - -The tables below are used by different parts of witness generation. - -| Table name | Description | -| ----------------------------- | ---------------------------------- | -| witness_inputs | TODO | -| leaf_aggregation_witness_jobs | Queue of jobs for leaf aggregation | -| node_aggregation_witness_jobs | Queue of jobs for node aggregation | -| scheduler_witness_jobs | TODO | - -### TODO - -| Table name | -| ------------------------------------- | -| \_sqlx_migrations | -| aggregated_proof | -| contract_verification_requests | -| contract_verification_solc_versions | -| contract_verification_zksolc_versions | -| contracts_verification_info | -| eth_txs | -| eth_txs_history | -| events | -| factory_deps | -| gpu_prover_queue | -| initial_writes | -| l1_batches | -| l2_to_l1_logs | -| miniblocks | -| proof | -| protective_reads | -| prover_jobs | -| static_artifact_storage | -| storage_logs_dedup | -| tokens | -| transaction_traces | -| transactions | +Current schema is managed by `sqlx`. Schema changes are stored in the [`migrations`](migrations) directory. + +## Schema overview + +_This overview skips prover-related and Ethereum sender-related tables, which are specific to the main node._ + +### Miniblocks and L1 batches + +- `miniblocks`. Stores miniblock headers. + +- `miniblocks_consensus`. Stores miniblock data related to the consensus algorithm used by the decentralized sequencer. + Tied one-to-one to miniblocks (the consensus side of the relation is optional). + +- `l1_batches`. Stores L1 batch headers. + +- `commitments`. Stores a part of L1 batch commitment data (event queue and bootloader memory commitments). In the + future, other commitment-related columns will be moved here from `l1_batches`. + +### Transactions + +- `transactions`. Stores all transactions received by the node, both L2 and L1 ones. Transactions in this table are not + necessarily included into a miniblock; i.e., the table is used as a persistent mempool as well. + +### VM storage + +See [`zksync_state`] crate for more context. + +- `storage_logs`. Stores all the VM storage write logs for all transactions, as well as non-transaction writes generated + by the bootloader. This is the source of truth for the VM storage; all other VM storage implementations (see the + [`zksync_state`] crate) are based on it (e.g., by adding persistent or in-memory caching). Used by multiple components + including Metadata calculator, Commitment generator, API server (both for reading one-off values like account balance, + and as a part of the VM sandbox) etc. + +- `initial_writes`. Stores initial writes information for each L1 batch, i.e., the enumeration index assigned for each + key. Used when generating L1 batch metadata in Metadata calculator and Commitment generator components, and in the VM + sandbox in API server for fee estimation. + +- `protective_reads`. Stores protective read information for each L1 batch, i.e., keys influencing VM execution for the + batch that were not modified. Used when generating L1 batch metadata in Commitment generator. + +- `factory_deps`. Stores bytecodes of all deployed L2 contracts. + +- `storage`. **Obsolete, going to be removed; must not be used in new code.** + +### Other VM artifacts + +- `events`. Stores all events (aka logs) emitted by smart contracts during VM execution. + +- `l2_to_l1_logs`. Stores L2-to-L1 logs emitted by smart contracts during VM execution. + +- `call_traces`. Stores call traces for transactions emitted during VM execution. (Unlike with L1 node implementations, + in Era call traces are currently proactively generated for all transactions.) + +- `tokens`. Stores all ERC-20 tokens registered in the L1–L2 bridge. + +- `transaction_traces`. **Obsolete, going to be removed; must not be used in new code.** + +### Snapshot generation and recovery + +See [`snapshots_creator`] and [`snapshots_applier`] crates for the overview of application-level nodes snapshots. + +- `snapshots`. Stores metadata for all snapshots generated by `snapshots_creator`, such as the L1 batch of the snapshot. + +- `snapshot_recovery`. Stores metadata for the snapshot used during node recovery, if any. Currently, this table is + expected to have no more than one row. + +## Logical invariants + +In addition to foreign key constraints and other constraints manifested directly in the DB schema, the following +invariants are expected to be upheld: + +- If a header is present in the `miniblocks` table, it is expected that the DB contains all artifacts associated with + the miniblock execution, such as `events`, `l2_to_l1_logs`, `call_traces`, `tokens` etc. (See State keeper I/O logic + for the exact definition of these artifacts.) +- Likewise, if a header is present in the `l1_batches` table, all artifacts associated with the L1 batch execution are + also expected in the DB, e.g. `initial_writes` and `protective_reads`. (See State keeper I/O logic for the exact + definition of these artifacts.) +- Miniblocks and L1 batches present in the DB form a continuous range of numbers. If a DB is recovered from a node + snapshot, the first miniblock / L1 batch is **the next one** after the snapshot miniblock / L1 batch mentioned in the + `snapshot_recovery` table. Otherwise, miniblocks / L1 batches must start from number 0 (aka genesis). + +## Contributing to DAL + +Some tips and tricks to make contributing to DAL easier: + +- If you want to add a new DB query, search the DAL code or the [`.sqlx`](.sqlx) directory for the identical / + equivalent queries. Reuse is almost always better than duplication. +- It usually makes sense to instrument your queries using [`instrument`](src/instrument.rs) tooling. See the + `instrument` module docs for details. +- It's best to cover added queries with unit tests to ensure they work and don't break in the future. `sqlx` has + compile-time schema checking, but it's not a panacea. +- If there are doubts as to the query performance, run a query with [`EXPLAIN`] / `EXPLAIN ANALYZE` prefixes against a + production-size database. + +### Backward compatibility + +All DB schema changes are expected to be backward-compatible. That is, _old_ code must be able to function with the +_new_ schema. As an example, dropping / renaming columns is not allowed. Instead, a 2-phase migration should be used: + +1. The column should be marked as obsolete, with its mentions replaced in all queries. If the column should be renamed, + a new column should be created and data (if any) should be copied from the old column (see also: + [_Programmatic migrations_](#programmatic-migrations)). +2. After a significant delay (order of months), the old column may be removed in a separate migration. + +### Programmatic migrations + +We cannot afford non-trivial amount of downtime caused by a data migration. That is, if a migration may cause such +downtime (e.g., it copies non-trivial amount of data), it must be organized as a programmatic migration and run in the +node background (perhaps, splitting work into chunks with a delay between them so that the migration doesn't hog all DB +resources). + +[`zksync_state`]: ../state +[`snapshots_creator`]: ../../bin/snapshots_creator +[`snapshots_applier`]: ../snapshots_applier +[`EXPLAIN`]: https://www.postgresql.org/docs/14/sql-explain.html diff --git a/core/lib/dal/src/instrument.rs b/core/lib/dal/src/instrument.rs index ad59d837554..74742309805 100644 --- a/core/lib/dal/src/instrument.rs +++ b/core/lib/dal/src/instrument.rs @@ -1,4 +1,15 @@ //! DAL query instrumentation. +//! +//! Query instrumentation allows to: +//! +//! - Report query latency as a metric +//! - Report slow and failing queries as metrics +//! - Log slow and failing queries together with their arguments, which makes it easier to debug. +//! +//! The entry point for instrumentation is the [`InstrumentExt`] trait. After it is imported into the scope, +//! its `instrument()` method can be placed on the output of `query*` functions or macros. You can then call +//! [`Instrumented`] methods on the returned struct, e.g. to [report query latency](Instrumented::report_latency()) +//! and/or [to add logged args](Instrumented::with_arg()) for a query. use std::{fmt, future::Future, panic::Location}; diff --git a/core/lib/dal/src/transactions_dal.rs b/core/lib/dal/src/transactions_dal.rs index 364b6e13e30..a37eb72da89 100644 --- a/core/lib/dal/src/transactions_dal.rs +++ b/core/lib/dal/src/transactions_dal.rs @@ -11,7 +11,7 @@ use zksync_types::{ l2::L2Tx, protocol_version::ProtocolUpgradeTx, tx::{tx_execution_info::TxExecutionStatus, TransactionExecutionResult}, - vm_trace::{Call, VmExecutionTrace}, + vm_trace::Call, Address, ExecuteTransactionCommon, L1BatchNumber, L1BlockNumber, MiniblockNumber, PriorityOpId, Transaction, H256, PROTOCOL_UPGRADE_TX_TYPE, U256, }; @@ -1074,48 +1074,6 @@ impl TransactionsDal<'_, '_> { } } - pub async fn insert_trace(&mut self, hash: H256, trace: VmExecutionTrace) { - { - sqlx::query!( - r#" - INSERT INTO - transaction_traces (tx_hash, trace, created_at, updated_at) - VALUES - ($1, $2, NOW(), NOW()) - "#, - hash.as_bytes(), - serde_json::to_value(trace).unwrap() - ) - .execute(self.storage.conn()) - .await - .unwrap(); - } - } - - pub async fn get_trace(&mut self, hash: H256) -> Option { - { - let trace = sqlx::query!( - r#" - SELECT - trace - FROM - transaction_traces - WHERE - tx_hash = $1 - "#, - hash.as_bytes() - ) - .fetch_optional(self.storage.conn()) - .await - .unwrap() - .map(|record| record.trace); - trace.map(|trace| { - serde_json::from_value(trace) - .unwrap_or_else(|_| panic!("invalid trace json in database for {:?}", hash)) - }) - } - } - /// Returns miniblocks with their transactions that state_keeper needs to re-execute on restart. /// These are the transactions that are included to some miniblock, /// but not included to L1 batch. The order of the transactions is the same as it was