Skip to content

Commit

Permalink
feat: add dynamic growth to lmdb (#6231)
Browse files Browse the repository at this point in the history
Description
---
Added dynamic growth size to LMBD, where it will grow with the
configured size as well an optional shortfall size that could not be
written. This is especially relevant during block sync of many full
blocks where the output SMT has grown very large.

Motivation and Context
---
Block sync did not work with many full blocks.
```rust
2024-03-25 07:27:14.304496100 [c::cs::lmdb_db::lmdb_db] INFO  Database resize required (resized 5 time(s) in this transaction)
2024-03-25 07:27:14.317099900 [lmdb] DEBUG (\node_05\esmeralda\data/base_node\db) LMDB MB, mapsize was grown from 1616 MB to 1632 MB, increased by 16 MB
2024-03-25 07:27:14.319843700 [c::bn::block_sync] WARN  Chain storage error: DB transaction was too large (3 operations)
2024-03-25 07:27:14.319864400 [c::bn::block_sync] WARN  Block sync failed: No more sync peers available: Block sync failed
2024-03-25 07:27:14.319969400 [c::cs::database] INFO  Rewinding headers from height 6325 to 1627
```

How Has This Been Tested?
---
System-level archival sync-from-scratch test on esmeralda after a
coin-split and transaction stress test.

Before the fix, blocks #1544 to #1584. Multiple resizes for the same set
of write operations was required, with the final one resulting in block
sync failure.
```rust
2024-03-25 07:19:35.346281600 [c::cs::lmdb_db::lmdb_db] INFO  Database resize required (resized 1 time(s) in this transaction)
2024-03-25 07:19:35.654103900 [c::cs::lmdb_db::lmdb_db] INFO  Database resize required (resized 2 time(s) in this transaction)
2024-03-25 07:19:35.952783600 [c::cs::lmdb_db::lmdb_db] INFO  Database resize required (resized 3 time(s) in this transaction)
2024-03-25 07:19:41.198100900 [c::cs::lmdb_db::lmdb_db] INFO  Database resize required (resized 1 time(s) in this transaction)
2024-03-25 07:19:41.519953900 [c::cs::lmdb_db::lmdb_db] INFO  Database resize required (resized 2 time(s) in this transaction)
2024-03-25 07:19:41.827079500 [c::cs::lmdb_db::lmdb_db] INFO  Database resize required (resized 3 time(s) in this transaction)
2024-03-25 07:19:42.136522700 [c::cs::lmdb_db::lmdb_db] INFO  Database resize required (resized 4 time(s) in this transaction)
2024-03-25 07:20:29.331297000 [c::cs::lmdb_db::lmdb_db] INFO  Database resize required (resized 1 time(s) in this transaction)
2024-03-25 07:20:29.755442600 [c::cs::lmdb_db::lmdb_db] INFO  Database resize required (resized 2 time(s) in this transaction)
2024-03-25 07:20:30.119457000 [c::cs::lmdb_db::lmdb_db] INFO  Database resize required (resized 3 time(s) in this transaction)
2024-03-25 07:20:30.491588200 [c::cs::lmdb_db::lmdb_db] INFO  Database resize required (resized 4 time(s) in this transaction)
2024-03-25 07:20:30.868365300 [c::cs::lmdb_db::lmdb_db] INFO  Database resize required (resized 5 time(s) in this transaction)
```
After the fix, blocks #1544 to #1584. Only a single resize each time for
a set of write operations was required.
```rust
2024-03-25 16:40:28.814566400 [c::cs::lmdb_db::lmdb_db] INFO  Database resize required (resized 1 time(s) in this transaction)
2024-03-25 16:42:05.167759000 [c::cs::lmdb_db::lmdb_db] INFO  Database resize required (resized 1 time(s) in this transaction)
```

What process can a PR reviewer use to test or verify this change?
---
Code review

<!-- Checklist -->
<!-- 1. Is the title of your PR in the form that would make nice release
notes? The title, excluding the conventional commit
tag, will be included exactly as is in the CHANGELOG, so please think
about it carefully. -->


Breaking Changes
---

- [x] None
- [ ] Requires data directory on base node to be deleted
- [ ] Requires hard fork
- [ ] Other - Please specify

<!-- Does this include a breaking change? If so, include this line as a
footer -->
<!-- BREAKING CHANGE: Description what the user should do, e.g. delete a
database, resync the chain -->
  • Loading branch information
hansieodendaal committed Mar 26, 2024
1 parent ccc00f2 commit f842c76
Show file tree
Hide file tree
Showing 4 changed files with 63 additions and 25 deletions.
6 changes: 3 additions & 3 deletions base_layer/core/src/chain_storage/error.rs
Expand Up @@ -118,7 +118,7 @@ pub enum ChainStorageError {
#[error("Key {key} in {table_name} already exists")]
KeyExists { table_name: &'static str, key: String },
#[error("Database resize required")]
DbResizeRequired,
DbResizeRequired(Option<usize>),
#[error("DB transaction was too large ({0} operations)")]
DbTransactionTooLarge(usize),
#[error("DB needs to be resynced: {0}")]
Expand Down Expand Up @@ -183,7 +183,7 @@ impl ChainStorageError {
_err @ ChainStorageError::IoError(_) |
_err @ ChainStorageError::CannotCalculateNonTipMmr(_) |
_err @ ChainStorageError::KeyExists { .. } |
_err @ ChainStorageError::DbResizeRequired |
_err @ ChainStorageError::DbResizeRequired(_) |
_err @ ChainStorageError::DbTransactionTooLarge(_) |
_err @ ChainStorageError::DatabaseResyncRequired(_) |
_err @ ChainStorageError::BlockError(_) |
Expand Down Expand Up @@ -213,7 +213,7 @@ impl From<lmdb_zero::Error> for ChainStorageError {
field: "<unknown>",
value: "<unknown>".to_string(),
},
Code(error::MAP_FULL) => ChainStorageError::DbResizeRequired,
Code(error::MAP_FULL) => ChainStorageError::DbResizeRequired(None),
_ => ChainStorageError::AccessError(err.to_string()),
}
}
Expand Down
6 changes: 3 additions & 3 deletions base_layer/core/src/chain_storage/lmdb_db/lmdb.rs
Expand Up @@ -86,7 +86,7 @@ where
target: LOG_TARGET, "Could not insert {} bytes with key '{}' into '{}' ({:?})",
val_buf.len(), to_hex(key.as_lmdb_bytes()), table_name, err
);
Err(ChainStorageError::DbResizeRequired)
Err(ChainStorageError::DbResizeRequired(Some(val_buf.len())))
},
Err(e) => {
error!(
Expand Down Expand Up @@ -116,7 +116,7 @@ where
txn.access().put(db, key, &val_buf, put::Flags::empty()).map_err(|e| {
if let lmdb_zero::Error::Code(code) = &e {
if *code == lmdb_zero::error::MAP_FULL {
return ChainStorageError::DbResizeRequired;
return ChainStorageError::DbResizeRequired(Some(val_buf.len()));
}
}
error!(
Expand All @@ -137,7 +137,7 @@ where
txn.access().put(db, key, &val_buf, put::Flags::empty()).map_err(|e| {
if let lmdb_zero::Error::Code(code) = &e {
if *code == lmdb_zero::error::MAP_FULL {
return ChainStorageError::DbResizeRequired;
return ChainStorageError::DbResizeRequired(Some(val_buf.len()));
}
}
error!(
Expand Down
54 changes: 50 additions & 4 deletions base_layer/core/src/chain_storage/lmdb_db/lmdb_db.rs
Expand Up @@ -23,7 +23,15 @@
use std::{convert::TryFrom, fmt, fs, fs::File, ops::Deref, path::Path, sync::Arc, time::Instant};

use fs2::FileExt;
use lmdb_zero::{open, ConstTransaction, Database, Environment, ReadTransaction, WriteTransaction};
use lmdb_zero::{
open,
traits::AsLmdbBytes,
ConstTransaction,
Database,
Environment,
ReadTransaction,
WriteTransaction,
};
use log::*;
use primitive_types::U256;
use serde::{Deserialize, Serialize};
Expand Down Expand Up @@ -55,6 +63,7 @@ use crate::{
error::{ChainStorageError, OrNotFound},
lmdb_db::{
composite_key::{CompositeKey, InputKey, OutputKey},
helpers::serialize,
lmdb::{
fetch_db_entry_sizes,
lmdb_clear,
Expand Down Expand Up @@ -321,6 +330,21 @@ impl LMDBDatabase {
fn apply_db_transaction(&mut self, txn: &DbTransaction) -> Result<(), ChainStorageError> {
#[allow(clippy::enum_glob_use)]
use WriteOperation::*;

// Ensure there will be enough space in the database to insert the block before it is attempted; this is more
// efficient than relying on an error if the LMDB environment map size was reached with each component's insert
// operation, with cleanup, resize and re-try. This will also prevent block sync from stalling due to LMDB
// environment map size being reached.
if txn.operations().iter().any(|op| {
matches!(op, InsertOrphanBlock { .. }) ||
matches!(op, InsertTipBlockBody { .. }) ||
matches!(op, InsertChainOrphanBlock { .. })
}) {
unsafe {
LMDBStore::resize_if_required(&self.env, &self.env_config)?;
}
}

let write_txn = self.write_transaction()?;
for op in txn.operations() {
trace!(target: LOG_TARGET, "[apply_db_transaction] WriteOperation: {}", op);
Expand Down Expand Up @@ -1397,7 +1421,29 @@ impl LMDBDatabase {

fn insert_tip_smt(&self, txn: &WriteTransaction<'_>, smt: &OutputSmt) -> Result<(), ChainStorageError> {
let k = MetadataKey::TipSmt;
lmdb_replace(txn, &self.tip_utxo_smt, &k.as_u32(), smt)

match lmdb_replace(txn, &self.tip_utxo_smt, &k.as_u32(), smt) {
Ok(_) => {
trace!(
"Inserted {} bytes with key '{}' into 'tip_utxo_smt' (size {})",
serialize(smt).unwrap_or_default().len(),
to_hex(k.as_u32().as_lmdb_bytes()),
smt.size()
);
Ok(())
},
Err(e) => {
if let ChainStorageError::DbResizeRequired(Some(val)) = e {
trace!(
"Could NOT insert {} bytes with key '{}' into 'tip_utxo_smt' (size {})",
val,
to_hex(k.as_u32().as_lmdb_bytes()),
smt.size()
);
}
Err(e)
},
}
}

fn update_block_accumulated_data(
Expand Down Expand Up @@ -1761,7 +1807,7 @@ impl BlockchainBackend for LMDBDatabase {

return Ok(());
},
Err(ChainStorageError::DbResizeRequired) => {
Err(ChainStorageError::DbResizeRequired(shortfall)) => {
info!(
target: LOG_TARGET,
"Database resize required (resized {} time(s) in this transaction)",
Expand All @@ -1772,7 +1818,7 @@ impl BlockchainBackend for LMDBDatabase {
// BlockchainDatabase, so we know there are no other threads taking out LMDB transactions when this
// is called.
unsafe {
LMDBStore::resize(&self.env, &self.env_config)?;
LMDBStore::resize(&self.env, &self.env_config, shortfall)?;
}
},
Err(e) => {
Expand Down
22 changes: 7 additions & 15 deletions infrastructure/storage/src/lmdb_store/store.rs
Expand Up @@ -92,7 +92,7 @@ impl LMDBConfig {

impl Default for LMDBConfig {
fn default() -> Self {
Self::new_from_mb(16, 16, 4)
Self::new_from_mb(16, 16, 8)
}
}

Expand Down Expand Up @@ -426,21 +426,13 @@ impl LMDBStore {
let stat = env.stat()?;
let size_used_bytes = stat.psize as usize * env_info.last_pgno;
let size_left_bytes = env_info.mapsize - size_used_bytes;
debug!(
target: LOG_TARGET,
"Resize check: Used bytes: {}, Remaining bytes: {}", size_used_bytes, size_left_bytes
);

if size_left_bytes <= config.resize_threshold_bytes {
Self::resize(env, config)?;
debug!(
target: LOG_TARGET,
"({}) LMDB size used {:?} MB, environment space left {:?} MB, increased by {:?} MB",
env.path()?.to_str()?,
size_used_bytes / BYTES_PER_MB,
size_left_bytes / BYTES_PER_MB,
config.grow_size_bytes / BYTES_PER_MB,
"Resize required: Used bytes: {}, Remaining bytes: {}", size_used_bytes, size_left_bytes
);
Self::resize(env, config, None)?;
}
Ok(())
}
Expand All @@ -452,10 +444,10 @@ impl LMDBStore {
/// not check for this condition, the caller must ensure it explicitly.
///
/// <http://www.lmdb.tech/doc/group__mdb.html#gaa2506ec8dab3d969b0e609cd82e619e5>
pub unsafe fn resize(env: &Environment, config: &LMDBConfig) -> Result<(), LMDBError> {
pub unsafe fn resize(env: &Environment, config: &LMDBConfig, shortfall: Option<usize>) -> Result<(), LMDBError> {
let env_info = env.info()?;
let current_mapsize = env_info.mapsize;
env.set_mapsize(current_mapsize + config.grow_size_bytes)?;
env.set_mapsize(current_mapsize + config.grow_size_bytes + shortfall.unwrap_or_default())?;
let env_info = env.info()?;
let new_mapsize = env_info.mapsize;
debug!(
Expand All @@ -464,7 +456,7 @@ impl LMDBStore {
env.path()?.to_str()?,
current_mapsize / BYTES_PER_MB,
new_mapsize / BYTES_PER_MB,
config.grow_size_bytes / BYTES_PER_MB,
(config.grow_size_bytes + shortfall.unwrap_or_default()) / BYTES_PER_MB,
);

Ok(())
Expand Down Expand Up @@ -498,7 +490,7 @@ impl LMDBDatabase {
"Failed to obtain write transaction because the database needs to be resized"
);
unsafe {
LMDBStore::resize(&self.env, &self.env_config)?;
LMDBStore::resize(&self.env, &self.env_config, Some(value.len()))?;
}
},
Err(e) => return Err(e.into()),
Expand Down

0 comments on commit f842c76

Please sign in to comment.