From cb01f9a070337c130f5bc1b53eb85abb77d77101 Mon Sep 17 00:00:00 2001 From: Josh Wilson Date: Thu, 25 Jan 2024 16:06:54 +0100 Subject: [PATCH] feat(nodes): encrypt all records before disk, decrypt on get fixes #1158 + also icnrases benchmark limit to account for extra work done here --- .github/workflows/benchmark-prs.yml | 20 +++--- Cargo.lock | 16 +++++ sn_networking/Cargo.toml | 1 + sn_networking/src/record_store.rs | 108 +++++++++++++++++++++------- 4 files changed, 108 insertions(+), 37 deletions(-) diff --git a/.github/workflows/benchmark-prs.yml b/.github/workflows/benchmark-prs.yml index d21d8a8bca..ba3ba08da1 100644 --- a/.github/workflows/benchmark-prs.yml +++ b/.github/workflows/benchmark-prs.yml @@ -17,7 +17,7 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - + - uses: dtolnay/rust-toolchain@stable with: components: rustfmt, clippy @@ -214,7 +214,7 @@ jobs: ######################### ### Stop Network ### ######################### - + - name: Stop the local network if: always() uses: maidsafe/sn-local-testnet-action@main @@ -242,12 +242,12 @@ jobs: run: | node_peak_mem_limit_mb="250" # mb peak_mem_usage=$( - rg '"memory_used_mb":[^,]*' $NODE_DATA_PATH/*/logs/* -o --no-line-number --no-filename | - awk -F':' '/"memory_used_mb":/{print $2}' | - sort -n | + rg '"memory_used_mb":[^,]*' $NODE_DATA_PATH/*/logs/* -o --no-line-number --no-filename | + awk -F':' '/"memory_used_mb":/{print $2}' | + sort -n | tail -n 1 ) - + echo "Memory usage: $peak_mem_usage MB" if (( $(echo "$peak_mem_usage > $node_peak_mem_limit_mb" | bc -l) )); then echo "Node memory usage exceeded threshold: $peak_mem_usage MB" @@ -327,7 +327,7 @@ jobs: echo "Total swarm_driver long handling times is: $total_num_of_times" echo "Total swarm_driver long handling duration is: $total_long_handling ms" echo "Total average swarm_driver long handling duration is: $average_handling_ms ms" - total_num_of_times_limit_hits="10000" # hits + total_num_of_times_limit_hits="20000" # hits total_long_handling_limit_ms="170000" # ms average_handling_limit_ms="20" # ms if (( $(echo "$total_num_of_times > $total_num_of_times_limit_hits" | bc -l) )); then @@ -394,7 +394,7 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - + - uses: dtolnay/rust-toolchain@stable with: components: rustfmt, clippy @@ -410,7 +410,7 @@ jobs: - name: install ripgrep run: sudo apt-get -y install ripgrep - + ######################## ### Benchmark ### @@ -421,4 +421,4 @@ jobs: # passes to tee which displays it in the terminal and writes to output.txt run: | cargo criterion --message-format=json 2>&1 -p sn_transfers | tee -a output.txt - cat output.txt + cat output.txt diff --git a/Cargo.lock b/Cargo.lock index 22c4e11e96..58f8e6d959 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -52,6 +52,21 @@ dependencies = [ "subtle", ] +[[package]] +name = "aes-gcm-siv" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae0784134ba9375416d469ec31e7c5f9fa94405049cf08c5ce5b4698be673e0d" +dependencies = [ + "aead", + "aes", + "cipher", + "ctr", + "polyval", + "subtle", + "zeroize", +] + [[package]] name = "ahash" version = "0.8.7" @@ -5192,6 +5207,7 @@ dependencies = [ name = "sn_networking" version = "0.12.40" dependencies = [ + "aes-gcm-siv", "async-trait", "backoff", "blsttc", diff --git a/sn_networking/Cargo.toml b/sn_networking/Cargo.toml index 0e91d3e03a..972b3557c3 100644 --- a/sn_networking/Cargo.toml +++ b/sn_networking/Cargo.toml @@ -39,6 +39,7 @@ tokio = { version = "1.32.0", features = ["io-util", "macros", "rt", "sync", "ti tracing = { version = "~0.1.26" } xor_name = "5.0.0" backoff = { version = "0.4.0", features = ["tokio"] } +aes-gcm-siv = "0.11.1" [dev-dependencies] bls = { package = "blsttc", version = "8.0.1" } diff --git a/sn_networking/src/record_store.rs b/sn_networking/src/record_store.rs index 0360d4bc2b..5785eaa2d4 100644 --- a/sn_networking/src/record_store.rs +++ b/sn_networking/src/record_store.rs @@ -9,6 +9,10 @@ use crate::target_arch::{spawn, Instant}; use crate::{cmd::SwarmCmd, event::NetworkEvent, send_swarm_cmd}; +use aes_gcm_siv::{ + aead::{Aead, KeyInit, OsRng}, + Aes256GcmSiv, Nonce, +}; use libp2p::{ identity::PeerId, kad::{ @@ -18,6 +22,7 @@ use libp2p::{ }; #[cfg(feature = "open-metrics")] use prometheus_client::metrics::gauge::Gauge; +use rand::RngCore; use sn_protocol::{ storage::{RecordHeader, RecordKind, RecordType}, NetworkAddress, PrettyPrintRecordKey, @@ -56,6 +61,9 @@ pub struct NodeRecordStore { record_count_metric: Option, /// Counting how many times got paid received_payment_count: usize, + /// Encyption cipher for the records, randomly generated at node startup + /// Plus a 4 byte nonce starter + encryption_details: (Aes256GcmSiv, [u8; 4]), } /// Configuration for a `DiskBackedRecordStore`. @@ -79,6 +87,16 @@ impl Default for NodeRecordStoreConfig { } } +/// Generate an encryption nonce for a given record key and nonce_starter bytes. +fn generate_nonce_for_record(nonce_starter: &[u8; 4], key: &Key) -> Nonce { + let mut nonce_bytes = nonce_starter.to_vec(); + nonce_bytes.extend_from_slice(key.as_ref()); + // Ensure the final nonce is exactly 96 bits long by padding or truncating as necessary + // https://crypto.stackexchange.com/questions/26790/how-bad-it-is-using-the-same-iv-twice-with-aes-gcm + nonce_bytes.resize(12, 0); // 12 (u8) * 8 = 96 bits + Nonce::from_iter(nonce_bytes) +} + impl NodeRecordStore { /// Creates a new `DiskBackedStore` with the given configuration. pub fn with_config( @@ -87,6 +105,10 @@ impl NodeRecordStore { network_event_sender: mpsc::Sender, swarm_cmd_sender: mpsc::Sender, ) -> Self { + let key = Aes256GcmSiv::generate_key(&mut OsRng); + let cipher = Aes256GcmSiv::new(&key); + let mut nonce_starter = [0u8; 4]; + OsRng.fill_bytes(&mut nonce_starter); NodeRecordStore { local_key: KBucketKey::from(local_id), config, @@ -97,6 +119,7 @@ impl NodeRecordStore { #[cfg(feature = "open-metrics")] record_count_metric: None, received_payment_count: 0, + encryption_details: (cipher, nonce_starter), } } @@ -117,26 +140,42 @@ impl NodeRecordStore { hex_string } - fn read_from_disk<'a>(key: &Key, storage_dir: &Path) -> Option> { + fn read_from_disk<'a>( + encryption_details: &(Aes256GcmSiv, [u8; 4]), + key: &Key, + storage_dir: &Path, + ) -> Option> { + let (cipher, nonce_starter) = encryption_details; + let start = Instant::now(); let filename = Self::key_to_hex(key); let file_path = storage_dir.join(&filename); + let nonce = generate_nonce_for_record(nonce_starter, key); // we should only be reading if we know the record is written to disk properly match fs::read(file_path) { - Ok(value) => { + Ok(ciphertext) => { // vdash metric (if modified please notify at https://github.com/happybeing/vdash/issues): info!( "Retrieved record from disk! filename: {filename} after {:?}", start.elapsed() ); - let record = Record { - key: key.clone(), - value, - publisher: None, - expires: None, - }; - Some(Cow::Owned(record)) + + match cipher.decrypt(&nonce, ciphertext.as_ref()) { + Ok(value) => { + let record = Record { + key: key.clone(), + value, + publisher: None, + expires: None, + }; + return Some(Cow::Owned(record)); + } + Err(error) => { + error!("Error while decrypting record. filename: {filename}: {error:?}"); + None + } + } } Err(err) => { error!("Error while reading file. filename: {filename}, error: {err:?}"); @@ -252,27 +291,38 @@ impl NodeRecordStore { let _ = metric.set(self.records.len() as i64); } + let (cipher, nonce_starter) = self.encryption_details.clone(); + let nonce = generate_nonce_for_record(&nonce_starter, &r.key); let cloned_cmd_sender = self.swarm_cmd_sender.clone(); spawn(async move { - let cmd = match fs::write(&file_path, r.value) { - Ok(_) => { - // vdash metric (if modified please notify at https://github.com/happybeing/vdash/issues): - info!("Wrote record {record_key:?} to disk! filename: {filename}"); + match cipher.encrypt(&nonce, r.value.as_ref()) { + Ok(value) => { + let cmd = match fs::write(&file_path, value) { + Ok(_) => { + // vdash metric (if modified please notify at https://github.com/happybeing/vdash/issues): + info!("Wrote record {record_key:?} to disk! filename: {filename}"); + + SwarmCmd::AddLocalRecordAsStored { + key: r.key, + record_type, + } + } + Err(err) => { + error!( + "Error writing record {record_key:?} filename: {filename}, error: {err:?}" + ); + SwarmCmd::RemoveFailedLocalRecord { key: r.key } + } + }; - SwarmCmd::AddLocalRecordAsStored { - key: r.key, - record_type, - } + send_swarm_cmd(cloned_cmd_sender, cmd); } - Err(err) => { - error!( - "Error writing record {record_key:?} filename: {filename}, error: {err:?}" + Err(error) => { + warn!( + "Failed to encrypt record {record_key:?} filename: {filename} : {error:?}" ); - SwarmCmd::RemoveFailedLocalRecord { key: r.key } } - }; - - send_swarm_cmd(cloned_cmd_sender, cmd); + } }); Ok(()) @@ -341,7 +391,7 @@ impl RecordStore for NodeRecordStore { debug!("GET request for Record key: {key}"); - Self::read_from_disk(k, &self.config.storage_dir) + Self::read_from_disk(&self.encryption_details, k, &self.config.storage_dir) } fn put(&mut self, record: Record) -> Result<()> { @@ -732,8 +782,12 @@ mod tests { // Confirm the pruned_key got removed, looping to allow async disk ops to complete. let mut iteration = 0; while iteration < max_iterations { - if NodeRecordStore::read_from_disk(&pruned_key, &store_config.storage_dir) - .is_none() + if NodeRecordStore::read_from_disk( + &store.encryption_details, + &pruned_key, + &store_config.storage_dir, + ) + .is_none() { break; }