From e635e686439f4074236f3950cf38d4108891bc8d Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Fri, 26 Aug 2022 06:26:30 +0000 Subject: [PATCH 01/28] wip --- Cargo.lock | 10 +++++ Cargo.toml | 1 + bootstore/Cargo.toml | 12 ++++++ bootstore/src/lib.rs | 94 ++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 117 insertions(+) create mode 100644 bootstore/Cargo.toml create mode 100644 bootstore/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index 23d5929b014..6c8aab3ce07 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -338,6 +338,16 @@ dependencies = [ "byte-tools", ] +[[package]] +name = "bootstore" +version = "0.1.0" +dependencies = [ + "sprockets-common", + "sprockets-host", + "thiserror", + "tokio", +] + [[package]] name = "bstr" version = "0.2.17" diff --git a/Cargo.toml b/Cargo.toml index 70d3e33ca42..a95b95c9c64 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,5 +1,6 @@ [workspace] members = [ + "bootstore", "common", "ddm-admin-client", "deploy", diff --git a/bootstore/Cargo.toml b/bootstore/Cargo.toml new file mode 100644 index 00000000000..a6c43109cfc --- /dev/null +++ b/bootstore/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "bootstore" +description = "A replicated datastore read before CockroachDb is available" +version = "0.1.0" +edition = "2021" +license = "MPL-2.0" + +[dependencies] +sprockets-common = { git = "http://github.com/oxidecomputer/sprockets", rev = "0361fd13ff19cda6696242fe40f1325fca30d3d1" } +sprockets-host = { git = "http://github.com/oxidecomputer/sprockets", rev = "0361fd13ff19cda6696242fe40f1325fca30d3d1" } +tokio = { version = "1.20", features = [ "full" ] } +thiserror = "1.0" diff --git a/bootstore/src/lib.rs b/bootstore/src/lib.rs new file mode 100644 index 00000000000..aee7e9fa0ea --- /dev/null +++ b/bootstore/src/lib.rs @@ -0,0 +1,94 @@ +//! The two communication paths for the bootstore: +//! +//! RSS -> Sled Agent -> Coordinator -> Storage Nodes +//! Nexus -> Steno -> Sled Agent -> Coordinator -> Storage Nodes +//! +//! +//! Since some trust quorum membership information that is input via RSS must +//! make its way into CockroachDb so that reconfiguration works, we will load +//! that information from the trust quorum database, parse it, and write +//! it to CockroachDB when we start it up. + +use std::collections::BTreeMap; +use std::net::Ipv6Addr; +use std::sync::Arc; + +use sprockets_common::Sha3_256Digest; +use sprockets_host::Identity; +use thiserror::Error; +use tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite, AsyncWriteExt}; +use tokio::net::TcpStream; + +pub const PORT: u16 = 2121; + +/// The current state of a replica for a given transaction +pub enum TransactionState { + Init, + Prepared, + Committed, + Aborted, +} + +/// The database used store blobs. +/// +/// We separate them because they are encrypted and accessed differently. +pub enum Db { + /// Used pre-rack unlock: Contains key shares and membership data + TrustQuorum, + + /// Used post-rack unlock: Contains information necessary for setting + /// up NTP. + NetworkConfig, +} + +/// A two phase commit (2PC) transaction identifier. Transactions either commit or abort. +/// +/// Users submit transactions to a coordinator and retry indefinitely for them +/// to commit or tell them to abort. By giving users explicit control over +/// abort we move the policy decision out of the protocol and keep it simple. +/// +/// It is expected that transactions will be driven by Steno, with their IDs +/// stored in CockroachDB. Before issuing transactions, users should record +/// them in CockroachDb so they can be restarted in the case of failure +/// or aborted. +pub struct TransactionId { + // Database structures are specifically adapted to append-only log style + // blog storage. + db: Db, + + // The name of the item in the database + // The monotonically increasing generation number of the data value + gen: u64, +} + +/// A sprockets server endpoint. We don't know the identity of the endpoint +/// until the session is established. +pub struct StorageNode { + addr: Ipv6Addr, + session: sprockets_host::Session, + id: Identity, +} + +/// The coordinator of the 2PC protocol. It establishes connections +/// to the storage nodes and transfers data via 2PC. +pub struct Coordinator { + // We don't know which address maps to which identity, but we know which + // identity maps to which data. We use an Arc for data, because in some + // cases the data is shared and can be somewhat large (a few MiB) and we want to + // eliminate copies. + // + // The data is serialized before we get it. The handlers + // on the nodes know how to deserialize and interpret it. + data: BTreeMap>>, +} + +#[cfg(test)] +mod tests { + use super::*; + + #[tokio::test] + async fn it_works() { + let result = 2 + 2; + assert_eq!(result, 4); + } +} From 96b709204755661d07931c030d3a9eac7986c6aa Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Fri, 26 Aug 2022 06:29:56 +0000 Subject: [PATCH 02/28] wip --- bootstore/src/lib.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/bootstore/src/lib.rs b/bootstore/src/lib.rs index aee7e9fa0ea..da62660cb72 100644 --- a/bootstore/src/lib.rs +++ b/bootstore/src/lib.rs @@ -63,9 +63,12 @@ pub struct TransactionId { /// A sprockets server endpoint. We don't know the identity of the endpoint /// until the session is established. -pub struct StorageNode { +pub struct StorageNode +where + Chan: AsyncRead + AsyncWrite, +{ addr: Ipv6Addr, - session: sprockets_host::Session, + session: Chan, id: Identity, } From 456bc9da17f41bb4bddd1961b1e248ab4e27b836 Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Sat, 27 Aug 2022 00:05:27 +0000 Subject: [PATCH 03/28] very wip --- bootstore/Cargo.toml | 12 ---- sled-agent/src/bootstore/db.rs | 17 +++++ sled-agent/src/bootstore/mod.rs | 29 ++++++++ sled-agent/src/bootstore/server.rs | 50 ++++++++++++++ .../src/bootstore/twopc.rs | 68 ++++++++----------- 5 files changed, 125 insertions(+), 51 deletions(-) delete mode 100644 bootstore/Cargo.toml create mode 100644 sled-agent/src/bootstore/db.rs create mode 100644 sled-agent/src/bootstore/mod.rs create mode 100644 sled-agent/src/bootstore/server.rs rename bootstore/src/lib.rs => sled-agent/src/bootstore/twopc.rs (64%) diff --git a/bootstore/Cargo.toml b/bootstore/Cargo.toml deleted file mode 100644 index a6c43109cfc..00000000000 --- a/bootstore/Cargo.toml +++ /dev/null @@ -1,12 +0,0 @@ -[package] -name = "bootstore" -description = "A replicated datastore read before CockroachDb is available" -version = "0.1.0" -edition = "2021" -license = "MPL-2.0" - -[dependencies] -sprockets-common = { git = "http://github.com/oxidecomputer/sprockets", rev = "0361fd13ff19cda6696242fe40f1325fca30d3d1" } -sprockets-host = { git = "http://github.com/oxidecomputer/sprockets", rev = "0361fd13ff19cda6696242fe40f1325fca30d3d1" } -tokio = { version = "1.20", features = [ "full" ] } -thiserror = "1.0" diff --git a/sled-agent/src/bootstore/db.rs b/sled-agent/src/bootstore/db.rs new file mode 100644 index 00000000000..525296318fe --- /dev/null +++ b/sled-agent/src/bootstore/db.rs @@ -0,0 +1,17 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Database layer for the bootstore + +/// The ID of the database used to store blobs. +/// +/// We separate them because they are encrypted and accessed differently. +pub enum DbId { + /// Used pre-rack unlock: Contains key shares and membership data + TrustQuorum, + + /// Used post-rack unlock: Contains information necessary for setting + /// up NTP. + NetworkConfig, +} diff --git a/sled-agent/src/bootstore/mod.rs b/sled-agent/src/bootstore/mod.rs new file mode 100644 index 00000000000..03ad9934033 --- /dev/null +++ b/sled-agent/src/bootstore/mod.rs @@ -0,0 +1,29 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! The two communication paths for the bootstore: +//! +//! RSS -> Sled Agent -> Coordinator -> Storage Nodes +//! Nexus -> Steno -> Sled Agent -> Coordinator -> Storage Nodes +//! +//! +//! Since some trust quorum membership information that is input via RSS must +//! make its way into CockroachDb so that reconfiguration works, we will load +//! that information from the trust quorum database, parse it, and write +//! it to CockroachDB when we start it up. + +mod db; +mod trust_quorum; +mod twopc; + +#[cfg(test)] +mod tests { + use super::*; + + #[tokio::test] + async fn it_works() { + let result = 2 + 2; + assert_eq!(result, 4); + } +} diff --git a/sled-agent/src/bootstore/server.rs b/sled-agent/src/bootstore/server.rs new file mode 100644 index 00000000000..1cd61998bbe --- /dev/null +++ b/sled-agent/src/bootstore/server.rs @@ -0,0 +1,50 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! A sprockets server for handling bootstrap related requests +//! +//! This is the server for trust-quorum rack unlock as well as +//! the server backing the 2PC implementation used for trust quorum initialization, +//! trust quorum reconfiguration, and NetworkConfiguration needed to configure NTP. + +use slog::Drain; +use slog::Logger; +use std::io; +use std::net::Ipv6Addr; +use std::net::SocketAddrV6; + +use crate::sp::SimSpConfig; +use crate::sp::SpHandle; + +/// The sprockets server for the bootstore +/// +/// The Server is in charge of managing the SP, key shares, and early boot +/// network configuration. +pub struct Server { + listener: TcpListener, + bind_address: SocketAddrV6, + sp: SpHandle, + log: Logger, +} + +#[derive(thiserror::Error, Debug)] +pub enum Error { + #[error("Cannot bind to {bind_address}: {err}")] + Bind { bind_address: SocketAddrV6, err: io::Error }, +} + +impl Server { + async fn start( + log: Logger, + bind_address: SocketAddrV6, + sp: SpHandle, + ) -> Result>, Error> { + let listener = TcpListener::bind(bind_address) + .await + .map_err(|err| Error::Bind { bind_address, err })?; + info!(log, "Started listening"; "local_addr" => %bind_address); + let server = Server { listener, sp, bind_address, log }; + Ok(tokio::spawn(server.run())) + } +} diff --git a/bootstore/src/lib.rs b/sled-agent/src/bootstore/twopc.rs similarity index 64% rename from bootstore/src/lib.rs rename to sled-agent/src/bootstore/twopc.rs index da62660cb72..871efd227ea 100644 --- a/bootstore/src/lib.rs +++ b/sled-agent/src/bootstore/twopc.rs @@ -1,18 +1,16 @@ -//! The two communication paths for the bootstore: -//! -//! RSS -> Sled Agent -> Coordinator -> Storage Nodes -//! Nexus -> Steno -> Sled Agent -> Coordinator -> Storage Nodes -//! -//! -//! Since some trust quorum membership information that is input via RSS must -//! make its way into CockroachDb so that reconfiguration works, we will load -//! that information from the trust quorum database, parse it, and write -//! it to CockroachDB when we start it up. +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. -use std::collections::BTreeMap; +//! Two-phase commit (2PC) layer for the bootstore + +use crate::db::DbId; + +use std::collections::{BTreeMap, BTreeSet}; use std::net::Ipv6Addr; use std::sync::Arc; +use sprockets_common::certificates::Ed25519Certificate; use sprockets_common::Sha3_256Digest; use sprockets_host::Identity; use thiserror::Error; @@ -28,19 +26,6 @@ pub enum TransactionState { Committed, Aborted, } - -/// The database used store blobs. -/// -/// We separate them because they are encrypted and accessed differently. -pub enum Db { - /// Used pre-rack unlock: Contains key shares and membership data - TrustQuorum, - - /// Used post-rack unlock: Contains information necessary for setting - /// up NTP. - NetworkConfig, -} - /// A two phase commit (2PC) transaction identifier. Transactions either commit or abort. /// /// Users submit transactions to a coordinator and retry indefinitely for them @@ -54,9 +39,8 @@ pub enum Db { pub struct TransactionId { // Database structures are specifically adapted to append-only log style // blog storage. - db: Db, + db_id: DbId, - // The name of the item in the database // The monotonically increasing generation number of the data value gen: u64, } @@ -68,12 +52,29 @@ where Chan: AsyncRead + AsyncWrite, { addr: Ipv6Addr, - session: Chan, + session: sprockets_host::Session, id: Identity, } +pub enum TransactionOp { + // This is an instruction from RSS to generate a rack secret + // + // The coordinator will generate a rack secret and distribute shares at + // database generation 0. + InitializeTrustQuorum { + rack_secret_threshold: usize, + member_device_id_certs: Vec, + }, +} + +pub struct Transaction { + id: TransactionId, + op: TransactionOp, + addrs: BTreeSet, +} + /// The coordinator of the 2PC protocol. It establishes connections -/// to the storage nodes and transfers data via 2PC. +/// to [`StorageNode`]]s and transfers data via 2PC. pub struct Coordinator { // We don't know which address maps to which identity, but we know which // identity maps to which data. We use an Arc for data, because in some @@ -84,14 +85,3 @@ pub struct Coordinator { // on the nodes know how to deserialize and interpret it. data: BTreeMap>>, } - -#[cfg(test)] -mod tests { - use super::*; - - #[tokio::test] - async fn it_works() { - let result = 2 + 2; - assert_eq!(result, 4); - } -} From 9a47e276e30b00f6b987d0aeeb065b58b203678e Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Sat, 27 Aug 2022 09:04:18 +0000 Subject: [PATCH 04/28] wip --- Cargo.lock | 22 ++++--- Cargo.toml | 1 - sled-agent/Cargo.toml | 1 + sled-agent/src/bootstore/db.rs | 99 ++++++++++++++++++++++++++++++ sled-agent/src/bootstore/mod.rs | 3 +- sled-agent/src/bootstore/server.rs | 3 + sled-agent/src/bootstore/twopc.rs | 2 +- sled-agent/src/lib.rs | 1 + 8 files changed, 119 insertions(+), 13 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6c8aab3ce07..be12e86605c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -338,16 +338,6 @@ dependencies = [ "byte-tools", ] -[[package]] -name = "bootstore" -version = "0.1.0" -dependencies = [ - "sprockets-common", - "sprockets-host", - "thiserror", - "tokio", -] - [[package]] name = "bstr" version = "0.2.17" @@ -1075,6 +1065,7 @@ dependencies = [ "ipnetwork", "itoa 1.0.2", "libc", + "libsqlite3-sys", "pq-sys", "r2d2", "serde_json", @@ -2411,6 +2402,16 @@ dependencies = [ "tracing", ] +[[package]] +name = "libsqlite3-sys" +version = "0.25.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f0455f2c1bc9a7caa792907026e469c1d91761fb0ea37cbb16427c77280cf35" +dependencies = [ + "pkg-config", + "vcpkg", +] + [[package]] name = "libxml" version = "0.3.1" @@ -3074,6 +3075,7 @@ dependencies = [ "crucible-agent-client", "crucible-client-types", "ddm-admin-client", + "diesel", "dropshot", "expectorate", "futures", diff --git a/Cargo.toml b/Cargo.toml index a95b95c9c64..70d3e33ca42 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,5 @@ [workspace] members = [ - "bootstore", "common", "ddm-admin-client", "deploy", diff --git a/sled-agent/Cargo.toml b/sled-agent/Cargo.toml index 359fa1fcbad..3bfbec47fe8 100644 --- a/sled-agent/Cargo.toml +++ b/sled-agent/Cargo.toml @@ -17,6 +17,7 @@ clap = { version = "3.2", features = ["derive"] } crucible-client-types = { git = "https://github.com/oxidecomputer/crucible", rev = "bacffd142fc38a01fe255407b0c8d5d0aacfe778" } crucible-agent-client = { git = "https://github.com/oxidecomputer/crucible", rev = "bacffd142fc38a01fe255407b0c8d5d0aacfe778" } ddm-admin-client = { path = "../ddm-admin-client" } +diesel = { version = "2.0.0-rc.1", features = ["sqlite", "chrono", "serde_json", "network-address", "uuid"] } dropshot = { git = "https://github.com/oxidecomputer/dropshot", branch = "main", features = [ "usdt-probes" ] } futures = "0.3.23" internal-dns-client = { path = "../internal-dns-client" } diff --git a/sled-agent/src/bootstore/db.rs b/sled-agent/src/bootstore/db.rs index 525296318fe..0c7a607c49b 100644 --- a/sled-agent/src/bootstore/db.rs +++ b/sled-agent/src/bootstore/db.rs @@ -4,6 +4,21 @@ //! Database layer for the bootstore +use diesel::prelude::*; +use diesel::SqliteConnection; +use slog::Logger; + +use crate::bootstrap::trust_quorum::SerializableShareDistribution; + +#[derive(thiserror::Error, Debug)] +pub enum Error { + #[error("Failed to open db connection to {path}: {err}")] + DbOpen { path: String, err: ConnectionError }, + + #[error(transparent)] + Db(#[from] diesel::result::Error), +} + /// The ID of the database used to store blobs. /// /// We separate them because they are encrypted and accessed differently. @@ -15,3 +30,87 @@ pub enum DbId { /// up NTP. NetworkConfig, } + +pub struct Db { + log: Logger, + conn: SqliteConnection, +} + +impl Db { + pub fn open(log: Logger, path: &str) -> Result { + let log = log.new(o!( + "component" => "BootstoreDb" + )); + info!(log, "opening database {:?}", path); + let mut c = SqliteConnection::establish(path) + .map_err(|err| Error::DbOpen { path: path.into(), err })?; + + // Enable foreign key processing, which is off by default. Without + // enabling this, there is no referential integrity check between + // primary and foreign keys in tables. + diesel::sql_query("PRAGMA foreign_keys = 'ON'").execute(&mut c)?; + + // Enable the WAL. + diesel::sql_query("PRAGMA journal_mode = 'WAL'").execute(&mut c)?; + + // Force overwriting with 0s on delete + diesel::sql_query("PRAGMA secure_delete = 'ON'").execute(&mut c)?; + + // Sync to disk after every commit. + // DO NOT CHANGE THIS SETTING! + diesel::sql_query("PRAGMA synchronous = 'FULL'").execute(&mut c)?; + + Ok(Db { log, conn: c }) + } +} + +// TODO: Use Josh's json_new_type macro from buildomat for this +pub struct ShareData(SerializableShareDistribution); + +#[derive(Queryable)] +pub struct KeySharePrepare { + pub epoch: i32, + pub share_distribution: ShareData, +} + +#[derive(Queryable)] +pub struct KeyShareCommit { + pub epoch: i32, +} + +// TODO: These should go in a crypto module +// The length of a SHA3-256 digest +pub const DIGEST_LEN: usize = 32; + +// The length of a ChaCha20Poly1305 Key +pub const KEY_LEN: usize = 32; + +// The length of a ChaCha20Poly1305 authentication tag +pub const TAG_LEN: usize = 16; + +// A chacha20poly1305 secret encrypted by a chacha20poly1305 secret key +// derived from the rack secret for the given epoch with the given salt +// +// The epoch informs which rack secret should be used to derive the +// encryptiong key used to encrypt this root secret. +#[derive(Queryable)] +pub struct EncryptedRootSecret { + /// The epoch of the rack secret rotation or rack reconfiguration + pub epoch: i32, + + /// Used as the salt parameter to HKDF to derive the encryption + /// key from the rack secret that protects `key` in this struct. + pub salt: Salt, + + /// The encrypted key + pub key: EncryptedKey, + + /// The authentication tag for the encrypted key + pub tag: AuthTag, +} + +// TODO: Create some ToSql/FromSql impls +// Should probably create a macro for arrays +pub struct EncryptedKey([u8; KEY_LEN]); +pub struct Salt([u8; DIGEST_LEN]); +pub struct AuthTag([u8; TAG_LEN]); diff --git a/sled-agent/src/bootstore/mod.rs b/sled-agent/src/bootstore/mod.rs index 03ad9934033..423b4490bdb 100644 --- a/sled-agent/src/bootstore/mod.rs +++ b/sled-agent/src/bootstore/mod.rs @@ -14,7 +14,8 @@ //! it to CockroachDB when we start it up. mod db; -mod trust_quorum; +//mod server; +//mod trust_quorum; mod twopc; #[cfg(test)] diff --git a/sled-agent/src/bootstore/server.rs b/sled-agent/src/bootstore/server.rs index 1cd61998bbe..0093fbac090 100644 --- a/sled-agent/src/bootstore/server.rs +++ b/sled-agent/src/bootstore/server.rs @@ -14,6 +14,7 @@ use std::io; use std::net::Ipv6Addr; use std::net::SocketAddrV6; +use super::db::Db; use crate::sp::SimSpConfig; use crate::sp::SpHandle; @@ -47,4 +48,6 @@ impl Server { let server = Server { listener, sp, bind_address, log }; Ok(tokio::spawn(server.run())) } + + async fn run(self) -> Result<(), Error> {} } diff --git a/sled-agent/src/bootstore/twopc.rs b/sled-agent/src/bootstore/twopc.rs index 871efd227ea..13f29283f98 100644 --- a/sled-agent/src/bootstore/twopc.rs +++ b/sled-agent/src/bootstore/twopc.rs @@ -4,7 +4,7 @@ //! Two-phase commit (2PC) layer for the bootstore -use crate::db::DbId; +use super::db::DbId; use std::collections::{BTreeMap, BTreeSet}; use std::net::Ipv6Addr; diff --git a/sled-agent/src/lib.rs b/sled-agent/src/lib.rs index 602293cab23..a83fc068f2e 100644 --- a/sled-agent/src/lib.rs +++ b/sled-agent/src/lib.rs @@ -19,6 +19,7 @@ pub mod sim; pub mod common; // Modules for the non-simulated sled agent. +mod bootstore; pub mod bootstrap; pub mod config; mod http_entrypoints; From 0247102f5962ea65ac3fc57cfeaccca08ae89011 Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Sun, 28 Aug 2022 22:17:59 +0000 Subject: [PATCH 05/28] wip --- sled-agent/src/bootstore/db.rs | 14 ++- sled-agent/src/bootstore/db_macros.rs | 133 ++++++++++++++++++++++++++ sled-agent/src/bootstore/mod.rs | 1 + 3 files changed, 143 insertions(+), 5 deletions(-) create mode 100644 sled-agent/src/bootstore/db_macros.rs diff --git a/sled-agent/src/bootstore/db.rs b/sled-agent/src/bootstore/db.rs index 0c7a607c49b..7b07ae20113 100644 --- a/sled-agent/src/bootstore/db.rs +++ b/sled-agent/src/bootstore/db.rs @@ -4,7 +4,13 @@ //! Database layer for the bootstore +use super::db_macros::array_new_type; +use super::db_macros::json_new_type; + +use diesel::deserialize::FromSql; use diesel::prelude::*; +use diesel::serialize::ToSql; +use diesel::FromSqlRow; use diesel::SqliteConnection; use slog::Logger; @@ -109,8 +115,6 @@ pub struct EncryptedRootSecret { pub tag: AuthTag, } -// TODO: Create some ToSql/FromSql impls -// Should probably create a macro for arrays -pub struct EncryptedKey([u8; KEY_LEN]); -pub struct Salt([u8; DIGEST_LEN]); -pub struct AuthTag([u8; TAG_LEN]); +array_new_type!(EncryptedKey, KEY_LEN); +array_new_type!(Salt, DIGEST_LEN); +array_new_type!(AuthTag, TAG_LEN); diff --git a/sled-agent/src/bootstore/db_macros.rs b/sled-agent/src/bootstore/db_macros.rs new file mode 100644 index 00000000000..cd04aed35f9 --- /dev/null +++ b/sled-agent/src/bootstore/db_macros.rs @@ -0,0 +1,133 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Macros used to create ToSql and FromSql impls required by Diesel + +/// Shamelessly stolen from buildomat/common/src/db.rs +/// Thanks @jmc +macro_rules! json_new_type { + ($name:ident, $mytype:ty) => { + #[derive( + Clone, Debug, FromSqlRow, diesel::expression::AsExpression, + )] + #[diesel(sql_type = diesel::sql_types::Text)] + pub struct $name(pub $mytype); + + impl ToSql for $name + where + String: ToSql, + { + fn to_sql( + &self, + out: &mut diesel::serialize::Output, + ) -> diesel::serialize::Result { + out.set_value(serde_json::to_string(&self.0)?); + Ok(diesel::serialize::IsNull::No) + } + } + + impl FromSql for $name + where + DB: diesel::backend::Backend, + String: FromSql, + { + fn from_sql( + bytes: diesel::backend::RawValue, + ) -> diesel::deserialize::Result { + Ok($name(serde_json::from_str(&String::from_sql(bytes)?)?)) + } + } + + impl From<$name> for $mytype { + fn from(t: $name) -> Self { + t.0 + } + } + + impl From<$mytype> for $name { + fn from(t: $mytype) -> $name { + $name(t) + } + } + + impl std::ops::Deref for $name { + type Target = $mytype; + + fn deref(&self) -> &Self::Target { + &self.0 + } + } + }; +} + +macro_rules! array_new_type { + ($name:ident, $len:expr) => { + #[derive( + Clone, Debug, FromSqlRow, diesel::expression::AsExpression, + )] + #[diesel(sql_type = diesel::sql_types::Binary)] + pub struct $name(pub [u8; $len]); + + impl ToSql for $name + where + Vec: ToSql, + { + fn to_sql( + &self, + out: &mut diesel::serialize::Output, + ) -> diesel::serialize::Result { + let mut copy = vec![0; $len]; + copy.copy_from_slice(&self.0[..]); + out.set_value(copy); + Ok(diesel::serialize::IsNull::No) + } + } + + impl FromSql for $name + where + DB: diesel::backend::Backend, + Vec: FromSql, + { + fn from_sql( + bytes: diesel::backend::RawValue, + ) -> diesel::deserialize::Result { + let read_bytes = Vec::::from_sql(bytes)?; + if read_bytes.len() != $len { + return Err(format!( + "Invalid length. Expected: {}, Actual: {}", + $len, + read_bytes.len() + ) + .into()); + } + let mut out = [0u8; $len]; + out.copy_from_slice(&read_bytes[..]); + Ok($name(out)) + } + } + + impl From<$name> for [u8; $len] { + fn from(t: $name) -> Self { + t.0 + } + } + + impl From<[u8; $len]> for $name { + fn from(t: [u8; $len]) -> $name { + $name(t) + } + } + + impl std::ops::Deref for $name { + type Target = [u8; $len]; + + fn deref(&self) -> &Self::Target { + &self.0 + } + } + }; +} + +pub(crate) use array_new_type; +pub(crate) use json_new_type; diff --git a/sled-agent/src/bootstore/mod.rs b/sled-agent/src/bootstore/mod.rs index 423b4490bdb..ef364f7b7ee 100644 --- a/sled-agent/src/bootstore/mod.rs +++ b/sled-agent/src/bootstore/mod.rs @@ -14,6 +14,7 @@ //! it to CockroachDB when we start it up. mod db; +mod db_macros; //mod server; //mod trust_quorum; mod twopc; From 32728d27859b98dd0a5bfce683689bfa127ca266 Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Sun, 28 Aug 2022 22:29:05 +0000 Subject: [PATCH 06/28] wip --- sled-agent/src/bootstore/db.rs | 5 ++--- .../bootstrap/trust_quorum/share_distribution.rs | 13 +++++++++++++ 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/sled-agent/src/bootstore/db.rs b/sled-agent/src/bootstore/db.rs index 7b07ae20113..0a878ec7c85 100644 --- a/sled-agent/src/bootstore/db.rs +++ b/sled-agent/src/bootstore/db.rs @@ -70,13 +70,12 @@ impl Db { } } -// TODO: Use Josh's json_new_type macro from buildomat for this -pub struct ShareData(SerializableShareDistribution); +json_new_type!(Share, SerializableShareDistribution); #[derive(Queryable)] pub struct KeySharePrepare { pub epoch: i32, - pub share_distribution: ShareData, + pub share: Share, } #[derive(Queryable)] diff --git a/sled-agent/src/bootstrap/trust_quorum/share_distribution.rs b/sled-agent/src/bootstrap/trust_quorum/share_distribution.rs index c8019ef8040..8566ae7246a 100644 --- a/sled-agent/src/bootstrap/trust_quorum/share_distribution.rs +++ b/sled-agent/src/bootstrap/trust_quorum/share_distribution.rs @@ -56,6 +56,19 @@ pub struct SerializableShareDistribution { pub verifier: Verifier, } +// We don't want to risk debug-logging the actual share contents, so implement +// `Debug` manually and omit sensitive fields. +impl fmt::Debug for SerializableShareDistribution { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("SerializableShareDistribution") + .field("threshold", &self.threshold) + .field("verifier", &"Verifier") + .field("share", &"Share") + .field("member_device_id_certs", &self.member_device_id_certs) + .finish() + } +} + impl From for SerializableShareDistribution { fn from(dist: ShareDistribution) -> Self { Self { From e89f3fde2c6e787904702b66aa41e4637f94e482 Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Sun, 28 Aug 2022 22:44:51 +0000 Subject: [PATCH 07/28] back to a crate --- Cargo.lock | 16 ++ Cargo.toml | 2 + bootstore/Cargo.toml | 18 ++ .../src/bootstore => bootstore/src}/db.rs | 3 +- .../bootstore => bootstore/src}/db_macros.rs | 0 .../bootstore/mod.rs => bootstore/src/lib.rs | 15 +- .../src/bootstore => bootstore/src}/server.rs | 0 bootstore/src/trust_quorum/error.rs | 16 ++ bootstore/src/trust_quorum/mod.rs | 20 ++ bootstore/src/trust_quorum/rack_secret.rs | 175 ++++++++++++++++++ .../src/trust_quorum/share_distribution.rs | 92 +++++++++ sled-agent/src/bootstore/twopc.rs | 87 --------- 12 files changed, 350 insertions(+), 94 deletions(-) create mode 100644 bootstore/Cargo.toml rename {sled-agent/src/bootstore => bootstore/src}/db.rs (97%) rename {sled-agent/src/bootstore => bootstore/src}/db_macros.rs (100%) rename sled-agent/src/bootstore/mod.rs => bootstore/src/lib.rs (73%) rename {sled-agent/src/bootstore => bootstore/src}/server.rs (100%) create mode 100644 bootstore/src/trust_quorum/error.rs create mode 100644 bootstore/src/trust_quorum/mod.rs create mode 100644 bootstore/src/trust_quorum/rack_secret.rs create mode 100644 bootstore/src/trust_quorum/share_distribution.rs delete mode 100644 sled-agent/src/bootstore/twopc.rs diff --git a/Cargo.lock b/Cargo.lock index be12e86605c..59d4086de56 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -338,6 +338,22 @@ dependencies = [ "byte-tools", ] +[[package]] +name = "bootstore" +version = "0.1.0" +dependencies = [ + "diesel", + "p256", + "rand 0.8.5", + "serde", + "serde_json", + "slog", + "sprockets-common", + "sprockets-host", + "thiserror", + "vsss-rs", +] + [[package]] name = "bstr" version = "0.2.17" diff --git a/Cargo.toml b/Cargo.toml index 70d3e33ca42..f9d2863a5f6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,5 +1,6 @@ [workspace] members = [ + "bootstore", "common", "ddm-admin-client", "deploy", @@ -37,6 +38,7 @@ members = [ ] default-members = [ + "bootstore", "common", "ddm-admin-client", "deploy", diff --git a/bootstore/Cargo.toml b/bootstore/Cargo.toml new file mode 100644 index 00000000000..f89699e87a7 --- /dev/null +++ b/bootstore/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "bootstore" +description = "Storage required for rack unlock" +version = "0.1.0" +edition = "2021" +license = "MPL-2.0" + +[dependencies] +diesel = { version = "2.0.0-rc.1", features = ["sqlite", "chrono", "serde_json", "network-address", "uuid"] } +p256 = "0.9.0" +rand = { version = "0.8.5", features = ["getrandom"] } +serde = { version = "1.0", features = [ "derive" ] } +serde_json = "1.0" +slog = { version = "2.5", features = [ "max_level_trace", "release_max_level_debug" ] } +sprockets-common = { git = "http://github.com/oxidecomputer/sprockets", rev = "0361fd13ff19cda6696242fe40f1325fca30d3d1" } +sprockets-host = { git = "http://github.com/oxidecomputer/sprockets", rev = "0361fd13ff19cda6696242fe40f1325fca30d3d1" } +thiserror = "1.0" +vsss-rs = { version = "2.0.0", default-features = false, features = ["std"] } diff --git a/sled-agent/src/bootstore/db.rs b/bootstore/src/db.rs similarity index 97% rename from sled-agent/src/bootstore/db.rs rename to bootstore/src/db.rs index 0a878ec7c85..5e8bbd3a812 100644 --- a/sled-agent/src/bootstore/db.rs +++ b/bootstore/src/db.rs @@ -13,8 +13,9 @@ use diesel::serialize::ToSql; use diesel::FromSqlRow; use diesel::SqliteConnection; use slog::Logger; +use slog::{info, o}; -use crate::bootstrap::trust_quorum::SerializableShareDistribution; +use crate::trust_quorum::SerializableShareDistribution; #[derive(thiserror::Error, Debug)] pub enum Error { diff --git a/sled-agent/src/bootstore/db_macros.rs b/bootstore/src/db_macros.rs similarity index 100% rename from sled-agent/src/bootstore/db_macros.rs rename to bootstore/src/db_macros.rs diff --git a/sled-agent/src/bootstore/mod.rs b/bootstore/src/lib.rs similarity index 73% rename from sled-agent/src/bootstore/mod.rs rename to bootstore/src/lib.rs index ef364f7b7ee..896facd7eab 100644 --- a/sled-agent/src/bootstore/mod.rs +++ b/bootstore/src/lib.rs @@ -1,7 +1,3 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - //! The two communication paths for the bootstore: //! //! RSS -> Sled Agent -> Coordinator -> Storage Nodes @@ -16,8 +12,7 @@ mod db; mod db_macros; //mod server; -//mod trust_quorum; -mod twopc; +mod trust_quorum; #[cfg(test)] mod tests { @@ -29,3 +24,11 @@ mod tests { assert_eq!(result, 4); } } +#[cfg(test)] +mod tests { + #[test] + fn it_works() { + let result = 2 + 2; + assert_eq!(result, 4); + } +} diff --git a/sled-agent/src/bootstore/server.rs b/bootstore/src/server.rs similarity index 100% rename from sled-agent/src/bootstore/server.rs rename to bootstore/src/server.rs diff --git a/bootstore/src/trust_quorum/error.rs b/bootstore/src/trust_quorum/error.rs new file mode 100644 index 00000000000..da6002cd33d --- /dev/null +++ b/bootstore/src/trust_quorum/error.rs @@ -0,0 +1,16 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Error type for trust quorum code + +use thiserror::Error; + +#[derive(Debug, Error)] +pub enum TrustQuorumError { + #[error("Not enough peers to unlock storage")] + NotEnoughPeers, + + #[error("Rack secret construction failed: {0:?}")] + RackSecretConstructionFailed(vsss_rs::Error), +} diff --git a/bootstore/src/trust_quorum/mod.rs b/bootstore/src/trust_quorum/mod.rs new file mode 100644 index 00000000000..5aa249f48e8 --- /dev/null +++ b/bootstore/src/trust_quorum/mod.rs @@ -0,0 +1,20 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! The entry point for the trust quorum code +//! +//! This module only provides the trust quorum primitives: the rack secret and +//! its associated machinery (splitting into shares, verification, etc.). +//! Distribution and retrieval of shares is the responsibility of the +//! bootstrap-agent, which uses sprockets to secure communications between +//! sleds. + +mod error; +mod rack_secret; +mod share_distribution; + +pub use error::TrustQuorumError; +pub use rack_secret::RackSecret; +pub use share_distribution::SerializableShareDistribution; +pub use share_distribution::ShareDistribution; diff --git a/bootstore/src/trust_quorum/rack_secret.rs b/bootstore/src/trust_quorum/rack_secret.rs new file mode 100644 index 00000000000..2a801b2fe3c --- /dev/null +++ b/bootstore/src/trust_quorum/rack_secret.rs @@ -0,0 +1,175 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::convert::AsRef; +use std::fmt::Debug; + +use p256::elliptic_curve::group::ff::PrimeField; +use p256::elliptic_curve::subtle::ConstantTimeEq; +use p256::{NonZeroScalar, ProjectivePoint, Scalar, SecretKey}; +use rand::rngs::OsRng; +use serde::{Deserialize, Serialize}; +use vsss_rs::{Feldman, FeldmanVerifier, Share}; + +/// A `RackSecret` is a shared secret used to perform a "rack-level" unlock. +/// +/// Each server sled on an oxide rack contains up to 10 SSDs containing customer +/// data and oxide specific data. We want to ensure that if a small number of +/// disks or sleds get stolen, the data on them remains inaccessible to the +/// thief. We also want to ensure that successfully rebooting a sled or an +/// entire rack does not require administrator intervention such as entering a +/// password. +/// +/// To provide the above guarantees we must ensure that all disks are encrypted, +/// and that there is an automatic mechanism to retrieve the decryption key. +/// Furthermore, we must guarantee that the the key retrieval mechanism is not +/// available when a only a subset of sleds or disks from a rack are available +/// to an attacker. The mechanism we use to provide these guarantees is based on +/// . A threshold secret +/// is generated of which individual disk encryption keys are derived, and each +/// of `N` server sleds receives one share of the secret. `K` of these shares +/// must be combinded in order to reconstruct the shared secret such that disks +/// may be decrypted. If fewer than `K` shares are available, no information +/// about the secret may be recovered, and the disks cannot be decrypted. We +/// call the threshold secret the `rack secret`. +/// +/// Inside a rack then, the sleds cooperate over secure channels in order to +/// retrieve key shares and reconstruct the `rack secret`, the resulting derived +/// encryption keys, and unlock their own local storage. We call this procedure +/// `rack unlock`. The establishment of secure channels and the ability to trust +/// the validity of a participating peer is outside the scope of this particular +/// type and orthogonal to its implementation. +pub struct RackSecret { + secret: NonZeroScalar, +} + +impl PartialEq for RackSecret { + fn eq(&self, other: &Self) -> bool { + self.secret.ct_eq(&other.secret).into() + } +} + +impl Eq for RackSecret {} + +/// A verifier used to ensure the validity of a given key share for an unknown +/// secret. +/// +/// We use verifiable secret sharing to detect invalid shares from being +/// combined and generating an incorrect secret. Each share must be verified +/// before the secret is reconstructed. +// This is just a wrapper around a FeldmanVerifier from the vsss-rs crate. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct Verifier { + verifier: FeldmanVerifier, +} + +impl Verifier { + pub fn verify(&self, share: &Share) -> bool { + self.verifier.verify(share) + } +} + +impl RackSecret { + /// Create a secret based on the NIST P-256 curve + pub fn new() -> RackSecret { + let mut rng = OsRng::default(); + let sk = SecretKey::random(&mut rng); + RackSecret { secret: sk.to_secret_scalar() } + } + + /// Split a secert into `total_shares` number of shares, where combining + /// `threshold` of the shares can be used to recover the secret. + pub fn split( + &self, + threshold: usize, + total_shares: usize, + ) -> Result<(Vec, Verifier), vsss_rs::Error> { + let mut rng = OsRng::default(); + let (shares, verifier) = Feldman { t: threshold, n: total_shares } + .split_secret(*self.as_ref(), None, &mut rng)?; + Ok((shares, Verifier { verifier })) + } + + /// Combine a set of shares and return a RackSecret + pub fn combine_shares( + threshold: usize, + total_shares: usize, + shares: &[Share], + ) -> Result { + let scalar = Feldman { t: threshold, n: total_shares } + .combine_shares::(shares)?; + let nzs = NonZeroScalar::from_repr(scalar.to_repr()).unwrap(); + let sk = SecretKey::from(nzs); + Ok(RackSecret { secret: sk.to_secret_scalar() }) + } +} + +impl AsRef for RackSecret { + fn as_ref(&self) -> &Scalar { + self.secret.as_ref() + } +} + +#[cfg(test)] +mod tests { + use std::fmt; + + use super::*; + + // This is a secret. Let's not print it outside of tests. + impl Debug for RackSecret { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.secret.as_ref().fmt(f) + } + } + + fn verify(secret: &RackSecret, verifier: &Verifier, shares: &[Share]) { + for s in shares { + assert!(verifier.verify(s)); + } + + let secret2 = RackSecret::combine_shares(3, 5, &shares[..3]).unwrap(); + let secret3 = RackSecret::combine_shares(3, 5, &shares[1..4]).unwrap(); + let secret4 = RackSecret::combine_shares(3, 5, &shares[2..5]).unwrap(); + let shares2 = + vec![shares[0].clone(), shares[2].clone(), shares[4].clone()]; + let secret5 = RackSecret::combine_shares(3, 5, &shares2).unwrap(); + + for s in [secret2, secret3, secret4, secret5] { + assert_eq!(*secret, s); + } + } + + #[test] + fn create_and_verify() { + let secret = RackSecret::new(); + let (shares, verifier) = secret.split(3, 5).unwrap(); + verify(&secret, &verifier, &shares); + } + + #[test] + fn secret_splitting_fails_with_threshold_larger_than_total_shares() { + let secret = RackSecret::new(); + assert!(secret.split(5, 3).is_err()); + } + + #[test] + fn combine_deserialized_shares() { + let secret = RackSecret::new(); + let (shares, verifier) = secret.split(3, 5).unwrap(); + let verifier_s = bincode::serialize(&verifier).unwrap(); + let shares_s = bincode::serialize(&shares).unwrap(); + + let shares2: Vec = bincode::deserialize(&shares_s).unwrap(); + let verifier2: Verifier = bincode::deserialize(&verifier_s).unwrap(); + + // Ensure we can reconstruct the secret with the deserialized shares and + // verifier. + verify(&secret, &verifier2, &shares2); + + // Ensure we can reconstruct the secret with the deserialized shares and + // original verifier. + verify(&secret, &verifier, &shares2); + } +} diff --git a/bootstore/src/trust_quorum/share_distribution.rs b/bootstore/src/trust_quorum/share_distribution.rs new file mode 100644 index 00000000000..8566ae7246a --- /dev/null +++ b/bootstore/src/trust_quorum/share_distribution.rs @@ -0,0 +1,92 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use serde::Deserialize; +use serde::Serialize; +use sprockets_host::Ed25519Certificate; +use std::fmt; +use vsss_rs::Share; + +use super::rack_secret::Verifier; + +/// A ShareDistribution is an individual share of a secret along with all the +/// metadata required to allow a server in possession of the share to know how +/// to correctly recreate a split secret. +// We intentionally DO NOT derive `Debug` or `Serialize`; both provide avenues +// by which we may accidentally log the contents of our `share`. +#[derive(Clone, PartialEq, Deserialize)] +pub struct ShareDistribution { + pub threshold: usize, + pub verifier: Verifier, + pub share: Share, + pub member_device_id_certs: Vec, +} + +impl ShareDistribution { + pub fn total_shares(&self) -> usize { + self.member_device_id_certs.len() + } +} + +// We don't want to risk debug-logging the actual share contents, so implement +// `Debug` manually and omit sensitive fields. +impl fmt::Debug for ShareDistribution { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("ShareDistribution") + .field("threshold", &self.threshold) + .field("verifier", &"Verifier") + .field("share", &"Share") + .field("member_device_id_certs", &self.member_device_id_certs) + .finish() + } +} + +/// This type is equivalent to `ShareDistribution` but implements `Serialize`. +/// It should be used _very carefully_; `ShareDistribution` should be preferred +/// in almost all cases to avoid accidental spillage of our `Share` contents. +/// This type should only be used to build careful serialization routines that +/// need to deal with trust quorum shares; e.g., +/// `RequestEnvelope::danger_serialize_as_json()`. +#[derive(Clone, PartialEq, Serialize, Deserialize)] +pub struct SerializableShareDistribution { + pub threshold: usize, + pub share: Share, + pub member_device_id_certs: Vec, + pub verifier: Verifier, +} + +// We don't want to risk debug-logging the actual share contents, so implement +// `Debug` manually and omit sensitive fields. +impl fmt::Debug for SerializableShareDistribution { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("SerializableShareDistribution") + .field("threshold", &self.threshold) + .field("verifier", &"Verifier") + .field("share", &"Share") + .field("member_device_id_certs", &self.member_device_id_certs) + .finish() + } +} + +impl From for SerializableShareDistribution { + fn from(dist: ShareDistribution) -> Self { + Self { + threshold: dist.threshold, + verifier: dist.verifier, + share: dist.share, + member_device_id_certs: dist.member_device_id_certs, + } + } +} + +impl From for ShareDistribution { + fn from(dist: SerializableShareDistribution) -> Self { + Self { + threshold: dist.threshold, + verifier: dist.verifier, + share: dist.share, + member_device_id_certs: dist.member_device_id_certs, + } + } +} diff --git a/sled-agent/src/bootstore/twopc.rs b/sled-agent/src/bootstore/twopc.rs deleted file mode 100644 index 13f29283f98..00000000000 --- a/sled-agent/src/bootstore/twopc.rs +++ /dev/null @@ -1,87 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -//! Two-phase commit (2PC) layer for the bootstore - -use super::db::DbId; - -use std::collections::{BTreeMap, BTreeSet}; -use std::net::Ipv6Addr; -use std::sync::Arc; - -use sprockets_common::certificates::Ed25519Certificate; -use sprockets_common::Sha3_256Digest; -use sprockets_host::Identity; -use thiserror::Error; -use tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite, AsyncWriteExt}; -use tokio::net::TcpStream; - -pub const PORT: u16 = 2121; - -/// The current state of a replica for a given transaction -pub enum TransactionState { - Init, - Prepared, - Committed, - Aborted, -} -/// A two phase commit (2PC) transaction identifier. Transactions either commit or abort. -/// -/// Users submit transactions to a coordinator and retry indefinitely for them -/// to commit or tell them to abort. By giving users explicit control over -/// abort we move the policy decision out of the protocol and keep it simple. -/// -/// It is expected that transactions will be driven by Steno, with their IDs -/// stored in CockroachDB. Before issuing transactions, users should record -/// them in CockroachDb so they can be restarted in the case of failure -/// or aborted. -pub struct TransactionId { - // Database structures are specifically adapted to append-only log style - // blog storage. - db_id: DbId, - - // The monotonically increasing generation number of the data value - gen: u64, -} - -/// A sprockets server endpoint. We don't know the identity of the endpoint -/// until the session is established. -pub struct StorageNode -where - Chan: AsyncRead + AsyncWrite, -{ - addr: Ipv6Addr, - session: sprockets_host::Session, - id: Identity, -} - -pub enum TransactionOp { - // This is an instruction from RSS to generate a rack secret - // - // The coordinator will generate a rack secret and distribute shares at - // database generation 0. - InitializeTrustQuorum { - rack_secret_threshold: usize, - member_device_id_certs: Vec, - }, -} - -pub struct Transaction { - id: TransactionId, - op: TransactionOp, - addrs: BTreeSet, -} - -/// The coordinator of the 2PC protocol. It establishes connections -/// to [`StorageNode`]]s and transfers data via 2PC. -pub struct Coordinator { - // We don't know which address maps to which identity, but we know which - // identity maps to which data. We use an Arc for data, because in some - // cases the data is shared and can be somewhat large (a few MiB) and we want to - // eliminate copies. - // - // The data is serialized before we get it. The handlers - // on the nodes know how to deserialize and interpret it. - data: BTreeMap>>, -} From 0b6b45d8394b34c9cae8e7b030afbe391568b805 Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Sun, 28 Aug 2022 23:46:48 +0000 Subject: [PATCH 08/28] wip --- bootstore/src/{db_macros.rs => db/macros.rs} | 0 bootstore/src/{db.rs => db/mod.rs} | 63 +++----------------- bootstore/src/db/models.rs | 62 +++++++++++++++++++ bootstore/src/db/schema.rs | 28 +++++++++ bootstore/src/db/schema.sql | 19 ++++++ bootstore/src/lib.rs | 22 +------ 6 files changed, 117 insertions(+), 77 deletions(-) rename bootstore/src/{db_macros.rs => db/macros.rs} (100%) rename bootstore/src/{db.rs => db/mod.rs} (56%) create mode 100644 bootstore/src/db/models.rs create mode 100644 bootstore/src/db/schema.rs create mode 100644 bootstore/src/db/schema.sql diff --git a/bootstore/src/db_macros.rs b/bootstore/src/db/macros.rs similarity index 100% rename from bootstore/src/db_macros.rs rename to bootstore/src/db/macros.rs diff --git a/bootstore/src/db.rs b/bootstore/src/db/mod.rs similarity index 56% rename from bootstore/src/db.rs rename to bootstore/src/db/mod.rs index 5e8bbd3a812..6da9b11e700 100644 --- a/bootstore/src/db.rs +++ b/bootstore/src/db/mod.rs @@ -3,20 +3,15 @@ // file, You can obtain one at https://mozilla.org/MPL/2.0/. //! Database layer for the bootstore +mod macros; +mod models; +mod schema; -use super::db_macros::array_new_type; -use super::db_macros::json_new_type; - -use diesel::deserialize::FromSql; use diesel::prelude::*; -use diesel::serialize::ToSql; -use diesel::FromSqlRow; use diesel::SqliteConnection; use slog::Logger; use slog::{info, o}; -use crate::trust_quorum::SerializableShareDistribution; - #[derive(thiserror::Error, Debug)] pub enum Error { #[error("Failed to open db connection to {path}: {err}")] @@ -45,6 +40,7 @@ pub struct Db { impl Db { pub fn open(log: Logger, path: &str) -> Result { + let schema = include_str!("./schema.sql"); let log = log.new(o!( "component" => "BootstoreDb" )); @@ -67,54 +63,9 @@ impl Db { // DO NOT CHANGE THIS SETTING! diesel::sql_query("PRAGMA synchronous = 'FULL'").execute(&mut c)?; + // Create tables + diesel::sql_query(schema).execute(&mut c)?; + Ok(Db { log, conn: c }) } } - -json_new_type!(Share, SerializableShareDistribution); - -#[derive(Queryable)] -pub struct KeySharePrepare { - pub epoch: i32, - pub share: Share, -} - -#[derive(Queryable)] -pub struct KeyShareCommit { - pub epoch: i32, -} - -// TODO: These should go in a crypto module -// The length of a SHA3-256 digest -pub const DIGEST_LEN: usize = 32; - -// The length of a ChaCha20Poly1305 Key -pub const KEY_LEN: usize = 32; - -// The length of a ChaCha20Poly1305 authentication tag -pub const TAG_LEN: usize = 16; - -// A chacha20poly1305 secret encrypted by a chacha20poly1305 secret key -// derived from the rack secret for the given epoch with the given salt -// -// The epoch informs which rack secret should be used to derive the -// encryptiong key used to encrypt this root secret. -#[derive(Queryable)] -pub struct EncryptedRootSecret { - /// The epoch of the rack secret rotation or rack reconfiguration - pub epoch: i32, - - /// Used as the salt parameter to HKDF to derive the encryption - /// key from the rack secret that protects `key` in this struct. - pub salt: Salt, - - /// The encrypted key - pub key: EncryptedKey, - - /// The authentication tag for the encrypted key - pub tag: AuthTag, -} - -array_new_type!(EncryptedKey, KEY_LEN); -array_new_type!(Salt, DIGEST_LEN); -array_new_type!(AuthTag, TAG_LEN); diff --git a/bootstore/src/db/models.rs b/bootstore/src/db/models.rs new file mode 100644 index 00000000000..a4fbeab3e0c --- /dev/null +++ b/bootstore/src/db/models.rs @@ -0,0 +1,62 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! DB models + +use diesel::deserialize::FromSql; +use diesel::prelude::*; +use diesel::serialize::ToSql; +use diesel::FromSqlRow; + +use super::macros::array_new_type; +use super::macros::json_new_type; +use crate::trust_quorum::SerializableShareDistribution; + +json_new_type!(Share, SerializableShareDistribution); + +#[derive(Queryable)] +pub struct KeySharePrepare { + pub epoch: i32, + pub share: Share, +} + +#[derive(Queryable)] +pub struct KeyShareCommit { + pub epoch: i32, +} + +// TODO: These should go in a crypto module +// The length of a SHA3-256 digest +pub const DIGEST_LEN: usize = 32; + +// The length of a ChaCha20Poly1305 Key +pub const KEY_LEN: usize = 32; + +// The length of a ChaCha20Poly1305 authentication tag +pub const TAG_LEN: usize = 16; + +// A chacha20poly1305 secret encrypted by a chacha20poly1305 secret key +// derived from the rack secret for the given epoch with the given salt +// +// The epoch informs which rack secret should be used to derive the +// encryptiong key used to encrypt this root secret. +#[derive(Queryable)] +pub struct EncryptedRootSecret { + /// The epoch of the rack secret rotation or rack reconfiguration + pub epoch: i32, + + /// Used as the salt parameter to HKDF to derive the encryption + /// key from the rack secret that protects `key` in this struct. + pub salt: Salt, + + /// The encrypted root secret for this epoch + pub secret: EncryptedSecret, + + /// The authentication tag for the encrypted secret + pub tag: AuthTag, +} + +array_new_type!(EncryptedSecret, KEY_LEN); +array_new_type!(Salt, DIGEST_LEN); +array_new_type!(AuthTag, TAG_LEN); diff --git a/bootstore/src/db/schema.rs b/bootstore/src/db/schema.rs new file mode 100644 index 00000000000..0e90a3b6c32 --- /dev/null +++ b/bootstore/src/db/schema.rs @@ -0,0 +1,28 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use diesel::table; + +table! { + key_share_prepare(epoch) { + epoch -> Integer, + share -> Text, + } +} + +table! { + key_share_commit(epoch) { + epoch -> Integer, + } +} + +table! { + encrypted_root_secret(epoch) { + epoch -> Integer, + salt -> Binary, + secret -> Binary, + tag -> Binary, + + } +} diff --git a/bootstore/src/db/schema.sql b/bootstore/src/db/schema.sql new file mode 100644 index 00000000000..af5f43fc8dd --- /dev/null +++ b/bootstore/src/db/schema.sql @@ -0,0 +1,19 @@ +CREATE TABLE IF NOT EXISTS key_share_prepare ( + epoch INTEGER NOT NULL, + share TEXT NOT NULL, + + PRIMARY KEY(epoch) +) + +CREATE TABLE IF NOT EXISTS key_share_commit( + epoch INTEGER NOT NULL, + + PRIMARY KEY(epoch) +) + +CREATE TABLE IF NOT EXISTS encrypted_root_secret( + epoch INTEGER NOT NULL, + salt BLOB NOT NULL, + secret BLOB NOT NULL, + tag BLOB NOT NULL, +) diff --git a/bootstore/src/lib.rs b/bootstore/src/lib.rs index 896facd7eab..da6afe91a3b 100644 --- a/bootstore/src/lib.rs +++ b/bootstore/src/lib.rs @@ -10,25 +10,5 @@ //! it to CockroachDB when we start it up. mod db; -mod db_macros; -//mod server; mod trust_quorum; - -#[cfg(test)] -mod tests { - use super::*; - - #[tokio::test] - async fn it_works() { - let result = 2 + 2; - assert_eq!(result, 4); - } -} -#[cfg(test)] -mod tests { - #[test] - fn it_works() { - let result = 2 + 2; - assert_eq!(result, 4); - } -} +//mod server; From df245ffa22e261a3261ce771532f941e550436f6 Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Mon, 29 Aug 2022 18:58:47 +0000 Subject: [PATCH 09/28] wip --- Cargo.lock | 2 ++ bootstore/Cargo.toml | 4 +++ bootstore/src/db/mod.rs | 69 +++++++++++++++++++++++++++++++++++-- bootstore/src/db/models.rs | 10 ++++-- bootstore/src/db/schema.rs | 6 ++-- bootstore/src/db/schema.sql | 18 +++++----- 6 files changed, 93 insertions(+), 16 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 59d4086de56..c14bdb0fc92 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -342,7 +342,9 @@ dependencies = [ name = "bootstore" version = "0.1.0" dependencies = [ + "bincode", "diesel", + "omicron-test-utils", "p256", "rand 0.8.5", "serde", diff --git a/bootstore/Cargo.toml b/bootstore/Cargo.toml index f89699e87a7..7e97b0be7a1 100644 --- a/bootstore/Cargo.toml +++ b/bootstore/Cargo.toml @@ -16,3 +16,7 @@ sprockets-common = { git = "http://github.com/oxidecomputer/sprockets", rev = "0 sprockets-host = { git = "http://github.com/oxidecomputer/sprockets", rev = "0361fd13ff19cda6696242fe40f1325fca30d3d1" } thiserror = "1.0" vsss-rs = { version = "2.0.0", default-features = false, features = ["std"] } + +[dev-dependencies] +bincode = "1.3.3" +omicron-test-utils = { path = "../test-utils" } diff --git a/bootstore/src/db/mod.rs b/bootstore/src/db/mod.rs index 6da9b11e700..78516484cc2 100644 --- a/bootstore/src/db/mod.rs +++ b/bootstore/src/db/mod.rs @@ -12,6 +12,12 @@ use diesel::SqliteConnection; use slog::Logger; use slog::{info, o}; +use crate::trust_quorum::SerializableShareDistribution; +use models::EncryptedRootSecret; +use models::KeyShareCommit; +use models::KeySharePrepare; +use models::Share; + #[derive(thiserror::Error, Debug)] pub enum Error { #[error("Failed to open db connection to {path}: {err}")] @@ -63,9 +69,68 @@ impl Db { // DO NOT CHANGE THIS SETTING! diesel::sql_query("PRAGMA synchronous = 'FULL'").execute(&mut c)?; - // Create tables - diesel::sql_query(schema).execute(&mut c)?; + c.immediate_transaction::<_, Error, _>(|tx| { + // Create tables + diesel::sql_query(schema).execute(tx).map_err(|e| Error::Db(e)) + })?; Ok(Db { log, conn: c }) } + + pub fn prepare_share( + &mut self, + epoch: i32, + share: SerializableShareDistribution, + ) -> Result<(), Error> { + use schema::key_share_prepares::dsl; + let prepare = KeySharePrepare { epoch, share: Share(share) }; + diesel::insert_into(dsl::key_share_prepares) + .values(&prepare) + .execute(&mut self.conn)?; + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::trust_quorum::{RackSecret, ShareDistribution}; + use omicron_test_utils::dev::test_setup_log; + + // TODO: Fill in with actual member certs + fn new_shares() -> Vec { + let member_device_id_certs = vec![]; + let rack_secret_threshold = 3; + let total_shares = 5; + let secret = RackSecret::new(); + let (shares, verifier) = + secret.split(rack_secret_threshold, total_shares).unwrap(); + + shares + .into_iter() + .map(move |share| ShareDistribution { + threshold: rack_secret_threshold, + verifier: verifier.clone(), + share, + member_device_id_certs: member_device_id_certs.clone(), + }) + .collect() + } + + #[test] + fn simple_prepare_insert_and_query() { + use schema::key_share_prepares::dsl; + let log = test_setup_log("test_db").log.clone(); + let mut db = Db::open(log, "/tmp/testdb.sqlite").unwrap(); + let shares = new_shares(); + let epoch = 0; + let expected: SerializableShareDistribution = shares[0].clone().into(); + db.prepare_share(epoch, expected.clone()).unwrap(); + let val = dsl::key_share_prepares + .select(dsl::share) + .filter(dsl::epoch.eq(epoch)) + .get_result::(&mut db.conn) + .unwrap(); + assert_eq!(val.0, expected); + } } diff --git a/bootstore/src/db/models.rs b/bootstore/src/db/models.rs index a4fbeab3e0c..d7f0616d75b 100644 --- a/bootstore/src/db/models.rs +++ b/bootstore/src/db/models.rs @@ -11,17 +11,18 @@ use diesel::FromSqlRow; use super::macros::array_new_type; use super::macros::json_new_type; +use super::schema::*; use crate::trust_quorum::SerializableShareDistribution; json_new_type!(Share, SerializableShareDistribution); -#[derive(Queryable)] +#[derive(Debug, Queryable, Insertable)] pub struct KeySharePrepare { pub epoch: i32, pub share: Share, } -#[derive(Queryable)] +#[derive(Debug, Queryable, Insertable)] pub struct KeyShareCommit { pub epoch: i32, } @@ -41,7 +42,10 @@ pub const TAG_LEN: usize = 16; // // The epoch informs which rack secret should be used to derive the // encryptiong key used to encrypt this root secret. -#[derive(Queryable)] +// +// TODO-security: We probably don't want to log even the encrypted secret, but +// it's likely useful for debugging right now. +#[derive(Debug, Queryable, Insertable)] pub struct EncryptedRootSecret { /// The epoch of the rack secret rotation or rack reconfiguration pub epoch: i32, diff --git a/bootstore/src/db/schema.rs b/bootstore/src/db/schema.rs index 0e90a3b6c32..3f6916352ed 100644 --- a/bootstore/src/db/schema.rs +++ b/bootstore/src/db/schema.rs @@ -5,20 +5,20 @@ use diesel::table; table! { - key_share_prepare(epoch) { + key_share_prepares(epoch) { epoch -> Integer, share -> Text, } } table! { - key_share_commit(epoch) { + key_share_commits(epoch) { epoch -> Integer, } } table! { - encrypted_root_secret(epoch) { + encrypted_root_secrets(epoch) { epoch -> Integer, salt -> Binary, secret -> Binary, diff --git a/bootstore/src/db/schema.sql b/bootstore/src/db/schema.sql index af5f43fc8dd..4f2d9465733 100644 --- a/bootstore/src/db/schema.sql +++ b/bootstore/src/db/schema.sql @@ -1,19 +1,21 @@ -CREATE TABLE IF NOT EXISTS key_share_prepare ( +CREATE TABLE IF NOT EXISTS key_share_prepares ( epoch INTEGER NOT NULL, share TEXT NOT NULL, - PRIMARY KEY(epoch) -) + PRIMARY KEY (epoch) +); -CREATE TABLE IF NOT EXISTS key_share_commit( +CREATE TABLE IF NOT EXISTS key_share_commits ( epoch INTEGER NOT NULL, - PRIMARY KEY(epoch) -) + PRIMARY KEY (epoch) +); -CREATE TABLE IF NOT EXISTS encrypted_root_secret( +CREATE TABLE IF NOT EXISTS encrypted_root_secrets ( epoch INTEGER NOT NULL, salt BLOB NOT NULL, secret BLOB NOT NULL, tag BLOB NOT NULL, -) + + PRIMARY KEY (epoch) +); From f1666077f1d02ade05be7e5fb5a343cd939282e8 Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Mon, 29 Aug 2022 19:11:07 +0000 Subject: [PATCH 10/28] randomize db names --- bootstore/src/db/mod.rs | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/bootstore/src/db/mod.rs b/bootstore/src/db/mod.rs index 78516484cc2..9f856c63014 100644 --- a/bootstore/src/db/mod.rs +++ b/bootstore/src/db/mod.rs @@ -96,6 +96,8 @@ mod tests { use super::*; use crate::trust_quorum::{RackSecret, ShareDistribution}; use omicron_test_utils::dev::test_setup_log; + use rand::distributions::Alphanumeric; + use rand::{thread_rng, Rng}; // TODO: Fill in with actual member certs fn new_shares() -> Vec { @@ -117,11 +119,20 @@ mod tests { .collect() } + fn rand_db_name() -> String { + let seed: String = thread_rng() + .sample_iter(&Alphanumeric) + .take(16) + .map(char::from) + .collect(); + format!("/tmp/testdb-{}.sqlite", seed) + } + #[test] fn simple_prepare_insert_and_query() { use schema::key_share_prepares::dsl; let log = test_setup_log("test_db").log.clone(); - let mut db = Db::open(log, "/tmp/testdb.sqlite").unwrap(); + let mut db = Db::open(log, &rand_db_name()).unwrap(); let shares = new_shares(); let epoch = 0; let expected: SerializableShareDistribution = shares[0].clone().into(); From b817d5179e621b178bd713e789d88ffe3ce58574 Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Mon, 29 Aug 2022 21:49:20 +0000 Subject: [PATCH 11/28] wip --- Cargo.lock | 7 +++++++ bootstore/src/db/mod.rs | 37 +++++++++++++++++++++++++++++++++---- bootstore/src/db/models.rs | 9 +++++++++ bootstore/src/db/schema.rs | 1 + bootstore/src/db/schema.sql | 17 ++++++++++------- 5 files changed, 60 insertions(+), 11 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c14bdb0fc92..8362ef4435d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -83,6 +83,12 @@ dependencies = [ "term", ] +[[package]] +name = "assert_matches" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b34d609dfbaf33d6889b2b7106d3ca345eacad44200913df5ba02bfd31d2ba9" + [[package]] name = "async-bb8-diesel" version = "0.1.0" @@ -342,6 +348,7 @@ dependencies = [ name = "bootstore" version = "0.1.0" dependencies = [ + "assert_matches", "bincode", "diesel", "omicron-test-utils", diff --git a/bootstore/src/db/mod.rs b/bootstore/src/db/mod.rs index 9f856c63014..fe625a42dc9 100644 --- a/bootstore/src/db/mod.rs +++ b/bootstore/src/db/mod.rs @@ -7,6 +7,7 @@ mod macros; mod models; mod schema; +use diesel::connection::SimpleConnection; use diesel::prelude::*; use diesel::SqliteConnection; use slog::Logger; @@ -69,10 +70,8 @@ impl Db { // DO NOT CHANGE THIS SETTING! diesel::sql_query("PRAGMA synchronous = 'FULL'").execute(&mut c)?; - c.immediate_transaction::<_, Error, _>(|tx| { - // Create tables - diesel::sql_query(schema).execute(tx).map_err(|e| Error::Db(e)) - })?; + // Create tables + c.batch_execute(&schema)?; Ok(Db { log, conn: c }) } @@ -89,6 +88,19 @@ impl Db { .execute(&mut self.conn)?; Ok(()) } + + pub fn commit_share( + &mut self, + epoch: i32, + digest: sprockets_common::Sha3_256Digest, + ) -> Result<(), Error> { + use schema::key_share_commits::dsl; + let commit = KeyShareCommit { epoch, share_digest: digest.into() }; + diesel::insert_into(dsl::key_share_commits) + .values(&commit) + .execute(&mut self.conn)?; + Ok(()) + } } #[cfg(test)] @@ -144,4 +156,21 @@ mod tests { .unwrap(); assert_eq!(val.0, expected); } + + #[test] + fn commit_fails_without_corresponding_prepare() { + let log = test_setup_log("test_db").log.clone(); + let mut db = Db::open(log, &rand_db_name()).unwrap(); + let epoch = 0; + + let digest = sprockets_common::Sha3_256Digest::default(); + let err = db.commit_share(epoch, digest).unwrap_err(); + assert!(matches!( + err, + Error::Db(diesel::result::Error::DatabaseError( + diesel::result::DatabaseErrorKind::ForeignKeyViolation, + _ + )), + )); + } } diff --git a/bootstore/src/db/models.rs b/bootstore/src/db/models.rs index d7f0616d75b..ceef21becb6 100644 --- a/bootstore/src/db/models.rs +++ b/bootstore/src/db/models.rs @@ -25,6 +25,8 @@ pub struct KeySharePrepare { #[derive(Debug, Queryable, Insertable)] pub struct KeyShareCommit { pub epoch: i32, + // The hash of the share in the corresponding KeySharePrepare + pub share_digest: Sha3_256Digest, } // TODO: These should go in a crypto module @@ -61,6 +63,13 @@ pub struct EncryptedRootSecret { pub tag: AuthTag, } +array_new_type!(Sha3_256Digest, DIGEST_LEN); array_new_type!(EncryptedSecret, KEY_LEN); array_new_type!(Salt, DIGEST_LEN); array_new_type!(AuthTag, TAG_LEN); + +impl From for Sha3_256Digest { + fn from(digest: sprockets_common::Sha3_256Digest) -> Self { + Sha3_256Digest(digest.0) + } +} diff --git a/bootstore/src/db/schema.rs b/bootstore/src/db/schema.rs index 3f6916352ed..2f94598e3e2 100644 --- a/bootstore/src/db/schema.rs +++ b/bootstore/src/db/schema.rs @@ -14,6 +14,7 @@ table! { table! { key_share_commits(epoch) { epoch -> Integer, + share_digest -> Binary, } } diff --git a/bootstore/src/db/schema.sql b/bootstore/src/db/schema.sql index 4f2d9465733..2e698dde1d5 100644 --- a/bootstore/src/db/schema.sql +++ b/bootstore/src/db/schema.sql @@ -1,21 +1,24 @@ CREATE TABLE IF NOT EXISTS key_share_prepares ( - epoch INTEGER NOT NULL, - share TEXT NOT NULL, + epoch INTEGER NOT NULL, + share TEXT NOT NULL, PRIMARY KEY (epoch) ); CREATE TABLE IF NOT EXISTS key_share_commits ( - epoch INTEGER NOT NULL, + epoch INTEGER NOT NULL, + share_digest BLOB NOT NULL, PRIMARY KEY (epoch) + FOREIGN KEY (epoch) REFERENCES key_share_prepares (epoch) ); CREATE TABLE IF NOT EXISTS encrypted_root_secrets ( - epoch INTEGER NOT NULL, - salt BLOB NOT NULL, - secret BLOB NOT NULL, - tag BLOB NOT NULL, + epoch INTEGER NOT NULL, + salt BLOB NOT NULL, + secret BLOB NOT NULL, + tag BLOB NOT NULL, PRIMARY KEY (epoch) + FOREIGN KEY (epoch) REFERENCES key_share_prepares (epoch) ); From 5f4f9a8155a13ae4fae06163499687ff2a65bbb8 Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Tue, 30 Aug 2022 01:00:55 +0000 Subject: [PATCH 12/28] verify prepare share digest matches commit --- Cargo.lock | 8 +--- bootstore/Cargo.toml | 1 + bootstore/src/db/macros.rs | 8 +++- bootstore/src/db/mod.rs | 81 ++++++++++++++++++++++++++++++------- bootstore/src/db/models.rs | 1 + bootstore/src/db/schema.rs | 1 + bootstore/src/db/schema.sql | 1 + 7 files changed, 78 insertions(+), 23 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8362ef4435d..411f3067602 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -83,12 +83,6 @@ dependencies = [ "term", ] -[[package]] -name = "assert_matches" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b34d609dfbaf33d6889b2b7106d3ca345eacad44200913df5ba02bfd31d2ba9" - [[package]] name = "async-bb8-diesel" version = "0.1.0" @@ -348,7 +342,6 @@ dependencies = [ name = "bootstore" version = "0.1.0" dependencies = [ - "assert_matches", "bincode", "diesel", "omicron-test-utils", @@ -356,6 +349,7 @@ dependencies = [ "rand 0.8.5", "serde", "serde_json", + "sha3", "slog", "sprockets-common", "sprockets-host", diff --git a/bootstore/Cargo.toml b/bootstore/Cargo.toml index 7e97b0be7a1..fa02159db57 100644 --- a/bootstore/Cargo.toml +++ b/bootstore/Cargo.toml @@ -11,6 +11,7 @@ p256 = "0.9.0" rand = { version = "0.8.5", features = ["getrandom"] } serde = { version = "1.0", features = [ "derive" ] } serde_json = "1.0" +sha3 = "0.10.1" slog = { version = "2.5", features = [ "max_level_trace", "release_max_level_debug" ] } sprockets-common = { git = "http://github.com/oxidecomputer/sprockets", rev = "0361fd13ff19cda6696242fe40f1325fca30d3d1" } sprockets-host = { git = "http://github.com/oxidecomputer/sprockets", rev = "0361fd13ff19cda6696242fe40f1325fca30d3d1" } diff --git a/bootstore/src/db/macros.rs b/bootstore/src/db/macros.rs index cd04aed35f9..9b4926e5b14 100644 --- a/bootstore/src/db/macros.rs +++ b/bootstore/src/db/macros.rs @@ -64,7 +64,11 @@ macro_rules! json_new_type { macro_rules! array_new_type { ($name:ident, $len:expr) => { #[derive( - Clone, Debug, FromSqlRow, diesel::expression::AsExpression, + PartialEq, + Clone, + Debug, + FromSqlRow, + diesel::expression::AsExpression, )] #[diesel(sql_type = diesel::sql_types::Binary)] pub struct $name(pub [u8; $len]); @@ -84,7 +88,7 @@ macro_rules! array_new_type { } } - impl FromSql for $name + impl FromSql for $name where DB: diesel::backend::Backend, Vec: FromSql, diff --git a/bootstore/src/db/mod.rs b/bootstore/src/db/mod.rs index fe625a42dc9..7ecf931ec93 100644 --- a/bootstore/src/db/mod.rs +++ b/bootstore/src/db/mod.rs @@ -17,7 +17,9 @@ use crate::trust_quorum::SerializableShareDistribution; use models::EncryptedRootSecret; use models::KeyShareCommit; use models::KeySharePrepare; +use models::Sha3_256Digest; use models::Share; +use sha3::{Digest, Sha3_256}; #[derive(thiserror::Error, Debug)] pub enum Error { @@ -26,6 +28,12 @@ pub enum Error { #[error(transparent)] Db(#[from] diesel::result::Error), + + #[error(transparent)] + Json(#[from] serde_json::Error), + + #[error("Share commit for {epoch} does not match prepare")] + CommitHashMismatch { epoch: i32 }, } /// The ID of the database used to store blobs. @@ -82,7 +90,14 @@ impl Db { share: SerializableShareDistribution, ) -> Result<(), Error> { use schema::key_share_prepares::dsl; - let prepare = KeySharePrepare { epoch, share: Share(share) }; + // We save the digest so we don't have to deserialize and recompute most of the time. + // We'd only want to do that for a consistency check occasionally. + let val = serde_json::to_string(&share)?; + let share_digest = + sprockets_common::Sha3_256Digest(Sha3_256::digest(&val).into()) + .into(); + let prepare = + KeySharePrepare { epoch, share: Share(share), share_digest }; diesel::insert_into(dsl::key_share_prepares) .values(&prepare) .execute(&mut self.conn)?; @@ -94,12 +109,27 @@ impl Db { epoch: i32, digest: sprockets_common::Sha3_256Digest, ) -> Result<(), Error> { - use schema::key_share_commits::dsl; - let commit = KeyShareCommit { epoch, share_digest: digest.into() }; - diesel::insert_into(dsl::key_share_commits) - .values(&commit) - .execute(&mut self.conn)?; - Ok(()) + use schema::key_share_commits; + use schema::key_share_prepares; + let commit = + KeyShareCommit { epoch, share_digest: digest.clone().into() }; + self.conn.immediate_transaction(|tx| { + // We only want to commit if the share digest of the commit is the + // same as that of the prepare. + let prepare_digest = key_share_prepares::table + .select(key_share_prepares::share_digest) + .filter(key_share_prepares::epoch.eq(epoch)) + .get_result::(tx)?; + + if prepare_digest != digest.into() { + return Err(Error::CommitHashMismatch { epoch }); + } + + diesel::insert_into(key_share_commits::table) + .values(&commit) + .execute(tx)?; + Ok(()) + }) } } @@ -165,12 +195,35 @@ mod tests { let digest = sprockets_common::Sha3_256Digest::default(); let err = db.commit_share(epoch, digest).unwrap_err(); - assert!(matches!( - err, - Error::Db(diesel::result::Error::DatabaseError( - diesel::result::DatabaseErrorKind::ForeignKeyViolation, - _ - )), - )); + assert!(matches!(err, Error::Db(diesel::result::Error::NotFound))); + } + + #[test] + fn commit_fails_with_invalid_hash() { + let log = test_setup_log("test_db").log.clone(); + let mut db = Db::open(log, &rand_db_name()).unwrap(); + let shares = new_shares(); + let epoch = 0; + let expected: SerializableShareDistribution = shares[0].clone().into(); + db.prepare_share(epoch, expected.clone()).unwrap(); + let digest = sprockets_common::Sha3_256Digest::default(); + let err = db.commit_share(epoch, digest).unwrap_err(); + assert!(matches!(err, Error::CommitHashMismatch { epoch: _ })); + } + + #[test] + fn commit_succeeds_with_correct_hash() { + let log = test_setup_log("test_db").log.clone(); + let mut db = Db::open(log, &rand_db_name()).unwrap(); + let shares = new_shares(); + let epoch = 0; + let expected: SerializableShareDistribution = shares[0].clone().into(); + db.prepare_share(epoch, expected.clone()).unwrap(); + + let val = serde_json::to_string(&expected).unwrap(); + let digest = + sprockets_common::Sha3_256Digest(Sha3_256::digest(&val).into()) + .into(); + assert!(db.commit_share(epoch, digest).is_ok()); } } diff --git a/bootstore/src/db/models.rs b/bootstore/src/db/models.rs index ceef21becb6..0afc2ac61e7 100644 --- a/bootstore/src/db/models.rs +++ b/bootstore/src/db/models.rs @@ -20,6 +20,7 @@ json_new_type!(Share, SerializableShareDistribution); pub struct KeySharePrepare { pub epoch: i32, pub share: Share, + pub share_digest: Sha3_256Digest, } #[derive(Debug, Queryable, Insertable)] diff --git a/bootstore/src/db/schema.rs b/bootstore/src/db/schema.rs index 2f94598e3e2..31cb36b311f 100644 --- a/bootstore/src/db/schema.rs +++ b/bootstore/src/db/schema.rs @@ -8,6 +8,7 @@ table! { key_share_prepares(epoch) { epoch -> Integer, share -> Text, + share_digest -> Binary, } } diff --git a/bootstore/src/db/schema.sql b/bootstore/src/db/schema.sql index 2e698dde1d5..167426c87d0 100644 --- a/bootstore/src/db/schema.sql +++ b/bootstore/src/db/schema.sql @@ -1,6 +1,7 @@ CREATE TABLE IF NOT EXISTS key_share_prepares ( epoch INTEGER NOT NULL, share TEXT NOT NULL, + share_digest BLOB NOT NULL, PRIMARY KEY (epoch) ); From 96d4ee04e12b1f8fce58e4c6b5316926f3eba6fd Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Tue, 30 Aug 2022 04:35:18 +0000 Subject: [PATCH 13/28] wip --- bootstore/src/db/models.rs | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/bootstore/src/db/models.rs b/bootstore/src/db/models.rs index 0afc2ac61e7..853ac17c6c1 100644 --- a/bootstore/src/db/models.rs +++ b/bootstore/src/db/models.rs @@ -30,16 +30,6 @@ pub struct KeyShareCommit { pub share_digest: Sha3_256Digest, } -// TODO: These should go in a crypto module -// The length of a SHA3-256 digest -pub const DIGEST_LEN: usize = 32; - -// The length of a ChaCha20Poly1305 Key -pub const KEY_LEN: usize = 32; - -// The length of a ChaCha20Poly1305 authentication tag -pub const TAG_LEN: usize = 16; - // A chacha20poly1305 secret encrypted by a chacha20poly1305 secret key // derived from the rack secret for the given epoch with the given salt // @@ -64,6 +54,16 @@ pub struct EncryptedRootSecret { pub tag: AuthTag, } +// TODO: These should likely go in a crypto module +// The length of a SHA3-256 digest +pub const DIGEST_LEN: usize = 32; + +// The length of a ChaCha20Poly1305 Key +pub const KEY_LEN: usize = 32; + +// The length of a ChaCha20Poly1305 authentication tag +pub const TAG_LEN: usize = 16; + array_new_type!(Sha3_256Digest, DIGEST_LEN); array_new_type!(EncryptedSecret, KEY_LEN); array_new_type!(Salt, DIGEST_LEN); From 28e2eedc659bbda1c4925b574b89117771066c9d Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Wed, 31 Aug 2022 18:04:57 +0000 Subject: [PATCH 14/28] start adding node and messages --- Cargo.lock | 2 + bootstore/Cargo.toml | 2 + bootstore/src/db/mod.rs | 1 + bootstore/src/lib.rs | 6 ++- bootstore/src/messages.rs | 86 +++++++++++++++++++++++++++++++++++++++ bootstore/src/node.rs | 49 ++++++++++++++++++++++ 6 files changed, 145 insertions(+), 1 deletion(-) create mode 100644 bootstore/src/messages.rs create mode 100644 bootstore/src/node.rs diff --git a/Cargo.lock b/Cargo.lock index 411f3067602..729e63e1059 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -343,6 +343,7 @@ name = "bootstore" version = "0.1.0" dependencies = [ "bincode", + "derive_more", "diesel", "omicron-test-utils", "p256", @@ -354,6 +355,7 @@ dependencies = [ "sprockets-common", "sprockets-host", "thiserror", + "uuid", "vsss-rs", ] diff --git a/bootstore/Cargo.toml b/bootstore/Cargo.toml index fa02159db57..30fec9ad4b2 100644 --- a/bootstore/Cargo.toml +++ b/bootstore/Cargo.toml @@ -6,6 +6,7 @@ edition = "2021" license = "MPL-2.0" [dependencies] +derive_more = "0.99.17" diesel = { version = "2.0.0-rc.1", features = ["sqlite", "chrono", "serde_json", "network-address", "uuid"] } p256 = "0.9.0" rand = { version = "0.8.5", features = ["getrandom"] } @@ -16,6 +17,7 @@ slog = { version = "2.5", features = [ "max_level_trace", "release_max_level_deb sprockets-common = { git = "http://github.com/oxidecomputer/sprockets", rev = "0361fd13ff19cda6696242fe40f1325fca30d3d1" } sprockets-host = { git = "http://github.com/oxidecomputer/sprockets", rev = "0361fd13ff19cda6696242fe40f1325fca30d3d1" } thiserror = "1.0" +uuid = { version = "1.1.0", features = [ "serde", "v4" ] } vsss-rs = { version = "2.0.0", default-features = false, features = ["std"] } [dev-dependencies] diff --git a/bootstore/src/db/mod.rs b/bootstore/src/db/mod.rs index 7ecf931ec93..7b68ab8cc42 100644 --- a/bootstore/src/db/mod.rs +++ b/bootstore/src/db/mod.rs @@ -3,6 +3,7 @@ // file, You can obtain one at https://mozilla.org/MPL/2.0/. //! Database layer for the bootstore + mod macros; mod models; mod schema; diff --git a/bootstore/src/lib.rs b/bootstore/src/lib.rs index da6afe91a3b..de26e35be25 100644 --- a/bootstore/src/lib.rs +++ b/bootstore/src/lib.rs @@ -10,5 +10,9 @@ //! it to CockroachDB when we start it up. mod db; +mod messages; +mod node; mod trust_quorum; -//mod server; + +pub use node::Config; +pub use node::Node; diff --git a/bootstore/src/messages.rs b/bootstore/src/messages.rs new file mode 100644 index 00000000000..70d0a20da4b --- /dev/null +++ b/bootstore/src/messages.rs @@ -0,0 +1,86 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! All messages sent and received by bootstore nodes and coordinators + +use derive_more::From; +use serde::{Deserialize, Serialize}; +use uuid::Uuid; +use vsss_rs::Share; + +use crate::trust_quorum::SerializableShareDistribution; + +/// A request sent to a [`Node`] from another [`Node`] or a [`Coordinator`]. +#[derive(Debug, Clone, PartialEq, From, Serialize, Deserialize)] +pub struct NodeRequest { + pub version: u32, + /// A message correlation id to match requests to responses + pub id: u64, + pub op: NodeOp, +} + +/// A specific operation for a Node +#[derive(Debug, Clone, PartialEq, From, Serialize, Deserialize)] +pub enum NodeOp { + /// Retrieve a key share for the given epoch + /// + /// A [`Node`] will only respond if the epoch is still valid + /// and the sending [`Node`] is a member of the trust quorum. + GetShare { epoch: i32 }, + + /// A request sent by RSS with the trust quorum membership and key share + /// for epoch 0. + /// + /// Epoch 0 is the only epoch in which prepared data can be overwritten. + /// This is because there is no global datastore (CockroachDB) with which + /// to persist information that needs to be distributed to all sleds in the + /// rack. If RSS dies, or more importantly, the scrimlet it is running on + /// dies, we want to enable re-running on a different scrimlet. Since we + /// don't know what information was already transferred to sleds, due to + /// lack of global datastore, we must re-issue the request. + /// + /// This request generates a `KeySharePrepare` for epoch 0. Once all sleds + /// have prepared, RSS trigger the start of RSS replicas. The trust quorum + /// membership and prepare status will be written into CockroachDB as the + /// epoch 0 trust quorum configuration. Nexus will then proceed to + /// commit the trust quorum information, by first writing the Commit to + /// CockroachDb and then sending a `KeyShareCommit` for epoch 0. + /// + /// TODO: The rack plan should also be sent here with similar storage + /// strategy as the key share/trust quorum membership. + Initialize { + rack_uuid: Uuid, + share_distribution: SerializableShareDistribution, + }, + + /// A request from a [`Coordinator`] for the Prepare phase + /// of a rekey or reconfiguration + KeySharePrepare { + epoch: i32, + share_distribution: SerializableShareDistribution, + }, + + /// A request from a [`Coordinator`] for the Commit phase of a + /// rekey or reconfiguration + KeyShareCommit { epoch: i32 }, +} + +/// A response from a [`Node`] to another [`Node`] or a [`Coordinator`] +#[derive(Debug, Clone, PartialEq, From, Serialize, Deserialize)] +pub struct NodeResponse { + pub version: u32, + /// A message correlation id to match requests to responses + pub id: u64, + pub op: NodeOp, +} + +#[derive(Debug, Clone, PartialEq, From, Serialize, Deserialize)] +// The result of an operation from a [`Node`] +pub enum NodeOpResult { + /// A key share for a given epoch as requested by [`PeerRequest::GetShare`] + Share { epoch: i32, share: Share }, + + /// An ack for the most recent coordinator message + CoordinatorAck, +} diff --git a/bootstore/src/node.rs b/bootstore/src/node.rs new file mode 100644 index 00000000000..fb26d957aab --- /dev/null +++ b/bootstore/src/node.rs @@ -0,0 +1,49 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! An individual bootstore node. +//! +//! Most logic is contained here, but networking sits on top. +//! This allows easier testing of clusters and failure situations. + +use slog::Logger; +use slog::{info, o}; +use std::net::SocketAddrV6; +use std::sync::Arc; +use std::sync::Mutex; + +use crate::db::Db; +use sprockets_host::Ed25519Certificate; + +/// Configuration for an individual node +pub struct Config { + log: Logger, + db_path: String, + // TODO: This will live inside the certificate eventually + serial_number: String, + device_id_cert: Ed25519Certificate, +} + +/// A node of the bootstore +/// +/// A Node contains all the logic of the bootstore and stores relevant +/// information in [`Db`]. The [`BootstrapAgent`] establishes sprockets +/// sessions, and utilizes its local `Node` to manage any messages received +/// over these sessions. +/// +/// Messages are received over sprockets sessions from either peer nodes +/// during rack unlock, or from a [`Coordinator`] during rack initialization +/// or reconfiguration. +pub struct Node { + config: Config, + db: Db, +} + +impl Node { + /// Create a new Node + pub fn new(config: Config) -> Node { + let db = Db::open(config.log.clone(), &config.db_path).unwrap(); + Node { config, db } + } +} From 2d2d86b3f5263de84877fd1492ed740ea3798f27 Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Wed, 31 Aug 2022 19:04:05 +0000 Subject: [PATCH 15/28] more node stuff --- bootstore/src/messages.rs | 38 ++++++++++++++++++--- bootstore/src/node.rs | 70 ++++++++++++++++++++++++++++++++++++++- sled-agent/src/lib.rs | 1 - 3 files changed, 103 insertions(+), 6 deletions(-) diff --git a/bootstore/src/messages.rs b/bootstore/src/messages.rs index 70d0a20da4b..edcf81cc9e0 100644 --- a/bootstore/src/messages.rs +++ b/bootstore/src/messages.rs @@ -12,7 +12,7 @@ use vsss_rs::Share; use crate::trust_quorum::SerializableShareDistribution; /// A request sent to a [`Node`] from another [`Node`] or a [`Coordinator`]. -#[derive(Debug, Clone, PartialEq, From, Serialize, Deserialize)] +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] pub struct NodeRequest { pub version: u32, /// A message correlation id to match requests to responses @@ -21,7 +21,7 @@ pub struct NodeRequest { } /// A specific operation for a Node -#[derive(Debug, Clone, PartialEq, From, Serialize, Deserialize)] +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] pub enum NodeOp { /// Retrieve a key share for the given epoch /// @@ -57,13 +57,14 @@ pub enum NodeOp { /// A request from a [`Coordinator`] for the Prepare phase /// of a rekey or reconfiguration KeySharePrepare { + rack_uuid: Uuid, epoch: i32, share_distribution: SerializableShareDistribution, }, /// A request from a [`Coordinator`] for the Commit phase of a /// rekey or reconfiguration - KeyShareCommit { epoch: i32 }, + KeyShareCommit { rack_uuid: Uuid, epoch: i32 }, } /// A response from a [`Node`] to another [`Node`] or a [`Coordinator`] @@ -72,7 +73,7 @@ pub struct NodeResponse { pub version: u32, /// A message correlation id to match requests to responses pub id: u64, - pub op: NodeOp, + pub op: NodeOpResult, } #[derive(Debug, Clone, PartialEq, From, Serialize, Deserialize)] @@ -83,4 +84,33 @@ pub enum NodeOpResult { /// An ack for the most recent coordinator message CoordinatorAck, + + /// Error responses + Error(NodeError), +} + +/// Errors returned inside a [`NodeOpResult`] +#[derive(Debug, Clone, PartialEq, From, Serialize, Deserialize)] +pub enum NodeError { + KeyShareDoesNotExist { + epoch: i32, + }, + RackUuidMismatch { + expected: Uuid, + actual: Uuid, + }, + + /// A Commit has already occurred for the rack initialization. + AlreadyInitialized { + rack_uuid: Uuid, + }, + + /// A `Prepare` for a given epoch was requested, but the node has not + /// seen the corresponding `KeySharePrepare`. + /// + /// This is valid for various 2-phase commits, not just key shares + MissingPrepare { + rack_uuid: Uuid, + epoch: i32, + }, } diff --git a/bootstore/src/node.rs b/bootstore/src/node.rs index fb26d957aab..868609af2c1 100644 --- a/bootstore/src/node.rs +++ b/bootstore/src/node.rs @@ -9,12 +9,22 @@ use slog::Logger; use slog::{info, o}; +use sprockets_host::Ed25519Certificate; use std::net::SocketAddrV6; use std::sync::Arc; use std::sync::Mutex; +use uuid::Uuid; use crate::db::Db; -use sprockets_host::Ed25519Certificate; +use crate::messages::*; +use crate::trust_quorum::SerializableShareDistribution; + +/// An error returned by a Node +#[derive(thiserror::Error, Debug)] +pub enum Error { + #[error("Version {0} messages are unsupported.")] + UnsupportedVersion(u32), +} /// Configuration for an individual node pub struct Config { @@ -46,4 +56,62 @@ impl Node { let db = Db::open(config.log.clone(), &config.db_path).unwrap(); Node { config, db } } + + /// Handle a message received over sprockets from another [`Node`] or + /// the [`Coordinator`]. + pub fn handle(&mut self, req: NodeRequest) -> Result { + if req.version != 1 { + return Err(Error::UnsupportedVersion(req.version)); + } + + let op_result = match req.op { + NodeOp::GetShare { epoch } => self.handle_get_share(epoch), + NodeOp::Initialize { rack_uuid, share_distribution } => { + self.handle_initialize(rack_uuid, share_distribution) + } + NodeOp::KeySharePrepare { + rack_uuid, + epoch, + share_distribution, + } => self.handle_key_share_prepare( + rack_uuid, + epoch, + share_distribution, + ), + NodeOp::KeyShareCommit { rack_uuid, epoch } => { + self.handle_key_share_commit(rack_uuid, epoch) + } + }?; + + Ok(NodeResponse { version: req.version, id: req.id, op: op_result }) + } + + fn handle_get_share(&mut self, epoch: i32) -> Result { + unimplemented!(); + } + + fn handle_initialize( + &mut self, + rack_uuid: Uuid, + share_distribution: SerializableShareDistribution, + ) -> Result { + unimplemented!(); + } + + fn handle_key_share_prepare( + &mut self, + rack_uuid: Uuid, + epoch: i32, + share_distribution: SerializableShareDistribution, + ) -> Result { + unimplemented!(); + } + + fn handle_key_share_commit( + &mut self, + rack_uuid: Uuid, + epoch: i32, + ) -> Result { + unimplemented!(); + } } diff --git a/sled-agent/src/lib.rs b/sled-agent/src/lib.rs index a83fc068f2e..602293cab23 100644 --- a/sled-agent/src/lib.rs +++ b/sled-agent/src/lib.rs @@ -19,7 +19,6 @@ pub mod sim; pub mod common; // Modules for the non-simulated sled agent. -mod bootstore; pub mod bootstrap; pub mod config; mod http_entrypoints; From 1c6d140e517a053244124e5e95a2465142925bb0 Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Wed, 31 Aug 2022 19:30:39 +0000 Subject: [PATCH 16/28] remove server --- bootstore/src/server.rs | 53 ----------------------------------------- 1 file changed, 53 deletions(-) delete mode 100644 bootstore/src/server.rs diff --git a/bootstore/src/server.rs b/bootstore/src/server.rs deleted file mode 100644 index 0093fbac090..00000000000 --- a/bootstore/src/server.rs +++ /dev/null @@ -1,53 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -//! A sprockets server for handling bootstrap related requests -//! -//! This is the server for trust-quorum rack unlock as well as -//! the server backing the 2PC implementation used for trust quorum initialization, -//! trust quorum reconfiguration, and NetworkConfiguration needed to configure NTP. - -use slog::Drain; -use slog::Logger; -use std::io; -use std::net::Ipv6Addr; -use std::net::SocketAddrV6; - -use super::db::Db; -use crate::sp::SimSpConfig; -use crate::sp::SpHandle; - -/// The sprockets server for the bootstore -/// -/// The Server is in charge of managing the SP, key shares, and early boot -/// network configuration. -pub struct Server { - listener: TcpListener, - bind_address: SocketAddrV6, - sp: SpHandle, - log: Logger, -} - -#[derive(thiserror::Error, Debug)] -pub enum Error { - #[error("Cannot bind to {bind_address}: {err}")] - Bind { bind_address: SocketAddrV6, err: io::Error }, -} - -impl Server { - async fn start( - log: Logger, - bind_address: SocketAddrV6, - sp: SpHandle, - ) -> Result>, Error> { - let listener = TcpListener::bind(bind_address) - .await - .map_err(|err| Error::Bind { bind_address, err })?; - info!(log, "Started listening"; "local_addr" => %bind_address); - let server = Server { listener, sp, bind_address, log }; - Ok(tokio::spawn(server.run())) - } - - async fn run(self) -> Result<(), Error> {} -} From b89a28fcc2730ce487ef78875bc9fff77fe5a472 Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Wed, 31 Aug 2022 20:08:11 +0000 Subject: [PATCH 17/28] fix typo --- bootstore/src/messages.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bootstore/src/messages.rs b/bootstore/src/messages.rs index edcf81cc9e0..350e2ada924 100644 --- a/bootstore/src/messages.rs +++ b/bootstore/src/messages.rs @@ -41,9 +41,9 @@ pub enum NodeOp { /// lack of global datastore, we must re-issue the request. /// /// This request generates a `KeySharePrepare` for epoch 0. Once all sleds - /// have prepared, RSS trigger the start of RSS replicas. The trust quorum - /// membership and prepare status will be written into CockroachDB as the - /// epoch 0 trust quorum configuration. Nexus will then proceed to + /// have prepared, RSS trigger the start of CockroachDB replicas. The trust + /// quorum membership and prepare status will be written into CockroachDB + /// as the epoch 0 trust quorum configuration. Nexus will then proceed to /// commit the trust quorum information, by first writing the Commit to /// CockroachDb and then sending a `KeyShareCommit` for epoch 0. /// From e3a98550ec52e3d4454478b40796a287cae29ed1 Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Thu, 1 Sep 2022 15:28:47 +0000 Subject: [PATCH 18/28] Return NodeResponse from Node::handle --- bootstore/src/messages.rs | 40 +++++++++++++++++++-------------------- bootstore/src/node.rs | 39 ++++++++++++++++++++++---------------- 2 files changed, 43 insertions(+), 36 deletions(-) diff --git a/bootstore/src/messages.rs b/bootstore/src/messages.rs index 350e2ada924..6159983a48c 100644 --- a/bootstore/src/messages.rs +++ b/bootstore/src/messages.rs @@ -90,27 +90,27 @@ pub enum NodeOpResult { } /// Errors returned inside a [`NodeOpResult`] -#[derive(Debug, Clone, PartialEq, From, Serialize, Deserialize)] +#[derive( + Debug, Clone, PartialEq, From, Serialize, Deserialize, thiserror::Error, +)] pub enum NodeError { - KeyShareDoesNotExist { - epoch: i32, - }, - RackUuidMismatch { - expected: Uuid, - actual: Uuid, - }, + #[error("Version {0} messages are unsupported.")] + UnsupportedVersion(u32), - /// A Commit has already occurred for the rack initialization. - AlreadyInitialized { - rack_uuid: Uuid, - }, + #[error("Key share for epoch {epoch} does not exist.")] + KeyShareDoesNotExist { epoch: i32 }, - /// A `Prepare` for a given epoch was requested, but the node has not - /// seen the corresponding `KeySharePrepare`. - /// - /// This is valid for various 2-phase commits, not just key shares - MissingPrepare { - rack_uuid: Uuid, - epoch: i32, - }, + #[error( + "Received unexpected rack UUID. Expected: {expected}, Actual: {actual}" + )] + RackUuidMismatch { expected: Uuid, actual: Uuid }, + + #[error("A commit has already occurred for rack {rack_uuid}")] + AlreadyInitialized { rack_uuid: Uuid }, + + #[error( + "No corresponding key share prepare for this commit: rack UUID: +{rack_uuid}, epoch: {epoch}" + )] + MissingKeySharePrepare { rack_uuid: Uuid, epoch: i32 }, } diff --git a/bootstore/src/node.rs b/bootstore/src/node.rs index 868609af2c1..5e2d1e9d133 100644 --- a/bootstore/src/node.rs +++ b/bootstore/src/node.rs @@ -19,13 +19,6 @@ use crate::db::Db; use crate::messages::*; use crate::trust_quorum::SerializableShareDistribution; -/// An error returned by a Node -#[derive(thiserror::Error, Debug)] -pub enum Error { - #[error("Version {0} messages are unsupported.")] - UnsupportedVersion(u32), -} - /// Configuration for an individual node pub struct Config { log: Logger, @@ -59,12 +52,18 @@ impl Node { /// Handle a message received over sprockets from another [`Node`] or /// the [`Coordinator`]. - pub fn handle(&mut self, req: NodeRequest) -> Result { + pub fn handle(&mut self, req: NodeRequest) -> NodeResponse { if req.version != 1 { - return Err(Error::UnsupportedVersion(req.version)); + return NodeResponse { + version: req.version, + id: req.id, + op: NodeOpResult::Error(NodeError::UnsupportedVersion( + req.version, + )), + }; } - let op_result = match req.op { + let result = match req.op { NodeOp::GetShare { epoch } => self.handle_get_share(epoch), NodeOp::Initialize { rack_uuid, share_distribution } => { self.handle_initialize(rack_uuid, share_distribution) @@ -81,12 +80,20 @@ impl Node { NodeOp::KeyShareCommit { rack_uuid, epoch } => { self.handle_key_share_commit(rack_uuid, epoch) } - }?; + }; + + let op_result = match result { + Ok(op_result) => op_result, + Err(err) => NodeOpResult::Error(err), + }; - Ok(NodeResponse { version: req.version, id: req.id, op: op_result }) + NodeResponse { version: req.version, id: req.id, op: op_result } } - fn handle_get_share(&mut self, epoch: i32) -> Result { + fn handle_get_share( + &mut self, + epoch: i32, + ) -> Result { unimplemented!(); } @@ -94,7 +101,7 @@ impl Node { &mut self, rack_uuid: Uuid, share_distribution: SerializableShareDistribution, - ) -> Result { + ) -> Result { unimplemented!(); } @@ -103,7 +110,7 @@ impl Node { rack_uuid: Uuid, epoch: i32, share_distribution: SerializableShareDistribution, - ) -> Result { + ) -> Result { unimplemented!(); } @@ -111,7 +118,7 @@ impl Node { &mut self, rack_uuid: Uuid, epoch: i32, - ) -> Result { + ) -> Result { unimplemented!(); } } From 07e043e547b44d9b299d3014407ea1f85ad457b0 Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Thu, 1 Sep 2022 15:37:14 +0000 Subject: [PATCH 19/28] Use a result directly in `NodeResposne --- bootstore/src/messages.rs | 5 +---- bootstore/src/node.rs | 11 ++--------- 2 files changed, 3 insertions(+), 13 deletions(-) diff --git a/bootstore/src/messages.rs b/bootstore/src/messages.rs index 6159983a48c..7547f331f8d 100644 --- a/bootstore/src/messages.rs +++ b/bootstore/src/messages.rs @@ -73,7 +73,7 @@ pub struct NodeResponse { pub version: u32, /// A message correlation id to match requests to responses pub id: u64, - pub op: NodeOpResult, + pub result: Result, } #[derive(Debug, Clone, PartialEq, From, Serialize, Deserialize)] @@ -84,9 +84,6 @@ pub enum NodeOpResult { /// An ack for the most recent coordinator message CoordinatorAck, - - /// Error responses - Error(NodeError), } /// Errors returned inside a [`NodeOpResult`] diff --git a/bootstore/src/node.rs b/bootstore/src/node.rs index 5e2d1e9d133..eba6e03b7ad 100644 --- a/bootstore/src/node.rs +++ b/bootstore/src/node.rs @@ -57,9 +57,7 @@ impl Node { return NodeResponse { version: req.version, id: req.id, - op: NodeOpResult::Error(NodeError::UnsupportedVersion( - req.version, - )), + result: Err(NodeError::UnsupportedVersion(req.version)), }; } @@ -82,12 +80,7 @@ impl Node { } }; - let op_result = match result { - Ok(op_result) => op_result, - Err(err) => NodeOpResult::Error(err), - }; - - NodeResponse { version: req.version, id: req.id, op: op_result } + NodeResponse { version: req.version, id: req.id, result } } fn handle_get_share( From 10058ed9b702f4d78501d99bf443189a29fbb1dd Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Thu, 1 Sep 2022 17:55:07 +0000 Subject: [PATCH 20/28] Remove key_share_commits table --- bootstore/src/db/mod.rs | 50 ++++++++++++++++++++++--------------- bootstore/src/db/models.rs | 13 ++++------ bootstore/src/db/schema.rs | 10 ++------ bootstore/src/db/schema.sql | 11 ++------ bootstore/src/lib.rs | 4 +++ bootstore/src/messages.rs | 12 ++++----- 6 files changed, 49 insertions(+), 51 deletions(-) diff --git a/bootstore/src/db/mod.rs b/bootstore/src/db/mod.rs index 7b68ab8cc42..875e7dc077b 100644 --- a/bootstore/src/db/mod.rs +++ b/bootstore/src/db/mod.rs @@ -16,8 +16,7 @@ use slog::{info, o}; use crate::trust_quorum::SerializableShareDistribution; use models::EncryptedRootSecret; -use models::KeyShareCommit; -use models::KeySharePrepare; +use models::KeyShare; use models::Sha3_256Digest; use models::Share; use sha3::{Digest, Sha3_256}; @@ -90,16 +89,20 @@ impl Db { epoch: i32, share: SerializableShareDistribution, ) -> Result<(), Error> { - use schema::key_share_prepares::dsl; + use schema::key_shares::dsl; // We save the digest so we don't have to deserialize and recompute most of the time. // We'd only want to do that for a consistency check occasionally. let val = serde_json::to_string(&share)?; let share_digest = sprockets_common::Sha3_256Digest(Sha3_256::digest(&val).into()) .into(); - let prepare = - KeySharePrepare { epoch, share: Share(share), share_digest }; - diesel::insert_into(dsl::key_share_prepares) + let prepare = KeyShare { + epoch, + share: Share(share), + share_digest, + committed: false, + }; + diesel::insert_into(dsl::key_shares) .values(&prepare) .execute(&mut self.conn)?; Ok(()) @@ -110,24 +113,21 @@ impl Db { epoch: i32, digest: sprockets_common::Sha3_256Digest, ) -> Result<(), Error> { - use schema::key_share_commits; - use schema::key_share_prepares; - let commit = - KeyShareCommit { epoch, share_digest: digest.clone().into() }; + use schema::key_shares::dsl; self.conn.immediate_transaction(|tx| { // We only want to commit if the share digest of the commit is the // same as that of the prepare. - let prepare_digest = key_share_prepares::table - .select(key_share_prepares::share_digest) - .filter(key_share_prepares::epoch.eq(epoch)) + let prepare_digest = dsl::key_shares + .select(dsl::share_digest) + .filter(dsl::epoch.eq(epoch)) .get_result::(tx)?; if prepare_digest != digest.into() { return Err(Error::CommitHashMismatch { epoch }); } - diesel::insert_into(key_share_commits::table) - .values(&commit) + diesel::update(dsl::key_shares.filter(dsl::epoch.eq(epoch))) + .set(dsl::committed.eq(true)) .execute(tx)?; Ok(()) }) @@ -173,19 +173,20 @@ mod tests { #[test] fn simple_prepare_insert_and_query() { - use schema::key_share_prepares::dsl; + use schema::key_shares::dsl; let log = test_setup_log("test_db").log.clone(); let mut db = Db::open(log, &rand_db_name()).unwrap(); let shares = new_shares(); let epoch = 0; let expected: SerializableShareDistribution = shares[0].clone().into(); db.prepare_share(epoch, expected.clone()).unwrap(); - let val = dsl::key_share_prepares - .select(dsl::share) + let (share, committed) = dsl::key_shares + .select((dsl::share, dsl::committed)) .filter(dsl::epoch.eq(epoch)) - .get_result::(&mut db.conn) + .get_result::<(Share, bool)>(&mut db.conn) .unwrap(); - assert_eq!(val.0, expected); + assert_eq!(share.0, expected); + assert_eq!(committed, false); } #[test] @@ -226,5 +227,14 @@ mod tests { sprockets_common::Sha3_256Digest(Sha3_256::digest(&val).into()) .into(); assert!(db.commit_share(epoch, digest).is_ok()); + + // Ensure `committed = true` + use schema::key_shares::dsl; + let committed = dsl::key_shares + .select(dsl::committed) + .filter(dsl::epoch.eq(epoch)) + .get_result::(&mut db.conn) + .unwrap(); + assert_eq!(true, committed); } } diff --git a/bootstore/src/db/models.rs b/bootstore/src/db/models.rs index 853ac17c6c1..2775b0e48d7 100644 --- a/bootstore/src/db/models.rs +++ b/bootstore/src/db/models.rs @@ -16,18 +16,15 @@ use crate::trust_quorum::SerializableShareDistribution; json_new_type!(Share, SerializableShareDistribution); +/// When a [`KeyShareParepare`] message arrives it is stored in a [`KeyShare`] +/// When a [`KeyShareCommit`] message arrives the `committed` field/column is +/// set to true. #[derive(Debug, Queryable, Insertable)] -pub struct KeySharePrepare { +pub struct KeyShare { pub epoch: i32, pub share: Share, pub share_digest: Sha3_256Digest, -} - -#[derive(Debug, Queryable, Insertable)] -pub struct KeyShareCommit { - pub epoch: i32, - // The hash of the share in the corresponding KeySharePrepare - pub share_digest: Sha3_256Digest, + pub committed: bool, } // A chacha20poly1305 secret encrypted by a chacha20poly1305 secret key diff --git a/bootstore/src/db/schema.rs b/bootstore/src/db/schema.rs index 31cb36b311f..1c4fb319f7c 100644 --- a/bootstore/src/db/schema.rs +++ b/bootstore/src/db/schema.rs @@ -5,17 +5,11 @@ use diesel::table; table! { - key_share_prepares(epoch) { + key_shares(epoch) { epoch -> Integer, share -> Text, share_digest -> Binary, - } -} - -table! { - key_share_commits(epoch) { - epoch -> Integer, - share_digest -> Binary, + committed -> Bool, } } diff --git a/bootstore/src/db/schema.sql b/bootstore/src/db/schema.sql index 167426c87d0..0bfd607347b 100644 --- a/bootstore/src/db/schema.sql +++ b/bootstore/src/db/schema.sql @@ -1,19 +1,12 @@ -CREATE TABLE IF NOT EXISTS key_share_prepares ( +CREATE TABLE IF NOT EXISTS key_shares ( epoch INTEGER NOT NULL, share TEXT NOT NULL, share_digest BLOB NOT NULL, + committed INTEGER NOT NULL, PRIMARY KEY (epoch) ); -CREATE TABLE IF NOT EXISTS key_share_commits ( - epoch INTEGER NOT NULL, - share_digest BLOB NOT NULL, - - PRIMARY KEY (epoch) - FOREIGN KEY (epoch) REFERENCES key_share_prepares (epoch) -); - CREATE TABLE IF NOT EXISTS encrypted_root_secrets ( epoch INTEGER NOT NULL, salt BLOB NOT NULL, diff --git a/bootstore/src/lib.rs b/bootstore/src/lib.rs index de26e35be25..ac8b15d166a 100644 --- a/bootstore/src/lib.rs +++ b/bootstore/src/lib.rs @@ -1,3 +1,7 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + //! The two communication paths for the bootstore: //! //! RSS -> Sled Agent -> Coordinator -> Storage Nodes diff --git a/bootstore/src/messages.rs b/bootstore/src/messages.rs index 7547f331f8d..95a9023cd67 100644 --- a/bootstore/src/messages.rs +++ b/bootstore/src/messages.rs @@ -40,12 +40,12 @@ pub enum NodeOp { /// don't know what information was already transferred to sleds, due to /// lack of global datastore, we must re-issue the request. /// - /// This request generates a `KeySharePrepare` for epoch 0. Once all sleds - /// have prepared, RSS trigger the start of CockroachDB replicas. The trust - /// quorum membership and prepare status will be written into CockroachDB - /// as the epoch 0 trust quorum configuration. Nexus will then proceed to - /// commit the trust quorum information, by first writing the Commit to - /// CockroachDb and then sending a `KeyShareCommit` for epoch 0. + /// This request generates a `KeySharePrepare` for epoch 0. Once all + /// sleds have prepared, RSS triggers the start of CockroachDB replicas. + /// The trust quorum membership and prepare status will be written into + /// CockroachDB as the epoch 0 trust quorum configuration. Nexus will then + /// proceed to commit the trust quorum information, by first writing the + /// Commit to CockroachDb and then sending a `KeyShareCommit` for epoch 0. /// /// TODO: The rack plan should also be sent here with similar storage /// strategy as the key share/trust quorum membership. From a9ac17c3df90c34267b77f05f6b1a1730658d76c Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Thu, 1 Sep 2022 17:58:27 +0000 Subject: [PATCH 21/28] Use :memory: db for tests --- bootstore/src/db/mod.rs | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/bootstore/src/db/mod.rs b/bootstore/src/db/mod.rs index 875e7dc077b..acfacdb79d7 100644 --- a/bootstore/src/db/mod.rs +++ b/bootstore/src/db/mod.rs @@ -162,20 +162,11 @@ mod tests { .collect() } - fn rand_db_name() -> String { - let seed: String = thread_rng() - .sample_iter(&Alphanumeric) - .take(16) - .map(char::from) - .collect(); - format!("/tmp/testdb-{}.sqlite", seed) - } - #[test] fn simple_prepare_insert_and_query() { use schema::key_shares::dsl; let log = test_setup_log("test_db").log.clone(); - let mut db = Db::open(log, &rand_db_name()).unwrap(); + let mut db = Db::open(log, ":memory:").unwrap(); let shares = new_shares(); let epoch = 0; let expected: SerializableShareDistribution = shares[0].clone().into(); @@ -192,7 +183,7 @@ mod tests { #[test] fn commit_fails_without_corresponding_prepare() { let log = test_setup_log("test_db").log.clone(); - let mut db = Db::open(log, &rand_db_name()).unwrap(); + let mut db = Db::open(log, ":memory:").unwrap(); let epoch = 0; let digest = sprockets_common::Sha3_256Digest::default(); @@ -203,7 +194,7 @@ mod tests { #[test] fn commit_fails_with_invalid_hash() { let log = test_setup_log("test_db").log.clone(); - let mut db = Db::open(log, &rand_db_name()).unwrap(); + let mut db = Db::open(log, ":memory:").unwrap(); let shares = new_shares(); let epoch = 0; let expected: SerializableShareDistribution = shares[0].clone().into(); @@ -216,7 +207,7 @@ mod tests { #[test] fn commit_succeeds_with_correct_hash() { let log = test_setup_log("test_db").log.clone(); - let mut db = Db::open(log, &rand_db_name()).unwrap(); + let mut db = Db::open(log, ":memory:").unwrap(); let shares = new_shares(); let epoch = 0; let expected: SerializableShareDistribution = shares[0].clone().into(); From 815f8e16ad77412fc23f1be6f257100912de1ad6 Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Thu, 1 Sep 2022 19:25:32 +0000 Subject: [PATCH 22/28] Use bcs instead of json for serialization --- Cargo.lock | 12 +++++++++++- bootstore/Cargo.toml | 2 +- bootstore/src/db/macros.rs | 18 +++++++++--------- bootstore/src/db/mod.rs | 21 ++++----------------- bootstore/src/db/models.rs | 4 ++-- bootstore/src/db/schema.rs | 2 +- bootstore/src/db/schema.sql | 2 +- 7 files changed, 29 insertions(+), 32 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 729e63e1059..52d4182d5e7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -222,6 +222,16 @@ dependencies = [ "tokio", ] +[[package]] +name = "bcs" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "510fd83e3eaf7263b06182f3550b4c0af2af42cb36ab8024969ff5ea7fcb2833" +dependencies = [ + "serde", + "thiserror", +] + [[package]] name = "bincode" version = "1.3.3" @@ -342,6 +352,7 @@ dependencies = [ name = "bootstore" version = "0.1.0" dependencies = [ + "bcs", "bincode", "derive_more", "diesel", @@ -349,7 +360,6 @@ dependencies = [ "p256", "rand 0.8.5", "serde", - "serde_json", "sha3", "slog", "sprockets-common", diff --git a/bootstore/Cargo.toml b/bootstore/Cargo.toml index 30fec9ad4b2..be635c917dd 100644 --- a/bootstore/Cargo.toml +++ b/bootstore/Cargo.toml @@ -6,12 +6,12 @@ edition = "2021" license = "MPL-2.0" [dependencies] +bcs = "0.1.3" derive_more = "0.99.17" diesel = { version = "2.0.0-rc.1", features = ["sqlite", "chrono", "serde_json", "network-address", "uuid"] } p256 = "0.9.0" rand = { version = "0.8.5", features = ["getrandom"] } serde = { version = "1.0", features = [ "derive" ] } -serde_json = "1.0" sha3 = "0.10.1" slog = { version = "2.5", features = [ "max_level_trace", "release_max_level_debug" ] } sprockets-common = { git = "http://github.com/oxidecomputer/sprockets", rev = "0361fd13ff19cda6696242fe40f1325fca30d3d1" } diff --git a/bootstore/src/db/macros.rs b/bootstore/src/db/macros.rs index 9b4926e5b14..3cb7cb48221 100644 --- a/bootstore/src/db/macros.rs +++ b/bootstore/src/db/macros.rs @@ -6,36 +6,36 @@ /// Shamelessly stolen from buildomat/common/src/db.rs /// Thanks @jmc -macro_rules! json_new_type { +macro_rules! bcs_new_type { ($name:ident, $mytype:ty) => { #[derive( Clone, Debug, FromSqlRow, diesel::expression::AsExpression, )] - #[diesel(sql_type = diesel::sql_types::Text)] + #[diesel(sql_type = diesel::sql_types::Binary)] pub struct $name(pub $mytype); - impl ToSql for $name + impl ToSql for $name where - String: ToSql, + Vec: ToSql, { fn to_sql( &self, out: &mut diesel::serialize::Output, ) -> diesel::serialize::Result { - out.set_value(serde_json::to_string(&self.0)?); + out.set_value(bcs::to_bytes(&self.0)?); Ok(diesel::serialize::IsNull::No) } } - impl FromSql for $name + impl FromSql for $name where DB: diesel::backend::Backend, - String: FromSql, + Vec: FromSql, { fn from_sql( bytes: diesel::backend::RawValue, ) -> diesel::deserialize::Result { - Ok($name(serde_json::from_str(&String::from_sql(bytes)?)?)) + Ok($name(bcs::from_bytes(&Vec::::from_sql(bytes)?)?)) } } @@ -134,4 +134,4 @@ macro_rules! array_new_type { } pub(crate) use array_new_type; -pub(crate) use json_new_type; +pub(crate) use bcs_new_type; diff --git a/bootstore/src/db/mod.rs b/bootstore/src/db/mod.rs index acfacdb79d7..17de13dae0b 100644 --- a/bootstore/src/db/mod.rs +++ b/bootstore/src/db/mod.rs @@ -30,24 +30,12 @@ pub enum Error { Db(#[from] diesel::result::Error), #[error(transparent)] - Json(#[from] serde_json::Error), + Bcs(#[from] bcs::Error), #[error("Share commit for {epoch} does not match prepare")] CommitHashMismatch { epoch: i32 }, } -/// The ID of the database used to store blobs. -/// -/// We separate them because they are encrypted and accessed differently. -pub enum DbId { - /// Used pre-rack unlock: Contains key shares and membership data - TrustQuorum, - - /// Used post-rack unlock: Contains information necessary for setting - /// up NTP. - NetworkConfig, -} - pub struct Db { log: Logger, conn: SqliteConnection, @@ -89,10 +77,11 @@ impl Db { epoch: i32, share: SerializableShareDistribution, ) -> Result<(), Error> { + info!(self.log, "Writing key share prepare for {epoch} to the Db"); use schema::key_shares::dsl; // We save the digest so we don't have to deserialize and recompute most of the time. // We'd only want to do that for a consistency check occasionally. - let val = serde_json::to_string(&share)?; + let val = bcs::to_bytes(&share)?; let share_digest = sprockets_common::Sha3_256Digest(Sha3_256::digest(&val).into()) .into(); @@ -139,8 +128,6 @@ mod tests { use super::*; use crate::trust_quorum::{RackSecret, ShareDistribution}; use omicron_test_utils::dev::test_setup_log; - use rand::distributions::Alphanumeric; - use rand::{thread_rng, Rng}; // TODO: Fill in with actual member certs fn new_shares() -> Vec { @@ -213,7 +200,7 @@ mod tests { let expected: SerializableShareDistribution = shares[0].clone().into(); db.prepare_share(epoch, expected.clone()).unwrap(); - let val = serde_json::to_string(&expected).unwrap(); + let val = bcs::to_bytes(&expected).unwrap(); let digest = sprockets_common::Sha3_256Digest(Sha3_256::digest(&val).into()) .into(); diff --git a/bootstore/src/db/models.rs b/bootstore/src/db/models.rs index 2775b0e48d7..f567b35a8f5 100644 --- a/bootstore/src/db/models.rs +++ b/bootstore/src/db/models.rs @@ -10,11 +10,11 @@ use diesel::serialize::ToSql; use diesel::FromSqlRow; use super::macros::array_new_type; -use super::macros::json_new_type; +use super::macros::bcs_new_type; use super::schema::*; use crate::trust_quorum::SerializableShareDistribution; -json_new_type!(Share, SerializableShareDistribution); +bcs_new_type!(Share, SerializableShareDistribution); /// When a [`KeyShareParepare`] message arrives it is stored in a [`KeyShare`] /// When a [`KeyShareCommit`] message arrives the `committed` field/column is diff --git a/bootstore/src/db/schema.rs b/bootstore/src/db/schema.rs index 1c4fb319f7c..dbf72dcbc6c 100644 --- a/bootstore/src/db/schema.rs +++ b/bootstore/src/db/schema.rs @@ -7,7 +7,7 @@ use diesel::table; table! { key_shares(epoch) { epoch -> Integer, - share -> Text, + share -> Binary, share_digest -> Binary, committed -> Bool, } diff --git a/bootstore/src/db/schema.sql b/bootstore/src/db/schema.sql index 0bfd607347b..4b0a0257e6b 100644 --- a/bootstore/src/db/schema.sql +++ b/bootstore/src/db/schema.sql @@ -1,6 +1,6 @@ CREATE TABLE IF NOT EXISTS key_shares ( epoch INTEGER NOT NULL, - share TEXT NOT NULL, + share BLOB NOT NULL, share_digest BLOB NOT NULL, committed INTEGER NOT NULL, From 181ce37d908c956500b9fda5f3d934fa2cb088d0 Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Thu, 1 Sep 2022 19:26:30 +0000 Subject: [PATCH 23/28] Remove boostore from workspace default-members until further along --- Cargo.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index f9d2863a5f6..a95b95c9c64 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -38,7 +38,6 @@ members = [ ] default-members = [ - "bootstore", "common", "ddm-admin-client", "deploy", From 452dcec2d7712374c068420d834283862f31d4fc Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Thu, 1 Sep 2022 22:17:23 +0000 Subject: [PATCH 24/28] fix unused variable/code warnings --- bootstore/src/db/mod.rs | 9 +++++++- bootstore/src/node.rs | 27 +++++++++++------------ bootstore/src/trust_quorum/error.rs | 4 ++++ bootstore/src/trust_quorum/rack_secret.rs | 2 ++ 4 files changed, 27 insertions(+), 15 deletions(-) diff --git a/bootstore/src/db/mod.rs b/bootstore/src/db/mod.rs index 17de13dae0b..753b42ff315 100644 --- a/bootstore/src/db/mod.rs +++ b/bootstore/src/db/mod.rs @@ -15,7 +15,6 @@ use slog::Logger; use slog::{info, o}; use crate::trust_quorum::SerializableShareDistribution; -use models::EncryptedRootSecret; use models::KeyShare; use models::Sha3_256Digest; use models::Share; @@ -32,15 +31,23 @@ pub enum Error { #[error(transparent)] Bcs(#[from] bcs::Error), + // Temporary until the using code is written + #[allow(dead_code)] #[error("Share commit for {epoch} does not match prepare")] CommitHashMismatch { epoch: i32 }, } pub struct Db { + // Temporary until the using code is written + #[allow(dead_code)] log: Logger, + // Temporary until the using code is written + #[allow(dead_code)] conn: SqliteConnection, } +// Temporary until the using code is written +#[allow(dead_code)] impl Db { pub fn open(log: Logger, path: &str) -> Result { let schema = include_str!("./schema.sql"); diff --git a/bootstore/src/node.rs b/bootstore/src/node.rs index eba6e03b7ad..7890e38ff9b 100644 --- a/bootstore/src/node.rs +++ b/bootstore/src/node.rs @@ -8,11 +8,7 @@ //! This allows easier testing of clusters and failure situations. use slog::Logger; -use slog::{info, o}; use sprockets_host::Ed25519Certificate; -use std::net::SocketAddrV6; -use std::sync::Arc; -use std::sync::Mutex; use uuid::Uuid; use crate::db::Db; @@ -24,8 +20,8 @@ pub struct Config { log: Logger, db_path: String, // TODO: This will live inside the certificate eventually - serial_number: String, - device_id_cert: Ed25519Certificate, + _serial_number: String, + _device_id_cert: Ed25519Certificate, } /// A node of the bootstore @@ -38,6 +34,9 @@ pub struct Config { /// Messages are received over sprockets sessions from either peer nodes /// during rack unlock, or from a [`Coordinator`] during rack initialization /// or reconfiguration. +// +// Temporary until the using code is written +#[allow(dead_code)] pub struct Node { config: Config, db: Db, @@ -85,32 +84,32 @@ impl Node { fn handle_get_share( &mut self, - epoch: i32, + _epoch: i32, ) -> Result { unimplemented!(); } fn handle_initialize( &mut self, - rack_uuid: Uuid, - share_distribution: SerializableShareDistribution, + _rack_uuid: Uuid, + _share_distribution: SerializableShareDistribution, ) -> Result { unimplemented!(); } fn handle_key_share_prepare( &mut self, - rack_uuid: Uuid, - epoch: i32, - share_distribution: SerializableShareDistribution, + _rack_uuid: Uuid, + _epoch: i32, + _share_distribution: SerializableShareDistribution, ) -> Result { unimplemented!(); } fn handle_key_share_commit( &mut self, - rack_uuid: Uuid, - epoch: i32, + _rack_uuid: Uuid, + _epoch: i32, ) -> Result { unimplemented!(); } diff --git a/bootstore/src/trust_quorum/error.rs b/bootstore/src/trust_quorum/error.rs index da6002cd33d..f695762c244 100644 --- a/bootstore/src/trust_quorum/error.rs +++ b/bootstore/src/trust_quorum/error.rs @@ -8,9 +8,13 @@ use thiserror::Error; #[derive(Debug, Error)] pub enum TrustQuorumError { + // Temporary until the using code is written + #[allow(dead_code)] #[error("Not enough peers to unlock storage")] NotEnoughPeers, + // Temporary until the using code is written + #[allow(dead_code)] #[error("Rack secret construction failed: {0:?}")] RackSecretConstructionFailed(vsss_rs::Error), } diff --git a/bootstore/src/trust_quorum/rack_secret.rs b/bootstore/src/trust_quorum/rack_secret.rs index 2a801b2fe3c..e9e22c527f7 100644 --- a/bootstore/src/trust_quorum/rack_secret.rs +++ b/bootstore/src/trust_quorum/rack_secret.rs @@ -70,6 +70,8 @@ impl Verifier { } } +// Temporary until the using code is written +#[allow(dead_code)] impl RackSecret { /// Create a secret based on the NIST P-256 curve pub fn new() -> RackSecret { From 83700ccb67a7f8382517f4ad7a79964ce3508f66 Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Fri, 2 Sep 2022 15:43:13 +0000 Subject: [PATCH 25/28] cleanup logs in tests --- bootstore/src/db/mod.rs | 23 +++++++++++++---------- bootstore/src/node.rs | 2 +- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/bootstore/src/db/mod.rs b/bootstore/src/db/mod.rs index 753b42ff315..a0a326021c2 100644 --- a/bootstore/src/db/mod.rs +++ b/bootstore/src/db/mod.rs @@ -49,7 +49,7 @@ pub struct Db { // Temporary until the using code is written #[allow(dead_code)] impl Db { - pub fn open(log: Logger, path: &str) -> Result { + pub fn open(log: &Logger, path: &str) -> Result { let schema = include_str!("./schema.sql"); let log = log.new(o!( "component" => "BootstoreDb" @@ -159,8 +159,8 @@ mod tests { #[test] fn simple_prepare_insert_and_query() { use schema::key_shares::dsl; - let log = test_setup_log("test_db").log.clone(); - let mut db = Db::open(log, ":memory:").unwrap(); + let logctx = test_setup_log("test_db"); + let mut db = Db::open(&logctx.log, ":memory:").unwrap(); let shares = new_shares(); let epoch = 0; let expected: SerializableShareDistribution = shares[0].clone().into(); @@ -172,23 +172,24 @@ mod tests { .unwrap(); assert_eq!(share.0, expected); assert_eq!(committed, false); + logctx.cleanup_successful(); } #[test] fn commit_fails_without_corresponding_prepare() { - let log = test_setup_log("test_db").log.clone(); - let mut db = Db::open(log, ":memory:").unwrap(); + let logctx = test_setup_log("test_db"); + let mut db = Db::open(&logctx.log, ":memory:").unwrap(); let epoch = 0; - let digest = sprockets_common::Sha3_256Digest::default(); let err = db.commit_share(epoch, digest).unwrap_err(); assert!(matches!(err, Error::Db(diesel::result::Error::NotFound))); + logctx.cleanup_successful(); } #[test] fn commit_fails_with_invalid_hash() { - let log = test_setup_log("test_db").log.clone(); - let mut db = Db::open(log, ":memory:").unwrap(); + let logctx = test_setup_log("test_db"); + let mut db = Db::open(&logctx.log, ":memory:").unwrap(); let shares = new_shares(); let epoch = 0; let expected: SerializableShareDistribution = shares[0].clone().into(); @@ -196,12 +197,13 @@ mod tests { let digest = sprockets_common::Sha3_256Digest::default(); let err = db.commit_share(epoch, digest).unwrap_err(); assert!(matches!(err, Error::CommitHashMismatch { epoch: _ })); + logctx.cleanup_successful(); } #[test] fn commit_succeeds_with_correct_hash() { - let log = test_setup_log("test_db").log.clone(); - let mut db = Db::open(log, ":memory:").unwrap(); + let logctx = test_setup_log("test_db"); + let mut db = Db::open(&logctx.log, ":memory:").unwrap(); let shares = new_shares(); let epoch = 0; let expected: SerializableShareDistribution = shares[0].clone().into(); @@ -221,5 +223,6 @@ mod tests { .get_result::(&mut db.conn) .unwrap(); assert_eq!(true, committed); + logctx.cleanup_successful(); } } diff --git a/bootstore/src/node.rs b/bootstore/src/node.rs index 7890e38ff9b..183cb5c46bb 100644 --- a/bootstore/src/node.rs +++ b/bootstore/src/node.rs @@ -45,7 +45,7 @@ pub struct Node { impl Node { /// Create a new Node pub fn new(config: Config) -> Node { - let db = Db::open(config.log.clone(), &config.db_path).unwrap(); + let db = Db::open(&config.log, &config.db_path).unwrap(); Node { config, db } } From 9c624d0cada344b5ab9953f0c381f8386d0a6ad6 Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Tue, 6 Sep 2022 15:00:57 +0000 Subject: [PATCH 26/28] debug kill -9 shenanigans --- .github/buildomat/jobs/build-and-test.sh | 6 ++++++ signals.sh | 20 ++++++++++++++++++++ 2 files changed, 26 insertions(+) create mode 100755 signals.sh diff --git a/.github/buildomat/jobs/build-and-test.sh b/.github/buildomat/jobs/build-and-test.sh index eb5c241b389..cc5c610c98e 100644 --- a/.github/buildomat/jobs/build-and-test.sh +++ b/.github/buildomat/jobs/build-and-test.sh @@ -8,6 +8,7 @@ #: "/var/tmp/omicron_tmp/*", #: "!/var/tmp/omicron_tmp/crdb-base*", #: "!/var/tmp/omicron_tmp/rustc*", +#: "%/var/tmp/signals.log", #: ] #: @@ -35,6 +36,11 @@ export PATH="$PATH:$PWD/out/cockroachdb/bin:$PWD/out/clickhouse" banner prerequisites ptime -m bash ./tools/install_builder_prerequisites.sh -y +# +# Try to figure out what is killing the cargo test process +# +./signals.sh + # # We build with: # diff --git a/signals.sh b/signals.sh new file mode 100755 index 00000000000..f6eb13cf282 --- /dev/null +++ b/signals.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +set -o xtrace +pfexec /usr/sbin/dtrace \ + -x switchrate=1000hz \ + -q \ + -o /var/tmp/signals.log \ + -n ' + signal-send { + printf("%Y (pid %d, %s) send signal %d (to pid %d, %s)\n", + walltimestamp, pid, execname, args[2], + args[1]->pr_pid, args[1]->pr_fname); + ustack(); + } + ' >/dev/null 2>&1 &2 +disown + From 37869f4376fd012735af107c32ce0236f02c4f9e Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Tue, 6 Sep 2022 19:32:36 +0000 Subject: [PATCH 27/28] remove buildomat debugging tools --- .github/buildomat/jobs/build-and-test.sh | 6 ------ signals.sh | 20 -------------------- 2 files changed, 26 deletions(-) delete mode 100755 signals.sh diff --git a/.github/buildomat/jobs/build-and-test.sh b/.github/buildomat/jobs/build-and-test.sh index cc5c610c98e..eb5c241b389 100644 --- a/.github/buildomat/jobs/build-and-test.sh +++ b/.github/buildomat/jobs/build-and-test.sh @@ -8,7 +8,6 @@ #: "/var/tmp/omicron_tmp/*", #: "!/var/tmp/omicron_tmp/crdb-base*", #: "!/var/tmp/omicron_tmp/rustc*", -#: "%/var/tmp/signals.log", #: ] #: @@ -36,11 +35,6 @@ export PATH="$PATH:$PWD/out/cockroachdb/bin:$PWD/out/clickhouse" banner prerequisites ptime -m bash ./tools/install_builder_prerequisites.sh -y -# -# Try to figure out what is killing the cargo test process -# -./signals.sh - # # We build with: # diff --git a/signals.sh b/signals.sh deleted file mode 100755 index f6eb13cf282..00000000000 --- a/signals.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/bin/bash - -set -o xtrace -pfexec /usr/sbin/dtrace \ - -x switchrate=1000hz \ - -q \ - -o /var/tmp/signals.log \ - -n ' - signal-send { - printf("%Y (pid %d, %s) send signal %d (to pid %d, %s)\n", - walltimestamp, pid, execname, args[2], - args[1]->pr_pid, args[1]->pr_fname); - ustack(); - } - ' >/dev/null 2>&1 &2 -disown - From 1b637045c8664b8c864d5cf28c51b0781c24ae36 Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Tue, 6 Sep 2022 23:19:36 +0000 Subject: [PATCH 28/28] Fix build by relying on pq-sys dep --- Cargo.lock | 2 ++ bootstore/Cargo.toml | 11 ++++++++++- bootstore/build.rs | 10 ++++++++++ 3 files changed, 22 insertions(+), 1 deletion(-) create mode 100644 bootstore/build.rs diff --git a/Cargo.lock b/Cargo.lock index 52d4182d5e7..413d4c86ab9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -356,8 +356,10 @@ dependencies = [ "bincode", "derive_more", "diesel", + "omicron-rpaths", "omicron-test-utils", "p256", + "pq-sys", "rand 0.8.5", "serde", "sha3", diff --git a/bootstore/Cargo.toml b/bootstore/Cargo.toml index be635c917dd..6ef8366391c 100644 --- a/bootstore/Cargo.toml +++ b/bootstore/Cargo.toml @@ -5,11 +5,20 @@ version = "0.1.0" edition = "2021" license = "MPL-2.0" +[build-dependencies.omicron-rpaths] +path = "../rpaths" + [dependencies] bcs = "0.1.3" derive_more = "0.99.17" -diesel = { version = "2.0.0-rc.1", features = ["sqlite", "chrono", "serde_json", "network-address", "uuid"] } +diesel = { version = "2.0.0-rc.1", features = ["sqlite", "uuid"] } p256 = "0.9.0" +# See omicron-rpaths for more about the "pq-sys" dependency. +# We don't actually need `pq-sys` here, other than to satisfy +# `[build-dependencies.omicron-rpaths]`, which are needed by `omicron-test- +# utils`. Unfortunately, it doesn't appear possible to put the `pq-sys` dep +# only in `[dev-dependencies]`. +pq-sys = "*" rand = { version = "0.8.5", features = ["getrandom"] } serde = { version = "1.0", features = [ "derive" ] } sha3 = "0.10.1" diff --git a/bootstore/build.rs b/bootstore/build.rs new file mode 100644 index 00000000000..1ba9acd41c9 --- /dev/null +++ b/bootstore/build.rs @@ -0,0 +1,10 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +// See omicron-rpaths for documentation. +// NOTE: This file MUST be kept in sync with the other build.rs files in this +// repository. +fn main() { + omicron_rpaths::configure_default_omicron_rpaths(); +}