diff --git a/Cargo.lock b/Cargo.lock index 23d5929b014..413d4c86ab9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -222,6 +222,16 @@ dependencies = [ "tokio", ] +[[package]] +name = "bcs" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "510fd83e3eaf7263b06182f3550b4c0af2af42cb36ab8024969ff5ea7fcb2833" +dependencies = [ + "serde", + "thiserror", +] + [[package]] name = "bincode" version = "1.3.3" @@ -338,6 +348,29 @@ dependencies = [ "byte-tools", ] +[[package]] +name = "bootstore" +version = "0.1.0" +dependencies = [ + "bcs", + "bincode", + "derive_more", + "diesel", + "omicron-rpaths", + "omicron-test-utils", + "p256", + "pq-sys", + "rand 0.8.5", + "serde", + "sha3", + "slog", + "sprockets-common", + "sprockets-host", + "thiserror", + "uuid", + "vsss-rs", +] + [[package]] name = "bstr" version = "0.2.17" @@ -1065,6 +1098,7 @@ dependencies = [ "ipnetwork", "itoa 1.0.2", "libc", + "libsqlite3-sys", "pq-sys", "r2d2", "serde_json", @@ -2401,6 +2435,16 @@ dependencies = [ "tracing", ] +[[package]] +name = "libsqlite3-sys" +version = "0.25.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f0455f2c1bc9a7caa792907026e469c1d91761fb0ea37cbb16427c77280cf35" +dependencies = [ + "pkg-config", + "vcpkg", +] + [[package]] name = "libxml" version = "0.3.1" @@ -3064,6 +3108,7 @@ dependencies = [ "crucible-agent-client", "crucible-client-types", "ddm-admin-client", + "diesel", "dropshot", "expectorate", "futures", diff --git a/Cargo.toml b/Cargo.toml index 70d3e33ca42..a95b95c9c64 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,5 +1,6 @@ [workspace] members = [ + "bootstore", "common", "ddm-admin-client", "deploy", diff --git a/bootstore/Cargo.toml b/bootstore/Cargo.toml new file mode 100644 index 00000000000..6ef8366391c --- /dev/null +++ b/bootstore/Cargo.toml @@ -0,0 +1,34 @@ +[package] +name = "bootstore" +description = "Storage required for rack unlock" +version = "0.1.0" +edition = "2021" +license = "MPL-2.0" + +[build-dependencies.omicron-rpaths] +path = "../rpaths" + +[dependencies] +bcs = "0.1.3" +derive_more = "0.99.17" +diesel = { version = "2.0.0-rc.1", features = ["sqlite", "uuid"] } +p256 = "0.9.0" +# See omicron-rpaths for more about the "pq-sys" dependency. +# We don't actually need `pq-sys` here, other than to satisfy +# `[build-dependencies.omicron-rpaths]`, which are needed by `omicron-test- +# utils`. Unfortunately, it doesn't appear possible to put the `pq-sys` dep +# only in `[dev-dependencies]`. +pq-sys = "*" +rand = { version = "0.8.5", features = ["getrandom"] } +serde = { version = "1.0", features = [ "derive" ] } +sha3 = "0.10.1" +slog = { version = "2.5", features = [ "max_level_trace", "release_max_level_debug" ] } +sprockets-common = { git = "http://github.com/oxidecomputer/sprockets", rev = "0361fd13ff19cda6696242fe40f1325fca30d3d1" } +sprockets-host = { git = "http://github.com/oxidecomputer/sprockets", rev = "0361fd13ff19cda6696242fe40f1325fca30d3d1" } +thiserror = "1.0" +uuid = { version = "1.1.0", features = [ "serde", "v4" ] } +vsss-rs = { version = "2.0.0", default-features = false, features = ["std"] } + +[dev-dependencies] +bincode = "1.3.3" +omicron-test-utils = { path = "../test-utils" } diff --git a/bootstore/build.rs b/bootstore/build.rs new file mode 100644 index 00000000000..1ba9acd41c9 --- /dev/null +++ b/bootstore/build.rs @@ -0,0 +1,10 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +// See omicron-rpaths for documentation. +// NOTE: This file MUST be kept in sync with the other build.rs files in this +// repository. +fn main() { + omicron_rpaths::configure_default_omicron_rpaths(); +} diff --git a/bootstore/src/db/macros.rs b/bootstore/src/db/macros.rs new file mode 100644 index 00000000000..3cb7cb48221 --- /dev/null +++ b/bootstore/src/db/macros.rs @@ -0,0 +1,137 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Macros used to create ToSql and FromSql impls required by Diesel + +/// Shamelessly stolen from buildomat/common/src/db.rs +/// Thanks @jmc +macro_rules! bcs_new_type { + ($name:ident, $mytype:ty) => { + #[derive( + Clone, Debug, FromSqlRow, diesel::expression::AsExpression, + )] + #[diesel(sql_type = diesel::sql_types::Binary)] + pub struct $name(pub $mytype); + + impl ToSql for $name + where + Vec: ToSql, + { + fn to_sql( + &self, + out: &mut diesel::serialize::Output, + ) -> diesel::serialize::Result { + out.set_value(bcs::to_bytes(&self.0)?); + Ok(diesel::serialize::IsNull::No) + } + } + + impl FromSql for $name + where + DB: diesel::backend::Backend, + Vec: FromSql, + { + fn from_sql( + bytes: diesel::backend::RawValue, + ) -> diesel::deserialize::Result { + Ok($name(bcs::from_bytes(&Vec::::from_sql(bytes)?)?)) + } + } + + impl From<$name> for $mytype { + fn from(t: $name) -> Self { + t.0 + } + } + + impl From<$mytype> for $name { + fn from(t: $mytype) -> $name { + $name(t) + } + } + + impl std::ops::Deref for $name { + type Target = $mytype; + + fn deref(&self) -> &Self::Target { + &self.0 + } + } + }; +} + +macro_rules! array_new_type { + ($name:ident, $len:expr) => { + #[derive( + PartialEq, + Clone, + Debug, + FromSqlRow, + diesel::expression::AsExpression, + )] + #[diesel(sql_type = diesel::sql_types::Binary)] + pub struct $name(pub [u8; $len]); + + impl ToSql for $name + where + Vec: ToSql, + { + fn to_sql( + &self, + out: &mut diesel::serialize::Output, + ) -> diesel::serialize::Result { + let mut copy = vec![0; $len]; + copy.copy_from_slice(&self.0[..]); + out.set_value(copy); + Ok(diesel::serialize::IsNull::No) + } + } + + impl FromSql for $name + where + DB: diesel::backend::Backend, + Vec: FromSql, + { + fn from_sql( + bytes: diesel::backend::RawValue, + ) -> diesel::deserialize::Result { + let read_bytes = Vec::::from_sql(bytes)?; + if read_bytes.len() != $len { + return Err(format!( + "Invalid length. Expected: {}, Actual: {}", + $len, + read_bytes.len() + ) + .into()); + } + let mut out = [0u8; $len]; + out.copy_from_slice(&read_bytes[..]); + Ok($name(out)) + } + } + + impl From<$name> for [u8; $len] { + fn from(t: $name) -> Self { + t.0 + } + } + + impl From<[u8; $len]> for $name { + fn from(t: [u8; $len]) -> $name { + $name(t) + } + } + + impl std::ops::Deref for $name { + type Target = [u8; $len]; + + fn deref(&self) -> &Self::Target { + &self.0 + } + } + }; +} + +pub(crate) use array_new_type; +pub(crate) use bcs_new_type; diff --git a/bootstore/src/db/mod.rs b/bootstore/src/db/mod.rs new file mode 100644 index 00000000000..a0a326021c2 --- /dev/null +++ b/bootstore/src/db/mod.rs @@ -0,0 +1,228 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Database layer for the bootstore + +mod macros; +mod models; +mod schema; + +use diesel::connection::SimpleConnection; +use diesel::prelude::*; +use diesel::SqliteConnection; +use slog::Logger; +use slog::{info, o}; + +use crate::trust_quorum::SerializableShareDistribution; +use models::KeyShare; +use models::Sha3_256Digest; +use models::Share; +use sha3::{Digest, Sha3_256}; + +#[derive(thiserror::Error, Debug)] +pub enum Error { + #[error("Failed to open db connection to {path}: {err}")] + DbOpen { path: String, err: ConnectionError }, + + #[error(transparent)] + Db(#[from] diesel::result::Error), + + #[error(transparent)] + Bcs(#[from] bcs::Error), + + // Temporary until the using code is written + #[allow(dead_code)] + #[error("Share commit for {epoch} does not match prepare")] + CommitHashMismatch { epoch: i32 }, +} + +pub struct Db { + // Temporary until the using code is written + #[allow(dead_code)] + log: Logger, + // Temporary until the using code is written + #[allow(dead_code)] + conn: SqliteConnection, +} + +// Temporary until the using code is written +#[allow(dead_code)] +impl Db { + pub fn open(log: &Logger, path: &str) -> Result { + let schema = include_str!("./schema.sql"); + let log = log.new(o!( + "component" => "BootstoreDb" + )); + info!(log, "opening database {:?}", path); + let mut c = SqliteConnection::establish(path) + .map_err(|err| Error::DbOpen { path: path.into(), err })?; + + // Enable foreign key processing, which is off by default. Without + // enabling this, there is no referential integrity check between + // primary and foreign keys in tables. + diesel::sql_query("PRAGMA foreign_keys = 'ON'").execute(&mut c)?; + + // Enable the WAL. + diesel::sql_query("PRAGMA journal_mode = 'WAL'").execute(&mut c)?; + + // Force overwriting with 0s on delete + diesel::sql_query("PRAGMA secure_delete = 'ON'").execute(&mut c)?; + + // Sync to disk after every commit. + // DO NOT CHANGE THIS SETTING! + diesel::sql_query("PRAGMA synchronous = 'FULL'").execute(&mut c)?; + + // Create tables + c.batch_execute(&schema)?; + + Ok(Db { log, conn: c }) + } + + pub fn prepare_share( + &mut self, + epoch: i32, + share: SerializableShareDistribution, + ) -> Result<(), Error> { + info!(self.log, "Writing key share prepare for {epoch} to the Db"); + use schema::key_shares::dsl; + // We save the digest so we don't have to deserialize and recompute most of the time. + // We'd only want to do that for a consistency check occasionally. + let val = bcs::to_bytes(&share)?; + let share_digest = + sprockets_common::Sha3_256Digest(Sha3_256::digest(&val).into()) + .into(); + let prepare = KeyShare { + epoch, + share: Share(share), + share_digest, + committed: false, + }; + diesel::insert_into(dsl::key_shares) + .values(&prepare) + .execute(&mut self.conn)?; + Ok(()) + } + + pub fn commit_share( + &mut self, + epoch: i32, + digest: sprockets_common::Sha3_256Digest, + ) -> Result<(), Error> { + use schema::key_shares::dsl; + self.conn.immediate_transaction(|tx| { + // We only want to commit if the share digest of the commit is the + // same as that of the prepare. + let prepare_digest = dsl::key_shares + .select(dsl::share_digest) + .filter(dsl::epoch.eq(epoch)) + .get_result::(tx)?; + + if prepare_digest != digest.into() { + return Err(Error::CommitHashMismatch { epoch }); + } + + diesel::update(dsl::key_shares.filter(dsl::epoch.eq(epoch))) + .set(dsl::committed.eq(true)) + .execute(tx)?; + Ok(()) + }) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::trust_quorum::{RackSecret, ShareDistribution}; + use omicron_test_utils::dev::test_setup_log; + + // TODO: Fill in with actual member certs + fn new_shares() -> Vec { + let member_device_id_certs = vec![]; + let rack_secret_threshold = 3; + let total_shares = 5; + let secret = RackSecret::new(); + let (shares, verifier) = + secret.split(rack_secret_threshold, total_shares).unwrap(); + + shares + .into_iter() + .map(move |share| ShareDistribution { + threshold: rack_secret_threshold, + verifier: verifier.clone(), + share, + member_device_id_certs: member_device_id_certs.clone(), + }) + .collect() + } + + #[test] + fn simple_prepare_insert_and_query() { + use schema::key_shares::dsl; + let logctx = test_setup_log("test_db"); + let mut db = Db::open(&logctx.log, ":memory:").unwrap(); + let shares = new_shares(); + let epoch = 0; + let expected: SerializableShareDistribution = shares[0].clone().into(); + db.prepare_share(epoch, expected.clone()).unwrap(); + let (share, committed) = dsl::key_shares + .select((dsl::share, dsl::committed)) + .filter(dsl::epoch.eq(epoch)) + .get_result::<(Share, bool)>(&mut db.conn) + .unwrap(); + assert_eq!(share.0, expected); + assert_eq!(committed, false); + logctx.cleanup_successful(); + } + + #[test] + fn commit_fails_without_corresponding_prepare() { + let logctx = test_setup_log("test_db"); + let mut db = Db::open(&logctx.log, ":memory:").unwrap(); + let epoch = 0; + let digest = sprockets_common::Sha3_256Digest::default(); + let err = db.commit_share(epoch, digest).unwrap_err(); + assert!(matches!(err, Error::Db(diesel::result::Error::NotFound))); + logctx.cleanup_successful(); + } + + #[test] + fn commit_fails_with_invalid_hash() { + let logctx = test_setup_log("test_db"); + let mut db = Db::open(&logctx.log, ":memory:").unwrap(); + let shares = new_shares(); + let epoch = 0; + let expected: SerializableShareDistribution = shares[0].clone().into(); + db.prepare_share(epoch, expected.clone()).unwrap(); + let digest = sprockets_common::Sha3_256Digest::default(); + let err = db.commit_share(epoch, digest).unwrap_err(); + assert!(matches!(err, Error::CommitHashMismatch { epoch: _ })); + logctx.cleanup_successful(); + } + + #[test] + fn commit_succeeds_with_correct_hash() { + let logctx = test_setup_log("test_db"); + let mut db = Db::open(&logctx.log, ":memory:").unwrap(); + let shares = new_shares(); + let epoch = 0; + let expected: SerializableShareDistribution = shares[0].clone().into(); + db.prepare_share(epoch, expected.clone()).unwrap(); + + let val = bcs::to_bytes(&expected).unwrap(); + let digest = + sprockets_common::Sha3_256Digest(Sha3_256::digest(&val).into()) + .into(); + assert!(db.commit_share(epoch, digest).is_ok()); + + // Ensure `committed = true` + use schema::key_shares::dsl; + let committed = dsl::key_shares + .select(dsl::committed) + .filter(dsl::epoch.eq(epoch)) + .get_result::(&mut db.conn) + .unwrap(); + assert_eq!(true, committed); + logctx.cleanup_successful(); + } +} diff --git a/bootstore/src/db/models.rs b/bootstore/src/db/models.rs new file mode 100644 index 00000000000..f567b35a8f5 --- /dev/null +++ b/bootstore/src/db/models.rs @@ -0,0 +1,73 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! DB models + +use diesel::deserialize::FromSql; +use diesel::prelude::*; +use diesel::serialize::ToSql; +use diesel::FromSqlRow; + +use super::macros::array_new_type; +use super::macros::bcs_new_type; +use super::schema::*; +use crate::trust_quorum::SerializableShareDistribution; + +bcs_new_type!(Share, SerializableShareDistribution); + +/// When a [`KeyShareParepare`] message arrives it is stored in a [`KeyShare`] +/// When a [`KeyShareCommit`] message arrives the `committed` field/column is +/// set to true. +#[derive(Debug, Queryable, Insertable)] +pub struct KeyShare { + pub epoch: i32, + pub share: Share, + pub share_digest: Sha3_256Digest, + pub committed: bool, +} + +// A chacha20poly1305 secret encrypted by a chacha20poly1305 secret key +// derived from the rack secret for the given epoch with the given salt +// +// The epoch informs which rack secret should be used to derive the +// encryptiong key used to encrypt this root secret. +// +// TODO-security: We probably don't want to log even the encrypted secret, but +// it's likely useful for debugging right now. +#[derive(Debug, Queryable, Insertable)] +pub struct EncryptedRootSecret { + /// The epoch of the rack secret rotation or rack reconfiguration + pub epoch: i32, + + /// Used as the salt parameter to HKDF to derive the encryption + /// key from the rack secret that protects `key` in this struct. + pub salt: Salt, + + /// The encrypted root secret for this epoch + pub secret: EncryptedSecret, + + /// The authentication tag for the encrypted secret + pub tag: AuthTag, +} + +// TODO: These should likely go in a crypto module +// The length of a SHA3-256 digest +pub const DIGEST_LEN: usize = 32; + +// The length of a ChaCha20Poly1305 Key +pub const KEY_LEN: usize = 32; + +// The length of a ChaCha20Poly1305 authentication tag +pub const TAG_LEN: usize = 16; + +array_new_type!(Sha3_256Digest, DIGEST_LEN); +array_new_type!(EncryptedSecret, KEY_LEN); +array_new_type!(Salt, DIGEST_LEN); +array_new_type!(AuthTag, TAG_LEN); + +impl From for Sha3_256Digest { + fn from(digest: sprockets_common::Sha3_256Digest) -> Self { + Sha3_256Digest(digest.0) + } +} diff --git a/bootstore/src/db/schema.rs b/bootstore/src/db/schema.rs new file mode 100644 index 00000000000..dbf72dcbc6c --- /dev/null +++ b/bootstore/src/db/schema.rs @@ -0,0 +1,24 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use diesel::table; + +table! { + key_shares(epoch) { + epoch -> Integer, + share -> Binary, + share_digest -> Binary, + committed -> Bool, + } +} + +table! { + encrypted_root_secrets(epoch) { + epoch -> Integer, + salt -> Binary, + secret -> Binary, + tag -> Binary, + + } +} diff --git a/bootstore/src/db/schema.sql b/bootstore/src/db/schema.sql new file mode 100644 index 00000000000..4b0a0257e6b --- /dev/null +++ b/bootstore/src/db/schema.sql @@ -0,0 +1,18 @@ +CREATE TABLE IF NOT EXISTS key_shares ( + epoch INTEGER NOT NULL, + share BLOB NOT NULL, + share_digest BLOB NOT NULL, + committed INTEGER NOT NULL, + + PRIMARY KEY (epoch) +); + +CREATE TABLE IF NOT EXISTS encrypted_root_secrets ( + epoch INTEGER NOT NULL, + salt BLOB NOT NULL, + secret BLOB NOT NULL, + tag BLOB NOT NULL, + + PRIMARY KEY (epoch) + FOREIGN KEY (epoch) REFERENCES key_share_prepares (epoch) +); diff --git a/bootstore/src/lib.rs b/bootstore/src/lib.rs new file mode 100644 index 00000000000..ac8b15d166a --- /dev/null +++ b/bootstore/src/lib.rs @@ -0,0 +1,22 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! The two communication paths for the bootstore: +//! +//! RSS -> Sled Agent -> Coordinator -> Storage Nodes +//! Nexus -> Steno -> Sled Agent -> Coordinator -> Storage Nodes +//! +//! +//! Since some trust quorum membership information that is input via RSS must +//! make its way into CockroachDb so that reconfiguration works, we will load +//! that information from the trust quorum database, parse it, and write +//! it to CockroachDB when we start it up. + +mod db; +mod messages; +mod node; +mod trust_quorum; + +pub use node::Config; +pub use node::Node; diff --git a/bootstore/src/messages.rs b/bootstore/src/messages.rs new file mode 100644 index 00000000000..95a9023cd67 --- /dev/null +++ b/bootstore/src/messages.rs @@ -0,0 +1,113 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! All messages sent and received by bootstore nodes and coordinators + +use derive_more::From; +use serde::{Deserialize, Serialize}; +use uuid::Uuid; +use vsss_rs::Share; + +use crate::trust_quorum::SerializableShareDistribution; + +/// A request sent to a [`Node`] from another [`Node`] or a [`Coordinator`]. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct NodeRequest { + pub version: u32, + /// A message correlation id to match requests to responses + pub id: u64, + pub op: NodeOp, +} + +/// A specific operation for a Node +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub enum NodeOp { + /// Retrieve a key share for the given epoch + /// + /// A [`Node`] will only respond if the epoch is still valid + /// and the sending [`Node`] is a member of the trust quorum. + GetShare { epoch: i32 }, + + /// A request sent by RSS with the trust quorum membership and key share + /// for epoch 0. + /// + /// Epoch 0 is the only epoch in which prepared data can be overwritten. + /// This is because there is no global datastore (CockroachDB) with which + /// to persist information that needs to be distributed to all sleds in the + /// rack. If RSS dies, or more importantly, the scrimlet it is running on + /// dies, we want to enable re-running on a different scrimlet. Since we + /// don't know what information was already transferred to sleds, due to + /// lack of global datastore, we must re-issue the request. + /// + /// This request generates a `KeySharePrepare` for epoch 0. Once all + /// sleds have prepared, RSS triggers the start of CockroachDB replicas. + /// The trust quorum membership and prepare status will be written into + /// CockroachDB as the epoch 0 trust quorum configuration. Nexus will then + /// proceed to commit the trust quorum information, by first writing the + /// Commit to CockroachDb and then sending a `KeyShareCommit` for epoch 0. + /// + /// TODO: The rack plan should also be sent here with similar storage + /// strategy as the key share/trust quorum membership. + Initialize { + rack_uuid: Uuid, + share_distribution: SerializableShareDistribution, + }, + + /// A request from a [`Coordinator`] for the Prepare phase + /// of a rekey or reconfiguration + KeySharePrepare { + rack_uuid: Uuid, + epoch: i32, + share_distribution: SerializableShareDistribution, + }, + + /// A request from a [`Coordinator`] for the Commit phase of a + /// rekey or reconfiguration + KeyShareCommit { rack_uuid: Uuid, epoch: i32 }, +} + +/// A response from a [`Node`] to another [`Node`] or a [`Coordinator`] +#[derive(Debug, Clone, PartialEq, From, Serialize, Deserialize)] +pub struct NodeResponse { + pub version: u32, + /// A message correlation id to match requests to responses + pub id: u64, + pub result: Result, +} + +#[derive(Debug, Clone, PartialEq, From, Serialize, Deserialize)] +// The result of an operation from a [`Node`] +pub enum NodeOpResult { + /// A key share for a given epoch as requested by [`PeerRequest::GetShare`] + Share { epoch: i32, share: Share }, + + /// An ack for the most recent coordinator message + CoordinatorAck, +} + +/// Errors returned inside a [`NodeOpResult`] +#[derive( + Debug, Clone, PartialEq, From, Serialize, Deserialize, thiserror::Error, +)] +pub enum NodeError { + #[error("Version {0} messages are unsupported.")] + UnsupportedVersion(u32), + + #[error("Key share for epoch {epoch} does not exist.")] + KeyShareDoesNotExist { epoch: i32 }, + + #[error( + "Received unexpected rack UUID. Expected: {expected}, Actual: {actual}" + )] + RackUuidMismatch { expected: Uuid, actual: Uuid }, + + #[error("A commit has already occurred for rack {rack_uuid}")] + AlreadyInitialized { rack_uuid: Uuid }, + + #[error( + "No corresponding key share prepare for this commit: rack UUID: +{rack_uuid}, epoch: {epoch}" + )] + MissingKeySharePrepare { rack_uuid: Uuid, epoch: i32 }, +} diff --git a/bootstore/src/node.rs b/bootstore/src/node.rs new file mode 100644 index 00000000000..183cb5c46bb --- /dev/null +++ b/bootstore/src/node.rs @@ -0,0 +1,116 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! An individual bootstore node. +//! +//! Most logic is contained here, but networking sits on top. +//! This allows easier testing of clusters and failure situations. + +use slog::Logger; +use sprockets_host::Ed25519Certificate; +use uuid::Uuid; + +use crate::db::Db; +use crate::messages::*; +use crate::trust_quorum::SerializableShareDistribution; + +/// Configuration for an individual node +pub struct Config { + log: Logger, + db_path: String, + // TODO: This will live inside the certificate eventually + _serial_number: String, + _device_id_cert: Ed25519Certificate, +} + +/// A node of the bootstore +/// +/// A Node contains all the logic of the bootstore and stores relevant +/// information in [`Db`]. The [`BootstrapAgent`] establishes sprockets +/// sessions, and utilizes its local `Node` to manage any messages received +/// over these sessions. +/// +/// Messages are received over sprockets sessions from either peer nodes +/// during rack unlock, or from a [`Coordinator`] during rack initialization +/// or reconfiguration. +// +// Temporary until the using code is written +#[allow(dead_code)] +pub struct Node { + config: Config, + db: Db, +} + +impl Node { + /// Create a new Node + pub fn new(config: Config) -> Node { + let db = Db::open(&config.log, &config.db_path).unwrap(); + Node { config, db } + } + + /// Handle a message received over sprockets from another [`Node`] or + /// the [`Coordinator`]. + pub fn handle(&mut self, req: NodeRequest) -> NodeResponse { + if req.version != 1 { + return NodeResponse { + version: req.version, + id: req.id, + result: Err(NodeError::UnsupportedVersion(req.version)), + }; + } + + let result = match req.op { + NodeOp::GetShare { epoch } => self.handle_get_share(epoch), + NodeOp::Initialize { rack_uuid, share_distribution } => { + self.handle_initialize(rack_uuid, share_distribution) + } + NodeOp::KeySharePrepare { + rack_uuid, + epoch, + share_distribution, + } => self.handle_key_share_prepare( + rack_uuid, + epoch, + share_distribution, + ), + NodeOp::KeyShareCommit { rack_uuid, epoch } => { + self.handle_key_share_commit(rack_uuid, epoch) + } + }; + + NodeResponse { version: req.version, id: req.id, result } + } + + fn handle_get_share( + &mut self, + _epoch: i32, + ) -> Result { + unimplemented!(); + } + + fn handle_initialize( + &mut self, + _rack_uuid: Uuid, + _share_distribution: SerializableShareDistribution, + ) -> Result { + unimplemented!(); + } + + fn handle_key_share_prepare( + &mut self, + _rack_uuid: Uuid, + _epoch: i32, + _share_distribution: SerializableShareDistribution, + ) -> Result { + unimplemented!(); + } + + fn handle_key_share_commit( + &mut self, + _rack_uuid: Uuid, + _epoch: i32, + ) -> Result { + unimplemented!(); + } +} diff --git a/bootstore/src/trust_quorum/error.rs b/bootstore/src/trust_quorum/error.rs new file mode 100644 index 00000000000..f695762c244 --- /dev/null +++ b/bootstore/src/trust_quorum/error.rs @@ -0,0 +1,20 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Error type for trust quorum code + +use thiserror::Error; + +#[derive(Debug, Error)] +pub enum TrustQuorumError { + // Temporary until the using code is written + #[allow(dead_code)] + #[error("Not enough peers to unlock storage")] + NotEnoughPeers, + + // Temporary until the using code is written + #[allow(dead_code)] + #[error("Rack secret construction failed: {0:?}")] + RackSecretConstructionFailed(vsss_rs::Error), +} diff --git a/bootstore/src/trust_quorum/mod.rs b/bootstore/src/trust_quorum/mod.rs new file mode 100644 index 00000000000..5aa249f48e8 --- /dev/null +++ b/bootstore/src/trust_quorum/mod.rs @@ -0,0 +1,20 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! The entry point for the trust quorum code +//! +//! This module only provides the trust quorum primitives: the rack secret and +//! its associated machinery (splitting into shares, verification, etc.). +//! Distribution and retrieval of shares is the responsibility of the +//! bootstrap-agent, which uses sprockets to secure communications between +//! sleds. + +mod error; +mod rack_secret; +mod share_distribution; + +pub use error::TrustQuorumError; +pub use rack_secret::RackSecret; +pub use share_distribution::SerializableShareDistribution; +pub use share_distribution::ShareDistribution; diff --git a/bootstore/src/trust_quorum/rack_secret.rs b/bootstore/src/trust_quorum/rack_secret.rs new file mode 100644 index 00000000000..e9e22c527f7 --- /dev/null +++ b/bootstore/src/trust_quorum/rack_secret.rs @@ -0,0 +1,177 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::convert::AsRef; +use std::fmt::Debug; + +use p256::elliptic_curve::group::ff::PrimeField; +use p256::elliptic_curve::subtle::ConstantTimeEq; +use p256::{NonZeroScalar, ProjectivePoint, Scalar, SecretKey}; +use rand::rngs::OsRng; +use serde::{Deserialize, Serialize}; +use vsss_rs::{Feldman, FeldmanVerifier, Share}; + +/// A `RackSecret` is a shared secret used to perform a "rack-level" unlock. +/// +/// Each server sled on an oxide rack contains up to 10 SSDs containing customer +/// data and oxide specific data. We want to ensure that if a small number of +/// disks or sleds get stolen, the data on them remains inaccessible to the +/// thief. We also want to ensure that successfully rebooting a sled or an +/// entire rack does not require administrator intervention such as entering a +/// password. +/// +/// To provide the above guarantees we must ensure that all disks are encrypted, +/// and that there is an automatic mechanism to retrieve the decryption key. +/// Furthermore, we must guarantee that the the key retrieval mechanism is not +/// available when a only a subset of sleds or disks from a rack are available +/// to an attacker. The mechanism we use to provide these guarantees is based on +/// . A threshold secret +/// is generated of which individual disk encryption keys are derived, and each +/// of `N` server sleds receives one share of the secret. `K` of these shares +/// must be combinded in order to reconstruct the shared secret such that disks +/// may be decrypted. If fewer than `K` shares are available, no information +/// about the secret may be recovered, and the disks cannot be decrypted. We +/// call the threshold secret the `rack secret`. +/// +/// Inside a rack then, the sleds cooperate over secure channels in order to +/// retrieve key shares and reconstruct the `rack secret`, the resulting derived +/// encryption keys, and unlock their own local storage. We call this procedure +/// `rack unlock`. The establishment of secure channels and the ability to trust +/// the validity of a participating peer is outside the scope of this particular +/// type and orthogonal to its implementation. +pub struct RackSecret { + secret: NonZeroScalar, +} + +impl PartialEq for RackSecret { + fn eq(&self, other: &Self) -> bool { + self.secret.ct_eq(&other.secret).into() + } +} + +impl Eq for RackSecret {} + +/// A verifier used to ensure the validity of a given key share for an unknown +/// secret. +/// +/// We use verifiable secret sharing to detect invalid shares from being +/// combined and generating an incorrect secret. Each share must be verified +/// before the secret is reconstructed. +// This is just a wrapper around a FeldmanVerifier from the vsss-rs crate. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct Verifier { + verifier: FeldmanVerifier, +} + +impl Verifier { + pub fn verify(&self, share: &Share) -> bool { + self.verifier.verify(share) + } +} + +// Temporary until the using code is written +#[allow(dead_code)] +impl RackSecret { + /// Create a secret based on the NIST P-256 curve + pub fn new() -> RackSecret { + let mut rng = OsRng::default(); + let sk = SecretKey::random(&mut rng); + RackSecret { secret: sk.to_secret_scalar() } + } + + /// Split a secert into `total_shares` number of shares, where combining + /// `threshold` of the shares can be used to recover the secret. + pub fn split( + &self, + threshold: usize, + total_shares: usize, + ) -> Result<(Vec, Verifier), vsss_rs::Error> { + let mut rng = OsRng::default(); + let (shares, verifier) = Feldman { t: threshold, n: total_shares } + .split_secret(*self.as_ref(), None, &mut rng)?; + Ok((shares, Verifier { verifier })) + } + + /// Combine a set of shares and return a RackSecret + pub fn combine_shares( + threshold: usize, + total_shares: usize, + shares: &[Share], + ) -> Result { + let scalar = Feldman { t: threshold, n: total_shares } + .combine_shares::(shares)?; + let nzs = NonZeroScalar::from_repr(scalar.to_repr()).unwrap(); + let sk = SecretKey::from(nzs); + Ok(RackSecret { secret: sk.to_secret_scalar() }) + } +} + +impl AsRef for RackSecret { + fn as_ref(&self) -> &Scalar { + self.secret.as_ref() + } +} + +#[cfg(test)] +mod tests { + use std::fmt; + + use super::*; + + // This is a secret. Let's not print it outside of tests. + impl Debug for RackSecret { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.secret.as_ref().fmt(f) + } + } + + fn verify(secret: &RackSecret, verifier: &Verifier, shares: &[Share]) { + for s in shares { + assert!(verifier.verify(s)); + } + + let secret2 = RackSecret::combine_shares(3, 5, &shares[..3]).unwrap(); + let secret3 = RackSecret::combine_shares(3, 5, &shares[1..4]).unwrap(); + let secret4 = RackSecret::combine_shares(3, 5, &shares[2..5]).unwrap(); + let shares2 = + vec![shares[0].clone(), shares[2].clone(), shares[4].clone()]; + let secret5 = RackSecret::combine_shares(3, 5, &shares2).unwrap(); + + for s in [secret2, secret3, secret4, secret5] { + assert_eq!(*secret, s); + } + } + + #[test] + fn create_and_verify() { + let secret = RackSecret::new(); + let (shares, verifier) = secret.split(3, 5).unwrap(); + verify(&secret, &verifier, &shares); + } + + #[test] + fn secret_splitting_fails_with_threshold_larger_than_total_shares() { + let secret = RackSecret::new(); + assert!(secret.split(5, 3).is_err()); + } + + #[test] + fn combine_deserialized_shares() { + let secret = RackSecret::new(); + let (shares, verifier) = secret.split(3, 5).unwrap(); + let verifier_s = bincode::serialize(&verifier).unwrap(); + let shares_s = bincode::serialize(&shares).unwrap(); + + let shares2: Vec = bincode::deserialize(&shares_s).unwrap(); + let verifier2: Verifier = bincode::deserialize(&verifier_s).unwrap(); + + // Ensure we can reconstruct the secret with the deserialized shares and + // verifier. + verify(&secret, &verifier2, &shares2); + + // Ensure we can reconstruct the secret with the deserialized shares and + // original verifier. + verify(&secret, &verifier, &shares2); + } +} diff --git a/bootstore/src/trust_quorum/share_distribution.rs b/bootstore/src/trust_quorum/share_distribution.rs new file mode 100644 index 00000000000..8566ae7246a --- /dev/null +++ b/bootstore/src/trust_quorum/share_distribution.rs @@ -0,0 +1,92 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use serde::Deserialize; +use serde::Serialize; +use sprockets_host::Ed25519Certificate; +use std::fmt; +use vsss_rs::Share; + +use super::rack_secret::Verifier; + +/// A ShareDistribution is an individual share of a secret along with all the +/// metadata required to allow a server in possession of the share to know how +/// to correctly recreate a split secret. +// We intentionally DO NOT derive `Debug` or `Serialize`; both provide avenues +// by which we may accidentally log the contents of our `share`. +#[derive(Clone, PartialEq, Deserialize)] +pub struct ShareDistribution { + pub threshold: usize, + pub verifier: Verifier, + pub share: Share, + pub member_device_id_certs: Vec, +} + +impl ShareDistribution { + pub fn total_shares(&self) -> usize { + self.member_device_id_certs.len() + } +} + +// We don't want to risk debug-logging the actual share contents, so implement +// `Debug` manually and omit sensitive fields. +impl fmt::Debug for ShareDistribution { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("ShareDistribution") + .field("threshold", &self.threshold) + .field("verifier", &"Verifier") + .field("share", &"Share") + .field("member_device_id_certs", &self.member_device_id_certs) + .finish() + } +} + +/// This type is equivalent to `ShareDistribution` but implements `Serialize`. +/// It should be used _very carefully_; `ShareDistribution` should be preferred +/// in almost all cases to avoid accidental spillage of our `Share` contents. +/// This type should only be used to build careful serialization routines that +/// need to deal with trust quorum shares; e.g., +/// `RequestEnvelope::danger_serialize_as_json()`. +#[derive(Clone, PartialEq, Serialize, Deserialize)] +pub struct SerializableShareDistribution { + pub threshold: usize, + pub share: Share, + pub member_device_id_certs: Vec, + pub verifier: Verifier, +} + +// We don't want to risk debug-logging the actual share contents, so implement +// `Debug` manually and omit sensitive fields. +impl fmt::Debug for SerializableShareDistribution { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("SerializableShareDistribution") + .field("threshold", &self.threshold) + .field("verifier", &"Verifier") + .field("share", &"Share") + .field("member_device_id_certs", &self.member_device_id_certs) + .finish() + } +} + +impl From for SerializableShareDistribution { + fn from(dist: ShareDistribution) -> Self { + Self { + threshold: dist.threshold, + verifier: dist.verifier, + share: dist.share, + member_device_id_certs: dist.member_device_id_certs, + } + } +} + +impl From for ShareDistribution { + fn from(dist: SerializableShareDistribution) -> Self { + Self { + threshold: dist.threshold, + verifier: dist.verifier, + share: dist.share, + member_device_id_certs: dist.member_device_id_certs, + } + } +} diff --git a/sled-agent/Cargo.toml b/sled-agent/Cargo.toml index 359fa1fcbad..3bfbec47fe8 100644 --- a/sled-agent/Cargo.toml +++ b/sled-agent/Cargo.toml @@ -17,6 +17,7 @@ clap = { version = "3.2", features = ["derive"] } crucible-client-types = { git = "https://github.com/oxidecomputer/crucible", rev = "bacffd142fc38a01fe255407b0c8d5d0aacfe778" } crucible-agent-client = { git = "https://github.com/oxidecomputer/crucible", rev = "bacffd142fc38a01fe255407b0c8d5d0aacfe778" } ddm-admin-client = { path = "../ddm-admin-client" } +diesel = { version = "2.0.0-rc.1", features = ["sqlite", "chrono", "serde_json", "network-address", "uuid"] } dropshot = { git = "https://github.com/oxidecomputer/dropshot", branch = "main", features = [ "usdt-probes" ] } futures = "0.3.23" internal-dns-client = { path = "../internal-dns-client" } diff --git a/sled-agent/src/bootstrap/trust_quorum/share_distribution.rs b/sled-agent/src/bootstrap/trust_quorum/share_distribution.rs index c8019ef8040..8566ae7246a 100644 --- a/sled-agent/src/bootstrap/trust_quorum/share_distribution.rs +++ b/sled-agent/src/bootstrap/trust_quorum/share_distribution.rs @@ -56,6 +56,19 @@ pub struct SerializableShareDistribution { pub verifier: Verifier, } +// We don't want to risk debug-logging the actual share contents, so implement +// `Debug` manually and omit sensitive fields. +impl fmt::Debug for SerializableShareDistribution { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("SerializableShareDistribution") + .field("threshold", &self.threshold) + .field("verifier", &"Verifier") + .field("share", &"Share") + .field("member_device_id_certs", &self.member_device_id_certs) + .finish() + } +} + impl From for SerializableShareDistribution { fn from(dist: ShareDistribution) -> Self { Self {