From 9b0d4c5ac4cc8c57fd194fb8d2bb7f1c8835d4e2 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Fri, 3 Jun 2022 14:09:42 -0400 Subject: [PATCH 01/11] Add (optional) trust quorum share to SledAgentRequest --- sled-agent/src/bootstrap/params.rs | 15 ++++++++++----- sled-agent/src/rack_setup/service.rs | 5 ++++- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/sled-agent/src/bootstrap/params.rs b/sled-agent/src/bootstrap/params.rs index 334376f28d3..4685fdd910c 100644 --- a/sled-agent/src/bootstrap/params.rs +++ b/sled-agent/src/bootstrap/params.rs @@ -4,24 +4,29 @@ //! Request types for the bootstrap agent -use std::borrow::Cow; - +use super::trust_quorum::ShareDistribution; use omicron_common::address::{Ipv6Subnet, SLED_PREFIX}; -use schemars::JsonSchema; use serde::{Deserialize, Serialize}; +use std::borrow::Cow; /// Identity signed by local RoT and Oxide certificate chain. -#[derive(Serialize, Deserialize, JsonSchema)] +#[derive(Serialize, Deserialize)] pub struct ShareRequest { // TODO-completeness: format TBD; currently opaque. pub identity: Vec, } /// Configuration information for launching a Sled Agent. -#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema, PartialEq)] +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)] pub struct SledAgentRequest { /// Portion of the IP space to be managed by the Sled Agent. pub subnet: Ipv6Subnet, + + /// Share of the rack secret for this Sled Agent. + // TODO-cleanup This is currently optional because we don't do trust quorum + // shares for single-node deployments (i.e., most dev/test environments), + // but eventually this should be required. + pub trust_quorum_share: Option, } #[derive(Clone, Debug, Serialize, Deserialize, PartialEq)] diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 602c7b7d8e9..211250ca283 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -314,7 +314,10 @@ impl ServiceInner { ( bootstrap_addr, SledAllocation { - initialization_request: SledAgentRequest { subnet }, + initialization_request: SledAgentRequest { + subnet, + trust_quorum_share: None, + }, services_request: request, }, ) From f595ed7835d6c49a90d3527e2fc3d7af45e731fd Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Mon, 6 Jun 2022 09:55:05 -0400 Subject: [PATCH 02/11] Add `rack_secret_threshold` to RSS config --- sled-agent/src/rack_setup/config.rs | 7 +++++++ smf/sled-agent/config-rss.toml | 5 +++++ 2 files changed, 12 insertions(+) diff --git a/sled-agent/src/rack_setup/config.rs b/sled-agent/src/rack_setup/config.rs index 26f3ce8a321..8329a4a6802 100644 --- a/sled-agent/src/rack_setup/config.rs +++ b/sled-agent/src/rack_setup/config.rs @@ -30,6 +30,12 @@ pub struct SetupServiceConfig { #[serde(default, rename = "request")] pub requests: Vec, + + /// The minimum number of sleds required to unlock the rack secret. + /// + /// If this value is less than 2, no rack secret will be created on startup; + /// this is the typical case for single-server test/development. + pub rack_secret_threshold: usize, } /// A request to initialize a sled. @@ -82,6 +88,7 @@ mod test { let cfg = SetupServiceConfig { rack_subnet: "fd00:1122:3344:0100::".parse().unwrap(), requests: vec![], + rack_secret_threshold: 0, }; assert_eq!( diff --git a/smf/sled-agent/config-rss.toml b/smf/sled-agent/config-rss.toml index d8113cf4d1b..add9419754b 100644 --- a/smf/sled-agent/config-rss.toml +++ b/smf/sled-agent/config-rss.toml @@ -6,6 +6,11 @@ # |...............| <- This /56 is the Rack Subnet rack_subnet = "fd00:1122:3344:0100::" +# The number of sleds required to unlock the rack secret. +# +# For values less than 2, no rack secret will be generated. +rack_secret_threshold = 1 + [[request]] # TODO(https://github.com/oxidecomputer/omicron/issues/732): Nexus From d8baad719a1b924b06564a76f683b5267108b4fd Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Mon, 6 Jun 2022 11:15:23 -0400 Subject: [PATCH 03/11] RSS: Generate rack secret and include shares in sled agent requests --- sled-agent/src/rack_setup/service.rs | 50 ++++++++++++++++++++++++++-- 1 file changed, 48 insertions(+), 2 deletions(-) diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 211250ca283..52b3bc637b6 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -9,6 +9,7 @@ use crate::bootstrap::config::BOOTSTRAP_AGENT_PORT; use crate::bootstrap::discovery::PeerMonitorObserver; use crate::bootstrap::params::SledAgentRequest; use crate::bootstrap::rss_handle::BootstrapAgentHandle; +use crate::bootstrap::trust_quorum::{RackSecret, ShareDistribution}; use crate::params::ServiceRequest; use omicron_common::address::{get_sled_address, ReservedRackSubnet}; use omicron_common::backoff::{ @@ -46,6 +47,9 @@ pub enum SetupServiceError { #[error("Failed to construct an HTTP client: {0}")] HttpClient(reqwest::Error), + + #[error("Failed to split rack secret: {0:?}")] + SplitRackSecret(vsss_rs::Error), } // The workload / information allocated to a single sled. @@ -264,8 +268,42 @@ impl ServiceInner { async fn create_plan( &self, config: &Config, - bootstrap_addrs: impl IntoIterator, + bootstrap_addrs: Vec, ) -> Result, SetupServiceError> { + // Create a rack secret, unless we're in the single-sled case. + let mut rack_secret_shares = if bootstrap_addrs.len() > 1 { + let total_shares = bootstrap_addrs.len(); + if config.rack_secret_threshold > 1 { + let secret = RackSecret::new(); + let (shares, verifier) = secret + .split(config.rack_secret_threshold, total_shares) + .map_err(SetupServiceError::SplitRackSecret)?; + + // Sanity check that `split` returned the expected number of + // shares (one per bootstrap agent) + assert_eq!(shares.len(), total_shares); + + Some(shares.into_iter().map(move |share| ShareDistribution { + threshold: config.rack_secret_threshold, + total_shares, + verifier: verifier.clone(), + share, + })) + } else { + warn!( + self.log, + concat!( + "Skipping rack secret creation due to config", + " (despite discovery of {} bootstrap agents)" + ), + total_shares, + ); + None + } + } else { + None + }; + let bootstrap_addrs = bootstrap_addrs.into_iter().enumerate(); let reserved_rack_subnet = ReservedRackSubnet::new(config.az_subnet()); let dns_subnets = reserved_rack_subnet.get_dns_subnets(); @@ -316,7 +354,15 @@ impl ServiceInner { SledAllocation { initialization_request: SledAgentRequest { subnet, - trust_quorum_share: None, + trust_quorum_share: rack_secret_shares.as_mut().map( + |shares_iter| { + // We asserted when creating + // `rack_secret_shares` that it contained + // exactly the number of shares as we have + // bootstrap addrs, so we can unwrap here. + shares_iter.next().unwrap() + }, + ), }, services_request: request, }, From ba6522c3701ba57e8edc4e99c4f3dc68a5f9cd82 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Mon, 6 Jun 2022 11:44:59 -0400 Subject: [PATCH 04/11] Read trust quorum share from sled agent request. Closes #513. --- deploy/src/bin/deployment-example.toml | 1 - deploy/src/bin/sled-agent-overlay-files.rs | 39 ------------ deploy/src/bin/thing-flinger.rs | 4 +- sled-agent/src/bootstrap/agent.rs | 70 +++++++++++----------- 4 files changed, 35 insertions(+), 79 deletions(-) diff --git a/deploy/src/bin/deployment-example.toml b/deploy/src/bin/deployment-example.toml index 95311ccb054..0ca27e2ff5e 100644 --- a/deploy/src/bin/deployment-example.toml +++ b/deploy/src/bin/deployment-example.toml @@ -18,7 +18,6 @@ omicron_path = "/remote/path/to/omicron" # which server is responsible for running the rack setup service; must # refer to one of the `servers` in the servers table rss_server = "foo" -rack_secret_threshold = 2 # Location where files to install will be placed before running # `omicron-package install` # diff --git a/deploy/src/bin/sled-agent-overlay-files.rs b/deploy/src/bin/sled-agent-overlay-files.rs index f9812b33275..87c3d801c75 100644 --- a/deploy/src/bin/sled-agent-overlay-files.rs +++ b/deploy/src/bin/sled-agent-overlay-files.rs @@ -7,10 +7,6 @@ //! used for the trust quourm here. We generate a shared secret then split it, //! distributing each share to the appropriate server. -use omicron_sled_agent::bootstrap::trust_quorum::{ - RackSecret, ShareDistribution, -}; - use anyhow::{anyhow, Context, Result}; use sp_sim::config::GimletConfig; use sp_sim::config::SpCommonConfig; @@ -23,45 +19,11 @@ use structopt::StructOpt; about = "Generate server unique files for deployment" )] struct Args { - /// The rack secret threshold - #[structopt(short, long)] - threshold: usize, - /// A directory per server where the files are output #[structopt(short, long)] directories: Vec, } -// Generate a rack secret and allocate a ShareDistribution to each deployment -// server folder. -fn overlay_secret_shares( - threshold: usize, - server_dirs: &[PathBuf], -) -> Result<()> { - let total_shares = server_dirs.len(); - if total_shares < 2 { - println!( - "Skipping secret share distribution: only one server \ - available." - ); - return Ok(()); - } - let secret = RackSecret::new(); - let (shares, verifier) = secret - .split(threshold, total_shares) - .map_err(|e| anyhow!("Failed to split rack secret: {:?}", e))?; - for (share, server_dir) in shares.into_iter().zip(server_dirs) { - ShareDistribution { - threshold, - total_shares, - verifier: verifier.clone(), - share, - } - .write(&server_dir)?; - } - Ok(()) -} - // Generate a config file for a simulated SP in each deployment server folder. fn overlay_sp_configs(server_dirs: &[PathBuf]) -> Result<()> { // We will eventually need to flesh out more of this config; for now, @@ -95,7 +57,6 @@ fn overlay_sp_configs(server_dirs: &[PathBuf]) -> Result<()> { fn main() -> Result<()> { let args = Args::from_args_safe().map_err(|err| anyhow!(err))?; - overlay_secret_shares(args.threshold, &args.directories)?; overlay_sp_configs(&args.directories)?; Ok(()) } diff --git a/deploy/src/bin/thing-flinger.rs b/deploy/src/bin/thing-flinger.rs index 68f4363bee0..7d67587047c 100644 --- a/deploy/src/bin/thing-flinger.rs +++ b/deploy/src/bin/thing-flinger.rs @@ -33,7 +33,6 @@ struct Server { #[derive(Deserialize, Debug)] struct Deployment { rss_server: String, - rack_secret_threshold: usize, staging_dir: PathBuf, } @@ -483,11 +482,10 @@ fn overlay_sled_agent( let cmd = format!( "sh -c 'for dir in {}; do mkdir -p $dir; done' && \ cd {} && \ - cargo run {} --bin sled-agent-overlay-files -- --threshold {} --directories {}", + cargo run {} --bin sled-agent-overlay-files -- --directories {}", dirs, config.builder.omicron_path.to_string_lossy(), config.release_arg(), - config.deployment.rack_secret_threshold, dirs ); ssh_exec(builder, &cmd, false) diff --git a/sled-agent/src/bootstrap/agent.rs b/sled-agent/src/bootstrap/agent.rs index bc0f84c2bad..a7f2ece5255 100644 --- a/sled-agent/src/bootstrap/agent.rs +++ b/sled-agent/src/bootstrap/agent.rs @@ -24,7 +24,6 @@ use omicron_common::backoff::{ }; use slog::Logger; -use std::io; use std::net::{Ipv6Addr, SocketAddrV6}; use std::path::{Path, PathBuf}; use thiserror::Error; @@ -59,28 +58,6 @@ impl From for ExternalError { } } -// Attempt to read a key share file. If the file does not exist, we return -// `Ok(None)`, indicating the sled is operating in a single node cluster. If -// the file exists, we parse it and return Ok(ShareDistribution). For any -// other error, we return the error. -// -// TODO: Remove after dynamic key generation. See #513. -fn read_key_share() -> Result, BootstrapError> { - let key_share_dir = Path::new("/opt/oxide/sled-agent/pkg"); - - match ShareDistribution::read(&key_share_dir) { - Ok(share) => Ok(Some(share)), - Err(TrustQuorumError::Io { message, err }) => { - if err.kind() == io::ErrorKind::NotFound { - Ok(None) - } else { - Err(BootstrapError::Io { message, err }) - } - } - Err(e) => Err(e.into()), - } -} - /// The entity responsible for bootstrapping an Oxide rack. pub(crate) struct Agent { /// Debug log @@ -184,21 +161,10 @@ impl Agent { message: format!("Monitoring for peers from {address}"), err, })?; - let share = read_key_share()?; - let agent = Agent { - log: ba_log, - parent_log: log, - peer_monitor, - share, - rss: Mutex::new(None), - sled_agent: Mutex::new(None), - sled_config, - sp, - }; let request_path = get_sled_agent_request_path(); - if request_path.exists() { - info!(agent.log, "Sled already configured, loading sled agent"); + let sled_request = if request_path.exists() { + info!(ba_log, "Sled already configured, loading sled agent"); let sled_request: SledAgentRequest = toml::from_str( &tokio::fs::read_to_string(&request_path).await.map_err( |err| BootstrapError::Io { @@ -210,6 +176,25 @@ impl Agent { )?, ) .map_err(|err| BootstrapError::Toml { path: request_path, err })?; + Some(sled_request) + } else { + None + }; + + let agent = Agent { + log: ba_log, + parent_log: log, + peer_monitor, + share: sled_request + .as_ref() + .and_then(|req| req.trust_quorum_share.clone()), + rss: Mutex::new(None), + sled_agent: Mutex::new(None), + sled_config, + sp, + }; + + if let Some(sled_request) = sled_request { agent.request_agent(&sled_request).await?; } @@ -255,6 +240,19 @@ impl Agent { return Err(BootstrapError::SledError(err_str)); } + // Bail out if this request includes a trust quorum share that + // doesn't match ours. TODO-correctness Need to handle a + // partially-initialized rack where we may have a share from a + // previously-started-but-not-completed init process. + if request.trust_quorum_share != self.share { + let err_str = concat!( + "Sled Agent already running with", + " a different trust quorum share" + ) + .to_string(); + return Err(BootstrapError::SledError(err_str)); + } + return Ok(SledAgentResponse { id: server.id() }); } // Server does not exist, initialize it. From 7d59bdcf0d1067034ee90ce74ce55d069451c10a Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Mon, 6 Jun 2022 12:01:35 -0400 Subject: [PATCH 05/11] Don't debug-log contents of secret shares --- .../bootstrap/trust_quorum/share_distribution.rs | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/sled-agent/src/bootstrap/trust_quorum/share_distribution.rs b/sled-agent/src/bootstrap/trust_quorum/share_distribution.rs index 799e94fc5d7..cd441b22d29 100644 --- a/sled-agent/src/bootstrap/trust_quorum/share_distribution.rs +++ b/sled-agent/src/bootstrap/trust_quorum/share_distribution.rs @@ -4,6 +4,7 @@ use serde::{Deserialize, Serialize}; use serde_json; +use std::fmt; use std::fs; use std::path::{Path, PathBuf}; use vsss_rs::Share; @@ -16,7 +17,7 @@ const FILENAME: &'static str = "share.json"; /// A ShareDistribution is an individual share of a secret along with all the /// metadata required to allow a server in possession of the share to know how /// to correctly recreate a split secret. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +#[derive(Clone, PartialEq, Serialize, Deserialize)] pub struct ShareDistribution { pub threshold: usize, pub total_shares: usize, @@ -24,6 +25,19 @@ pub struct ShareDistribution { pub share: Share, } +// We don't want to risk debug-logging the actual share contents, so implement +// `Debug` manually and omit sensitive fields. +impl fmt::Debug for ShareDistribution { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("ShareDistribution") + .field("threshold", &self.threshold) + .field("total_shares", &self.total_shares) + .field("verifier", &"Verifier") + .field("share", &"Share") + .finish() + } +} + impl ShareDistribution { pub fn write>( &self, From 539c31ea891362fba030429840578b4b33d1e491 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Wed, 8 Jun 2022 16:11:02 -0400 Subject: [PATCH 06/11] Move rack secret retrieval from SPDM to sprockets --- sled-agent/src/bootstrap/agent.rs | 108 +++++------ sled-agent/src/bootstrap/client.rs | 19 +- sled-agent/src/bootstrap/mod.rs | 2 - sled-agent/src/bootstrap/params.rs | 7 - sled-agent/src/bootstrap/server.rs | 8 +- sled-agent/src/bootstrap/spdm/error.rs | 39 ---- sled-agent/src/bootstrap/spdm/mod.rs | 95 ---------- sled-agent/src/bootstrap/spdm/requester.rs | 178 ------------------ sled-agent/src/bootstrap/spdm/responder.rs | 142 -------------- .../src/bootstrap/trust_quorum/client.rs | 60 ------ .../src/bootstrap/trust_quorum/error.rs | 24 +-- sled-agent/src/bootstrap/trust_quorum/mod.rs | 34 +--- sled-agent/src/bootstrap/trust_quorum/msgs.rs | 16 -- .../src/bootstrap/trust_quorum/server.rs | 137 -------------- .../trust_quorum/share_distribution.rs | 73 ------- sled-agent/src/bootstrap/views.rs | 17 +- 16 files changed, 82 insertions(+), 877 deletions(-) delete mode 100644 sled-agent/src/bootstrap/spdm/error.rs delete mode 100644 sled-agent/src/bootstrap/spdm/mod.rs delete mode 100644 sled-agent/src/bootstrap/spdm/requester.rs delete mode 100644 sled-agent/src/bootstrap/spdm/responder.rs delete mode 100644 sled-agent/src/bootstrap/trust_quorum/client.rs delete mode 100644 sled-agent/src/bootstrap/trust_quorum/msgs.rs delete mode 100644 sled-agent/src/bootstrap/trust_quorum/server.rs diff --git a/sled-agent/src/bootstrap/agent.rs b/sled-agent/src/bootstrap/agent.rs index a7f2ece5255..c891ed49318 100644 --- a/sled-agent/src/bootstrap/agent.rs +++ b/sled-agent/src/bootstrap/agent.rs @@ -4,14 +4,13 @@ //! Bootstrap-related APIs. +use super::client::Client as BootstrapAgentClient; use super::config::{Config, BOOTSTRAP_AGENT_PORT}; use super::discovery; use super::params::SledAgentRequest; use super::rss_handle::RssHandle; -use super::trust_quorum::{ - self, RackSecret, ShareDistribution, TrustQuorumError, -}; -use super::views::{ShareResponse, SledAgentResponse}; +use super::trust_quorum::{RackSecret, ShareDistribution, TrustQuorumError}; +use super::views::SledAgentResponse; use crate::config::Config as SledConfig; use crate::illumos::dladm::{self, Dladm, PhysicalLink}; use crate::illumos::zone::Zones; @@ -22,12 +21,12 @@ use omicron_common::api::external::{Error as ExternalError, MacAddr}; use omicron_common::backoff::{ internal_service_policy, retry_notify, BackoffError, }; - use slog::Logger; use std::net::{Ipv6Addr, SocketAddrV6}; use std::path::{Path, PathBuf}; use thiserror::Error; use tokio::sync::Mutex; +use vsss_rs::Share; /// Describes errors which may occur while operating the bootstrap service. #[derive(Error, Debug)] @@ -66,7 +65,9 @@ pub(crate) struct Agent { /// other launched components can set their own value. parent_log: Logger, peer_monitor: discovery::PeerMonitor, - share: Option, + + /// Our share of the rack secret, if we have one. + share: Mutex>, rss: Mutex>, sled_agent: Mutex>, @@ -162,9 +163,20 @@ impl Agent { err, })?; + let agent = Agent { + log: ba_log, + parent_log: log, + peer_monitor, + share: Mutex::new(None), + rss: Mutex::new(None), + sled_agent: Mutex::new(None), + sled_config, + sp, + }; + let request_path = get_sled_agent_request_path(); - let sled_request = if request_path.exists() { - info!(ba_log, "Sled already configured, loading sled agent"); + if request_path.exists() { + info!(agent.log, "Sled already configured, loading sled agent"); let sled_request: SledAgentRequest = toml::from_str( &tokio::fs::read_to_string(&request_path).await.map_err( |err| BootstrapError::Io { @@ -176,44 +188,19 @@ impl Agent { )?, ) .map_err(|err| BootstrapError::Toml { path: request_path, err })?; - Some(sled_request) - } else { - None - }; - - let agent = Agent { - log: ba_log, - parent_log: log, - peer_monitor, - share: sled_request - .as_ref() - .and_then(|req| req.trust_quorum_share.clone()), - rss: Mutex::new(None), - sled_agent: Mutex::new(None), - sled_config, - sp, - }; - - if let Some(sled_request) = sled_request { agent.request_agent(&sled_request).await?; } Ok(agent) } - /// Implements the "request share" API. - #[allow(dead_code)] // Currently uncalled; will be used soon! - pub async fn request_share( - &self, - identity: Vec, - ) -> Result { - // TODO-correctness: Validate identity, return whatever - // information is necessary to establish trust quorum. - // - // This current implementation is a placeholder. - info!(&self.log, "request_share, received identity: {:x?}", identity); - - Ok(ShareResponse { shared_secret: vec![] }) + /// Returns our share of the rack secret, if we have one. + pub async fn secret_share(&self) -> Option { + self.share + .lock() + .await + .as_ref() + .map(|share_dist| share_dist.share.clone()) } /// Initializes the Sled Agent on behalf of the RSS, if one has not already @@ -244,7 +231,7 @@ impl Agent { // doesn't match ours. TODO-correctness Need to handle a // partially-initialized rack where we may have a share from a // previously-started-but-not-completed init process. - if request.trust_quorum_share != self.share { + if request.trust_quorum_share != *self.share.lock().await { let err_str = concat!( "Sled Agent already running with", " a different trust quorum share" @@ -270,6 +257,9 @@ impl Agent { maybe_agent.replace(server); info!(&self.log, "Sled Agent loaded; recording configuration"); + // Remember our share, allowing us to respond to `request_share()`. + *self.share.lock().await = request.trust_quorum_share.clone(); + // Record this request so the sled agent can be automatically // initialized on the next boot. let path = get_sled_agent_request_path(); @@ -294,6 +284,7 @@ impl Agent { /// sufficiently unlocked. async fn establish_sled_quorum( &self, + share: &ShareDistribution, ) -> Result { let rack_secret = retry_notify( internal_service_policy(), @@ -304,8 +295,6 @@ impl Agent { "Bootstrap: Communicating with peers: {:?}", other_agents ); - let share = self.share.as_ref().unwrap(); - // "-1" to account for ourselves. if other_agents.len() < share.threshold - 1 { warn!( @@ -322,19 +311,21 @@ impl Agent { ); // Retrieve verified rack_secret shares from a quorum of agents - let other_agents: Vec = other_agents + let other_agents: Vec = other_agents .into_iter() .map(|addr| { let addr = SocketAddrV6::new( addr, - trust_quorum::PORT, + BOOTSTRAP_AGENT_PORT, 0, 0, ); - trust_quorum::Client::new( - &self.log, - share.verifier.clone(), + BootstrapAgentClient::new( addr, + &self.sp, + self.log.new(o!( + "BootstrapAgentClient" => addr.to_string()), + ), ) }) .collect(); @@ -343,10 +334,10 @@ impl Agent { // don't resend. See https://github.com/oxidecomputer/omicron/issues/514 let mut shares = vec![share.share.clone()]; for agent in &other_agents { - let share = agent.get_share().await + let share = agent.request_share().await .map_err(|e| { info!(&self.log, "Bootstrap: failed to retreive share from peer: {:?}", e); - BackoffError::transient(e) + BackoffError::transient(e.into()) })?; info!( &self.log, @@ -390,17 +381,6 @@ impl Agent { Ok(rack_secret) } - async fn run_trust_quorum_server(&self) -> Result<(), BootstrapError> { - let my_share = self.share.as_ref().unwrap().share.clone(); - let mut server = trust_quorum::Server::new(&self.log, my_share) - .map_err(|err| BootstrapError::Io { - message: "Cannot run trust quorum server".to_string(), - err, - })?; - tokio::spawn(async move { server.run().await }); - Ok(()) - } - // Initializes the Rack Setup Service. async fn start_rss(&self, config: &Config) -> Result<(), BootstrapError> { if let Some(rss_config) = &config.rss_config { @@ -427,9 +407,9 @@ impl Agent { ) -> Result<(), BootstrapError> { info!(&self.log, "bootstrap service initializing"); - if self.share.is_some() { - self.run_trust_quorum_server().await?; - self.establish_sled_quorum().await?; + let maybe_share = self.share.lock().await; + if let Some(share) = &*maybe_share { + self.establish_sled_quorum(share).await?; } self.start_rss(config).await?; diff --git a/sled-agent/src/bootstrap/client.rs b/sled-agent/src/bootstrap/client.rs index fb8f54f79f9..54d07d1f3cb 100644 --- a/sled-agent/src/bootstrap/client.rs +++ b/sled-agent/src/bootstrap/client.rs @@ -14,6 +14,7 @@ use crate::bootstrap::views::ResponseEnvelope; use crate::sp::SpHandle; use crate::sp::SprocketsRole; use slog::Logger; +use vsss_rs::Share; use std::borrow::Cow; use std::io; use std::net::SocketAddrV6; @@ -23,7 +24,7 @@ use tokio::io::AsyncWriteExt; use tokio::net::TcpStream; #[derive(Debug, Error)] -pub(crate) enum Error { +pub enum Error { #[error("Could not connect to {addr}: {err}")] Connect { addr: SocketAddrV6, err: io::Error }, @@ -79,6 +80,10 @@ impl<'a> Client<'a> { Self { addr, sp, log } } + pub(crate) fn addr(&self) -> SocketAddrV6 { + self.addr + } + pub(crate) async fn start_sled( &self, request: &SledAgentRequest, @@ -94,6 +99,18 @@ impl<'a> Client<'a> { } } + pub(crate) async fn request_share(&self) -> Result { + let request = Request::ShareRequest; + + match self.request_response(request).await? { + Response::ShareResponse(response) => Ok(response), + Response::SledAgentResponse(_) => Err(Error::InvalidResponse { + expected: "ShareResponse", + received: "SledAgentResponse", + }), + } + } + async fn request_response( &self, request: Request<'_>, diff --git a/sled-agent/src/bootstrap/mod.rs b/sled-agent/src/bootstrap/mod.rs index 27bc0b1c791..cdc331a27ae 100644 --- a/sled-agent/src/bootstrap/mod.rs +++ b/sled-agent/src/bootstrap/mod.rs @@ -8,11 +8,9 @@ pub mod agent; pub mod client; pub mod config; pub mod discovery; -//mod http_entrypoints; pub mod multicast; pub(crate) mod params; pub(crate) mod rss_handle; pub mod server; -mod spdm; pub mod trust_quorum; mod views; diff --git a/sled-agent/src/bootstrap/params.rs b/sled-agent/src/bootstrap/params.rs index 4685fdd910c..d6768eca9c3 100644 --- a/sled-agent/src/bootstrap/params.rs +++ b/sled-agent/src/bootstrap/params.rs @@ -9,13 +9,6 @@ use omicron_common::address::{Ipv6Subnet, SLED_PREFIX}; use serde::{Deserialize, Serialize}; use std::borrow::Cow; -/// Identity signed by local RoT and Oxide certificate chain. -#[derive(Serialize, Deserialize)] -pub struct ShareRequest { - // TODO-completeness: format TBD; currently opaque. - pub identity: Vec, -} - /// Configuration information for launching a Sled Agent. #[derive(Clone, Debug, Serialize, Deserialize, PartialEq)] pub struct SledAgentRequest { diff --git a/sled-agent/src/bootstrap/server.rs b/sled-agent/src/bootstrap/server.rs index c5e663b7eab..1c4616ee044 100644 --- a/sled-agent/src/bootstrap/server.rs +++ b/sled-agent/src/bootstrap/server.rs @@ -218,7 +218,13 @@ async fn serve_single_request( } } Request::ShareRequest => { - Err("share request currently unsupported".to_string()) + match bootstrap_agent.secret_share().await { + Some(share) => Ok(Response::ShareResponse(share)), + None => { + warn!(log, "Share requested before we have one"); + Err(format!("Share request failed: share unavailable")) + } + } } }; diff --git a/sled-agent/src/bootstrap/spdm/error.rs b/sled-agent/src/bootstrap/spdm/error.rs deleted file mode 100644 index 970d4bb17a2..00000000000 --- a/sled-agent/src/bootstrap/spdm/error.rs +++ /dev/null @@ -1,39 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -//! Wrap errors returned from the `spdm` crate and std::io::Error. - -use spdm::{requester::RequesterError, responder::ResponderError}; -use thiserror::Error; - -/// Describes errors that arise from use of the SPDM protocol library -#[derive(Error, Debug)] -pub enum SpdmError { - #[error("requester error: {0}")] - Requester(RequesterError), - - #[error("responder error: {0}")] - Responder(ResponderError), - - #[error(transparent)] - Io(#[from] std::io::Error), - - #[error("invalid state transition: expected {expected}, got {got}")] - InvalidState { expected: &'static str, got: &'static str }, - - #[error("timeout")] - Timeout(#[from] tokio::time::error::Elapsed), -} - -impl From for SpdmError { - fn from(e: RequesterError) -> Self { - SpdmError::Requester(e) - } -} - -impl From for SpdmError { - fn from(e: ResponderError) -> Self { - SpdmError::Responder(e) - } -} diff --git a/sled-agent/src/bootstrap/spdm/mod.rs b/sled-agent/src/bootstrap/spdm/mod.rs deleted file mode 100644 index 2e907d6678a..00000000000 --- a/sled-agent/src/bootstrap/spdm/mod.rs +++ /dev/null @@ -1,95 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -//! Instantiate a SPDM requester and responder with particular capabilities, -//! algorithms, and credentials. -//! -//! Sled agents run the SPDM protocol over a tokio TCP stream with a 2 byte size -//! header for framing. - -mod error; -pub mod requester; -pub mod responder; - -use std::io::{Error, ErrorKind}; -use std::time::Duration; - -use bytes::{Bytes, BytesMut}; -use futures::{SinkExt, StreamExt}; -use slog::Logger; -use tokio::net::TcpStream; -use tokio::time::timeout; -use tokio_util::codec::{Framed, LengthDelimitedCodec}; - -// 2^16 - 2 bytes for a header -const MAX_BUF_SIZE: usize = 65534; - -const TIMEOUT: Duration = Duration::from_secs(5); - -pub use error::SpdmError; - -pub struct Transport { - framed: Framed, - log: Logger, -} - -impl Transport { - // We use 2-byte size framed headers. - #[allow(dead_code)] - pub const HEADER_LEN: usize = 2; - - #[allow(dead_code)] - pub fn new(sock: TcpStream, log: Logger) -> Transport { - Transport { - framed: LengthDelimitedCodec::builder() - .length_field_length(Self::HEADER_LEN) - .new_framed(sock), - log, - } - } - - pub async fn send(&mut self, data: &[u8]) -> Result<(), SpdmError> { - let data = Bytes::copy_from_slice(data); - timeout(TIMEOUT, self.framed.send(data)).await??; - Ok(()) - } - - pub async fn recv(&mut self) -> Result { - if let Some(rsp) = timeout(TIMEOUT, self.framed.next()).await? { - let rsp = rsp?; - debug!(self.log, "Received {:x?}", &rsp[..]); - Ok(rsp) - } else { - Err(Error::new(ErrorKind::ConnectionAborted, "SPDM channel closed") - .into()) - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - use std::net::SocketAddr; - use tokio::net::TcpListener; - - #[tokio::test] - async fn test_recv_timeout() { - let logctx = - omicron_test_utils::dev::test_setup_log("test_recv_timeout"); - let log = logctx.log.clone(); - let addr: SocketAddr = "127.0.0.1:9898".parse().unwrap(); - let listener = TcpListener::bind(addr.clone()).await.unwrap(); - - let handle = tokio::spawn(async move { - let (sock, _) = listener.accept().await.unwrap(); - let mut transport = Transport::new(sock, log); - transport.recv().await - }); - - let _ = TcpStream::connect(addr).await.unwrap(); - - assert!(handle.await.unwrap().is_err()); - logctx.cleanup_successful(); - } -} diff --git a/sled-agent/src/bootstrap/spdm/requester.rs b/sled-agent/src/bootstrap/spdm/requester.rs deleted file mode 100644 index 83f28e3b8a8..00000000000 --- a/sled-agent/src/bootstrap/spdm/requester.rs +++ /dev/null @@ -1,178 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -use slog::Logger; - -use spdm::msgs::algorithms::*; -use spdm::msgs::capabilities::{GetCapabilities, ReqFlags}; -use spdm::requester::{self, algorithms, capabilities, id_auth}; -use spdm::{ - config::{MAX_CERT_CHAIN_SIZE, NUM_SLOTS}, - Transcript, -}; - -use super::{SpdmError, Transport, MAX_BUF_SIZE}; - -// A `Ctx` contains shared types for use by a requester task -struct Ctx { - buf: [u8; MAX_BUF_SIZE], - log: Logger, - transport: Transport, - transcript: Transcript, -} - -impl Ctx { - fn new(log: Logger, transport: Transport) -> Ctx { - Ctx { - buf: [0u8; MAX_BUF_SIZE], - log, - transport, - transcript: Transcript::new(), - } - } - - async fn negotiate_version( - &mut self, - ) -> Result { - let state = requester::start(); - let data = - state.write_get_version(&mut self.buf, &mut self.transcript)?; - - debug!(self.log, "Requester sending GET_VERSION"); - self.transport.send(data).await?; - - let rsp = self.transport.recv().await?; - debug!(self.log, "Requester received VERSION"); - - state.handle_msg(&rsp[..], &mut self.transcript).map_err(|e| e.into()) - } - - async fn negotiate_capabilities( - &mut self, - mut state: capabilities::State, - ) -> Result { - let req = GetCapabilities { - ct_exponent: 12, - flags: ReqFlags::CERT_CAP - | ReqFlags::CHAL_CAP - | ReqFlags::ENCRYPT_CAP - | ReqFlags::MAC_CAP - | ReqFlags::MUT_AUTH_CAP - | ReqFlags::KEY_EX_CAP - | ReqFlags::ENCAP_CAP - | ReqFlags::HBEAT_CAP - | ReqFlags::KEY_UPD_CAP, - }; - - debug!(self.log, "Requester sending GET_CAPABILITIES"); - let data = - state.write_msg(&req, &mut self.buf, &mut self.transcript)?; - self.transport.send(data).await?; - - let rsp = self.transport.recv().await?; - debug!(self.log, "Requester received CAPABILITIES"); - state.handle_msg(&rsp, &mut self.transcript).map_err(|e| e.into()) - } - - async fn negotiate_algorithms( - &mut self, - mut state: algorithms::State, - ) -> Result { - let req = NegotiateAlgorithms { - measurement_spec: MeasurementSpec::DMTF, - base_asym_algo: BaseAsymAlgo::ECDSA_ECC_NIST_P256, - base_hash_algo: BaseHashAlgo::SHA_256, - num_algorithm_requests: 4, - algorithm_requests: [ - AlgorithmRequest::Dhe(DheAlgorithm { - supported: DheFixedAlgorithms::SECP_256_R1, - }), - AlgorithmRequest::Aead(AeadAlgorithm { - supported: AeadFixedAlgorithms::AES_256_GCM, - }), - AlgorithmRequest::ReqBaseAsym(ReqBaseAsymAlgorithm { - supported: ReqBaseAsymFixedAlgorithms::ECDSA_ECC_NIST_P256, - }), - AlgorithmRequest::KeySchedule(KeyScheduleAlgorithm { - supported: KeyScheduleFixedAlgorithms::SPDM, - }), - ], - }; - - debug!(self.log, "Requester sending NEGOTIATE_ALGORITHMS"); - let data = state.write_msg(req, &mut self.buf, &mut self.transcript)?; - self.transport.send(data).await?; - - let rsp = self.transport.recv().await?; - debug!(self.log, "Requester received ALGORITHMS"); - - state - .handle_msg::( - &rsp, - &mut self.transcript, - ) - .map_err(|e| e.into()) - } -} - -/// Run the requester side of the SPDM protocol. -/// -/// The protocol operates over a TCP stream framed with a 2 byte size -/// header. Requesters and Responders are decoupled from whether the endpoint of -/// a socket is a TCP client or server. -#[allow(dead_code)] -pub async fn run( - log: Logger, - transport: Transport, -) -> Result { - let mut ctx = Ctx::new(log, transport); - - info!(ctx.log, "Requester starting version negotiation"); - let state = ctx.negotiate_version().await?; - - info!(ctx.log, "Requester starting capabilities negotiation"); - let state = ctx.negotiate_capabilities(state).await?; - - info!(ctx.log, "Requester starting algorithms negotiation"); - let _state = ctx.negotiate_algorithms(state).await?; - - info!(ctx.log, "Requester completed negotiation phase"); - debug!(ctx.log, "Requester transcript: {:x?}", ctx.transcript.get()); - - Ok(ctx.transport) -} - -#[cfg(test)] -mod tests { - use std::net::SocketAddr; - use tokio::net::{TcpListener, TcpStream}; - - use super::super::responder; - use super::*; - - #[tokio::test] - async fn negotiation() { - let logctx = omicron_test_utils::dev::test_setup_log("negotiation"); - let log = logctx.log.clone(); - let log2 = log.clone(); - let log3 = log.clone(); - - let addr: SocketAddr = "127.0.0.1:9999".parse().unwrap(); - let listener = TcpListener::bind(addr.clone()).await.unwrap(); - - let handle = tokio::spawn(async move { - let (sock, _) = listener.accept().await.unwrap(); - let log2 = log.clone(); - let transport = Transport::new(sock, log); - responder::run(log2, transport).await.unwrap(); - }); - - let sock = TcpStream::connect(addr).await.unwrap(); - let transport = Transport::new(sock, log2); - run(log3, transport).await.unwrap(); - - handle.await.unwrap(); - logctx.cleanup_successful(); - } -} diff --git a/sled-agent/src/bootstrap/spdm/responder.rs b/sled-agent/src/bootstrap/spdm/responder.rs deleted file mode 100644 index bd51723b29f..00000000000 --- a/sled-agent/src/bootstrap/spdm/responder.rs +++ /dev/null @@ -1,142 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -use slog::Logger; - -use spdm::msgs::capabilities::{Capabilities, RspFlags}; -use spdm::responder::{self, algorithms, capabilities, id_auth}; -use spdm::Transcript; - -use super::{SpdmError, Transport, MAX_BUF_SIZE}; - -// A `Ctx` contains shared types for use by a responder task -struct Ctx { - buf: [u8; MAX_BUF_SIZE], - log: Logger, - transport: Transport, - transcript: Transcript, -} - -impl Ctx { - fn new(log: Logger, transport: Transport) -> Ctx { - Ctx { - buf: [0u8; MAX_BUF_SIZE], - log, - transport, - transcript: Transcript::new(), - } - } - - async fn negotiate_version( - &mut self, - ) -> Result { - let state = responder::start(); - let req = self.transport.recv().await?; - - let (data, state) = - state.handle_msg(&req[..], &mut self.buf, &mut self.transcript)?; - debug!(self.log, "Responder received GET_VERSION"); - - self.transport.send(data).await?; - debug!(self.log, "Responder sent VERSION"); - Ok(state) - } - - async fn negotiate_capabilities( - &mut self, - state: capabilities::State, - ) -> Result { - let supported = Capabilities { - ct_exponent: 14, - flags: RspFlags::CERT_CAP - | RspFlags::CHAL_CAP - | RspFlags::ENCRYPT_CAP - | RspFlags::MAC_CAP - | RspFlags::MUT_AUTH_CAP - | RspFlags::KEY_EX_CAP - | RspFlags::ENCAP_CAP - | RspFlags::HBEAT_CAP - | RspFlags::KEY_UPD_CAP, - }; - - let req = self.transport.recv().await?; - let (data, transition) = state.handle_msg( - supported, - &req[..], - &mut self.buf, - &mut self.transcript, - )?; - debug!(self.log, "Responder received GET_CAPABILITIES"); - - // We expect to transition to the Algorithms state - // TODO: Handle both states - use responder::capabilities::Transition; - let state = match transition { - Transition::Algorithms(state) => state, - _ => { - return Err(SpdmError::InvalidState { - expected: "Algorithms", - got: "Capabilities", - }) - } - }; - - self.transport.send(data).await?; - debug!(self.log, "Responder sent CAPABILITIES"); - Ok(state) - } - - async fn select_algorithms( - &mut self, - state: algorithms::State, - ) -> Result { - let req = self.transport.recv().await?; - let (data, transition) = - state.handle_msg(&req[..], &mut self.buf, &mut self.transcript)?; - debug!(self.log, "Responder received NEGOTIATE_ALGORITHMS"); - - // We expect to transition to the Algorithms state - // TODO: Handle both states - use responder::algorithms::Transition; - let state = match transition { - Transition::IdAuth(state) => state, - _ => { - return Err(SpdmError::InvalidState { - expected: "IdAuth", - got: "Capabilities", - }) - } - }; - - self.transport.send(data).await?; - debug!(self.log, "Responder sent ALGORITHMS"); - Ok(state) - } -} - -/// Run the responder side of the SPDM protocol. -/// -/// The protocol operates over a TCP stream framed with a 2 byte size -/// header. Requesters and Responders are decoupled from whether the endpoint of -/// a socket is a TCP client or server. -#[allow(dead_code)] -pub async fn run( - log: Logger, - transport: Transport, -) -> Result { - let mut ctx = Ctx::new(log, transport); - - info!(ctx.log, "Responder starting version negotiation"); - let state = ctx.negotiate_version().await?; - - info!(ctx.log, "Responder starting capabilities negotiation"); - let state = ctx.negotiate_capabilities(state).await?; - - info!(ctx.log, "Responder starting algorithms selection"); - let _state = ctx.select_algorithms(state).await?; - - info!(ctx.log, "Responder completed negotiation phase"); - debug!(ctx.log, "Responder transcript: {:x?}\n", ctx.transcript.get()); - Ok(ctx.transport) -} diff --git a/sled-agent/src/bootstrap/trust_quorum/client.rs b/sled-agent/src/bootstrap/trust_quorum/client.rs deleted file mode 100644 index 0d6cdaf2c1d..00000000000 --- a/sled-agent/src/bootstrap/trust_quorum/client.rs +++ /dev/null @@ -1,60 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -use std::net::{SocketAddr, SocketAddrV6}; - -use slog::Logger; -use tokio::net::TcpStream; -use vsss_rs::Share; - -use super::msgs::{Request, Response}; -use super::rack_secret::Verifier; -use super::TrustQuorumError; -use crate::bootstrap::spdm; - -pub struct Client { - log: Logger, - verifier: Verifier, - addr: SocketAddrV6, -} - -impl Client { - pub fn new(log: &Logger, verifier: Verifier, addr: SocketAddrV6) -> Client { - Client { log: log.clone(), verifier, addr } - } - - pub fn addr(&self) -> &SocketAddrV6 { - &self.addr - } - - // Connect to a trust quorum server, establish an SPDM channel, and retrieve - // a share. - pub async fn get_share(&self) -> Result { - let sock = TcpStream::connect(&self.addr).await.map_err(|err| { - TrustQuorumError::Io { - message: format!("Connecting to {}", self.addr), - err, - } - })?; - let transport = spdm::Transport::new(sock, self.log.clone()); - - // Complete SPDM negotiation and return a secure transport - let mut transport = - spdm::requester::run(self.log.clone(), transport).await?; - - // Request a share and receive it, validating it's what we expect. - let req = bincode::serialize(&Request::Share)?; - transport.send(&req).await?; - - let rsp = transport.recv().await?; - let rsp: Response = bincode::deserialize(&rsp)?; - - let Response::Share(share) = rsp; - if self.verifier.verify(&share) { - Ok(share) - } else { - Err(TrustQuorumError::InvalidShare(SocketAddr::V6(self.addr))) - } - } -} diff --git a/sled-agent/src/bootstrap/trust_quorum/error.rs b/sled-agent/src/bootstrap/trust_quorum/error.rs index 69c98bc6c31..8ff41df1f47 100644 --- a/sled-agent/src/bootstrap/trust_quorum/error.rs +++ b/sled-agent/src/bootstrap/trust_quorum/error.rs @@ -4,35 +4,17 @@ //! Error type for trust quorum code -use super::super::spdm::SpdmError; - -use std::net::SocketAddr; +use crate::bootstrap; use thiserror::Error; #[derive(Debug, Error)] pub enum TrustQuorumError { - #[error("Error running SPDM protocol: {0}")] - Spdm(#[from] SpdmError), + #[error("Error contacting bootstrap agent: {0}")] + BootstrapClient(#[from] bootstrap::client::Error), #[error("Not enough peers to unlock storage")] NotEnoughPeers, - #[error("Bincode (de)serialization error: {0}")] - Bincode(#[from] Box), - - #[error("JSON (de)serialization error: {0}")] - Json(#[from] serde_json::Error), - - #[error("Invalid secret share received from {0}")] - InvalidShare(SocketAddr), - #[error("Rack secret construction failed: {0:?}")] RackSecretConstructionFailed(vsss_rs::Error), - - #[error("IO error {message}: {err}")] - Io { - message: String, - #[source] - err: std::io::Error, - }, } diff --git a/sled-agent/src/bootstrap/trust_quorum/mod.rs b/sled-agent/src/bootstrap/trust_quorum/mod.rs index fe8b9f7bcc9..75ed9960e4a 100644 --- a/sled-agent/src/bootstrap/trust_quorum/mod.rs +++ b/sled-agent/src/bootstrap/trust_quorum/mod.rs @@ -4,40 +4,16 @@ //! The entry point for the trust quorum code //! -//! The Trust quorum relies on IPv6 multicast discovery, rack secret handling, -//! and the SPDM protocol. -//! -//! Below is the trust quorum protocol for share retrieval over TCP. -//! -//! The following protocol is shown between two sleds only, but multicast -//! discovery and share requests will continue to run until enough shares -//! have been received to recreate the rack secret. -//! -//! Sled1 Sled2 -//! ===== ===== -//! || ------- Multicast Discovery --------- || -//! || || -//! || ---- Connect to TrustQuorum port ---> || -//! || || -//! || --------- SPDM Requests ------------> || -//! || || -//! || <-------- SPDM Responses ------------ || -//! || || -//! || ----- SPDM Channel Established ------ || -//! || || -//! || --------- Request Share ------------> || -//! || || -//! || <----------- Share ------------------ || +//! This module only provides the trust quorum primitives: the rack secret and +//! its associated machinery (splitting into shares, verification, etc.). +//! Distribution and retrieval of shares is the responsibility of the +//! bootstrap-agent, which uses sprockets to secure communications between +//! sleds. -mod client; mod error; -mod msgs; mod rack_secret; -mod server; mod share_distribution; -pub use client::Client; pub use error::TrustQuorumError; pub use rack_secret::RackSecret; -pub use server::{Server, PORT}; pub use share_distribution::ShareDistribution; diff --git a/sled-agent/src/bootstrap/trust_quorum/msgs.rs b/sled-agent/src/bootstrap/trust_quorum/msgs.rs deleted file mode 100644 index 64841da5f3b..00000000000 --- a/sled-agent/src/bootstrap/trust_quorum/msgs.rs +++ /dev/null @@ -1,16 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -use serde::{Deserialize, Serialize}; -use vsss_rs::Share; - -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub enum Request { - Share, -} - -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub enum Response { - Share(Share), -} diff --git a/sled-agent/src/bootstrap/trust_quorum/server.rs b/sled-agent/src/bootstrap/trust_quorum/server.rs deleted file mode 100644 index 9016bc7e9e1..00000000000 --- a/sled-agent/src/bootstrap/trust_quorum/server.rs +++ /dev/null @@ -1,137 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -use std::io; -use std::net::{Ipv6Addr, SocketAddr, SocketAddrV6}; - -use slog::Logger; -use tokio::net::{TcpListener, TcpStream}; -use tokio::task::JoinHandle; -use vsss_rs::Share; - -use super::msgs::Response; -use super::TrustQuorumError; -use crate::bootstrap::spdm; - -// TODO: Get port from config -// TODO: Get IpAddr from local router: -// See https://github.com/oxidecomputer/omicron/issues/443 -pub const PORT: u16 = 12347; - -/// A TCP server over which a secure SPDM channel will be established and an -/// application level trust protocol will run. -pub struct Server { - log: Logger, - share: Share, - listener: TcpListener, -} - -impl Server { - pub fn new(log: &Logger, share: Share) -> io::Result { - let addr = SocketAddrV6::new(Ipv6Addr::UNSPECIFIED, PORT, 0, 0); - let sock = socket2::Socket::new( - socket2::Domain::IPV6, - socket2::Type::STREAM, - Some(socket2::Protocol::TCP), - )?; - sock.set_only_v6(true)?; - - // Allow rebinding during TIME_WAIT - sock.set_reuse_address(true)?; - - sock.bind(&addr.into())?; - sock.listen(5)?; - sock.set_nonblocking(true)?; - - Ok(Server { - log: log.clone(), - share, - listener: TcpListener::from_std(sock.into())?, - }) - } - - pub async fn run(&mut self) -> Result<(), TrustQuorumError> { - loop { - // TODO: Track the returned handles in a FuturesUnordered and log any errors? - // Alternatively, maintain some shared state across all - // responders that is accessable to the Server. - // See https://github.com/oxidecomputer/omicron/issues/517 - let _ = self.accept().await?; - } - } - - async fn accept( - &mut self, - ) -> Result>, TrustQuorumError> - { - let (sock, addr) = self.listener.accept().await.map_err(|err| { - TrustQuorumError::Io { - message: "Accepting a connection from TCP listener".to_string(), - err, - } - })?; - debug!(self.log, "Accepted connection from {}", addr); - let share = self.share.clone(); - let log = self.log.clone(); - - Ok(tokio::spawn( - async move { run_responder(log, addr, sock, share).await }, - )) - } -} - -async fn run_responder( - log: Logger, - addr: SocketAddr, - sock: TcpStream, - share: Share, -) -> Result<(), TrustQuorumError> { - let transport = spdm::Transport::new(sock, log.clone()); - - // TODO: Future code will return a secure SPDM session. For now, we just - // return the framed transport so we can send unencrypted messages. - let mut transport = spdm::responder::run(log.clone(), transport).await?; - - info!(log, "Sending share to {}", addr); - - let req = transport.recv().await?; - - // There's only one possible request - let _ = bincode::deserialize(&req)?; - - let rsp = Response::Share(share); - let rsp = bincode::serialize(&rsp)?; - transport.send(&rsp).await?; - - Ok(()) -} - -#[cfg(test)] -mod test { - use super::super::client::Client; - use super::super::rack_secret::RackSecret; - use super::*; - - #[tokio::test] - async fn send_share() { - // Create a rack secret and some shares - let secret = RackSecret::new(); - let (shares, verifier) = secret.split(2, 2).unwrap(); - - // Start a trust quorum server, but only accept one connection - let logctx = - omicron_test_utils::dev::test_setup_log("trust_quorum::send_share"); - let log = logctx.log.clone(); - let mut server = Server::new(&log, shares[0].clone()).unwrap(); - let join_handle = tokio::spawn(async move { server.accept().await }); - - let client = - Client::new(&log, verifier, "[::1]:12347".parse().unwrap()); - let share = client.get_share().await.unwrap(); - assert_eq!(share, shares[0]); - - join_handle.await.unwrap().unwrap(); - logctx.cleanup_successful(); - } -} diff --git a/sled-agent/src/bootstrap/trust_quorum/share_distribution.rs b/sled-agent/src/bootstrap/trust_quorum/share_distribution.rs index cd441b22d29..00c8b3f4f04 100644 --- a/sled-agent/src/bootstrap/trust_quorum/share_distribution.rs +++ b/sled-agent/src/bootstrap/trust_quorum/share_distribution.rs @@ -3,16 +3,10 @@ // file, You can obtain one at https://mozilla.org/MPL/2.0/. use serde::{Deserialize, Serialize}; -use serde_json; use std::fmt; -use std::fs; -use std::path::{Path, PathBuf}; use vsss_rs::Share; use super::rack_secret::Verifier; -use super::TrustQuorumError; - -const FILENAME: &'static str = "share.json"; /// A ShareDistribution is an individual share of a secret along with all the /// metadata required to allow a server in possession of the share to know how @@ -37,70 +31,3 @@ impl fmt::Debug for ShareDistribution { .finish() } } - -impl ShareDistribution { - pub fn write>( - &self, - dir: P, - ) -> Result<(), TrustQuorumError> { - let mut path = PathBuf::from(dir.as_ref()); - path.push(FILENAME); - let json = serde_json::to_string(&self)?; - fs::write(&path, &json).map_err(|err| TrustQuorumError::Io { - message: format!("Writing share to {path:?}"), - err, - })?; - Ok(()) - } - - pub fn read>( - dir: P, - ) -> Result { - let mut path = PathBuf::from(dir.as_ref()); - path.push(FILENAME); - let json = - fs::read_to_string(path.to_str().unwrap()).map_err(|err| { - TrustQuorumError::Io { - message: format!("Reading share from {path:?}"), - err, - } - })?; - serde_json::from_str(&json).map_err(|e| e.into()) - } -} - -#[cfg(test)] -mod tests { - use super::super::RackSecret; - use super::*; - - const THRESHOLD: usize = 3; - const TOTAL: usize = 5; - - fn get_share_and_verifier() -> (Share, Verifier) { - let secret = RackSecret::new(); - let (mut shares, verifier) = secret.split(THRESHOLD, TOTAL).unwrap(); - (shares.pop().unwrap(), verifier) - } - - #[test] - fn write_and_read() { - let dir = std::env::temp_dir(); - - let (share, verifier) = get_share_and_verifier(); - let share_distribution = ShareDistribution { - threshold: THRESHOLD, - total_shares: TOTAL, - verifier, - share, - }; - share_distribution.write(&dir).unwrap(); - - let read = ShareDistribution::read(&dir).unwrap(); - assert_eq!(share_distribution, read); - - let mut file = dir.clone(); - file.push(FILENAME); - std::fs::remove_file(file.as_path()).unwrap(); - } -} diff --git a/sled-agent/src/bootstrap/views.rs b/sled-agent/src/bootstrap/views.rs index 0c749b3b787..3aeea3e1b9f 100644 --- a/sled-agent/src/bootstrap/views.rs +++ b/sled-agent/src/bootstrap/views.rs @@ -4,29 +4,22 @@ //! Response types for the bootstrap agent -use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use uuid::Uuid; - -/// Sent between bootstrap agents to establish trust quorum. -// Note: We intentionally do not derive `Debug` on this type, to avoid -// accidentally debug-logging the secret share. -#[derive(Serialize, Deserialize, JsonSchema, PartialEq)] -pub struct ShareResponse { - // TODO-completeness: format TBD; currently opaque. - pub shared_secret: Vec, -} +use vsss_rs::Share; /// Describes the Sled Agent running on the device. -#[derive(Serialize, Deserialize, JsonSchema, PartialEq)] +#[derive(Serialize, Deserialize, PartialEq)] pub struct SledAgentResponse { pub id: Uuid, } #[derive(Serialize, Deserialize, PartialEq)] +// Note: We intentionally do not derive `Debug` on this type, to avoid +// accidentally debug-logging the secret share. pub enum Response { SledAgentResponse(SledAgentResponse), - ShareResponse(ShareResponse), + ShareResponse(Share), } #[derive(Serialize, Deserialize, PartialEq)] From 70d3c109acd4df3243b8369836e144498f07785c Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Wed, 8 Jun 2022 17:23:40 -0400 Subject: [PATCH 07/11] cargo fmt --- sled-agent/src/bootstrap/client.rs | 2 +- sled-agent/src/bootstrap/server.rs | 14 ++++++-------- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/sled-agent/src/bootstrap/client.rs b/sled-agent/src/bootstrap/client.rs index 54d07d1f3cb..5360eda21ed 100644 --- a/sled-agent/src/bootstrap/client.rs +++ b/sled-agent/src/bootstrap/client.rs @@ -14,7 +14,6 @@ use crate::bootstrap::views::ResponseEnvelope; use crate::sp::SpHandle; use crate::sp::SprocketsRole; use slog::Logger; -use vsss_rs::Share; use std::borrow::Cow; use std::io; use std::net::SocketAddrV6; @@ -22,6 +21,7 @@ use thiserror::Error; use tokio::io::AsyncReadExt; use tokio::io::AsyncWriteExt; use tokio::net::TcpStream; +use vsss_rs::Share; #[derive(Debug, Error)] pub enum Error { diff --git a/sled-agent/src/bootstrap/server.rs b/sled-agent/src/bootstrap/server.rs index 1c4616ee044..bdf98d9f9f4 100644 --- a/sled-agent/src/bootstrap/server.rs +++ b/sled-agent/src/bootstrap/server.rs @@ -217,15 +217,13 @@ async fn serve_single_request( } } } - Request::ShareRequest => { - match bootstrap_agent.secret_share().await { - Some(share) => Ok(Response::ShareResponse(share)), - None => { - warn!(log, "Share requested before we have one"); - Err(format!("Share request failed: share unavailable")) - } + Request::ShareRequest => match bootstrap_agent.secret_share().await { + Some(share) => Ok(Response::ShareResponse(share)), + None => { + warn!(log, "Share requested before we have one"); + Err(format!("Share request failed: share unavailable")) } - } + }, }; // Build and serialize response. From 652a6c18192469dbbd3504e3a530330d71a49314 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Thu, 9 Jun 2022 09:33:48 -0400 Subject: [PATCH 08/11] appease clippy --- sled-agent/src/bootstrap/server.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sled-agent/src/bootstrap/server.rs b/sled-agent/src/bootstrap/server.rs index bdf98d9f9f4..5741b585aa5 100644 --- a/sled-agent/src/bootstrap/server.rs +++ b/sled-agent/src/bootstrap/server.rs @@ -221,7 +221,7 @@ async fn serve_single_request( Some(share) => Ok(Response::ShareResponse(share)), None => { warn!(log, "Share requested before we have one"); - Err(format!("Share request failed: share unavailable")) + Err("Share request failed: share unavailable".to_string()) } }, }; From 65aa7081642ff00238d5fe3eae31f75f6b197aa0 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Thu, 9 Jun 2022 09:44:31 -0400 Subject: [PATCH 09/11] minor cleanup --- sled-agent/src/bootstrap/agent.rs | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/sled-agent/src/bootstrap/agent.rs b/sled-agent/src/bootstrap/agent.rs index c891ed49318..3490607d101 100644 --- a/sled-agent/src/bootstrap/agent.rs +++ b/sled-agent/src/bootstrap/agent.rs @@ -228,9 +228,10 @@ impl Agent { } // Bail out if this request includes a trust quorum share that - // doesn't match ours. TODO-correctness Need to handle a + // doesn't match ours. TODO-correctness Do we need to handle a // partially-initialized rack where we may have a share from a - // previously-started-but-not-completed init process. + // previously-started-but-not-completed init process? If rerunning + // it produces different shares this check will fail. if request.trust_quorum_share != *self.share.lock().await { let err_str = concat!( "Sled Agent already running with", @@ -284,7 +285,7 @@ impl Agent { /// sufficiently unlocked. async fn establish_sled_quorum( &self, - share: &ShareDistribution, + share: ShareDistribution, ) -> Result { let rack_secret = retry_notify( internal_service_policy(), @@ -407,8 +408,8 @@ impl Agent { ) -> Result<(), BootstrapError> { info!(&self.log, "bootstrap service initializing"); - let maybe_share = self.share.lock().await; - if let Some(share) = &*maybe_share { + let maybe_share = self.share.lock().await.clone(); + if let Some(share) = maybe_share { self.establish_sled_quorum(share).await?; } From 4995b8104541d5039068f88d9abdec5eba32ba4d Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Thu, 9 Jun 2022 11:32:06 -0400 Subject: [PATCH 10/11] Extract rack secret creation and add unit tests of it --- sled-agent/src/rack_setup/service.rs | 141 ++++++++++++++++++++------- 1 file changed, 105 insertions(+), 36 deletions(-) diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 52b3bc637b6..27c712dc4ce 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -271,38 +271,17 @@ impl ServiceInner { bootstrap_addrs: Vec, ) -> Result, SetupServiceError> { // Create a rack secret, unless we're in the single-sled case. - let mut rack_secret_shares = if bootstrap_addrs.len() > 1 { - let total_shares = bootstrap_addrs.len(); - if config.rack_secret_threshold > 1 { - let secret = RackSecret::new(); - let (shares, verifier) = secret - .split(config.rack_secret_threshold, total_shares) - .map_err(SetupServiceError::SplitRackSecret)?; - - // Sanity check that `split` returned the expected number of - // shares (one per bootstrap agent) - assert_eq!(shares.len(), total_shares); + let mut maybe_rack_secret_shares = generate_rack_secret( + config.rack_secret_threshold, + bootstrap_addrs.len(), + &self.log, + )?; - Some(shares.into_iter().map(move |share| ShareDistribution { - threshold: config.rack_secret_threshold, - total_shares, - verifier: verifier.clone(), - share, - })) - } else { - warn!( - self.log, - concat!( - "Skipping rack secret creation due to config", - " (despite discovery of {} bootstrap agents)" - ), - total_shares, - ); - None - } - } else { - None - }; + // Sanity check that the returned iterator (if we got one) is the length + // we expect. + if let Some(rack_secret_shares) = maybe_rack_secret_shares.as_ref() { + assert_eq!(rack_secret_shares.len(), bootstrap_addrs.len()); + } let bootstrap_addrs = bootstrap_addrs.into_iter().enumerate(); let reserved_rack_subnet = ReservedRackSubnet::new(config.az_subnet()); @@ -354,15 +333,15 @@ impl ServiceInner { SledAllocation { initialization_request: SledAgentRequest { subnet, - trust_quorum_share: rack_secret_shares.as_mut().map( - |shares_iter| { + trust_quorum_share: maybe_rack_secret_shares + .as_mut() + .map(|shares_iter| { // We asserted when creating - // `rack_secret_shares` that it contained + // `maybe_rack_secret_shares` that it contained // exactly the number of shares as we have // bootstrap addrs, so we can unwrap here. shares_iter.next().unwrap() - }, - ), + }), }, services_request: request, }, @@ -616,3 +595,93 @@ impl ServiceInner { Ok(()) } } + +fn generate_rack_secret( + rack_secret_threshold: usize, + total_shares: usize, + log: &Logger, +) -> Result< + Option>, + SetupServiceError, +> { + // We do not generate a rack secret if we only have a single sled or if our + // config specifies that the threshold for unlock is only a single sled. + if total_shares <= 1 { + info!(log, "Skipping rack secret creation (only one sled present)"); + return Ok(None); + } + + if rack_secret_threshold <= 1 { + warn!( + log, + concat!( + "Skipping rack secret creation due to config", + " (despite discovery of {} bootstrap agents)" + ), + total_shares, + ); + return Ok(None); + } + + let secret = RackSecret::new(); + let (shares, verifier) = secret + .split(rack_secret_threshold, total_shares) + .map_err(SetupServiceError::SplitRackSecret)?; + + Ok(Some(shares.into_iter().map(move |share| ShareDistribution { + threshold: rack_secret_threshold, + total_shares, + verifier: verifier.clone(), + share, + }))) +} + +#[cfg(test)] +mod tests { + use super::*; + use omicron_test_utils::dev::test_setup_log; + + #[test] + fn test_generate_rack_secret() { + let logctx = test_setup_log("test_generate_rack_secret"); + + // No secret generated if total_shares <= 1 + let maybe_shares = generate_rack_secret(10, 1, &logctx.log).unwrap(); + assert!(maybe_shares.is_none()); + + // No secret generated if threshold <= 1 + let maybe_shares = generate_rack_secret(1, 10, &logctx.log).unwrap(); + assert!(maybe_shares.is_none()); + + // Secret generation fails if threshold > total shares + let maybe_shares = generate_rack_secret(10, 5, &logctx.log); + assert!(matches!( + maybe_shares, + Err(SetupServiceError::SplitRackSecret(_)) + )); + + // Secret generation succeeds if threshold <= total shares and both are + // > 1, and the returned iterator satifies: + // + // * total length == total shares + // * each share is distinct + for total_shares in 2..=32 { + for threshold in 2..=total_shares { + let shares = + generate_rack_secret(threshold, total_shares, &logctx.log) + .unwrap() + .unwrap(); + + assert_eq!(shares.len(), total_shares); + + // `Share` doesn't implement `Hash`, but it's a newtype around + // `Vec` (which does). Unwrap the newtype to check that all + // shares are distinct. + let shares_set = shares + .map(|share_dist| share_dist.share.0) + .collect::>(); + assert_eq!(shares_set.len(), total_shares); + } + } + } +} From 4f0403163825647da645210d6b2bc81f989bf77a Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Thu, 9 Jun 2022 12:30:53 -0400 Subject: [PATCH 11/11] clean up logfile after test --- sled-agent/src/rack_setup/service.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 27c712dc4ce..99a6ec9534e 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -683,5 +683,7 @@ mod tests { assert_eq!(shares_set.len(), total_shares); } } + + logctx.cleanup_successful(); } }