From 7cb0555ff91fa47209c9abf9ba6b615d7b7d6554 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Tue, 5 Apr 2022 10:54:47 -0400 Subject: [PATCH 1/4] [sled-agent] Decouple RSS from Bootstrap Agent, run in distinct task --- sled-agent/src/bin/sled-agent.rs | 4 +- sled-agent/src/bootstrap/agent.rs | 189 ++-------------------- sled-agent/src/bootstrap/config.rs | 46 +----- sled-agent/src/lib.rs | 1 + sled-agent/src/rack_setup/config.rs | 52 ++++++ sled-agent/src/rack_setup/mod.rs | 8 + sled-agent/src/rack_setup/service.rs | 227 +++++++++++++++++++++++++++ 7 files changed, 304 insertions(+), 223 deletions(-) create mode 100644 sled-agent/src/rack_setup/config.rs create mode 100644 sled-agent/src/rack_setup/mod.rs create mode 100644 sled-agent/src/rack_setup/service.rs diff --git a/sled-agent/src/bin/sled-agent.rs b/sled-agent/src/bin/sled-agent.rs index 6c24ed541a4..c2989ee0280 100644 --- a/sled-agent/src/bin/sled-agent.rs +++ b/sled-agent/src/bin/sled-agent.rs @@ -13,9 +13,9 @@ use omicron_common::api::external::Error; use omicron_common::cmd::fatal; use omicron_common::cmd::CmdError; use omicron_sled_agent::bootstrap::{ - config::Config as BootstrapConfig, config::SetupServiceConfig as RssConfig, - server as bootstrap_server, + config::Config as BootstrapConfig, server as bootstrap_server, }; +use omicron_sled_agent::rack_setup::config::SetupServiceConfig as RssConfig; use omicron_sled_agent::{config::Config as SledConfig, server as sled_server}; use std::path::PathBuf; use structopt::StructOpt; diff --git a/sled-agent/src/bootstrap/agent.rs b/sled-agent/src/bootstrap/agent.rs index c0576b3788c..9f85cf6d1c6 100644 --- a/sled-agent/src/bootstrap/agent.rs +++ b/sled-agent/src/bootstrap/agent.rs @@ -10,6 +10,7 @@ use super::trust_quorum::{ self, RackSecret, ShareDistribution, TrustQuorumError, }; use super::views::ShareResponse; +use crate::rack_setup::service::Service as RackSetupService; use omicron_common::api::external::Error as ExternalError; use omicron_common::backoff::{ internal_service_policy, retry_notify, BackoffError, @@ -19,13 +20,11 @@ use slog::Logger; use std::io; use std::path::Path; use thiserror::Error; +use tokio::sync::Mutex; /// Describes errors which may occur while operating the bootstrap service. #[derive(Error, Debug)] pub enum BootstrapError { - #[error("Cannot deserialize TOML file")] - Toml(#[from] toml::de::Error), - #[error("Error accessing filesystem: {0}")] Io(#[from] std::io::Error), @@ -35,17 +34,8 @@ pub enum BootstrapError { #[error("Error modifying SMF service: {0}")] SmfAdm(#[from] smf::AdmError), - #[error("Error making HTTP request to Sled Agent: {0}")] - SledApi(#[from] sled_agent_client::Error), - - #[error("Error making HTTP request to Nexus: {0}")] - NexusApi(#[from] nexus_client::Error), - #[error(transparent)] TrustQuorum(#[from] TrustQuorumError), - - #[error("Configuration changed")] - Configuration, } impl From for ExternalError { @@ -82,13 +72,15 @@ pub(crate) struct Agent { log: Logger, peer_monitor: discovery::PeerMonitor, share: Option, + + rss: Mutex>, } impl Agent { pub fn new(log: Logger) -> Result { let peer_monitor = discovery::PeerMonitor::new(&log)?; let share = read_key_share()?; - Ok(Agent { log, peer_monitor, share }) + Ok(Agent { log, peer_monitor, share, rss: Mutex::new(None) }) } /// Implements the "request share" API. @@ -207,169 +199,14 @@ impl Agent { Ok(()) } - // In lieu of having an operator send requests to all sleds via an - // initialization service, the sled-agent configuration may allow for the - // automated injection of setup requests from a sled. - async fn inject_rack_setup_service_requests( - &self, - config: &Config, - ) -> Result<(), BootstrapError> { + // Initializes the Rack Setup Service. + async fn start_rss(&self, config: &Config) -> Result<(), BootstrapError> { if let Some(rss_config) = &config.rss_config { - info!(self.log, "Injecting RSS configuration: {:#?}", rss_config); - - let serialized_config = toml::Value::try_from(&config) - .expect("Cannot serialize configuration"); - let config_str = toml::to_string(&serialized_config) - .expect("Cannot turn config to string"); - - // First, check if this request has previously been made. - // - // Normally, the rack setup service is run with a human-in-the-loop, - // but with this automated injection, we need a way to determine the - // (destructive) initialization has occurred. - // - // We do this by storing the configuration at "rss_config_path" - // after successfully performing initialization. - let rss_config_path = - std::path::Path::new(crate::OMICRON_CONFIG_PATH) - .join("config-rss.toml"); - if rss_config_path.exists() { - info!( - self.log, - "RSS configuration already exists at {}", - rss_config_path.to_string_lossy() - ); - let old_config: Config = toml::from_str( - &tokio::fs::read_to_string(&rss_config_path).await?, - )?; - if &old_config == config { - info!( - self.log, - "RSS config already applied from: {}", - rss_config_path.to_string_lossy() - ); - return Ok(()); - } - - // TODO(https://github.com/oxidecomputer/omicron/issues/724): - // We could potentially handle this case by deleting all - // partitions (in preparation for applying the new - // configuration), but at the moment it's an error. - warn!( - self.log, - "Rack Setup Service Config was already applied, but has changed. - This means that you may have partitions set up on this sled, but they - may not match the ones requested by the supplied configuration.\n - To re-initialize this sled: - - Disable all Oxide services - - Delete all partitions within the attached zpool - - Delete the configuration file ({}) - - Restart the sled agent", - rss_config_path.to_string_lossy() - ); - return Err(BootstrapError::Configuration); - } else { - info!( - self.log, - "No RSS configuration found at {}", - rss_config_path.to_string_lossy() - ); - } - - // Issue the dataset initialization requests to all sleds. - futures::future::join_all( - rss_config.requests.iter().map(|request| async move { - info!(self.log, "observing request: {:#?}", request); - let dur = std::time::Duration::from_secs(60); - let client = reqwest::ClientBuilder::new() - .connect_timeout(dur) - .timeout(dur) - .build() - .map_err(|e| nexus_client::Error::::from(e))?; - let client = sled_agent_client::Client::new_with_client( - &format!("http://{}", request.sled_address), - client, - self.log.new(o!("SledAgentClient" => request.sled_address)), - ); - - info!(self.log, "sending partition requests..."); - for partition in &request.partitions { - let filesystem_put = || async { - info!(self.log, "creating new filesystem: {:?}", partition); - client.filesystem_put(&partition.clone().into()) - .await - .map_err(BackoffError::transient)?; - Ok::< - (), - BackoffError< - sled_agent_client::Error, - >, - >(()) - }; - let log_failure = |error, _| { - warn!(self.log, "failed to create filesystem"; "error" => ?error); - }; - retry_notify( - internal_service_policy(), - filesystem_put, - log_failure, - ).await?; - } - Ok(()) - }) - ).await.into_iter().collect::, BootstrapError>>()?; - - // Issue service initialization requests. - // - // Note that this must happen *after* the partition initialization, - // to ensure that CockroachDB has been initialized before Nexus - // starts. - futures::future::join_all( - rss_config.requests.iter().map(|request| async move { - info!(self.log, "observing request: {:#?}", request); - let dur = std::time::Duration::from_secs(60); - let client = reqwest::ClientBuilder::new() - .connect_timeout(dur) - .timeout(dur) - .build() - .map_err(|e| nexus_client::Error::::from(e))?; - let client = sled_agent_client::Client::new_with_client( - &format!("http://{}", request.sled_address), - client, - self.log.new(o!("SledAgentClient" => request.sled_address)), - ); - - info!(self.log, "sending service requests..."); - let services_put = || async { - info!(self.log, "initializing sled services: {:?}", request.services); - client.services_put( - &sled_agent_client::types::ServiceEnsureBody { - services: request.services.iter().map(|s| s.clone().into()).collect() - }) - .await - .map_err(BackoffError::transient)?; - Ok::< - (), - BackoffError< - sled_agent_client::Error, - >, - >(()) - }; - let log_failure = |error, _| { - warn!(self.log, "failed to initialize services"; "error" => ?error); - }; - retry_notify( - internal_service_policy(), - services_put, - log_failure, - ).await?; - Ok::<(), BootstrapError>(()) - }) - ).await.into_iter().collect::, BootstrapError>>()?; - - // Finally, make sure the configuration is saved so we don't inject - // the requests on the next iteration. - tokio::fs::write(rss_config_path, config_str).await?; + let rss = RackSetupService::new( + self.log.new(o!("component" => "RSS")), + rss_config.clone(), + ); + self.rss.lock().await.replace(rss); } Ok(()) } @@ -391,7 +228,7 @@ impl Agent { self.establish_sled_quorum().await?; } - self.inject_rack_setup_service_requests(config).await?; + self.start_rss(config).await?; Ok(()) } diff --git a/sled-agent/src/bootstrap/config.rs b/sled-agent/src/bootstrap/config.rs index 15ab42f8246..1fec659b5b3 100644 --- a/sled-agent/src/bootstrap/config.rs +++ b/sled-agent/src/bootstrap/config.rs @@ -4,14 +4,10 @@ //! Interfaces for working with bootstrap agent configuration -use crate::config::ConfigError; -use crate::params::{DatasetEnsureBody, ServiceRequest}; use dropshot::ConfigDropshot; use dropshot::ConfigLogging; use serde::Deserialize; use serde::Serialize; -use std::net::SocketAddr; -use std::path::Path; use uuid::Uuid; /// Configuration for a bootstrap agent @@ -21,45 +17,5 @@ pub struct Config { pub dropshot: ConfigDropshot, pub log: ConfigLogging, - pub rss_config: Option, -} - -/// Configuration for the "rack setup service", which is controlled during -/// bootstrap. -/// -/// The Rack Setup Service should be responsible for one-time setup actions, -/// such as CockroachDB placement and initialization. Without operator -/// intervention, however, these actions need a way to be automated in our -/// deployment. -/// -/// By injecting this (optional) configuration into the bootstrap agent, it -/// can act as a stand-in initialization service. -#[derive(Clone, Debug, Deserialize, Serialize, PartialEq)] -pub struct SetupServiceConfig { - #[serde(default, rename = "request")] - pub requests: Vec, -} - -/// A request to initialize a sled. -#[derive(Clone, Debug, Deserialize, Serialize, PartialEq)] -pub struct SledRequest { - /// The Sled Agent address receiving these requests. - pub sled_address: SocketAddr, - - /// Partitions to be created. - #[serde(default, rename = "partition")] - pub partitions: Vec, - - /// Services to be instantiated. - #[serde(default, rename = "service")] - pub services: Vec, -} - -impl SetupServiceConfig { - pub fn from_file>(path: P) -> Result { - let path = path.as_ref(); - let contents = std::fs::read_to_string(path)?; - let config = toml::from_str(&contents)?; - Ok(config) - } + pub rss_config: Option, } diff --git a/sled-agent/src/lib.rs b/sled-agent/src/lib.rs index 245af13ab21..fb93dfc5fe1 100644 --- a/sled-agent/src/lib.rs +++ b/sled-agent/src/lib.rs @@ -27,6 +27,7 @@ mod instance; mod instance_manager; mod nexus; mod params; +pub mod rack_setup; pub mod server; mod services; mod sled_agent; diff --git a/sled-agent/src/rack_setup/config.rs b/sled-agent/src/rack_setup/config.rs new file mode 100644 index 00000000000..4d284cfed7b --- /dev/null +++ b/sled-agent/src/rack_setup/config.rs @@ -0,0 +1,52 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Interfaces for working with RSS config. + +use crate::config::ConfigError; +use crate::params::{DatasetEnsureBody, ServiceRequest}; +use serde::Deserialize; +use serde::Serialize; +use std::net::SocketAddr; +use std::path::Path; + +/// Configuration for the "rack setup service", which is controlled during +/// bootstrap. +/// +/// The Rack Setup Service should be responsible for one-time setup actions, +/// such as CockroachDB placement and initialization. Without operator +/// intervention, however, these actions need a way to be automated in our +/// deployment. +/// +/// By injecting this (optional) configuration into the bootstrap agent, it +/// can act as a stand-in initialization service. +#[derive(Clone, Debug, Deserialize, Serialize, PartialEq)] +pub struct SetupServiceConfig { + #[serde(default, rename = "request")] + pub requests: Vec, +} + +/// A request to initialize a sled. +#[derive(Clone, Debug, Deserialize, Serialize, PartialEq)] +pub struct SledRequest { + /// The Sled Agent address receiving these requests. + pub sled_address: SocketAddr, + + /// Partitions to be created. + #[serde(default, rename = "partition")] + pub partitions: Vec, + + /// Services to be instantiated. + #[serde(default, rename = "service")] + pub services: Vec, +} + +impl SetupServiceConfig { + pub fn from_file>(path: P) -> Result { + let path = path.as_ref(); + let contents = std::fs::read_to_string(path)?; + let config = toml::from_str(&contents)?; + Ok(config) + } +} diff --git a/sled-agent/src/rack_setup/mod.rs b/sled-agent/src/rack_setup/mod.rs new file mode 100644 index 00000000000..e947ff99ef0 --- /dev/null +++ b/sled-agent/src/rack_setup/mod.rs @@ -0,0 +1,8 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Rack Setup Service + +pub mod config; +pub mod service; diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs new file mode 100644 index 00000000000..35d46b536df --- /dev/null +++ b/sled-agent/src/rack_setup/service.rs @@ -0,0 +1,227 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Rack Setup Service implementation + +use super::config::SetupServiceConfig as Config; +use omicron_common::backoff::{ + internal_service_policy, retry_notify, BackoffError, +}; +use slog::Logger; +use thiserror::Error; + +/// Describes errors which may occur while operating the setup service. +#[derive(Error, Debug)] +pub enum SetupServiceError { + #[error("Error accessing filesystem: {0}")] + Io(#[from] std::io::Error), + + #[error("Error making HTTP request to Nexus: {0}")] + NexusApi(#[from] nexus_client::Error), + + #[error("Error making HTTP request to Sled Agent: {0}")] + SledApi(#[from] sled_agent_client::Error), + + #[error("Cannot deserialize TOML file")] + Toml(#[from] toml::de::Error), + + #[error("Configuration changed")] + Configuration, +} + +/// The interface to the Rack Setup Service. +pub struct Service { + handle: tokio::task::JoinHandle>, +} + +impl Service { + pub fn new(log: Logger, config: Config) -> Self { + let handle = tokio::task::spawn(async move { + let svc = ServiceInner::new(log); + svc.inject_rack_setup_requests(&config).await + }); + + Service { handle } + } + + /// Awaits the completion of the RSS service. + pub async fn join(self) -> Result<(), SetupServiceError> { + self.handle.await.expect("Rack Setup Service Task panicked") + } +} + +/// The implementation of the Rack Setup Service. +struct ServiceInner { + log: Logger, +} + +impl ServiceInner { + pub fn new(log: Logger) -> Self { + ServiceInner { log } + } + + // In lieu of having an operator send requests to all sleds via an + // initialization service, the sled-agent configuration may allow for the + // automated injection of setup requests from a sled. + async fn inject_rack_setup_requests( + &self, + config: &Config, + ) -> Result<(), SetupServiceError> { + info!(self.log, "Injecting RSS configuration: {:#?}", config); + + let serialized_config = toml::Value::try_from(&config) + .expect("Cannot serialize configuration"); + let config_str = toml::to_string(&serialized_config) + .expect("Cannot turn config to string"); + + // First, check if this request has previously been made. + // + // Normally, the rack setup service is run with a human-in-the-loop, + // but with this automated injection, we need a way to determine the + // (destructive) initialization has occurred. + // + // We do this by storing the configuration at "rss_config_path" + // after successfully performing initialization. + let rss_config_path = std::path::Path::new(crate::OMICRON_CONFIG_PATH) + .join("config-rss.toml"); + if rss_config_path.exists() { + info!( + self.log, + "RSS configuration already exists at {}", + rss_config_path.to_string_lossy() + ); + let old_config: Config = toml::from_str( + &tokio::fs::read_to_string(&rss_config_path).await?, + )?; + if &old_config == config { + info!( + self.log, + "RSS config already applied from: {}", + rss_config_path.to_string_lossy() + ); + return Ok(()); + } + + // TODO(https://github.com/oxidecomputer/omicron/issues/724): + // We could potentially handle this case by deleting all + // partitions (in preparation for applying the new + // configuration), but at the moment it's an error. + warn!( + self.log, + "Rack Setup Service Config was already applied, but has changed. + This means that you may have partitions set up on this sled, but they + may not match the ones requested by the supplied configuration.\n + To re-initialize this sled: + - Disable all Oxide services + - Delete all partitions within the attached zpool + - Delete the configuration file ({}) + - Restart the sled agent", + rss_config_path.to_string_lossy() + ); + return Err(SetupServiceError::Configuration); + } else { + info!( + self.log, + "No RSS configuration found at {}", + rss_config_path.to_string_lossy() + ); + } + + // Issue the dataset initialization requests to all sleds. + futures::future::join_all( + config.requests.iter().map(|request| async move { + info!(self.log, "observing request: {:#?}", request); + let dur = std::time::Duration::from_secs(60); + let client = reqwest::ClientBuilder::new() + .connect_timeout(dur) + .timeout(dur) + .build() + .map_err(|e| nexus_client::Error::::from(e))?; + let client = sled_agent_client::Client::new_with_client( + &format!("http://{}", request.sled_address), + client, + self.log.new(o!("SledAgentClient" => request.sled_address)), + ); + + info!(self.log, "sending partition requests..."); + for partition in &request.partitions { + let filesystem_put = || async { + info!(self.log, "creating new filesystem: {:?}", partition); + client.filesystem_put(&partition.clone().into()) + .await + .map_err(BackoffError::transient)?; + Ok::< + (), + BackoffError< + sled_agent_client::Error, + >, + >(()) + }; + let log_failure = |error, _| { + warn!(self.log, "failed to create filesystem"; "error" => ?error); + }; + retry_notify( + internal_service_policy(), + filesystem_put, + log_failure, + ).await?; + } + Ok(()) + }) + ).await.into_iter().collect::, SetupServiceError>>()?; + + // Issue service initialization requests. + // + // Note that this must happen *after* the partition initialization, + // to ensure that CockroachDB has been initialized before Nexus + // starts. + futures::future::join_all( + config.requests.iter().map(|request| async move { + info!(self.log, "observing request: {:#?}", request); + let dur = std::time::Duration::from_secs(60); + let client = reqwest::ClientBuilder::new() + .connect_timeout(dur) + .timeout(dur) + .build() + .map_err(|e| nexus_client::Error::::from(e))?; + let client = sled_agent_client::Client::new_with_client( + &format!("http://{}", request.sled_address), + client, + self.log.new(o!("SledAgentClient" => request.sled_address)), + ); + + info!(self.log, "sending service requests..."); + let services_put = || async { + info!(self.log, "initializing sled services: {:?}", request.services); + client.services_put( + &sled_agent_client::types::ServiceEnsureBody { + services: request.services.iter().map(|s| s.clone().into()).collect() + }) + .await + .map_err(BackoffError::transient)?; + Ok::< + (), + BackoffError< + sled_agent_client::Error, + >, + >(()) + }; + let log_failure = |error, _| { + warn!(self.log, "failed to initialize services"; "error" => ?error); + }; + retry_notify( + internal_service_policy(), + services_put, + log_failure, + ).await?; + Ok::<(), SetupServiceError>(()) + }) + ).await.into_iter().collect::, SetupServiceError>>()?; + + // Finally, make sure the configuration is saved so we don't inject + // the requests on the next iteration. + tokio::fs::write(rss_config_path, config_str).await?; + Ok(()) + } +} From 0b492fc55d253a264090245e92a6fe2f0ede0871 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Tue, 5 Apr 2022 12:10:42 -0400 Subject: [PATCH 2/4] no nexus errors --- sled-agent/src/rack_setup/service.rs | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 35d46b536df..470725b961d 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -17,15 +17,15 @@ pub enum SetupServiceError { #[error("Error accessing filesystem: {0}")] Io(#[from] std::io::Error), - #[error("Error making HTTP request to Nexus: {0}")] - NexusApi(#[from] nexus_client::Error), - #[error("Error making HTTP request to Sled Agent: {0}")] SledApi(#[from] sled_agent_client::Error), #[error("Cannot deserialize TOML file")] Toml(#[from] toml::de::Error), + #[error(transparent)] + Http(#[from] reqwest::Error), + #[error("Configuration changed")] Configuration, } @@ -136,8 +136,7 @@ impl ServiceInner { let client = reqwest::ClientBuilder::new() .connect_timeout(dur) .timeout(dur) - .build() - .map_err(|e| nexus_client::Error::::from(e))?; + .build()?; let client = sled_agent_client::Client::new_with_client( &format!("http://{}", request.sled_address), client, @@ -183,8 +182,7 @@ impl ServiceInner { let client = reqwest::ClientBuilder::new() .connect_timeout(dur) .timeout(dur) - .build() - .map_err(|e| nexus_client::Error::::from(e))?; + .build()?; let client = sled_agent_client::Client::new_with_client( &format!("http://{}", request.sled_address), client, From 8c9fe56994cb2060be081a1ba8f198b67c1e2732 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Tue, 5 Apr 2022 12:15:00 -0400 Subject: [PATCH 3/4] [cleanup] Rename partition to dataset --- openapi/sled-agent.json | 10 +++++----- sled-agent/src/http_entrypoints.rs | 2 +- sled-agent/src/params.rs | 14 +++++++------- sled-agent/src/rack_setup/config.rs | 6 +++--- sled-agent/src/rack_setup/service.rs | 16 ++++++++-------- sled-agent/src/sled_agent.rs | 6 +++--- sled-agent/src/storage_manager.rs | 2 +- smf/sled-agent/config-rss.toml | 28 +++++++++++++++++----------- 8 files changed, 45 insertions(+), 39 deletions(-) diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index 5d3fe21f20a..d7960bb5ded 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -238,13 +238,13 @@ ] }, "DatasetEnsureBody": { - "description": "Used to request a new partition kind exists within a zpool.\n\nMany partition types are associated with services that will be instantiated when the partition is detected.", + "description": "Used to request a new dataset kind exists within a zpool.\n\nMany dataset types are associated with services that will be instantiated when the dataset is detected.", "type": "object", "properties": { "address": { "type": "string" }, - "partition_kind": { + "dataset_kind": { "$ref": "#/components/schemas/DatasetKind" }, "zpool_uuid": { @@ -254,7 +254,7 @@ }, "required": [ "address", - "partition_kind", + "dataset_kind", "zpool_uuid" ] }, @@ -941,7 +941,7 @@ ] }, "ServiceEnsureBody": { - "description": "Used to request that the Sled initialize certain services on initialization.\n\nThis may be used to record that certain sleds are responsible for launching services which may not be associated with a partition, such as Nexus.", + "description": "Used to request that the Sled initialize certain services on initialization.\n\nThis may be used to record that certain sleds are responsible for launching services which may not be associated with a dataset, such as Nexus.", "type": "object", "properties": { "services": { @@ -1102,4 +1102,4 @@ } } } -} \ No newline at end of file +} diff --git a/sled-agent/src/http_entrypoints.rs b/sled-agent/src/http_entrypoints.rs index 11f6d9d2eb3..c097a85fcec 100644 --- a/sled-agent/src/http_entrypoints.rs +++ b/sled-agent/src/http_entrypoints.rs @@ -68,7 +68,7 @@ async fn filesystem_put( let body_args = body.into_inner(); sa.filesystem_ensure( body_args.zpool_uuid, - body_args.partition_kind, + body_args.dataset_kind, body_args.address, ) .await diff --git a/sled-agent/src/params.rs b/sled-agent/src/params.rs index 23c8f739a17..0ff062f00a8 100644 --- a/sled-agent/src/params.rs +++ b/sled-agent/src/params.rs @@ -193,21 +193,21 @@ impl std::fmt::Display for DatasetKind { } } -/// Used to request a new partition kind exists within a zpool. +/// Used to request a new dataset kind exists within a zpool. /// -/// Many partition types are associated with services that will be -/// instantiated when the partition is detected. +/// Many dataset types are associated with services that will be +/// instantiated when the dataset is detected. #[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq)] pub struct DatasetEnsureBody { // The name (and UUID) of the Zpool which we are inserting into. pub zpool_uuid: Uuid, // The type of the filesystem. - pub partition_kind: DatasetKind, + pub dataset_kind: DatasetKind, // The address on which the zone will listen for requests. pub address: SocketAddr, // NOTE: We could insert a UUID here, if we want that to be set by the // caller explicitly? Currently, the lack of a UUID implies that - // "at most one partition type" exists within a zpool. + // "at most one dataset type" exists within a zpool. // // It's unclear if this is actually necessary - making this change // would also require the RSS to query existing datasets before @@ -219,7 +219,7 @@ impl From for sled_agent_client::types::DatasetEnsureBody { fn from(p: DatasetEnsureBody) -> Self { Self { zpool_uuid: p.zpool_uuid, - partition_kind: p.partition_kind.into(), + dataset_kind: p.dataset_kind.into(), address: p.address.to_string(), } } @@ -247,7 +247,7 @@ impl From for sled_agent_client::types::ServiceRequest { /// Used to request that the Sled initialize certain services on initialization. /// /// This may be used to record that certain sleds are responsible for -/// launching services which may not be associated with a partition, such +/// launching services which may not be associated with a dataset, such /// as Nexus. #[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq)] pub struct ServiceEnsureBody { diff --git a/sled-agent/src/rack_setup/config.rs b/sled-agent/src/rack_setup/config.rs index 4d284cfed7b..3174777a00e 100644 --- a/sled-agent/src/rack_setup/config.rs +++ b/sled-agent/src/rack_setup/config.rs @@ -33,9 +33,9 @@ pub struct SledRequest { /// The Sled Agent address receiving these requests. pub sled_address: SocketAddr, - /// Partitions to be created. - #[serde(default, rename = "partition")] - pub partitions: Vec, + /// Datasets to be created. + #[serde(default, rename = "dataset")] + pub datasets: Vec, /// Services to be instantiated. #[serde(default, rename = "service")] diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 470725b961d..90b194cafe6 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -105,16 +105,16 @@ impl ServiceInner { // TODO(https://github.com/oxidecomputer/omicron/issues/724): // We could potentially handle this case by deleting all - // partitions (in preparation for applying the new + // datasets (in preparation for applying the new // configuration), but at the moment it's an error. warn!( self.log, "Rack Setup Service Config was already applied, but has changed. - This means that you may have partitions set up on this sled, but they + This means that you may have datasets set up on this sled, but they may not match the ones requested by the supplied configuration.\n To re-initialize this sled: - Disable all Oxide services - - Delete all partitions within the attached zpool + - Delete all datasets within the attached zpool - Delete the configuration file ({}) - Restart the sled agent", rss_config_path.to_string_lossy() @@ -143,11 +143,11 @@ impl ServiceInner { self.log.new(o!("SledAgentClient" => request.sled_address)), ); - info!(self.log, "sending partition requests..."); - for partition in &request.partitions { + info!(self.log, "sending dataset requests..."); + for dataset in &request.datasets { let filesystem_put = || async { - info!(self.log, "creating new filesystem: {:?}", partition); - client.filesystem_put(&partition.clone().into()) + info!(self.log, "creating new filesystem: {:?}", dataset); + client.filesystem_put(&dataset.clone().into()) .await .map_err(BackoffError::transient)?; Ok::< @@ -172,7 +172,7 @@ impl ServiceInner { // Issue service initialization requests. // - // Note that this must happen *after* the partition initialization, + // Note that this must happen *after* the dataset initialization, // to ensure that CockroachDB has been initialized before Nexus // starts. futures::future::join_all( diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index c93074a844e..2ed62264ca6 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -68,7 +68,7 @@ impl From for omicron_common::api::external::Error { /// /// Contains both a connection to the Nexus, as well as managed instances. pub struct SledAgent { - // Component of Sled Agent responsible for storage and partition management. + // Component of Sled Agent responsible for storage and dataset management. storage: StorageManager, // Component of Sled Agent responsible for managing Propolis instances. @@ -182,11 +182,11 @@ impl SledAgent { pub async fn filesystem_ensure( &self, zpool_uuid: Uuid, - partition_kind: DatasetKind, + dataset_kind: DatasetKind, address: SocketAddr, ) -> Result<(), Error> { self.storage - .upsert_filesystem(zpool_uuid, partition_kind, address) + .upsert_filesystem(zpool_uuid, dataset_kind, address) .await?; Ok(()) } diff --git a/sled-agent/src/storage_manager.rs b/sled-agent/src/storage_manager.rs index 0b1cfc83ce7..97d3033c13f 100644 --- a/sled-agent/src/storage_manager.rs +++ b/sled-agent/src/storage_manager.rs @@ -513,7 +513,7 @@ impl StorageWorker { // If requested via the `do_format` parameter, may also initialize // these resources. // - // Returns the UUID attached to the underlying ZFS partition. + // Returns the UUID attached to the underlying ZFS dataset. // Returns (was_inserted, Uuid). async fn initialize_dataset_and_zone( &self, diff --git a/smf/sled-agent/config-rss.toml b/smf/sled-agent/config-rss.toml index c9fd9ff43dc..a2f2116940f 100644 --- a/smf/sled-agent/config-rss.toml +++ b/smf/sled-agent/config-rss.toml @@ -3,38 +3,44 @@ [[request]] sled_address = "[fd00:1de::]:12345" -[[request.partition]] +# TODO(https://github.com/oxidecomputer/omicron/issues/732): Nexus +# should allocate crucible datasets. +[[request.dataset]] zpool_uuid = "d462a7f7-b628-40fe-80ff-4e4189e2d62b" address = "[fd00:1de::9]:32345" -partition_kind.type = "crucible" +dataset_kind.type = "crucible" -[[request.partition]] +[[request.dataset]] zpool_uuid = "e4b4dc87-ab46-49fb-a4b4-d361ae214c03" address = "[fd00:1de::10]:32345" -partition_kind.type = "crucible" +dataset_kind.type = "crucible" -[[request.partition]] +[[request.dataset]] zpool_uuid = "f4b4dc87-ab46-49fb-a4b4-d361ae214c03" address = "[fd00:1de::11]:32345" -partition_kind.type = "crucible" +dataset_kind.type = "crucible" -[[request.partition]] +[[request.dataset]] zpool_uuid = "d462a7f7-b628-40fe-80ff-4e4189e2d62b" address = "[fd00:1de::5]:32221" -partition_kind.type = "cockroach_db" -partition_kind.all_addresses = [ +dataset_kind.type = "cockroach_db" +dataset_kind.all_addresses = [ "[fd00:1de::5]:32221", ] -[[request.partition]] +# TODO(https://github.com/oxidecomputer/omicron/issues/732): Nexus +# should allocate clickhouse datasets. +[[request.dataset]] zpool_uuid = "d462a7f7-b628-40fe-80ff-4e4189e2d62b" address = "[fd00:1de::8]:8123" -partition_kind.type = "clickhouse" +dataset_kind.type = "clickhouse" [[request.service]] name = "nexus" addresses = [ "[fd00:1de::7]:12220", "[fd00:1de::7]:12221" ] +# TODO(https://github.com/oxidecomputer/omicron/issues/732): Nexus +# should allocate Oximeter services. [[request.service]] name = "oximeter" addresses = [ "[fd00:1de::6]:12223" ] From 6e56c21aa063c3111a4cdbe87a3acda9c7dabaf1 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Tue, 5 Apr 2022 13:59:28 -0400 Subject: [PATCH 4/4] openapi --- openapi/sled-agent.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index d7960bb5ded..cfaf49594e7 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -1102,4 +1102,4 @@ } } } -} +} \ No newline at end of file