diff --git a/docs/how-to-run.adoc b/docs/how-to-run.adoc index 16218589d9b..0ce10f236a1 100644 --- a/docs/how-to-run.adoc +++ b/docs/how-to-run.adoc @@ -147,11 +147,11 @@ When we deploy, we're effectively creating a number of different zones for all the components that make up Omicron (Nexus, Clickhouse, Crucible, etc). Since all these services run in different zones they cannot communicate with each other (and Sled Agent in the global zone) via `localhost`. In practice, -we'll assign addresses as per RFD 63 as well as incorporating DNS based +we assign addresses as per RFD 63 as well as incorporating DNS based service discovery. -For the purposes of local development today, we specify some hardcoded IPv6 -unique local addresses in the subnet of the first Sled Agent: `fd00:1122:3344:1::/64`. +For the purposes of local development today, we specify some hardcoded IP +addresses. If you'd like to modify these values to suit your local network, you can modify them within the https://github.com/oxidecomputer/omicron/tree/main/smf[`smf/` subdirectory]. @@ -164,15 +164,6 @@ be set as a default route for the Nexus zone. |=================================================================================================== | Service | Endpoint | Sled Agent: Bootstrap | Derived from MAC address of physical data link. -| Sled Agent: Dropshot API | `[fd00:1122:3344:0101::1]:12345` -| Cockroach DB | `[fd00:1122:3344:0101::2]:32221` -| Nexus: Internal API | `[fd00:1122:3344:0101::3]:12221` -| Oximeter | `[fd00:1122:3344:0101::4]:12223` -| Clickhouse | `[fd00:1122:3344:0101::5]:8123` -| Crucible Downstairs 1 | `[fd00:1122:3344:0101::6]:32345` -| Crucible Downstairs 2 | `[fd00:1122:3344:0101::7]:32345` -| Crucible Downstairs 3 | `[fd00:1122:3344:0101::8]:32345` -| Internal DNS Service | `[fd00:1122:3344:0001::1]:5353` | Nexus: External API | `192.168.1.20:80` | Internet Gateway | None, but can be set in `smf/sled-agent/config.toml` |=================================================================================================== diff --git a/nexus/src/app/rack.rs b/nexus/src/app/rack.rs index 8b81dc61fdb..5abf7b4ea99 100644 --- a/nexus/src/app/rack.rs +++ b/nexus/src/app/rack.rs @@ -8,7 +8,7 @@ use crate::authz; use crate::context::OpContext; use crate::db; use crate::db::lookup::LookupPath; -use crate::internal_api::params::ServicePutRequest; +use crate::internal_api::params::RackInitializationRequest; use omicron_common::api::external::DataPageParams; use omicron_common::api::external::Error; use omicron_common::api::external::ListResultVec; @@ -57,12 +57,13 @@ impl super::Nexus { &self, opctx: &OpContext, rack_id: Uuid, - services: Vec, + request: RackInitializationRequest, ) -> Result<(), Error> { opctx.authorize(authz::Action::Modify, &authz::FLEET).await?; // Convert from parameter -> DB type. - let services: Vec<_> = services + let services: Vec<_> = request + .services .into_iter() .map(|svc| { db::model::Service::new( @@ -74,14 +75,51 @@ impl super::Nexus { }) .collect(); - // TODO(https://github.com/oxidecomputer/omicron/pull/1216): - // Actually supply datasets provided from the sled agent. - // - // This requires corresponding changes on the RSS side. + let datasets: Vec<_> = request + .datasets + .into_iter() + .map(|dataset| { + db::model::Dataset::new( + dataset.dataset_id, + dataset.zpool_id, + dataset.request.address, + dataset.request.kind.into(), + ) + }) + .collect(); self.db_datastore - .rack_set_initialized(opctx, rack_id, services, vec![]) + .rack_set_initialized(opctx, rack_id, services, datasets) .await?; Ok(()) } + + /// Awaits the initialization of the rack. + /// + /// This will occur by either: + /// 1. RSS invoking the internal API, handing off responsibility, or + /// 2. Re-reading a value from the DB, if the rack has already been + /// initialized. + /// + /// See RFD 278 for additional context. + pub async fn await_rack_initialization(&self, opctx: &OpContext) { + loop { + let result = self.rack_lookup(&opctx, &self.rack_id).await; + match result { + Ok(rack) => { + if rack.initialized { + return; + } + info!( + self.log, + "Still waiting for rack initialization: {:?}", rack + ); + } + Err(e) => { + warn!(self.log, "Cannot look up rack: {}", e); + } + } + tokio::time::sleep(std::time::Duration::from_secs(2)).await; + } + } } diff --git a/nexus/src/internal_api/http_entrypoints.rs b/nexus/src/internal_api/http_entrypoints.rs index 4bf280c7b70..369f2d28740 100644 --- a/nexus/src/internal_api/http_entrypoints.rs +++ b/nexus/src/internal_api/http_entrypoints.rs @@ -7,8 +7,9 @@ use crate::context::OpContext; use crate::ServerContext; use super::params::{ - DatasetPutRequest, DatasetPutResponse, OximeterInfo, ServicePutRequest, - SledAgentStartupInfo, ZpoolPutRequest, ZpoolPutResponse, + DatasetPutRequest, DatasetPutResponse, OximeterInfo, + RackInitializationRequest, SledAgentStartupInfo, ZpoolPutRequest, + ZpoolPutResponse, }; use dropshot::endpoint; use dropshot::ApiDescription; @@ -104,15 +105,15 @@ struct RackPathParam { async fn rack_initialization_complete( rqctx: Arc>>, path_params: Path, - info: TypedBody>, + info: TypedBody, ) -> Result { let apictx = rqctx.context(); let nexus = &apictx.nexus; let path = path_params.into_inner(); - let svcs = info.into_inner(); + let request = info.into_inner(); let opctx = OpContext::for_internal_api(&rqctx).await; - nexus.rack_initialize(&opctx, path.rack_id, svcs).await?; + nexus.rack_initialize(&opctx, path.rack_id, request).await?; Ok(HttpResponseUpdatedNoContent()) } diff --git a/nexus/src/lib.rs b/nexus/src/lib.rs index fd7b50802e9..40cdcf99fdd 100644 --- a/nexus/src/lib.rs +++ b/nexus/src/lib.rs @@ -66,23 +66,27 @@ pub fn run_openapi_internal() -> Result<(), String> { .map_err(|e| e.to_string()) } -/// Packages up a [`Nexus`], running both external and internal HTTP API servers -/// wired up to Nexus -pub struct Server { +/// A partially-initialized Nexus server, which exposes an internal interface, +/// but is not ready to receive external requests. +pub struct InternalServer<'a> { /// shared state used by API request handlers pub apictx: Arc, - /// dropshot server for external API - pub http_server_external: dropshot::HttpServer>, /// dropshot server for internal API pub http_server_internal: dropshot::HttpServer>, + + config: &'a Config, + log: Logger, } -impl Server { - /// Start a nexus server. +impl<'a> InternalServer<'a> { + /// Creates a Nexus instance with only the internal API exposed. + /// + /// This is often used as an argument when creating a [`Server`], + /// which also exposes the external API. pub async fn start( - config: &Config, + config: &'a Config, log: &Logger, - ) -> Result { + ) -> Result, String> { let log = log.new(o!("name" => config.deployment.id.to_string())); info!(log, "setting up nexus server"); @@ -92,14 +96,6 @@ impl Server { ServerContext::new(config.deployment.rack_id, ctxlog, &config) .await?; - let http_server_starter_external = dropshot::HttpServerStarter::new( - &config.deployment.dropshot_external, - external_api(), - Arc::clone(&apictx), - &log.new(o!("component" => "dropshot_external")), - ) - .map_err(|error| format!("initializing external server: {}", error))?; - let http_server_starter_internal = dropshot::HttpServerStarter::new( &config.deployment.dropshot_internal, internal_api(), @@ -107,9 +103,48 @@ impl Server { &log.new(o!("component" => "dropshot_internal")), ) .map_err(|error| format!("initializing internal server: {}", error))?; + let http_server_internal = http_server_starter_internal.start(); + + Ok(Self { apictx, http_server_internal, config, log }) + } +} + +/// Packages up a [`Nexus`], running both external and internal HTTP API servers +/// wired up to Nexus +pub struct Server { + /// shared state used by API request handlers + pub apictx: Arc, + /// dropshot server for external API + pub http_server_external: dropshot::HttpServer>, + /// dropshot server for internal API + pub http_server_internal: dropshot::HttpServer>, +} +impl Server { + pub async fn start(internal: InternalServer<'_>) -> Result { + let apictx = internal.apictx; + let http_server_internal = internal.http_server_internal; + let log = internal.log; + let config = internal.config; + + // Wait until RSS handoff completes. + let opctx = apictx.nexus.opctx_for_service_balancer(); + apictx.nexus.await_rack_initialization(&opctx).await; + + // With the exception of integration tests environments, + // we expect background tasks to be enabled. + if config.pkg.tunables.enable_background_tasks { + apictx.nexus.start_background_tasks().map_err(|e| e.to_string())?; + } + + let http_server_starter_external = dropshot::HttpServerStarter::new( + &config.deployment.dropshot_external, + external_api(), + Arc::clone(&apictx), + &log.new(o!("component" => "dropshot_external")), + ) + .map_err(|error| format!("initializing external server: {}", error))?; let http_server_external = http_server_starter_external.start(); - let http_server_internal = http_server_starter_internal.start(); Ok(Server { apictx, http_server_external, http_server_internal }) } @@ -167,7 +202,8 @@ pub async fn run_server(config: &Config) -> Result<(), String> { } else { debug!(log, "registered DTrace probes"); } - let server = Server::start(config, &log).await?; + let internal_server = InternalServer::start(config, &log).await?; + let server = Server::start(internal_server).await?; server.register_as_producer().await; server.wait_for_finish().await } diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index 73279124b53..81a206ef486 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -109,15 +109,42 @@ pub async fn test_setup_with_config( .expect("Tests expect to set a port of Clickhouse") .set_port(clickhouse.port()); - let server = - omicron_nexus::Server::start(&config, &logctx.log).await.unwrap(); - server + // Start the Nexus internal API. + let internal_server = + omicron_nexus::InternalServer::start(&config, &logctx.log) + .await + .unwrap(); + internal_server .apictx .nexus .wait_for_populate() .await .expect("Nexus never loaded users"); + // Perform the "handoff from RSS". + // + // However, RSS isn't running, so we'll do the handoff ourselves. + let opctx = internal_server.apictx.nexus.opctx_for_service_balancer(); + internal_server + .apictx + .nexus + .rack_initialize( + &opctx, + config.deployment.rack_id, + // NOTE: In the context of this test utility, we arguably do have an + // instance of CRDB and Nexus running. However, as this info isn't + // necessary for most tests, we pass no information here. + omicron_nexus::internal_api::params::RackInitializationRequest { + services: vec![], + datasets: vec![], + }, + ) + .await + .expect("Could not initialize rack"); + + // Start the Nexus external API. + let server = omicron_nexus::Server::start(internal_server).await.unwrap(); + let testctx_external = ClientTestContext::new( server.http_server_external.local_addr(), logctx.log.new(o!("component" => "external client test context")), diff --git a/nexus/tests/config.test.toml b/nexus/tests/config.test.toml index 1462ae11814..c451a341a5e 100644 --- a/nexus/tests/config.test.toml +++ b/nexus/tests/config.test.toml @@ -34,6 +34,7 @@ address = "[::1]:0" [tunables] # Allow small subnets, so we can test IP address exhaustion easily / quickly max_vpc_ipv4_subnet_prefix = 29 +# Disable background tests to help with test determinism enable_background_tasks = false [deployment] diff --git a/nexus/types/src/internal_api/params.rs b/nexus/types/src/internal_api/params.rs index 038e74afc72..34be0cb58fb 100644 --- a/nexus/types/src/internal_api/params.rs +++ b/nexus/types/src/internal_api/params.rs @@ -152,6 +152,19 @@ pub struct ServicePutRequest { pub kind: ServiceKind, } +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] +pub struct DatasetCreateRequest { + pub zpool_id: Uuid, + pub dataset_id: Uuid, + pub request: DatasetPutRequest, +} + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] +pub struct RackInitializationRequest { + pub services: Vec, + pub datasets: Vec, +} + /// Message used to notify Nexus that this oximeter instance is up and running. #[derive(Debug, Clone, Copy, JsonSchema, Serialize, Deserialize)] pub struct OximeterInfo { diff --git a/openapi/nexus-internal.json b/openapi/nexus-internal.json index 47183aaa3fa..ce838d204f7 100644 --- a/openapi/nexus-internal.json +++ b/openapi/nexus-internal.json @@ -255,11 +255,7 @@ "content": { "application/json": { "schema": { - "title": "Array_of_ServicePutRequest", - "type": "array", - "items": { - "$ref": "#/components/schemas/ServicePutRequest" - } + "$ref": "#/components/schemas/RackInitializationRequest" } } }, @@ -674,6 +670,27 @@ "value" ] }, + "DatasetCreateRequest": { + "type": "object", + "properties": { + "dataset_id": { + "type": "string", + "format": "uuid" + }, + "request": { + "$ref": "#/components/schemas/DatasetPutRequest" + }, + "zpool_id": { + "type": "string", + "format": "uuid" + } + }, + "required": [ + "dataset_id", + "request", + "zpool_id" + ] + }, "DatasetKind": { "description": "Describes the purpose of the dataset.", "type": "string", @@ -1711,6 +1728,27 @@ } ] }, + "RackInitializationRequest": { + "type": "object", + "properties": { + "datasets": { + "type": "array", + "items": { + "$ref": "#/components/schemas/DatasetCreateRequest" + } + }, + "services": { + "type": "array", + "items": { + "$ref": "#/components/schemas/ServicePutRequest" + } + } + }, + "required": [ + "datasets", + "services" + ] + }, "Sample": { "description": "A concrete type representing a single, timestamped measurement from a timeseries.", "type": "object", diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index ceb8c4ac342..aaad667fb8f 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -248,6 +248,33 @@ } } } + }, + "/zpools": { + "get": { + "operationId": "zpools_get", + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "title": "Array_of_Zpool", + "type": "array", + "items": { + "$ref": "#/components/schemas/Zpool" + } + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } } }, "components": { @@ -1410,6 +1437,18 @@ ] } ] + }, + "Zpool": { + "type": "object", + "properties": { + "id": { + "type": "string", + "format": "uuid" + } + }, + "required": [ + "id" + ] } } } diff --git a/sled-agent/src/bin/sled-agent.rs b/sled-agent/src/bin/sled-agent.rs index ffa88dbcda3..368d5be1057 100644 --- a/sled-agent/src/bin/sled-agent.rs +++ b/sled-agent/src/bin/sled-agent.rs @@ -15,6 +15,7 @@ use omicron_sled_agent::rack_setup::config::SetupServiceConfig as RssConfig; use omicron_sled_agent::sp::SimSpConfig; use omicron_sled_agent::{config::Config as SledConfig, server as sled_server}; use std::path::PathBuf; +use uuid::Uuid; #[derive(Debug, Parser)] #[clap( @@ -98,7 +99,9 @@ async fn do_run() -> Result<(), CmdError> { // Configure and run the Bootstrap server. let bootstrap_config = BootstrapConfig { - id: config.id, + // NOTE: The UUID of this bootstrap server is not stable across + // reboots. + id: Uuid::new_v4(), bind_address: bootstrap_address, log: config.log.clone(), rss_config, diff --git a/sled-agent/src/bootstrap/agent.rs b/sled-agent/src/bootstrap/agent.rs index 165e3fe782b..70aed33c358 100644 --- a/sled-agent/src/bootstrap/agent.rs +++ b/sled-agent/src/bootstrap/agent.rs @@ -136,7 +136,6 @@ impl Agent { ) -> Result<(Self, TrustQuorumMembership), BootstrapError> { let ba_log = log.new(o!( "component" => "BootstrapAgent", - "server" => sled_config.id.to_string(), )); // We expect this directory to exist - ensure that it does, before any @@ -238,7 +237,14 @@ impl Agent { // Server already exists, return it. info!(&self.log, "Sled Agent already loaded"); - if &server.address().ip() != sled_address.ip() { + if server.id() != request.id { + let err_str = format!( + "Sled Agent already running with UUID {}, but {} was requested", + server.id(), + request.id, + ); + return Err(BootstrapError::SledError(err_str)); + } else if &server.address().ip() != sled_address.ip() { let err_str = format!( "Sled Agent already running on address {}, but {} was requested", server.address().ip(), @@ -267,6 +273,7 @@ impl Agent { let server = SledServer::start( &self.sled_config, self.parent_log.clone(), + request.id, sled_address, request.rack_id, ) @@ -314,7 +321,7 @@ impl Agent { // indicating which kind of address we're advertising). self.ddmd_client.advertise_prefix(request.subnet); - Ok(SledAgentResponse { id: self.sled_config.id }) + Ok(SledAgentResponse { id: request.id }) } /// Communicates with peers, sharing secrets, until the rack has been diff --git a/sled-agent/src/bootstrap/server.rs b/sled-agent/src/bootstrap/server.rs index a24b65dbf24..974c2fe83e7 100644 --- a/sled-agent/src/bootstrap/server.rs +++ b/sled-agent/src/bootstrap/server.rs @@ -86,7 +86,6 @@ impl Server { info!(log, "detecting (real or simulated) SP"); let sp = SpHandle::detect( config.sp_config.as_ref().map(|c| &c.local_sp), - &sled_config, &log, ) .await diff --git a/sled-agent/src/config.rs b/sled-agent/src/config.rs index dce3d124f98..3c454fdb637 100644 --- a/sled-agent/src/config.rs +++ b/sled-agent/src/config.rs @@ -13,18 +13,13 @@ use serde::Deserialize; use serde_with::serde_as; use serde_with::DisplayFromStr; use serde_with::PickFirst; -use std::net::{Ipv4Addr, SocketAddr}; +use std::net::Ipv4Addr; use std::path::{Path, PathBuf}; -use uuid::Uuid; /// Configuration for a sled agent #[serde_as] #[derive(Clone, Debug, Deserialize)] pub struct Config { - /// Unique id for the sled - pub id: Uuid, - /// Address of Nexus instance - pub nexus_address: SocketAddr, /// Configuration for the sled agent debug log pub log: ConfigLogging, /// Optional VLAN ID to be used for tagging guest VNICs. diff --git a/sled-agent/src/http_entrypoints.rs b/sled-agent/src/http_entrypoints.rs index 607c3828f37..943e4535e90 100644 --- a/sled-agent/src/http_entrypoints.rs +++ b/sled-agent/src/http_entrypoints.rs @@ -7,6 +7,7 @@ use crate::params::{ DatasetEnsureBody, DiskEnsureBody, InstanceEnsureBody, InstanceSerialConsoleData, InstanceSerialConsoleRequest, ServiceEnsureBody, + Zpool, }; use crate::serial::ByteOffset; use dropshot::{ @@ -30,6 +31,7 @@ type SledApiDescription = ApiDescription; pub fn api() -> SledApiDescription { fn register_endpoints(api: &mut SledApiDescription) -> Result<(), String> { api.register(services_put)?; + api.register(zpools_get)?; api.register(filesystem_put)?; api.register(instance_put)?; api.register(disk_put)?; @@ -59,6 +61,17 @@ async fn services_put( Ok(HttpResponseUpdatedNoContent()) } +#[endpoint { + method = GET, + path = "/zpools", +}] +async fn zpools_get( + rqctx: Arc>, +) -> Result>, HttpError> { + let sa = rqctx.context(); + Ok(HttpResponseOk(sa.zpools_get().await.map_err(|e| Error::from(e))?)) +} + #[endpoint { method = PUT, path = "/filesystem", diff --git a/sled-agent/src/params.rs b/sled-agent/src/params.rs index 494a901be5d..67159b14599 100644 --- a/sled-agent/src/params.rs +++ b/sled-agent/src/params.rs @@ -203,6 +203,11 @@ pub struct InstanceSerialConsoleData { pub last_byte_offset: u64, } +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq)] +pub struct Zpool { + pub id: Uuid, +} + // The type of networking 'ASIC' the Dendrite service is expected to manage #[derive( Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq, Copy, Hash, diff --git a/sled-agent/src/rack_setup/config.rs b/sled-agent/src/rack_setup/config.rs index 1006514b3a9..60687a16764 100644 --- a/sled-agent/src/rack_setup/config.rs +++ b/sled-agent/src/rack_setup/config.rs @@ -5,13 +5,12 @@ //! Interfaces for working with RSS config. use crate::config::ConfigError; -use crate::params::{DatasetEnsureBody, ServiceRequest}; use omicron_common::address::{ get_64_subnet, Ipv6Subnet, AZ_PREFIX, RACK_PREFIX, SLED_PREFIX, }; use serde::Deserialize; use serde::Serialize; -use std::net::Ipv6Addr; +use std::net::{IpAddr, Ipv6Addr}; use std::path::Path; /// Configuration for the "rack setup service", which is controlled during @@ -28,30 +27,15 @@ use std::path::Path; pub struct SetupServiceConfig { pub rack_subnet: Ipv6Addr, - #[serde(default, rename = "request")] - pub requests: Vec, - /// The minimum number of sleds required to unlock the rack secret. /// /// If this value is less than 2, no rack secret will be created on startup; /// this is the typical case for single-server test/development. pub rack_secret_threshold: usize, -} - -/// A request to initialize a sled. -#[derive(Clone, Debug, Default, Deserialize, Serialize, PartialEq)] -pub struct HardcodedSledRequest { - /// Datasets to be created. - #[serde(default, rename = "dataset")] - pub datasets: Vec, - - /// Services to be instantiated. - #[serde(default, rename = "service")] - pub services: Vec, - /// DNS Services to be instantiated. - #[serde(default, rename = "dns_service")] - pub dns_services: Vec, + /// The address on which Nexus should serve an external interface. + // TODO: Eventually, this should be pulled from a pool of addresses. + pub nexus_external_address: IpAddr, } impl SetupServiceConfig { @@ -87,8 +71,8 @@ mod test { fn test_subnets() { let cfg = SetupServiceConfig { rack_subnet: "fd00:1122:3344:0100::".parse().unwrap(), - requests: vec![], rack_secret_threshold: 0, + nexus_external_address: "192.168.1.20".parse().unwrap(), }; assert_eq!( diff --git a/sled-agent/src/rack_setup/mod.rs b/sled-agent/src/rack_setup/mod.rs index e947ff99ef0..4df85a7727f 100644 --- a/sled-agent/src/rack_setup/mod.rs +++ b/sled-agent/src/rack_setup/mod.rs @@ -5,4 +5,5 @@ //! Rack Setup Service pub mod config; +mod plan; pub mod service; diff --git a/sled-agent/src/rack_setup/plan/mod.rs b/sled-agent/src/rack_setup/plan/mod.rs new file mode 100644 index 00000000000..2343a3be2e6 --- /dev/null +++ b/sled-agent/src/rack_setup/plan/mod.rs @@ -0,0 +1,8 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Rack Setup Service plan generation + +pub mod service; +pub mod sled; diff --git a/sled-agent/src/rack_setup/plan/service.rs b/sled-agent/src/rack_setup/plan/service.rs new file mode 100644 index 00000000000..429d612d62e --- /dev/null +++ b/sled-agent/src/rack_setup/plan/service.rs @@ -0,0 +1,302 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Plan generation for "where should services be initialized". + +use crate::params::{DatasetEnsureBody, ServiceRequest, ServiceType}; +use crate::rack_setup::config::SetupServiceConfig as Config; +use omicron_common::address::{ + get_sled_address, ReservedRackSubnet, DNS_PORT, DNS_SERVER_PORT, + NEXUS_EXTERNAL_PORT, NEXUS_INTERNAL_PORT, RSS_RESERVED_ADDRESSES, +}; +use omicron_common::backoff::{ + internal_service_policy, retry_notify, BackoffError, +}; +use serde::{Deserialize, Serialize}; +use sled_agent_client::{ + types as SledAgentTypes, Client as SledAgentClient, Error as SledAgentError, +}; +use slog::Logger; +use std::collections::HashMap; +use std::net::{Ipv6Addr, SocketAddr, SocketAddrV6}; +use std::path::{Path, PathBuf}; +use thiserror::Error; +use uuid::Uuid; + +// The number of Nexus instances to create from RSS. +const NEXUS_COUNT: usize = 1; + +// The number of CRDB instances to create from RSS. +const CRDB_COUNT: usize = 1; + +fn rss_service_plan_path() -> PathBuf { + Path::new(omicron_common::OMICRON_CONFIG_PATH).join("rss-service-plan.toml") +} + +/// Describes errors which may occur while generating a plan for services. +#[derive(Error, Debug)] +pub enum PlanError { + #[error("I/O error while {message}: {err}")] + Io { + message: String, + #[source] + err: std::io::Error, + }, + + #[error("Cannot deserialize TOML file at {path}: {err}")] + Toml { path: PathBuf, err: toml::de::Error }, + + #[error("Error making HTTP request to Sled Agent: {0}")] + SledApi(#[from] SledAgentError), + + #[error("Error initializing sled via sled-agent: {0}")] + SledInitialization(String), + + #[error("Failed to construct an HTTP client: {0}")] + HttpClient(reqwest::Error), +} + +#[derive(Clone, Debug, Default, Deserialize, Serialize, PartialEq)] +pub struct SledRequest { + /// Datasets to be created. + #[serde(default, rename = "dataset")] + pub datasets: Vec, + + /// Services to be instantiated. + #[serde(default, rename = "service")] + pub services: Vec, + + /// DNS Services to be instantiated. + #[serde(default, rename = "dns_service")] + pub dns_services: Vec, +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct Plan { + pub services: HashMap, +} + +impl Plan { + pub async fn load(log: &Logger) -> Result, PlanError> { + // If we already created a plan for this RSS to allocate + // services to sleds, re-use that existing plan. + let rss_service_plan_path = rss_service_plan_path(); + if rss_service_plan_path.exists() { + info!(log, "RSS plan already created, loading from file"); + + let plan: Self = toml::from_str( + &tokio::fs::read_to_string(&rss_service_plan_path) + .await + .map_err(|err| PlanError::Io { + message: format!( + "Loading RSS plan {rss_service_plan_path:?}" + ), + err, + })?, + ) + .map_err(|err| PlanError::Toml { + path: rss_service_plan_path, + err, + })?; + Ok(Some(plan)) + } else { + Ok(None) + } + } + + // Gets a zpool UUID from the sled. + async fn get_a_zpool_from_sled( + log: &Logger, + address: SocketAddrV6, + ) -> Result { + let dur = std::time::Duration::from_secs(60); + let client = reqwest::ClientBuilder::new() + .connect_timeout(dur) + .timeout(dur) + .build() + .map_err(PlanError::HttpClient)?; + let client = SledAgentClient::new_with_client( + &format!("http://{}", address), + client, + log.new(o!("SledAgentClient" => address.to_string())), + ); + + let get_zpools = || async { + let zpools: Vec = client + .zpools_get() + .await + .map(|response| { + response + .into_inner() + .into_iter() + .map(|zpool| zpool.id) + .collect() + }) + .map_err(|err| { + BackoffError::transient(PlanError::SledApi(err)) + })?; + + if zpools.is_empty() { + return Err(BackoffError::transient( + PlanError::SledInitialization( + "Awaiting zpools".to_string(), + ), + )); + } + + Ok(zpools) + }; + let log_failure = |error, _| { + warn!(log, "failed to get zpools"; "error" => ?error); + }; + let zpools = + retry_notify(internal_service_policy(), get_zpools, log_failure) + .await?; + + Ok(zpools[0]) + } + + pub async fn create( + log: &Logger, + config: &Config, + sled_addrs: &Vec, + ) -> Result { + let reserved_rack_subnet = ReservedRackSubnet::new(config.az_subnet()); + let dns_subnets = reserved_rack_subnet.get_dns_subnets(); + + let mut allocations = vec![]; + + for idx in 0..sled_addrs.len() { + let sled_address = sled_addrs[idx]; + let sled_subnet_index = + u8::try_from(idx + 1).expect("Too many peers!"); + let subnet = config.sled_subnet(sled_subnet_index); + let mut addr_alloc = + AddressBumpAllocator::new(*get_sled_address(subnet).ip()); + + let mut request = SledRequest::default(); + + // The first enumerated sleds get assigned the responsibility + // of hosting Nexus. + if idx < NEXUS_COUNT { + let address = addr_alloc.next().expect("Not enough addrs"); + request.services.push(ServiceRequest { + id: Uuid::new_v4(), + name: "nexus".to_string(), + addresses: vec![address], + gz_addresses: vec![], + service_type: ServiceType::Nexus { + internal_address: SocketAddrV6::new( + address, + NEXUS_INTERNAL_PORT, + 0, + 0, + ), + external_address: SocketAddr::new( + config.nexus_external_address, + NEXUS_EXTERNAL_PORT, + ), + }, + }) + } + + // The first enumerated sleds host the CRDB datasets, using + // zpools described from the underlying config file. + if idx < CRDB_COUNT { + let zpool_id = + Self::get_a_zpool_from_sled(log, sled_address).await?; + + let address = SocketAddrV6::new( + addr_alloc.next().expect("Not enough addrs"), + omicron_common::address::COCKROACH_PORT, + 0, + 0, + ); + request.datasets.push(DatasetEnsureBody { + id: Uuid::new_v4(), + zpool_id, + dataset_kind: crate::params::DatasetKind::CockroachDb { + all_addresses: vec![address], + }, + address, + }); + } + + // The first enumerated sleds get assigned the additional + // responsibility of being internal DNS servers. + if idx < dns_subnets.len() { + let dns_subnet = &dns_subnets[idx]; + let dns_addr = dns_subnet.dns_address().ip(); + request.dns_services.push(ServiceRequest { + id: Uuid::new_v4(), + name: "internal-dns".to_string(), + addresses: vec![dns_addr], + gz_addresses: vec![dns_subnet.gz_address().ip()], + service_type: ServiceType::InternalDns { + server_address: SocketAddrV6::new( + dns_addr, + DNS_SERVER_PORT, + 0, + 0, + ), + dns_address: SocketAddrV6::new( + dns_addr, DNS_PORT, 0, 0, + ), + }, + }); + } + + allocations.push((sled_address, request)); + } + + let mut services = std::collections::HashMap::new(); + for (addr, allocation) in allocations { + services.insert(addr, allocation); + } + + let plan = Self { services }; + + // Once we've constructed a plan, write it down to durable storage. + let serialized_plan = + toml::Value::try_from(&plan).unwrap_or_else(|e| { + panic!("Cannot serialize configuration: {:#?}: {}", plan, e) + }); + let plan_str = toml::to_string(&serialized_plan) + .expect("Cannot turn config to string"); + + info!(log, "Plan serialized as: {}", plan_str); + let path = rss_service_plan_path(); + tokio::fs::write(&path, plan_str).await.map_err(|err| { + PlanError::Io { + message: format!("Storing RSS service plan to {path:?}"), + err, + } + })?; + info!(log, "Service plan written to storage"); + + Ok(plan) + } +} + +struct AddressBumpAllocator { + last_addr: Ipv6Addr, +} + +// TODO: Testable? +// TODO: Could exist in another file? +impl AddressBumpAllocator { + fn new(sled_addr: Ipv6Addr) -> Self { + Self { last_addr: sled_addr } + } + + fn next(&mut self) -> Option { + let mut segments: [u16; 8] = self.last_addr.segments(); + segments[7] = segments[7].checked_add(1)?; + if segments[7] > RSS_RESERVED_ADDRESSES { + return None; + } + self.last_addr = Ipv6Addr::from(segments); + Some(self.last_addr) + } +} diff --git a/sled-agent/src/rack_setup/plan/sled.rs b/sled-agent/src/rack_setup/plan/sled.rs new file mode 100644 index 00000000000..9e77a401360 --- /dev/null +++ b/sled-agent/src/rack_setup/plan/sled.rs @@ -0,0 +1,240 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Plan generation for "how should sleds be initialized". + +use crate::bootstrap::{ + config::BOOTSTRAP_AGENT_PORT, + params::SledAgentRequest, + trust_quorum::{RackSecret, ShareDistribution}, +}; +use crate::rack_setup::config::SetupServiceConfig as Config; +use serde::{Deserialize, Serialize}; +use slog::Logger; +use sprockets_host::Ed25519Certificate; +use std::collections::HashMap; +use std::net::{Ipv6Addr, SocketAddrV6}; +use std::path::{Path, PathBuf}; +use thiserror::Error; +use uuid::Uuid; + +fn rss_sled_plan_path() -> PathBuf { + Path::new(omicron_common::OMICRON_CONFIG_PATH).join("rss-sled-plan.toml") +} + +pub fn generate_rack_secret<'a>( + rack_secret_threshold: usize, + member_device_id_certs: &'a [Ed25519Certificate], + log: &Logger, +) -> Result< + Option + 'a>, + PlanError, +> { + // We do not generate a rack secret if we only have a single sled or if our + // config specifies that the threshold for unlock is only a single sled. + let total_shares = member_device_id_certs.len(); + if total_shares <= 1 { + info!(log, "Skipping rack secret creation (only one sled present)"); + return Ok(None); + } + + if rack_secret_threshold <= 1 { + warn!( + log, + concat!( + "Skipping rack secret creation due to config", + " (despite discovery of {} bootstrap agents)" + ), + total_shares, + ); + return Ok(None); + } + + let secret = RackSecret::new(); + let (shares, verifier) = secret + .split(rack_secret_threshold, total_shares) + .map_err(PlanError::SplitRackSecret)?; + + Ok(Some(shares.into_iter().map(move |share| ShareDistribution { + threshold: rack_secret_threshold, + verifier: verifier.clone(), + share, + member_device_id_certs: member_device_id_certs.to_vec(), + }))) +} + +/// Describes errors which may occur while generating a plan for sleds. +#[derive(Error, Debug)] +pub enum PlanError { + #[error("I/O error while {message}: {err}")] + Io { + message: String, + #[source] + err: std::io::Error, + }, + + #[error("Cannot deserialize TOML file at {path}: {err}")] + Toml { path: PathBuf, err: toml::de::Error }, + + #[error("Failed to split rack secret: {0:?}")] + SplitRackSecret(vsss_rs::Error), +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct Plan { + pub rack_id: Uuid, + pub sleds: HashMap, + // TODO: Consider putting the rack subnet here? This may be operator-driven + // in the future, so it should exist in the "plan". + // + // TL;DR: The more we decouple rom "rss-config.toml", the easier it'll be to + // switch to an operator-driven interface. +} + +impl Plan { + pub async fn load(log: &Logger) -> Result, PlanError> { + // If we already created a plan for this RSS to allocate + // subnets/requests to sleds, re-use that existing plan. + let rss_sled_plan_path = rss_sled_plan_path(); + if rss_sled_plan_path.exists() { + info!(log, "RSS plan already created, loading from file"); + + let plan: Self = toml::from_str( + &tokio::fs::read_to_string(&rss_sled_plan_path).await.map_err( + |err| PlanError::Io { + message: format!( + "Loading RSS plan {rss_sled_plan_path:?}" + ), + err, + }, + )?, + ) + .map_err(|err| PlanError::Toml { path: rss_sled_plan_path, err })?; + Ok(Some(plan)) + } else { + Ok(None) + } + } + + pub async fn create( + log: &Logger, + config: &Config, + bootstrap_addrs: Vec, + ) -> Result { + let rack_id = Uuid::new_v4(); + + let bootstrap_addrs = bootstrap_addrs.into_iter().enumerate(); + let allocations = bootstrap_addrs.map(|(idx, bootstrap_addr)| { + info!(log, "Creating plan for the sled at {:?}", bootstrap_addr); + let bootstrap_addr = + SocketAddrV6::new(bootstrap_addr, BOOTSTRAP_AGENT_PORT, 0, 0); + let sled_subnet_index = + u8::try_from(idx + 1).expect("Too many peers!"); + let subnet = config.sled_subnet(sled_subnet_index); + + ( + bootstrap_addr, + SledAgentRequest { id: Uuid::new_v4(), subnet, rack_id }, + ) + }); + + info!(log, "Serializing plan"); + + let mut sleds = std::collections::HashMap::new(); + for (addr, allocation) in allocations { + sleds.insert(addr, allocation); + } + + let plan = Self { rack_id, sleds }; + + // Once we've constructed a plan, write it down to durable storage. + let serialized_plan = + toml::Value::try_from(&plan).unwrap_or_else(|e| { + panic!("Cannot serialize configuration: {:#?}: {}", plan, e) + }); + let plan_str = toml::to_string(&serialized_plan) + .expect("Cannot turn config to string"); + + info!(log, "Plan serialized as: {}", plan_str); + let path = rss_sled_plan_path(); + tokio::fs::write(&path, plan_str).await.map_err(|err| { + PlanError::Io { + message: format!("Storing RSS sled plan to {path:?}"), + err, + } + })?; + info!(log, "Sled plan written to storage"); + + Ok(plan) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use omicron_test_utils::dev::test_setup_log; + use sprockets_common::certificates::Ed25519Signature; + use sprockets_common::certificates::KeyType; + use std::collections::HashSet; + + fn dummy_certs(n: usize) -> Vec { + vec![ + Ed25519Certificate { + subject_key_type: KeyType::DeviceId, + subject_public_key: sprockets_host::Ed25519PublicKey([0; 32]), + signer_key_type: KeyType::Manufacturing, + signature: Ed25519Signature([0; 64]), + }; + n + ] + } + + #[test] + fn test_generate_rack_secret() { + let logctx = test_setup_log("test_generate_rack_secret"); + + // No secret generated if we have <= 1 sled + assert!(generate_rack_secret(10, &dummy_certs(1), &logctx.log) + .unwrap() + .is_none()); + + // No secret generated if threshold <= 1 + assert!(generate_rack_secret(1, &dummy_certs(10), &logctx.log) + .unwrap() + .is_none()); + + // Secret generation fails if threshold > total sleds + assert!(matches!( + generate_rack_secret(10, &dummy_certs(5), &logctx.log), + Err(PlanError::SplitRackSecret(_)) + )); + + // Secret generation succeeds if threshold <= total shares and both are + // > 1, and the returned iterator satifies: + // + // * total length == total shares + // * each share is distinct + for total_shares in 2..=32 { + for threshold in 2..=total_shares { + let certs = dummy_certs(total_shares); + let shares = + generate_rack_secret(threshold, &certs, &logctx.log) + .unwrap() + .unwrap(); + + assert_eq!(shares.len(), total_shares); + + // `Share` doesn't implement `Hash`, but it's a newtype around + // `Vec` (which does). Unwrap the newtype to check that all + // shares are distinct. + let shares_set = shares + .map(|share_dist| share_dist.share.0) + .collect::>(); + assert_eq!(shares_set.len(), total_shares); + } + } + + logctx.cleanup_successful(); + } +} diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 45f133a5eb6..1cb245a6314 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -4,30 +4,44 @@ //! Rack Setup Service implementation -use super::config::{HardcodedSledRequest, SetupServiceConfig as Config}; -use crate::bootstrap::config::BOOTSTRAP_AGENT_PORT; -use crate::bootstrap::ddm_admin_client::{DdmAdminClient, DdmError}; -use crate::bootstrap::params::SledAgentRequest; -use crate::bootstrap::rss_handle::BootstrapAgentHandle; -use crate::bootstrap::trust_quorum::{RackSecret, ShareDistribution}; -use crate::params::{ServiceRequest, ServiceType}; -use internal_dns_client::multiclient::{DnsError, Updater as DnsUpdater}; -use omicron_common::address::{ - get_sled_address, ReservedRackSubnet, DNS_PORT, DNS_SERVER_PORT, +use super::config::SetupServiceConfig as Config; +use crate::bootstrap::{ + ddm_admin_client::{DdmAdminClient, DdmError}, + rss_handle::BootstrapAgentHandle, }; +use crate::params::{DatasetEnsureBody, ServiceRequest, ServiceType}; +use crate::rack_setup::plan::service::{ + Plan as ServicePlan, PlanError as ServicePlanError, +}; +use crate::rack_setup::plan::sled::{ + generate_rack_secret, Plan as SledPlan, PlanError as SledPlanError, +}; +use internal_dns_client::multiclient::{ + DnsError, Resolver as DnsResolver, Updater as DnsUpdater, +}; +use internal_dns_client::names::{ServiceName, SRV}; +use nexus_client::{ + types as NexusTypes, Client as NexusClient, Error as NexusError, +}; +use omicron_common::address::{get_sled_address, NEXUS_INTERNAL_PORT}; use omicron_common::backoff::{ - internal_service_policy, retry_notify, BackoffError, + internal_service_policy, internal_service_policy_with_max, retry_notify, + BackoffError, +}; +use sled_agent_client::{ + types as SledAgentTypes, Client as SledAgentClient, Error as SledAgentError, }; -use serde::{Deserialize, Serialize}; use slog::Logger; use sprockets_host::Ed25519Certificate; use std::collections::{HashMap, HashSet}; use std::iter; use std::net::{Ipv6Addr, SocketAddr, SocketAddrV6}; -use std::path::PathBuf; +use std::path::{Path, PathBuf}; use thiserror::Error; use tokio::sync::OnceCell; -use uuid::Uuid; + +// The minimum number of sleds to initialize the rack. +const MINIMUM_SLED_COUNT: usize = 1; /// Describes errors which may occur while operating the setup service. #[derive(Error, Debug)] @@ -39,38 +53,37 @@ pub enum SetupServiceError { err: std::io::Error, }, + #[error("Cannot create plan for sled services: {0}")] + ServicePlan(#[from] ServicePlanError), + + #[error("Cannot create plan for sled setup: {0}")] + SledPlan(#[from] SledPlanError), + + #[error("Bad configuration for setting up rack: {0}")] + BadConfig(String), + #[error("Error initializing sled via sled-agent: {0}")] SledInitialization(String), #[error("Error making HTTP request to Sled Agent: {0}")] - SledApi(#[from] sled_agent_client::Error), + SledApi(#[from] SledAgentError), + + #[error("Error making HTTP request to Nexus: {0}")] + NexusApi(#[from] NexusError), #[error("Error contacting ddmd: {0}")] DdmError(#[from] DdmError), - #[error("Cannot deserialize TOML file at {path}: {err}")] - Toml { path: PathBuf, err: toml::de::Error }, - #[error("Failed to monitor for peers: {0}")] PeerMonitor(#[from] tokio::sync::broadcast::error::RecvError), #[error("Failed to construct an HTTP client: {0}")] HttpClient(reqwest::Error), - #[error("Failed to split rack secret: {0:?}")] - SplitRackSecret(vsss_rs::Error), - - #[error("Failed to access DNS servers: {0}")] + #[error("Failed to access DNS server: {0}")] Dns(#[from] DnsError), } -// The workload / information allocated to a single sled. -#[derive(Clone, Debug, Deserialize, Serialize, PartialEq)] -struct SledAllocation { - initialization_request: SledAgentRequest, - services_request: HardcodedSledRequest, -} - /// The interface to the Rack Setup Service. pub struct Service { handle: tokio::task::JoinHandle>, @@ -123,14 +136,9 @@ impl Service { } } -fn rss_plan_path() -> std::path::PathBuf { - std::path::Path::new(omicron_common::OMICRON_CONFIG_PATH) - .join("rss-plan.toml") -} - -fn rss_completed_plan_path() -> std::path::PathBuf { - std::path::Path::new(omicron_common::OMICRON_CONFIG_PATH) - .join("rss-plan-completed.toml") +fn rss_completed_plan_path() -> PathBuf { + Path::new(omicron_common::OMICRON_CONFIG_PATH) + .join("rss-plan-completed.marker") } // Describes the options when awaiting for peers. @@ -164,20 +172,19 @@ impl ServiceInner { async fn initialize_datasets( &self, - sled_address: SocketAddr, - datasets: &Vec, + sled_address: SocketAddrV6, + datasets: &Vec, ) -> Result<(), SetupServiceError> { let dur = std::time::Duration::from_secs(60); - let client = reqwest::ClientBuilder::new() .connect_timeout(dur) .timeout(dur) .build() .map_err(SetupServiceError::HttpClient)?; - let client = sled_agent_client::Client::new_with_client( + let client = SledAgentClient::new_with_client( &format!("http://{}", sled_address), client, - self.log.new(o!("SledAgentClient" => sled_address)), + self.log.new(o!("SledAgentClient" => sled_address.to_string())), ); info!(self.log, "sending dataset requests..."); @@ -188,14 +195,7 @@ impl ServiceInner { .filesystem_put(&dataset.clone().into()) .await .map_err(BackoffError::transient)?; - Ok::< - (), - BackoffError< - sled_agent_client::Error< - sled_agent_client::types::Error, - >, - >, - >(()) + Ok::<(), BackoffError>>(()) }; let log_failure = |error, _| { warn!(self.log, "failed to create filesystem"; "error" => ?error); @@ -207,12 +207,35 @@ impl ServiceInner { ) .await?; } + + let mut records = HashMap::new(); + for dataset in datasets { + records + .entry(dataset.srv()) + .or_insert_with(Vec::new) + .push((dataset.aaaa(), dataset.address())); + } + let records_put = || async { + self.dns_servers + .get() + .expect("DNS servers must be initialized first") + .insert_dns_records(&records) + .await + .map_err(BackoffError::transient)?; + Ok::<(), BackoffError>(()) + }; + let log_failure = |error, _| { + warn!(self.log, "failed to set DNS records"; "error" => ?error); + }; + retry_notify(internal_service_policy(), records_put, log_failure) + .await?; + Ok(()) } async fn initialize_services( &self, - sled_address: SocketAddr, + sled_address: SocketAddrV6, services: &Vec, ) -> Result<(), SetupServiceError> { let dur = std::time::Duration::from_secs(60); @@ -221,17 +244,17 @@ impl ServiceInner { .timeout(dur) .build() .map_err(SetupServiceError::HttpClient)?; - let client = sled_agent_client::Client::new_with_client( + let client = SledAgentClient::new_with_client( &format!("http://{}", sled_address), client, - self.log.new(o!("SledAgentClient" => sled_address)), + self.log.new(o!("SledAgentClient" => sled_address.to_string())), ); info!(self.log, "sending service requests..."); let services_put = || async { info!(self.log, "initializing sled services: {:?}", services); client - .services_put(&sled_agent_client::types::ServiceEnsureBody { + .services_put(&SledAgentTypes::ServiceEnsureBody { services: services .iter() .map(|s| s.clone().into()) @@ -239,155 +262,38 @@ impl ServiceInner { }) .await .map_err(BackoffError::transient)?; - Ok::< - (), - BackoffError< - sled_agent_client::Error, - >, - >(()) + Ok::<(), BackoffError>>(()) }; let log_failure = |error, _| { warn!(self.log, "failed to initialize services"; "error" => ?error); }; retry_notify(internal_service_policy(), services_put, log_failure) .await?; - Ok(()) - } - - async fn load_plan( - &self, - ) -> Result>, SetupServiceError> - { - // If we already created a plan for this RSS to allocate - // subnets/requests to sleds, re-use that existing plan. - let rss_plan_path = rss_plan_path(); - if rss_plan_path.exists() { - info!(self.log, "RSS plan already created, loading from file"); - - let plan: std::collections::HashMap = - toml::from_str( - &tokio::fs::read_to_string(&rss_plan_path).await.map_err( - |err| SetupServiceError::Io { - message: format!( - "Loading RSS plan {rss_plan_path:?}" - ), - err, - }, - )?, - ) - .map_err(|err| SetupServiceError::Toml { - path: rss_plan_path, - err, - })?; - Ok(Some(plan)) - } else { - Ok(None) - } - } - - async fn create_plan( - &self, - config: &Config, - bootstrap_addrs: Vec, - ) -> Result, SetupServiceError> { - let bootstrap_addrs = bootstrap_addrs.into_iter().enumerate(); - let reserved_rack_subnet = ReservedRackSubnet::new(config.az_subnet()); - let dns_subnets = reserved_rack_subnet.get_dns_subnets(); - - info!(self.log, "dns_subnets: {:#?}", dns_subnets); - - let requests_and_sleds = - bootstrap_addrs.map(|(idx, bootstrap_addr)| { - // If a sled was explicitly requested from the RSS configuration, - // use that. Otherwise, just give it a "default" (empty) set of - // services. - let mut request = { - if idx < config.requests.len() { - config.requests[idx].clone() - } else { - HardcodedSledRequest::default() - } - }; - - // The first enumerated sleds get assigned the additional - // responsibility of being internal DNS servers. - if idx < dns_subnets.len() { - let dns_subnet = &dns_subnets[idx]; - let dns_addr = dns_subnet.dns_address().ip(); - request.dns_services.push(ServiceRequest { - id: Uuid::new_v4(), - name: "internal-dns".to_string(), - addresses: vec![dns_addr], - gz_addresses: vec![dns_subnet.gz_address().ip()], - service_type: ServiceType::InternalDns { - server_address: SocketAddrV6::new( - dns_addr, - DNS_SERVER_PORT, - 0, - 0, - ), - dns_address: SocketAddrV6::new( - dns_addr, DNS_PORT, 0, 0, - ), - }, - }); - } - - (request, (idx, bootstrap_addr)) - }); - - let rack_id = Uuid::new_v4(); - let allocations = requests_and_sleds.map(|(request, sled)| { - let (idx, bootstrap_addr) = sled; - info!( - self.log, - "Creating plan for the sled at {:?}", bootstrap_addr - ); - let bootstrap_addr = - SocketAddrV6::new(bootstrap_addr, BOOTSTRAP_AGENT_PORT, 0, 0); - let sled_subnet_index = - u8::try_from(idx + 1).expect("Too many peers!"); - let subnet = config.sled_subnet(sled_subnet_index); - - ( - bootstrap_addr, - SledAllocation { - initialization_request: SledAgentRequest { - id: Uuid::new_v4(), - subnet, - rack_id, - }, - services_request: request, - }, - ) - }); - info!(self.log, "Serializing plan"); - - let mut plan = std::collections::HashMap::new(); - for (addr, allocation) in allocations { - plan.insert(addr, allocation); - } - - // Once we've constructed a plan, write it down to durable storage. - let serialized_plan = - toml::Value::try_from(&plan).unwrap_or_else(|e| { - panic!("Cannot serialize configuration: {:#?}: {}", plan, e) - }); - let plan_str = toml::to_string(&serialized_plan) - .expect("Cannot turn config to string"); - - info!(self.log, "Plan serialized as: {}", plan_str); - let path = rss_plan_path(); - tokio::fs::write(&path, plan_str).await.map_err(|err| { - SetupServiceError::Io { - message: format!("Storing RSS plan to {path:?}"), - err, + // Insert DNS records, if the DNS servers have been initialized + if let Some(dns_servers) = self.dns_servers.get() { + let mut records = HashMap::new(); + for service in services { + records + .entry(service.srv()) + .or_insert_with(Vec::new) + .push((service.aaaa(), service.address())); } - })?; - info!(self.log, "Plan written to storage"); + let records_put = || async { + dns_servers + .insert_dns_records(&records) + .await + .map_err(BackoffError::transient)?; + Ok::<(), BackoffError>(()) + }; + let log_failure = |error, _| { + warn!(self.log, "failed to set DNS records"; "error" => ?error); + }; + retry_notify(internal_service_policy(), records_put, log_failure) + .await?; + } - Ok(plan) + Ok(()) } // Waits for sufficient neighbors to exist so the initial set of requests @@ -458,6 +364,108 @@ impl ServiceInner { Ok(addrs) } + async fn handoff_to_nexus( + &self, + config: &Config, + sled_plan: &SledPlan, + service_plan: &ServicePlan, + ) -> Result<(), SetupServiceError> { + info!(self.log, "Handing off control to Nexus"); + + let resolver = DnsResolver::new(&config.az_subnet()) + .expect("Failed to create DNS resolver"); + let ip = resolver + .lookup_ip(SRV::Service(ServiceName::Nexus)) + .await + .expect("Failed to lookup IP"); + let nexus_address = SocketAddr::new(ip, NEXUS_INTERNAL_PORT); + + info!(self.log, "Nexus address: {}", nexus_address.to_string()); + + let nexus_client = NexusClient::new( + &format!("http://{}", nexus_address), + self.log.new(o!("component" => "NexusClient")), + ); + + // Ensure we can quickly look up "Sled Agent Address" -> "UUID of sled". + // + // We need the ID when passing info to Nexus. + let mut id_map = HashMap::new(); + for (_, sled_request) in sled_plan.sleds.iter() { + id_map + .insert(get_sled_address(sled_request.subnet), sled_request.id); + } + + // Convert all the information we have about services and datasets into + // a format which can be processed by Nexus. + let mut services: Vec = vec![]; + let mut datasets: Vec = vec![]; + for (addr, service_request) in service_plan.services.iter() { + let sled_id = *id_map + .get(addr) + .expect("Sled address in service plan, but not sled plan"); + + for svc in service_request + .services + .iter() + .chain(service_request.dns_services.iter()) + { + let kind = match svc.service_type { + ServiceType::Nexus { .. } => NexusTypes::ServiceKind::Nexus, + ServiceType::InternalDns { .. } => { + NexusTypes::ServiceKind::InternalDNS + } + ServiceType::Oximeter => NexusTypes::ServiceKind::Oximeter, + ServiceType::Dendrite { .. } => { + NexusTypes::ServiceKind::Dendrite + } + }; + + services.push(NexusTypes::ServicePutRequest { + service_id: svc.id, + sled_id, + // TODO: Should this be a vec, or a single value? + address: svc.addresses[0], + kind, + }) + } + + for dataset in service_request.datasets.iter() { + datasets.push(NexusTypes::DatasetCreateRequest { + zpool_id: dataset.zpool_id, + dataset_id: dataset.id, + request: NexusTypes::DatasetPutRequest { + address: dataset.address.to_string(), + kind: dataset.dataset_kind.clone().into(), + }, + }) + } + } + + let request = + NexusTypes::RackInitializationRequest { services, datasets }; + + let notify_nexus = || async { + nexus_client + .rack_initialization_complete(&sled_plan.rack_id, &request) + .await + .map_err(BackoffError::transient) + }; + let log_failure = |err, _| { + info!(self.log, "Failed to handoff to nexus: {err}"); + }; + + retry_notify( + internal_service_policy_with_max(std::time::Duration::from_secs(1)), + notify_nexus, + log_failure, + ) + .await?; + + info!(self.log, "Handoff to Nexus is complete"); + Ok(()) + } + // In lieu of having an operator send requests to all sleds via an // initialization service, the sled-agent configuration may allow for the // automated injection of setup requests from a sled. @@ -465,19 +473,23 @@ impl ServiceInner { // This method has a few distinct phases, identified by files in durable // storage: // - // 1. ALLOCATION PLAN CREATION. When the RSS starts up for the first time, - // it creates an allocation plan to provision subnets and services - // to an initial set of sleds. - // - // This plan is stored at "rss_plan_path()". + // 1. SLED ALLOCATION PLAN CREATION. When the RSS starts up for the first + // time, it creates an allocation plan to provision subnets to an initial + // set of sleds. // - // 2. ALLOCATION PLAN EXECUTION. The RSS then carries out this plan, making + // 2. SLED ALLOCATION PLAN EXECUTION. The RSS then carries out this plan, making // requests to the sleds enumerated within the "allocation plan". // - // 3. MARKING SETUP COMPLETE. Once the RSS has successfully initialized the - // rack, the "rss_plan_path()" file is renamed to - // "rss_completed_plan_path()". This indicates that the plan executed - // successfully, and no work remains. + // 3. SERVICE ALLOCATION PLAN CREATION. Now that Sled Agents are executing + // on their respsective subnets, they can be queried to create an + // allocation plan for services. + // + // 4. SERVICE ALLOCATION PLAN EXECUTION. RSS requests that the services + // outlined in the aforementioned step are created. + // + // 5. MARKING SETUP COMPLETE. Once the RSS has successfully initialized the + // rack, a marker file is created at "rss_completed_plan_path()". This + // indicates that the plan executed successfully, and no work remains. async fn inject_rack_setup_requests( &self, config: &Config, @@ -498,6 +510,15 @@ impl ServiceInner { self.log, "RSS configuration looks like it has already been applied", ); + + let sled_plan = SledPlan::load(&self.log) + .await? + .expect("Sled plan should exist if completed marker exists"); + let service_plan = ServicePlan::load(&self.log) + .await? + .expect("Service plan should exist if completed marker exists"); + self.handoff_to_nexus(&config, &sled_plan, &service_plan).await?; + return Ok(()); } else { info!(self.log, "RSS configuration has not been fully applied yet",); @@ -506,11 +527,13 @@ impl ServiceInner { // Wait for either: // - All the peers to re-load an old plan (if one exists) // - Enough peers to create a new plan (if one does not exist) - let maybe_plan = self.load_plan().await?; - let expectation = if let Some(plan) = &maybe_plan { - PeerExpectation::LoadOldPlan(plan.keys().map(|a| *a.ip()).collect()) + let maybe_sled_plan = SledPlan::load(&self.log).await?; + let expectation = if let Some(plan) = &maybe_sled_plan { + PeerExpectation::LoadOldPlan( + plan.sleds.keys().map(|a| *a.ip()).collect(), + ) } else { - PeerExpectation::CreateNewPlan(config.requests.len()) + PeerExpectation::CreateNewPlan(MINIMUM_SLED_COUNT) }; let addrs = self .wait_for_peers(expectation, local_bootstrap_agent.our_address()) @@ -521,14 +544,14 @@ impl ServiceInner { // // NOTE: This is a "point-of-no-return" -- before sending any requests // to neighboring sleds, the plan must be recorded to durable storage. - // This way, if the RSS power-cycles, it can idempotently execute the - // same allocation plan. - let plan = if let Some(plan) = maybe_plan { + // This way, if the RSS power-cycles, it can idempotently provide the + // same subnets to the same sleds. + let plan = if let Some(plan) = maybe_sled_plan { info!(self.log, "Re-using existing allocation plan"); plan } else { info!(self.log, "Creating new allocation plan"); - self.create_plan(config, addrs).await? + SledPlan::create(&self.log, &config, addrs).await? }; // Generate our rack secret, unless we're in the single-sled case. @@ -548,7 +571,7 @@ impl ServiceInner { // addrs, which would remove the need for this assertion. assert_eq!( rack_secret_shares.len(), - plan.len(), + plan.sleds.len(), concat!( "Number of trust quorum members does not match ", "number of sleds in the plan" @@ -559,11 +582,12 @@ impl ServiceInner { // Forward the sled initialization requests to our sled-agent. local_bootstrap_agent .initialize_sleds( - plan.iter() - .map(move |(bootstrap_addr, allocation)| { + plan.sleds + .iter() + .map(move |(bootstrap_addr, initialization_request)| { ( *bootstrap_addr, - allocation.initialization_request.clone(), + initialization_request.clone(), maybe_rack_secret_shares .as_mut() .map(|shares| shares.next().unwrap()), @@ -574,22 +598,37 @@ impl ServiceInner { .await .map_err(SetupServiceError::SledInitialization)?; + let sled_addresses: Vec<_> = plan + .sleds + .iter() + .map(|(_, initialization_request)| { + get_sled_address(initialization_request.subnet) + }) + .collect(); + + // Now that sled agents have been initialized, we can create + // a service allocation plan. + let service_plan = + if let Some(plan) = ServicePlan::load(&self.log).await? { + plan + } else { + ServicePlan::create(&self.log, &config, &sled_addresses).await? + }; + // Set up internal DNS services. futures::future::join_all( - plan.iter() - .filter(|(_, allocation)| { + service_plan + .services + .iter() + .filter(|(_, service_request)| { // Only send requests to sleds that are supposed to be running // DNS services. - !allocation.services_request.dns_services.is_empty() + !service_request.dns_services.is_empty() }) - .map(|(_, allocation)| async move { - let sled_address = SocketAddr::V6(get_sled_address( - allocation.initialization_request.subnet, - )); - + .map(|(sled_address, services_request)| async move { self.initialize_services( - sled_address, - &allocation.services_request.dns_services, + *sled_address, + &services_request.dns_services, ) .await?; Ok(()) @@ -606,32 +645,16 @@ impl ServiceInner { self.dns_servers .set(dns_servers) .map_err(|_| ()) - .expect("DNS servers should only be set once"); - - // Issue the dataset initialization requests to all sleds. - futures::future::join_all(plan.iter().map( - |(_, allocation)| async move { - let sled_address = SocketAddr::V6(get_sled_address( - allocation.initialization_request.subnet, - )); + .expect("Already set DNS servers"); + + // Issue the crdb initialization requests to all sleds. + futures::future::join_all(service_plan.services.iter().map( + |(sled_address, services_request)| async move { self.initialize_datasets( - sled_address, - &allocation.services_request.datasets, + *sled_address, + &services_request.datasets, ) .await?; - - let mut records = HashMap::new(); - for dataset in &allocation.services_request.datasets { - records - .entry(dataset.srv()) - .or_insert_with(Vec::new) - .push((dataset.aaaa(), dataset.address())); - } - self.dns_servers - .get() - .expect("DNS servers must be initialized first") - .insert_dns_records(&records) - .await?; Ok(()) }, )) @@ -643,37 +666,29 @@ impl ServiceInner { // Issue service initialization requests. // - // Note that this must happen *after* the dataset initialization, + // NOTE: This must happen *after* the dataset initialization, // to ensure that CockroachDB has been initialized before Nexus // starts. - futures::future::join_all(plan.iter().map( - |(_, allocation)| async move { - let sled_address = SocketAddr::V6(get_sled_address( - allocation.initialization_request.subnet, - )); - - let all_services = allocation - .services_request + // + // If Nexus was more resilient to concurrent initialization + // of CRDB, this requirement could be relaxed. + futures::future::join_all(service_plan.services.iter().map( + |(sled_address, services_request)| async move { + // With the current implementation of "initialize_services", + // we must provide the set of *all* services that should be + // executing on a sled. + // + // This means re-requesting the DNS service, even if it is + // already running - this is fine, however, as the receiving + // sled agent doesn't modify the already-running service. + let all_services = services_request .services .iter() - .chain(allocation.services_request.dns_services.iter()) + .chain(services_request.dns_services.iter()) .map(|s| s.clone()) .collect::>(); - self.initialize_services(sled_address, &all_services).await?; - - let mut records = HashMap::new(); - for service in &all_services { - records - .entry(service.srv()) - .or_insert_with(Vec::new) - .push((service.aaaa(), service.address())); - } - self.dns_servers - .get() - .expect("DNS servers must be initialized first") - .insert_dns_records(&records) - .await?; + self.initialize_services(*sled_address, &all_services).await?; Ok(()) }, )) @@ -685,16 +700,18 @@ impl ServiceInner { // Finally, make sure the configuration is saved so we don't inject // the requests on the next iteration. - let plan_path = rss_plan_path(); - tokio::fs::rename(&plan_path, &rss_completed_plan_path).await.map_err( + tokio::fs::File::create(&rss_completed_plan_path).await.map_err( |err| SetupServiceError::Io { - message: format!( - "renaming {plan_path:?} to {rss_completed_plan_path:?}" - ), + message: format!("creating {rss_completed_plan_path:?}"), err, }, )?; + // At this point, even if we reboot, we must not try to manage sleds, + // services, or DNS records. + + self.handoff_to_nexus(&config, &plan, &service_plan).await?; + // TODO Questions to consider: // - What if a sled comes online *right after* this setup? How does // it get a /64? @@ -702,112 +719,3 @@ impl ServiceInner { Ok(()) } } - -fn generate_rack_secret<'a>( - rack_secret_threshold: usize, - member_device_id_certs: &'a [Ed25519Certificate], - log: &Logger, -) -> Result< - Option + 'a>, - SetupServiceError, -> { - // We do not generate a rack secret if we only have a single sled or if our - // config specifies that the threshold for unlock is only a single sled. - let total_shares = member_device_id_certs.len(); - if total_shares <= 1 { - info!(log, "Skipping rack secret creation (only one sled present)"); - return Ok(None); - } - - if rack_secret_threshold <= 1 { - warn!( - log, - concat!( - "Skipping rack secret creation due to config", - " (despite discovery of {} bootstrap agents)" - ), - total_shares, - ); - return Ok(None); - } - - let secret = RackSecret::new(); - let (shares, verifier) = secret - .split(rack_secret_threshold, total_shares) - .map_err(SetupServiceError::SplitRackSecret)?; - - Ok(Some(shares.into_iter().map(move |share| ShareDistribution { - threshold: rack_secret_threshold, - verifier: verifier.clone(), - share, - member_device_id_certs: member_device_id_certs.to_vec(), - }))) -} - -#[cfg(test)] -mod tests { - use super::*; - use omicron_test_utils::dev::test_setup_log; - use sprockets_common::certificates::Ed25519Signature; - use sprockets_common::certificates::KeyType; - - fn dummy_certs(n: usize) -> Vec { - vec![ - Ed25519Certificate { - subject_key_type: KeyType::DeviceId, - subject_public_key: sprockets_host::Ed25519PublicKey([0; 32]), - signer_key_type: KeyType::Manufacturing, - signature: Ed25519Signature([0; 64]), - }; - n - ] - } - - #[test] - fn test_generate_rack_secret() { - let logctx = test_setup_log("test_generate_rack_secret"); - - // No secret generated if we have <= 1 sled - assert!(generate_rack_secret(10, &dummy_certs(1), &logctx.log) - .unwrap() - .is_none()); - - // No secret generated if threshold <= 1 - assert!(generate_rack_secret(1, &dummy_certs(10), &logctx.log) - .unwrap() - .is_none()); - - // Secret generation fails if threshold > total sleds - assert!(matches!( - generate_rack_secret(10, &dummy_certs(5), &logctx.log), - Err(SetupServiceError::SplitRackSecret(_)) - )); - - // Secret generation succeeds if threshold <= total shares and both are - // > 1, and the returned iterator satifies: - // - // * total length == total shares - // * each share is distinct - for total_shares in 2..=32 { - for threshold in 2..=total_shares { - let certs = dummy_certs(total_shares); - let shares = - generate_rack_secret(threshold, &certs, &logctx.log) - .unwrap() - .unwrap(); - - assert_eq!(shares.len(), total_shares); - - // `Share` doesn't implement `Hash`, but it's a newtype around - // `Vec` (which does). Unwrap the newtype to check that all - // shares are distinct. - let shares_set = shares - .map(|share_dist| share_dist.share.0) - .collect::>(); - assert_eq!(shares_set.len(), total_shares); - } - } - - logctx.cleanup_successful(); - } -} diff --git a/sled-agent/src/server.rs b/sled-agent/src/server.rs index cff044fda86..bd73c6e4169 100644 --- a/sled-agent/src/server.rs +++ b/sled-agent/src/server.rs @@ -36,6 +36,7 @@ impl Server { pub async fn start( config: &Config, log: Logger, + sled_id: Uuid, addr: SocketAddrV6, rack_id: Uuid, ) -> Result { @@ -50,6 +51,7 @@ impl Server { &config, log.clone(), lazy_nexus_client.clone(), + sled_id, addr, rack_id, ) @@ -70,7 +72,6 @@ impl Server { .start(); let sled_address = http_server.local_addr(); - let sled_id = config.id; let nexus_notifier_handle = tokio::task::spawn(async move { // Notify the control plane that we're up, and continue trying this // until it succeeds. We retry with an randomized, capped exponential diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 8404965ec26..63bc58ad709 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -21,13 +21,11 @@ use omicron_common::address::SLED_PREFIX; use omicron_common::nexus_config::{ self, DeploymentConfig as NexusDeploymentConfig, }; -use omicron_common::postgres_config::PostgresConfigWithUrl; use slog::Logger; use std::collections::HashSet; use std::iter::FromIterator; use std::net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddr}; use std::path::{Path, PathBuf}; -use std::str::FromStr; use tokio::io::AsyncWriteExt; use tokio::sync::Mutex; use uuid::Uuid; @@ -419,12 +417,7 @@ impl ServiceManager { subnet: Ipv6Subnet::::new( self.underlay_address, ), - // TODO: Switch to inferring this URL by DNS. - database: nexus_config::Database::FromUrl { - url: PostgresConfigWithUrl::from_str( - "postgresql://root@[fd00:1122:3344:0101::2]:32221/omicron?sslmode=disable" - ).unwrap(), - } + database: nexus_config::Database::FromDns, }; // Copy the partial config file to the expected location. diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index d9040618fae..2d1662b6ed1 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -16,7 +16,7 @@ use crate::nexus::LazyNexusClient; use crate::params::{ DatasetKind, DiskStateRequested, InstanceHardware, InstanceMigrateParams, InstanceRuntimeStateRequested, InstanceSerialConsoleData, - ServiceEnsureBody, + ServiceEnsureBody, Zpool, }; use crate::services::{self, ServiceManager}; use crate::storage_manager::StorageManager; @@ -125,11 +125,10 @@ impl SledAgent { config: &Config, log: Logger, lazy_nexus_client: LazyNexusClient, + id: Uuid, sled_address: SocketAddrV6, rack_id: Uuid, ) -> Result { - let id = config.id; - // Pass the "parent_log" to all subcomponents that want to set their own // "component" value. let parent_log = log.clone(); @@ -291,6 +290,11 @@ impl SledAgent { Ok(()) } + pub async fn zpools_get(&self) -> Result, Error> { + let zpools = self.storage.get_zpools().await?; + Ok(zpools) + } + /// Ensures that a filesystem type exists within the zpool. pub async fn filesystem_ensure( &self, diff --git a/sled-agent/src/sp/mod.rs b/sled-agent/src/sp/mod.rs index 902d3bd25ca..b43c40979c6 100644 --- a/sled-agent/src/sp/mod.rs +++ b/sled-agent/src/sp/mod.rs @@ -4,7 +4,6 @@ //! Interface to a (simulated or real) SP / RoT. -use crate::config::Config as SledConfig; use crate::config::ConfigError; use crate::illumos; use crate::illumos::dladm::CreateVnicError; @@ -86,11 +85,10 @@ impl SpHandle { /// A return value of `Ok(None)` means no SP is available. pub async fn detect( sp_config: Option<&GimletConfig>, - sled_config: &SledConfig, log: &Logger, ) -> Result, SpError> { - let inner = if let Some(config) = sp_config { - let sim_sp = SimulatedSp::start(config, sled_config, log).await?; + let inner = if let Some(config) = sp_config.as_ref() { + let sim_sp = SimulatedSp::start(config, log).await?; Some(Inner::SimulatedSp(sim_sp)) } else { None diff --git a/sled-agent/src/sp/simulated.rs b/sled-agent/src/sp/simulated.rs index bdf31ffb9f3..fe3b3cbac2a 100644 --- a/sled-agent/src/sp/simulated.rs +++ b/sled-agent/src/sp/simulated.rs @@ -5,7 +5,6 @@ //! Implementation of a simulated SP / RoT. use super::SpError; -use crate::config::Config as SledConfig; use crate::illumos::dladm::Dladm; use crate::zone::Zones; use slog::Logger; @@ -36,7 +35,6 @@ pub(super) struct SimulatedSp { impl SimulatedSp { pub(super) async fn start( sp_config: &GimletConfig, - sled_config: &SledConfig, log: &Logger, ) -> Result { // Is our simulated SP going to bind to addresses (acting like @@ -79,7 +77,6 @@ impl SimulatedSp { info!(log, "starting simulated gimlet SP"); let sp_log = log.new(o!( "component" => "sp-sim", - "server" => sled_config.id.clone().to_string(), )); let sp = Arc::new( sp_sim::Gimlet::spawn(&sp_config, sp_log) @@ -91,7 +88,6 @@ impl SimulatedSp { info!(log, "starting simulated gimlet RoT"); let rot_log = log.new(o!( "component" => "rot-sim", - "server" => sled_config.id.clone().to_string(), )); let transport = SimRotTransport { sp: Arc::clone(&sp), responses: VecDeque::new() }; diff --git a/sled-agent/src/storage_manager.rs b/sled-agent/src/storage_manager.rs index 3ec65c6427d..59f46a9e6b8 100644 --- a/sled-agent/src/storage_manager.rs +++ b/sled-agent/src/storage_manager.rs @@ -15,7 +15,7 @@ use crate::params::DatasetKind; use futures::stream::FuturesOrdered; use futures::FutureExt; use futures::StreamExt; -use nexus_client::types::{DatasetPutRequest, ZpoolPutRequest}; +use nexus_client::types::ZpoolPutRequest; use omicron_common::api::external::{ByteCount, ByteCountRangeError}; use omicron_common::backoff; use schemars::JsonSchema; @@ -241,6 +241,9 @@ impl DatasetInfo { address: SocketAddrV6, do_format: bool, ) -> Result<(), Error> { + // TODO: Related to + // https://github.com/oxidecomputer/omicron/pull/1124 , should we + // avoid importing these manifests? match self.kind { DatasetKind::CockroachDb { .. } => { info!(log, "start_zone: Loading CRDB manifest"); @@ -317,7 +320,9 @@ impl DatasetInfo { warn!(log, "cockroachdb not yet alive"); }; backoff::retry_notify( - backoff::internal_service_policy(), + backoff::internal_service_policy_with_max( + std::time::Duration::from_secs(1), + ), check_health, log_failure, ) @@ -653,58 +658,14 @@ impl StorageWorker { let log_post_failure = move |_, delay| { warn!( log, - "failed to notify nexus, will retry in {:?}", delay; + "failed to notify nexus about zpool, will retry in {:?}", delay; ); }; nexus_notifications.push( backoff::retry_notify( - backoff::internal_service_policy(), - notify_nexus, - log_post_failure, - ) - .boxed(), - ); - } - - // Adds a "notification to nexus" to `nexus_notifications`, - // informing it about the addition of `datasets` to `pool_id`. - fn add_datasets_notify( - &self, - nexus_notifications: &mut FuturesOrdered>>, - datasets: Vec<(Uuid, SocketAddrV6, DatasetKind)>, - pool_id: Uuid, - ) { - let lazy_nexus_client = self.lazy_nexus_client.clone(); - let notify_nexus = move || { - let lazy_nexus_client = lazy_nexus_client.clone(); - let datasets = datasets.clone(); - async move { - let nexus = lazy_nexus_client.get().await.map_err(|e| { - backoff::BackoffError::transient(e.to_string()) - })?; - - for (id, address, kind) in datasets { - let request = DatasetPutRequest { - address: address.to_string(), - kind: kind.into(), - }; - nexus.dataset_put(&pool_id, &id, &request).await.map_err( - |e| backoff::BackoffError::transient(e.to_string()), - )?; - } - Ok(()) - } - }; - let log = self.log.clone(); - let log_post_failure = move |_, delay| { - warn!( - log, - "failed to notify nexus about datasets, will retry in {:?}", delay; - ); - }; - nexus_notifications.push( - backoff::retry_notify( - backoff::internal_service_policy(), + backoff::internal_service_policy_with_max( + std::time::Duration::from_secs(1), + ), notify_nexus, log_post_failure, ) @@ -718,7 +679,6 @@ impl StorageWorker { // Attempts to add a dataset within a zpool, according to `request`. async fn add_dataset( &self, - nexus_notifications: &mut FuturesOrdered>>, request: &NewFilesystemRequest, ) -> Result<(), Error> { info!(self.log, "add_dataset: {:?}", request); @@ -765,12 +725,6 @@ impl StorageWorker { err, })?; - self.add_datasets_notify( - nexus_notifications, - vec![(id, dataset_info.address, dataset_info.kind)], - pool.id(), - ); - Ok(()) } @@ -864,21 +818,16 @@ impl StorageWorker { } } - // Notify Nexus of the zpool and all datasets within. + // Notify Nexus of the zpool. self.add_zpool_notify( &mut nexus_notifications, pool.id(), size, ); - self.add_datasets_notify( - &mut nexus_notifications, - datasets, - pool.id(), - ); }, Some(request) = self.new_filesystems_rx.recv() => { - let result = self.add_dataset(&mut nexus_notifications, &request).await; + let result = self.add_dataset(&request).await; let _ = request.responder.send(result); } } @@ -955,6 +904,14 @@ impl StorageManager { Ok(()) } + pub async fn get_zpools(&self) -> Result, Error> { + let pools = self.pools.lock().await; + Ok(pools + .keys() + .map(|zpool| crate::params::Zpool { id: zpool.id() }) + .collect()) + } + pub async fn upsert_filesystem( &self, zpool_id: Uuid, diff --git a/smf/nexus/config-partial.toml b/smf/nexus/config-partial.toml index 66cb2bd24eb..dd38aed438e 100644 --- a/smf/nexus/config-partial.toml +++ b/smf/nexus/config-partial.toml @@ -19,11 +19,3 @@ level = "info" mode = "file" path = "/dev/stdout" if_exists = "append" - -# Configuration for interacting with the timeseries database -[timeseries_db] -address = "[fd00:1122:3344:0101::5]:8123" - -[tunables] -# TODO: Remove when RSS transfer to Nexus is fully fleshed out -enable_background_tasks = false diff --git a/smf/sled-agent/config-rss.toml b/smf/sled-agent/config-rss.toml index fbb5214c556..0adcc276f67 100644 --- a/smf/sled-agent/config-rss.toml +++ b/smf/sled-agent/config-rss.toml @@ -11,69 +11,5 @@ rack_subnet = "fd00:1122:3344:0100::" # For values less than 2, no rack secret will be generated. rack_secret_threshold = 1 -[[request]] - -# TODO(https://github.com/oxidecomputer/omicron/issues/732): Nexus -# should allocate crucible datasets. -[[request.dataset]] -id = "09a9a25f-2602-4e2f-9630-31af9c492c3e" -zpool_id = "d462a7f7-b628-40fe-80ff-4e4189e2d62b" -address = "[fd00:1122:3344:0101::6]:32345" -dataset_kind.type = "crucible" - -[[request.dataset]] -id = "2713b37a-3043-4ed5-aaff-f38200e45cfb" -zpool_id = "e4b4dc87-ab46-49fb-a4b4-d361ae214c03" -address = "[fd00:1122:3344:0101::7]:32345" -dataset_kind.type = "crucible" - -[[request.dataset]] -id = "ffd16cad-e5d5-495e-9c59-4312a3857d91" -zpool_id = "f4b4dc87-ab46-49fb-a4b4-d361ae214c03" -address = "[fd00:1122:3344:0101::8]:32345" -dataset_kind.type = "crucible" - -[[request.dataset]] -id = "4d08fc19-3d5f-4f6b-9c48-925f8eac7255" -zpool_id = "d462a7f7-b628-40fe-80ff-4e4189e2d62b" -address = "[fd00:1122:3344:0101::2]:32221" -dataset_kind.type = "cockroach_db" -dataset_kind.all_addresses = [ "[fd00:1122:3344:0101::2]:32221" ] - -# TODO(https://github.com/oxidecomputer/omicron/issues/732): Nexus -# should allocate clickhouse datasets. -[[request.dataset]] -id = "a3505b41-a592-420b-84f2-3d76bf0e0a81" -zpool_id = "d462a7f7-b628-40fe-80ff-4e4189e2d62b" -address = "[fd00:1122:3344:0101::5]:8123" -dataset_kind.type = "clickhouse" - -[[request.service]] -id = "e6bff1ff-24fb-49dc-a54e-c6a350cd4d6c" -name = "nexus" -addresses = [ "fd00:1122:3344:0101::3" ] -gz_addresses = [] -[request.service.service_type] -type = "nexus" -internal_address = "[fd00:1122:3344:0101::3]:12221" # NOTE: In the lab, use "172.20.15.226" -external_address = "192.168.1.20:80" - -# TODO(https://github.com/oxidecomputer/omicron/issues/732): Nexus -# should allocate Oximeter services. -[[request.service]] -id = "1da65e5b-210c-4859-a7d7-200c1e659972" -name = "oximeter" -addresses = [ "fd00:1122:3344:0101::4" ] -gz_addresses = [] -[request.service.service_type] -type = "oximeter" - -[[request.service]] -id = "a0fe5ebc-9261-6f77-acc1-972481755789" -name = "dendrite" -addresses = [ "fd00:1122:3344:0101::9" ] -gz_addresses = [] -[request.service.service_type] -type = "dendrite" -asic = "tofino_stub" +nexus_external_address = "192.168.1.20" diff --git a/smf/sled-agent/config.toml b/smf/sled-agent/config.toml index 17072d7f865..51d061c3ba9 100644 --- a/smf/sled-agent/config.toml +++ b/smf/sled-agent/config.toml @@ -1,12 +1,5 @@ # Sled Agent Configuration -id = "fb0f7546-4d46-40ca-9d56-cbb810684ca7" - -# TODO: Remove this address - -# Internal address of Nexus -nexus_address = "[fd00:1122:3344:0101::3]:12221" - # A file-backed zpool can be manually created with the following: # # truncate -s 10GB testpool.vdev # # zpool create oxp_d462a7f7-b628-40fe-80ff-4e4189e2d62b "$PWD/testpool.vdev"