From 7f1087d3f550030e1ecfd634dd903977e55a0bf8 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 8 Jun 2022 20:43:50 -0400 Subject: [PATCH 01/35] Pull in RSS changes from 'use-dns' branch --- Cargo.lock | 1 + common/src/address.rs | 5 + sled-agent/Cargo.toml | 1 + sled-agent/src/rack_setup/config.rs | 1 + sled-agent/src/rack_setup/service.rs | 132 ++++++++++++++++++++++++--- sled-agent/src/services.rs | 9 +- smf/sled-agent/config-rss.toml | 92 +++++++++---------- 7 files changed, 174 insertions(+), 67 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ca290e03a05..6d004285e09 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3104,6 +3104,7 @@ dependencies = [ "expectorate", "futures", "http", + "internal-dns-client", "ipnetwork", "libc", "macaddr", diff --git a/common/src/address.rs b/common/src/address.rs index 226dc9ea655..b105588b587 100644 --- a/common/src/address.rs +++ b/common/src/address.rs @@ -33,6 +33,11 @@ pub const SLED_AGENT_PORT: u16 = 12345; /// The port propolis-server listens on inside the propolis zone. pub const PROPOLIS_PORT: u16 = 12400; +pub const COCKROACH_PORT: u16 = 32221; +pub const CRUCIBLE_PORT: u16 = 32345; + +pub const NEXUS_EXTERNAL_PORT: u16 = 12220; +pub const NEXUS_INTERNAL_PORT: u16 = 12221; // Anycast is a mechanism in which a single IP address is shared by multiple // devices, and the destination is located based on routing distance. diff --git a/sled-agent/Cargo.toml b/sled-agent/Cargo.toml index 18c9514ba94..a0c157e74b1 100644 --- a/sled-agent/Cargo.toml +++ b/sled-agent/Cargo.toml @@ -16,6 +16,7 @@ chrono = { version = "0.4", features = [ "serde" ] } crucible-agent-client = { git = "https://github.com/oxidecomputer/crucible", rev = "cd74a23ea42ce5e673923a00faf31b0a920191cc" } dropshot = { git = "https://github.com/oxidecomputer/dropshot", branch = "main", features = [ "usdt-probes" ] } futures = "0.3.21" +internal-dns-client = { path = "../internal-dns-client" } ipnetwork = "0.18" libc = "0.2.126" macaddr = { version = "1.0.1", features = [ "serde_std" ] } diff --git a/sled-agent/src/rack_setup/config.rs b/sled-agent/src/rack_setup/config.rs index d9f8324535d..6786312a009 100644 --- a/sled-agent/src/rack_setup/config.rs +++ b/sled-agent/src/rack_setup/config.rs @@ -28,6 +28,7 @@ use std::path::Path; pub struct SetupServiceConfig { pub rack_subnet: Ipv6Addr, + // TODO: REMOVE! #[serde(default, rename = "request")] pub requests: Vec, } diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 6c1610e1983..0fd19706908 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -4,14 +4,15 @@ //! Rack Setup Service implementation -use super::config::{HardcodedSledRequest, SetupServiceConfig as Config}; +use super::config::SetupServiceConfig as Config; use crate::bootstrap::{ config::BOOTSTRAP_AGENT_PORT, discovery::PeerMonitorObserver, params::SledAgentRequest, rss_handle::BootstrapAgentHandle, }; -use crate::params::{ServiceRequest, ServiceType}; +use crate::params::{DatasetEnsureBody, ServiceRequest, ServiceType}; use omicron_common::address::{ get_sled_address, ReservedRackSubnet, DNS_PORT, DNS_SERVER_PORT, + NEXUS_EXTERNAL_PORT, NEXUS_INTERNAL_PORT, RSS_RESERVED_ADDRESSES, }; use omicron_common::backoff::{ internal_service_policy, retry_notify, BackoffError, @@ -22,9 +23,12 @@ use std::collections::{HashMap, HashSet}; use std::net::{Ipv6Addr, SocketAddr, SocketAddrV6}; use std::path::PathBuf; use thiserror::Error; -use tokio::sync::Mutex; +use tokio::sync::{Mutex, OnceCell}; use uuid::Uuid; +// The number of Nexus instances to create from RSS. +const NEXUS_COUNT: usize = 1; + /// Describes errors which may occur while operating the setup service. #[derive(Error, Debug)] pub enum SetupServiceError { @@ -49,13 +53,32 @@ pub enum SetupServiceError { #[error("Failed to construct an HTTP client: {0}")] HttpClient(reqwest::Error), + + // XXX CLEAN UP + #[error(transparent)] + Dns(#[from] internal_dns_client::Error), +} + +#[derive(Clone, Debug, Default, Deserialize, Serialize, PartialEq)] +pub struct SledRequest { + /// Datasets to be created. + #[serde(default, rename = "dataset")] + pub datasets: Vec, + + /// Services to be instantiated. + #[serde(default, rename = "service")] + pub services: Vec, + + /// DNS Services to be instantiated. + #[serde(default, rename = "dns_service")] + pub dns_services: Vec, } // The workload / information allocated to a single sled. #[derive(Clone, Debug, Deserialize, Serialize, PartialEq)] struct SledAllocation { initialization_request: SledAgentRequest, - services_request: HardcodedSledRequest, + services_request: SledRequest, } /// The interface to the Rack Setup Service. @@ -130,15 +153,42 @@ enum PeerExpectation { CreateNewPlan(usize), } +struct AddressBumpAllocator { + last_addr: Ipv6Addr, +} + +// TODO: Testable? +// TODO: Could exist in another file? +impl AddressBumpAllocator { + fn new(sled_addr: Ipv6Addr) -> Self { + Self { last_addr: sled_addr } + } + + fn next(&mut self) -> Option { + let mut segments: [u16; 8] = self.last_addr.segments(); + segments[7] = segments[7].checked_add(1)?; + if segments[7] > RSS_RESERVED_ADDRESSES { + return None; + } + self.last_addr = Ipv6Addr::from(segments); + Some(self.last_addr) + } +} + /// The implementation of the Rack Setup Service. struct ServiceInner { log: Logger, peer_monitor: Mutex, + dns_servers: OnceCell, } impl ServiceInner { fn new(log: Logger, peer_monitor: PeerMonitorObserver) -> Self { - ServiceInner { log, peer_monitor: Mutex::new(peer_monitor) } + ServiceInner { + log, + peer_monitor: Mutex::new(peer_monitor), + dns_servers: OnceCell::new(), + } } async fn initialize_datasets( @@ -277,16 +327,61 @@ impl ServiceInner { let requests_and_sleds = bootstrap_addrs.map(|(idx, bootstrap_addr)| { - // If a sled was explicitly requested from the RSS configuration, - // use that. Otherwise, just give it a "default" (empty) set of - // services. - let mut request = { - if idx < config.requests.len() { - config.requests[idx].clone() - } else { - HardcodedSledRequest::default() + let sled_subnet_index = + u8::try_from(idx + 1).expect("Too many peers!"); + let subnet = config.sled_subnet(sled_subnet_index); + let mut addr_alloc = + AddressBumpAllocator::new(*get_sled_address(subnet).ip()); + + let mut request = SledRequest::default(); + + // The first enumerated sleds get assigned the responsibility + // of hosting Nexus. + if idx < NEXUS_COUNT { + let address = addr_alloc.next().expect("Not enough addrs"); + request.services.push(ServiceRequest { + id: Uuid::new_v4(), + name: "nexus".to_string(), + addresses: vec![address], + gz_addresses: vec![], + service_type: ServiceType::Nexus { + internal_address: SocketAddrV6::new( + address, + NEXUS_INTERNAL_PORT, + 0, + 0, + ), + external_address: SocketAddrV6::new( + address, + NEXUS_EXTERNAL_PORT, + 0, + 0, + ), + }, + }) + } + + // The first enumerated sleds host the CRDB datasets, using + // zpools described from the underlying config file. + if idx < config.requests.len() { + for dataset in &config.requests[idx].datasets { + let address = SocketAddrV6::new( + addr_alloc.next().expect("Not enough addrs"), + omicron_common::address::COCKROACH_PORT, + 0, + 0, + ); + request.datasets.push(DatasetEnsureBody { + id: Uuid::new_v4(), + zpool_id: dataset.zpool_id, + dataset_kind: + crate::params::DatasetKind::CockroachDb { + all_addresses: vec![address], + }, + address, + }); } - }; + } // The first enumerated sleds get assigned the additional // responsibility of being internal DNS servers. @@ -517,6 +612,15 @@ impl ServiceInner { .into_iter() .collect::>()?; + let dns_servers = internal_dns_client::multiclient::Updater::new( + config.az_subnet(), + self.log.new(o!("client" => "DNS")), + ); + self.dns_servers + .set(dns_servers) + .map_err(|_| ()) + .expect("Already set DNS servers"); + // Issue the dataset initialization requests to all sleds. futures::future::join_all(plan.iter().map( |(_, allocation)| async move { diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 4edd18a3fa7..946a6a8bc88 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -14,13 +14,11 @@ use crate::zone::Zones; use dropshot::ConfigDropshot; use omicron_common::address::{Ipv6Subnet, RACK_PREFIX}; use omicron_common::nexus_config::{self, RuntimeConfig as NexusRuntimeConfig}; -use omicron_common::postgres_config::PostgresConfigWithUrl; use slog::Logger; use std::collections::HashSet; use std::iter::FromIterator; use std::net::{IpAddr, Ipv6Addr, SocketAddr}; use std::path::{Path, PathBuf}; -use std::str::FromStr; use tokio::io::AsyncWriteExt; use tokio::sync::Mutex; @@ -328,11 +326,8 @@ impl ServiceManager { self.underlay_address, ), // TODO: Switch to inferring this URL by DNS. - database: nexus_config::Database::FromUrl { - url: PostgresConfigWithUrl::from_str( - "postgresql://root@[fd00:1122:3344:0101::2]:32221/omicron?sslmode=disable" - ).unwrap() - } + // "postgresql://root@[fd00:1122:3344:0101::2]:32221/omicron?sslmode=disable" + database: nexus_config::Database::FromDns, }; // Copy the partial config file to the expected location. diff --git a/smf/sled-agent/config-rss.toml b/smf/sled-agent/config-rss.toml index 698d5b112fc..18a1a3d8597 100644 --- a/smf/sled-agent/config-rss.toml +++ b/smf/sled-agent/config-rss.toml @@ -6,59 +6,59 @@ # |...............| <- This /56 is the Rack Subnet rack_subnet = "fd00:1122:3344:0100::" -[[request]] - +# [[request]] +# # TODO(https://github.com/oxidecomputer/omicron/issues/732): Nexus # should allocate crucible datasets. -[[request.dataset]] -id = "09a9a25f-2602-4e2f-9630-31af9c492c3e" -zpool_id = "d462a7f7-b628-40fe-80ff-4e4189e2d62b" -address = "[fd00:1122:3344:0101::6]:32345" -dataset_kind.type = "crucible" - -[[request.dataset]] -id = "2713b37a-3043-4ed5-aaff-f38200e45cfb" -zpool_id = "e4b4dc87-ab46-49fb-a4b4-d361ae214c03" -address = "[fd00:1122:3344:0101::7]:32345" -dataset_kind.type = "crucible" - -[[request.dataset]] -id = "ffd16cad-e5d5-495e-9c59-4312a3857d91" -zpool_id = "f4b4dc87-ab46-49fb-a4b4-d361ae214c03" -address = "[fd00:1122:3344:0101::8]:32345" -dataset_kind.type = "crucible" +# [[request.dataset]] +# id = "09a9a25f-2602-4e2f-9630-31af9c492c3e" +# zpool_id = "d462a7f7-b628-40fe-80ff-4e4189e2d62b" +# address = "[fd00:1122:3344:0101::6]:32345" +# dataset_kind.type = "crucible" +# +# [[request.dataset]] +# id = "2713b37a-3043-4ed5-aaff-f38200e45cfb" +# zpool_id = "e4b4dc87-ab46-49fb-a4b4-d361ae214c03" +# address = "[fd00:1122:3344:0101::7]:32345" +# dataset_kind.type = "crucible" +# +# [[request.dataset]] +# id = "ffd16cad-e5d5-495e-9c59-4312a3857d91" +# zpool_id = "f4b4dc87-ab46-49fb-a4b4-d361ae214c03" +# address = "[fd00:1122:3344:0101::8]:32345" +# dataset_kind.type = "crucible" -[[request.dataset]] -id = "4d08fc19-3d5f-4f6b-9c48-925f8eac7255" -zpool_id = "d462a7f7-b628-40fe-80ff-4e4189e2d62b" -address = "[fd00:1122:3344:0101::2]:32221" -dataset_kind.type = "cockroach_db" -dataset_kind.all_addresses = [ "[fd00:1122:3344:0101::2]:32221" ] +# [[request.dataset]] +# id = "4d08fc19-3d5f-4f6b-9c48-925f8eac7255" +# zpool_id = "d462a7f7-b628-40fe-80ff-4e4189e2d62b" +# address = "[fd00:1122:3344:0101::2]:32221" +# dataset_kind.type = "cockroach_db" +# dataset_kind.all_addresses = [ "[fd00:1122:3344:0101::2]:32221" ] # TODO(https://github.com/oxidecomputer/omicron/issues/732): Nexus # should allocate clickhouse datasets. -[[request.dataset]] -id = "a3505b41-a592-420b-84f2-3d76bf0e0a81" -zpool_id = "d462a7f7-b628-40fe-80ff-4e4189e2d62b" -address = "[fd00:1122:3344:0101::5]:8123" -dataset_kind.type = "clickhouse" +# [[request.dataset]] +# id = "a3505b41-a592-420b-84f2-3d76bf0e0a81" +# zpool_id = "d462a7f7-b628-40fe-80ff-4e4189e2d62b" +# address = "[fd00:1122:3344:0101::5]:8123" +# dataset_kind.type = "clickhouse" -[[request.service]] -id = "e6bff1ff-24fb-49dc-a54e-c6a350cd4d6c" -name = "nexus" -addresses = [ "fd00:1122:3344:0101::3" ] -gz_addresses = [] -[request.service.service_type] -type = "nexus" -internal_address = "[fd00:1122:3344:0101::3]:12221" -external_address = "[fd00:1122:3344:0101::3]:12220" +# [[request.service]] +# id = "e6bff1ff-24fb-49dc-a54e-c6a350cd4d6c" +# name = "nexus" +# addresses = [ "fd00:1122:3344:0101::3" ] +# gz_addresses = [] +# [request.service.service_type] +# type = "nexus" +# internal_address = "[fd00:1122:3344:0101::3]:12221" +# external_address = "[fd00:1122:3344:0101::3]:12220" # TODO(https://github.com/oxidecomputer/omicron/issues/732): Nexus # should allocate Oximeter services. -[[request.service]] -id = "1da65e5b-210c-4859-a7d7-200c1e659972" -name = "oximeter" -addresses = [ "fd00:1122:3344:0101::4" ] -gz_addresses = [] -[request.service.service_type] -type = "oximeter" +# [[request.service]] +# id = "1da65e5b-210c-4859-a7d7-200c1e659972" +# name = "oximeter" +# addresses = [ "fd00:1122:3344:0101::4" ] +# gz_addresses = [] +# [request.service.service_type] +# type = "oximeter" From eca54846ca99f0034ad12aaf114d76c5409e08ed Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Thu, 9 Jun 2022 14:17:43 -0400 Subject: [PATCH 02/35] RSS performs config by itself, mostly --- Cargo.lock | 1 + nexus/Cargo.toml | 1 + nexus/src/context.rs | 31 +- nexus/src/lib.rs | 2 +- openapi/sled-agent.json | 39 ++ sled-agent/src/http_entrypoints.rs | 17 + sled-agent/src/params.rs | 7 +- sled-agent/src/rack_setup/config.rs | 22 -- sled-agent/src/rack_setup/service.rs | 534 ++++++++++++++++++--------- sled-agent/src/sled_agent.rs | 11 +- sled-agent/src/storage_manager.rs | 9 + smf/sled-agent/config-rss.toml | 4 +- 12 files changed, 474 insertions(+), 204 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6d004285e09..094e0feb651 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3005,6 +3005,7 @@ dependencies = [ "http", "httptest", "hyper", + "internal-dns-client", "ipnetwork", "lazy_static", "libc", diff --git a/nexus/Cargo.toml b/nexus/Cargo.toml index 85cbbaa72b9..20eceacc788 100644 --- a/nexus/Cargo.toml +++ b/nexus/Cargo.toml @@ -25,6 +25,7 @@ hex = "0.4.3" http = "0.2.7" hyper = "0.14" db-macros = { path = "src/db/db-macros" } +internal-dns-client = { path = "../internal-dns-client" } ipnetwork = "0.18" lazy_static = "1.4.0" libc = "0.2.126" diff --git a/nexus/src/context.rs b/nexus/src/context.rs index 2ad6a93553a..e0ed637aef3 100644 --- a/nexus/src/context.rs +++ b/nexus/src/context.rs @@ -18,8 +18,13 @@ use authn::external::session_cookie::HttpAuthnSessionCookie; use authn::external::spoof::HttpAuthnSpoof; use authn::external::HttpAuthnScheme; use chrono::{DateTime, Duration, Utc}; +use internal_dns_client::names::SRV; +use omicron_common::address::{ + Ipv6Subnet, AZ_PREFIX, COCKROACH_PORT, +}; use omicron_common::api::external::Error; use omicron_common::nexus_config; +use omicron_common::postgres_config::PostgresConfigWithUrl; use oximeter::types::ProducerRegistry; use oximeter_instruments::http::{HttpService, LatencyTracker}; use slog::Logger; @@ -27,6 +32,7 @@ use std::collections::BTreeMap; use std::env; use std::fmt::Debug; use std::path::PathBuf; +use std::str::FromStr; use std::sync::Arc; use std::time::Instant; use std::time::SystemTime; @@ -68,7 +74,7 @@ pub struct ConsoleConfig { impl ServerContext { /// Create a new context with the given rack id and log. This creates the /// underlying nexus as well. - pub fn new( + pub async fn new( rack_id: Uuid, log: Logger, config: &config::Config, @@ -134,11 +140,32 @@ impl ServerContext { // like console index.html. leaving that out for now so we don't break // nexus in dev for everyone + // Set up DNS Client + let az_subnet = + Ipv6Subnet::::new(config.runtime.subnet.net().ip()); + info!(log, "Setting up resolver on subnet: {:?}", az_subnet); + let resolver = + internal_dns_client::multiclient::create_resolver(az_subnet) + .map_err(|e| format!("Failed to create DNS resolver: {}", e))?; + // Set up DB pool let url = match &config.runtime.database { nexus_config::Database::FromUrl { url } => url.clone(), nexus_config::Database::FromDns => { - todo!("Not yet implemented"); + info!(log, "Accessing DB url from DNS"); + let response = resolver + .lookup_ip(&SRV::Service("cockroachdb".to_string()).to_string()) + .await + .map_err(|e| format!("Failed to lookup IP: {}", e))?; + let address = response.iter().next().ok_or_else(|| { + "no addresses returned from DNS resolver".to_string() + })?; + info!(log, "DB addreess: {}", address); + PostgresConfigWithUrl::from_str(&format!( + "postgresql://root@[{}]:{}/omicron?sslmode=disable", + address, COCKROACH_PORT + )) + .map_err(|e| format!("Cannot parse Postgres URL: {}", e))? } }; let pool = db::Pool::new(&db::Config { url }); diff --git a/nexus/src/lib.rs b/nexus/src/lib.rs index c13fc3de3c8..61abe04b1ba 100644 --- a/nexus/src/lib.rs +++ b/nexus/src/lib.rs @@ -90,7 +90,7 @@ impl Server { let ctxlog = log.new(o!("component" => "ServerContext")); - let apictx = ServerContext::new(rack_id, ctxlog, &config)?; + let apictx = ServerContext::new(rack_id, ctxlog, &config).await?; let http_server_starter_external = dropshot::HttpServerStarter::new( &config.runtime.dropshot_external, diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index 839e8ba9a76..0d9daf0ccb5 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -177,6 +177,33 @@ } } } + }, + "/zpools": { + "get": { + "operationId": "zpools_get", + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "title": "Array_of_Zpool", + "type": "array", + "items": { + "$ref": "#/components/schemas/Zpool" + } + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } } }, "components": { @@ -1219,6 +1246,18 @@ ] } ] + }, + "Zpool": { + "type": "object", + "properties": { + "id": { + "type": "string", + "format": "uuid" + } + }, + "required": [ + "id" + ] } } } diff --git a/sled-agent/src/http_entrypoints.rs b/sled-agent/src/http_entrypoints.rs index 9f1d167f85c..72a8c3c3f74 100644 --- a/sled-agent/src/http_entrypoints.rs +++ b/sled-agent/src/http_entrypoints.rs @@ -6,6 +6,7 @@ use crate::params::{ DatasetEnsureBody, DiskEnsureBody, InstanceEnsureBody, ServiceEnsureBody, + Zpool, }; use dropshot::{ endpoint, ApiDescription, HttpError, HttpResponseOk, @@ -28,6 +29,7 @@ type SledApiDescription = ApiDescription; pub fn api() -> SledApiDescription { fn register_endpoints(api: &mut SledApiDescription) -> Result<(), String> { api.register(services_put)?; + api.register(zpools_get)?; api.register(filesystem_put)?; api.register(instance_put)?; api.register(disk_put)?; @@ -56,6 +58,21 @@ async fn services_put( Ok(HttpResponseUpdatedNoContent()) } +#[endpoint { + method = GET, + path = "/zpools", +}] +async fn zpools_get( + rqctx: Arc>, +) -> Result>, HttpError> { + let sa = rqctx.context(); + Ok(HttpResponseOk( + sa.zpools_get() + .await + .map_err(|e| Error::from(e))? + )) +} + #[endpoint { method = PUT, path = "/filesystem", diff --git a/sled-agent/src/params.rs b/sled-agent/src/params.rs index d003bbe785e..cc2e18a1062 100644 --- a/sled-agent/src/params.rs +++ b/sled-agent/src/params.rs @@ -156,6 +156,11 @@ pub struct InstanceRuntimeStateRequested { pub migration_params: Option, } +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq)] +pub struct Zpool { + pub id: Uuid, +} + /// The type of a dataset, and an auxiliary information necessary /// to successfully launch a zone managing the associated data. #[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq)] @@ -198,7 +203,7 @@ impl std::fmt::Display for DatasetKind { use DatasetKind::*; let s = match self { Crucible => "crucible", - CockroachDb { .. } => "cockroach", + CockroachDb { .. } => "cockroachdb", Clickhouse => "clickhouse", }; write!(f, "{}", s) diff --git a/sled-agent/src/rack_setup/config.rs b/sled-agent/src/rack_setup/config.rs index 6786312a009..ad53cdb8a04 100644 --- a/sled-agent/src/rack_setup/config.rs +++ b/sled-agent/src/rack_setup/config.rs @@ -5,7 +5,6 @@ //! Interfaces for working with RSS config. use crate::config::ConfigError; -use crate::params::{DatasetEnsureBody, ServiceRequest}; use omicron_common::address::{ get_64_subnet, Ipv6Subnet, AZ_PREFIX, RACK_PREFIX, SLED_PREFIX, }; @@ -27,26 +26,6 @@ use std::path::Path; #[derive(Clone, Debug, Deserialize, Serialize, PartialEq)] pub struct SetupServiceConfig { pub rack_subnet: Ipv6Addr, - - // TODO: REMOVE! - #[serde(default, rename = "request")] - pub requests: Vec, -} - -/// A request to initialize a sled. -#[derive(Clone, Debug, Default, Deserialize, Serialize, PartialEq)] -pub struct HardcodedSledRequest { - /// Datasets to be created. - #[serde(default, rename = "dataset")] - pub datasets: Vec, - - /// Services to be instantiated. - #[serde(default, rename = "service")] - pub services: Vec, - - /// DNS Services to be instantiated. - #[serde(default, rename = "dns_service")] - pub dns_services: Vec, } impl SetupServiceConfig { @@ -82,7 +61,6 @@ mod test { fn test_subnets() { let cfg = SetupServiceConfig { rack_subnet: "fd00:1122:3344:0100::".parse().unwrap(), - requests: vec![], }; assert_eq!( diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 0fd19706908..00f7230a3a0 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -10,6 +10,7 @@ use crate::bootstrap::{ params::SledAgentRequest, rss_handle::BootstrapAgentHandle, }; use crate::params::{DatasetEnsureBody, ServiceRequest, ServiceType}; +use internal_dns_client::names::{AAAA, SRV}; use omicron_common::address::{ get_sled_address, ReservedRackSubnet, DNS_PORT, DNS_SERVER_PORT, NEXUS_EXTERNAL_PORT, NEXUS_INTERNAL_PORT, RSS_RESERVED_ADDRESSES, @@ -18,9 +19,14 @@ use omicron_common::backoff::{ internal_service_policy, retry_notify, BackoffError, }; use serde::{Deserialize, Serialize}; +use sled_agent_client::{ + Client as SledAgentClient, + Error as SledAgentError, + types as SledAgentTypes, +}; use slog::Logger; use std::collections::{HashMap, HashSet}; -use std::net::{Ipv6Addr, SocketAddr, SocketAddrV6}; +use std::net::{Ipv6Addr, SocketAddrV6}; use std::path::PathBuf; use thiserror::Error; use tokio::sync::{Mutex, OnceCell}; @@ -29,6 +35,12 @@ use uuid::Uuid; // The number of Nexus instances to create from RSS. const NEXUS_COUNT: usize = 1; +// The number of CRDB instances to create from RSS. +const CRDB_COUNT: usize = 1; + +// The minimum number of sleds to initialize the rack. +const MINIMUM_SLED_COUNT: usize = 1; + /// Describes errors which may occur while operating the setup service. #[derive(Error, Debug)] pub enum SetupServiceError { @@ -39,11 +51,14 @@ pub enum SetupServiceError { err: std::io::Error, }, + #[error("Bad configuration for setting up rack: {0}")] + BadConfig(String), + #[error("Error initializing sled via sled-agent: {0}")] SledInitialization(String), #[error("Error making HTTP request to Sled Agent: {0}")] - SledApi(#[from] sled_agent_client::Error), + SledApi(#[from] SledAgentError), #[error("Cannot deserialize TOML file at {path}: {err}")] Toml { path: PathBuf, err: toml::de::Error }, @@ -125,14 +140,19 @@ impl Service { } } -fn rss_plan_path() -> std::path::PathBuf { +fn rss_sled_plan_path() -> std::path::PathBuf { + std::path::Path::new(omicron_common::OMICRON_CONFIG_PATH) + .join("rss-sled-plan.toml") +} + +fn rss_service_plan_path() -> std::path::PathBuf { std::path::Path::new(omicron_common::OMICRON_CONFIG_PATH) - .join("rss-plan.toml") + .join("rss-service-plan.toml") } fn rss_completed_plan_path() -> std::path::PathBuf { std::path::Path::new(omicron_common::OMICRON_CONFIG_PATH) - .join("rss-plan-completed.toml") + .join("rss-plan-completed.marker") } // Describes the options when awaiting for peers. @@ -191,22 +211,30 @@ impl ServiceInner { } } - async fn initialize_datasets( + async fn initialize_crdb( &self, - sled_address: SocketAddr, + sled_address: SocketAddrV6, datasets: &Vec, ) -> Result<(), SetupServiceError> { - let dur = std::time::Duration::from_secs(60); + if datasets.iter().any(|dataset| { + !matches!( + dataset.dataset_kind, + crate::params::DatasetKind::CockroachDb { .. } + ) + }) { + return Err(SetupServiceError::BadConfig("RSS should only initialize CRDB services".into())); + } + let dur = std::time::Duration::from_secs(60); let client = reqwest::ClientBuilder::new() .connect_timeout(dur) .timeout(dur) .build() .map_err(SetupServiceError::HttpClient)?; - let client = sled_agent_client::Client::new_with_client( + let client = SledAgentClient::new_with_client( &format!("http://{}", sled_address), client, - self.log.new(o!("SledAgentClient" => sled_address)), + self.log.new(o!("SledAgentClient" => sled_address.to_string())), ); info!(self.log, "sending dataset requests..."); @@ -220,8 +248,8 @@ impl ServiceInner { Ok::< (), BackoffError< - sled_agent_client::Error< - sled_agent_client::types::Error, + SledAgentError< + SledAgentTypes::Error, >, >, >(()) @@ -236,12 +264,34 @@ impl ServiceInner { ) .await?; } + + // Initialize DNS records for these datasets. + // + // CRDB is treated as a service, since they are interchangeable. + + let aaaa = datasets + .iter() + .map(|dataset| { + ( + AAAA::Zone(dataset.id), + dataset.address, + ) + }) + .collect::>(); + let srv_key = SRV::Service("cockroachdb".into()); + + self.dns_servers + .get() + .expect("DNS servers must be initialized first") + .insert_dns_records(&self.log, aaaa, srv_key) + .await?; + Ok(()) } async fn initialize_services( &self, - sled_address: SocketAddr, + sled_address: SocketAddrV6, services: &Vec, ) -> Result<(), SetupServiceError> { let dur = std::time::Duration::from_secs(60); @@ -250,17 +300,17 @@ impl ServiceInner { .timeout(dur) .build() .map_err(SetupServiceError::HttpClient)?; - let client = sled_agent_client::Client::new_with_client( + let client = SledAgentClient::new_with_client( &format!("http://{}", sled_address), client, - self.log.new(o!("SledAgentClient" => sled_address)), + self.log.new(o!("SledAgentClient" => sled_address.to_string())), ); info!(self.log, "sending service requests..."); let services_put = || async { info!(self.log, "initializing sled services: {:?}", services); client - .services_put(&sled_agent_client::types::ServiceEnsureBody { + .services_put(&SledAgentTypes::ServiceEnsureBody { services: services .iter() .map(|s| s.clone().into()) @@ -271,7 +321,7 @@ impl ServiceInner { Ok::< (), BackoffError< - sled_agent_client::Error, + SledAgentError, >, >(()) }; @@ -283,29 +333,29 @@ impl ServiceInner { Ok(()) } - async fn load_plan( + async fn load_sled_plan( &self, - ) -> Result>, SetupServiceError> + ) -> Result>, SetupServiceError> { // If we already created a plan for this RSS to allocate // subnets/requests to sleds, re-use that existing plan. - let rss_plan_path = rss_plan_path(); - if rss_plan_path.exists() { + let rss_sled_plan_path = rss_sled_plan_path(); + if rss_sled_plan_path.exists() { info!(self.log, "RSS plan already created, loading from file"); - let plan: std::collections::HashMap = + let plan: std::collections::HashMap = toml::from_str( - &tokio::fs::read_to_string(&rss_plan_path).await.map_err( + &tokio::fs::read_to_string(&rss_sled_plan_path).await.map_err( |err| SetupServiceError::Io { message: format!( - "Loading RSS plan {rss_plan_path:?}" + "Loading RSS plan {rss_sled_plan_path:?}" ), err, }, )?, ) .map_err(|err| SetupServiceError::Toml { - path: rss_plan_path, + path: rss_sled_plan_path, err, })?; Ok(Some(plan)) @@ -314,104 +364,14 @@ impl ServiceInner { } } - async fn create_plan( + async fn create_sled_plan( &self, config: &Config, bootstrap_addrs: impl IntoIterator, - ) -> Result, SetupServiceError> { + ) -> Result, SetupServiceError> { let bootstrap_addrs = bootstrap_addrs.into_iter().enumerate(); - let reserved_rack_subnet = ReservedRackSubnet::new(config.az_subnet()); - let dns_subnets = reserved_rack_subnet.get_dns_subnets(); - - info!(self.log, "dns_subnets: {:#?}", dns_subnets); - - let requests_and_sleds = - bootstrap_addrs.map(|(idx, bootstrap_addr)| { - let sled_subnet_index = - u8::try_from(idx + 1).expect("Too many peers!"); - let subnet = config.sled_subnet(sled_subnet_index); - let mut addr_alloc = - AddressBumpAllocator::new(*get_sled_address(subnet).ip()); - - let mut request = SledRequest::default(); - - // The first enumerated sleds get assigned the responsibility - // of hosting Nexus. - if idx < NEXUS_COUNT { - let address = addr_alloc.next().expect("Not enough addrs"); - request.services.push(ServiceRequest { - id: Uuid::new_v4(), - name: "nexus".to_string(), - addresses: vec![address], - gz_addresses: vec![], - service_type: ServiceType::Nexus { - internal_address: SocketAddrV6::new( - address, - NEXUS_INTERNAL_PORT, - 0, - 0, - ), - external_address: SocketAddrV6::new( - address, - NEXUS_EXTERNAL_PORT, - 0, - 0, - ), - }, - }) - } - - // The first enumerated sleds host the CRDB datasets, using - // zpools described from the underlying config file. - if idx < config.requests.len() { - for dataset in &config.requests[idx].datasets { - let address = SocketAddrV6::new( - addr_alloc.next().expect("Not enough addrs"), - omicron_common::address::COCKROACH_PORT, - 0, - 0, - ); - request.datasets.push(DatasetEnsureBody { - id: Uuid::new_v4(), - zpool_id: dataset.zpool_id, - dataset_kind: - crate::params::DatasetKind::CockroachDb { - all_addresses: vec![address], - }, - address, - }); - } - } - - // The first enumerated sleds get assigned the additional - // responsibility of being internal DNS servers. - if idx < dns_subnets.len() { - let dns_subnet = &dns_subnets[idx]; - let dns_addr = dns_subnet.dns_address().ip(); - request.dns_services.push(ServiceRequest { - id: Uuid::new_v4(), - name: "internal-dns".to_string(), - addresses: vec![dns_addr], - gz_addresses: vec![dns_subnet.gz_address().ip()], - service_type: ServiceType::InternalDns { - server_address: SocketAddrV6::new( - dns_addr, - DNS_SERVER_PORT, - 0, - 0, - ), - dns_address: SocketAddrV6::new( - dns_addr, DNS_PORT, 0, 0, - ), - }, - }); - } - (request, (idx, bootstrap_addr)) - }); - - let allocations = requests_and_sleds.map(|(request, sled)| { - let (idx, bootstrap_addr) = sled; + let allocations = bootstrap_addrs.map(|(idx, bootstrap_addr)| { info!( self.log, "Creating plan for the sled at {:?}", bootstrap_addr @@ -424,10 +384,7 @@ impl ServiceInner { ( bootstrap_addr, - SledAllocation { - initialization_request: SledAgentRequest { subnet }, - services_request: request, - }, + SledAgentRequest { subnet }, ) }); @@ -447,14 +404,222 @@ impl ServiceInner { .expect("Cannot turn config to string"); info!(self.log, "Plan serialized as: {}", plan_str); - let path = rss_plan_path(); + let path = rss_sled_plan_path(); tokio::fs::write(&path, plan_str).await.map_err(|err| { SetupServiceError::Io { - message: format!("Storing RSS plan to {path:?}"), + message: format!("Storing RSS sled plan to {path:?}"), err, } })?; - info!(self.log, "Plan written to storage"); + info!(self.log, "Sled plan written to storage"); + + Ok(plan) + } + + // Gets a zpool UUID from the sled. + async fn get_a_zpool_from_sled( + &self, + address: SocketAddrV6, + ) -> Result { + let dur = std::time::Duration::from_secs(60); + let client = reqwest::ClientBuilder::new() + .connect_timeout(dur) + .timeout(dur) + .build() + .map_err(SetupServiceError::HttpClient)?; + let client = SledAgentClient::new_with_client( + &format!("http://{}", address), + client, + self.log.new(o!("SledAgentClient" => address.to_string())), + ); + + let get_zpools = || async { + let zpools: Vec = client + .zpools_get() + .await + .map(|response| { + response.into_inner() + .into_iter() + .map(|zpool| zpool.id) + .collect() + }) + .map_err(|err| { + BackoffError::transient( + SetupServiceError::SledApi(err) + ) + })?; + + if zpools.is_empty() { + return Err(BackoffError::transient( + SetupServiceError::SledInitialization("Awaiting zpools".to_string()) + )); + } + + Ok(zpools) + }; + let log_failure = |error, _| { + warn!(self.log, "failed to get zpools"; "error" => ?error); + }; + let zpools = retry_notify( + internal_service_policy(), + get_zpools, + log_failure, + ) + .await?; + + Ok(zpools[0]) + } + + async fn load_service_plan( + &self, + ) -> Result>, SetupServiceError> + { + // If we already created a plan for this RSS to allocate + // services to sleds, re-use that existing plan. + let rss_service_plan_path = rss_service_plan_path(); + if rss_service_plan_path.exists() { + info!(self.log, "RSS plan already created, loading from file"); + + let plan: std::collections::HashMap = + toml::from_str( + &tokio::fs::read_to_string(&rss_service_plan_path).await.map_err( + |err| SetupServiceError::Io { + message: format!( + "Loading RSS plan {rss_service_plan_path:?}" + ), + err, + }, + )?, + ) + .map_err(|err| SetupServiceError::Toml { + path: rss_service_plan_path, + err, + })?; + Ok(Some(plan)) + } else { + Ok(None) + } + } + async fn create_service_plan( + &self, + config: &Config, + sled_addrs: &Vec, + ) -> Result, SetupServiceError> { + let reserved_rack_subnet = ReservedRackSubnet::new(config.az_subnet()); + let dns_subnets = reserved_rack_subnet.get_dns_subnets(); + + let mut allocations = vec![]; + + for idx in 0..sled_addrs.len() { + let sled_address = sled_addrs[idx]; + let sled_subnet_index = + u8::try_from(idx + 1).expect("Too many peers!"); + let subnet = config.sled_subnet(sled_subnet_index); + let mut addr_alloc = + AddressBumpAllocator::new(*get_sled_address(subnet).ip()); + + let mut request = SledRequest::default(); + + // The first enumerated sleds get assigned the responsibility + // of hosting Nexus. + if idx < NEXUS_COUNT { + let address = addr_alloc.next().expect("Not enough addrs"); + request.services.push(ServiceRequest { + id: Uuid::new_v4(), + name: "nexus".to_string(), + addresses: vec![address], + gz_addresses: vec![], + service_type: ServiceType::Nexus { + internal_address: SocketAddrV6::new( + address, + NEXUS_INTERNAL_PORT, + 0, + 0, + ), + external_address: SocketAddrV6::new( + address, + NEXUS_EXTERNAL_PORT, + 0, + 0, + ), + }, + }) + } + + // The first enumerated sleds host the CRDB datasets, using + // zpools described from the underlying config file. + if idx < CRDB_COUNT { + let zpool_id = self.get_a_zpool_from_sled(sled_address).await?; + + let address = SocketAddrV6::new( + addr_alloc.next().expect("Not enough addrs"), + omicron_common::address::COCKROACH_PORT, + 0, + 0, + ); + request.datasets.push(DatasetEnsureBody { + id: Uuid::new_v4(), + zpool_id, + dataset_kind: + crate::params::DatasetKind::CockroachDb { + all_addresses: vec![address], + }, + address, + }); + } + + // The first enumerated sleds get assigned the additional + // responsibility of being internal DNS servers. + if idx < dns_subnets.len() { + let dns_subnet = &dns_subnets[idx]; + let dns_addr = dns_subnet.dns_address().ip(); + request.dns_services.push(ServiceRequest { + id: Uuid::new_v4(), + name: "internal-dns".to_string(), + addresses: vec![dns_addr], + gz_addresses: vec![dns_subnet.gz_address().ip()], + service_type: ServiceType::InternalDns { + server_address: SocketAddrV6::new( + dns_addr, + DNS_SERVER_PORT, + 0, + 0, + ), + dns_address: SocketAddrV6::new( + dns_addr, DNS_PORT, 0, 0, + ), + }, + }); + } + + allocations.push(( + sled_address, + request + )); + } + + let mut plan = std::collections::HashMap::new(); + for (addr, allocation) in allocations { + plan.insert(addr, allocation); + } + + // Once we've constructed a plan, write it down to durable storage. + let serialized_plan = + toml::Value::try_from(&plan).unwrap_or_else(|e| { + panic!("Cannot serialize configuration: {:#?}: {}", plan, e) + }); + let plan_str = toml::to_string(&serialized_plan) + .expect("Cannot turn config to string"); + + info!(self.log, "Plan serialized as: {}", plan_str); + let path = rss_service_plan_path(); + tokio::fs::write(&path, plan_str).await.map_err(|err| { + SetupServiceError::Io { + message: format!("Storing RSS service plan to {path:?}"), + err, + } + })?; + info!(self.log, "Service plan written to storage"); Ok(plan) } @@ -509,19 +674,28 @@ impl ServiceInner { // This method has a few distinct phases, identified by files in durable // storage: // - // 1. ALLOCATION PLAN CREATION. When the RSS starts up for the first time, - // it creates an allocation plan to provision subnets and services - // to an initial set of sleds. + // 1. SLED ALLOCATION PLAN CREATION. When the RSS starts up for the first + // time, it creates an allocation plan to provision subnets to an initial + // set of sleds. // - // This plan is stored at "rss_plan_path()". + // This plan is stored at "rss_sled_plan_path()". // - // 2. ALLOCATION PLAN EXECUTION. The RSS then carries out this plan, making + // 2. SLED ALLOCATION PLAN EXECUTION. The RSS then carries out this plan, making // requests to the sleds enumerated within the "allocation plan". // - // 3. MARKING SETUP COMPLETE. Once the RSS has successfully initialized the - // rack, the "rss_plan_path()" file is renamed to - // "rss_completed_plan_path()". This indicates that the plan executed - // successfully, and no work remains. + // 3. SERVICE ALLOCATION PLAN CREATION. Now that Sled Agents are executing + // on their respsective subnets, they can be queried to create an + // allocation plan for services. + // + // This plan - for what services go where - is stored at + // "rss_service_plan_path()". + // + // 4. SERVICE ALLOCATION PLAN EXECUTION. RSS requests that the services + // outlined in the aforementioned step are created. + // + // 5. MARKING SETUP COMPLETE. Once the RSS has successfully initialized the + // rack, a marker file is created at "rss_completed_plan_path()". This + // indicates that the plan executed successfully, and no work remains. async fn inject_rack_setup_requests( &self, config: &Config, @@ -549,11 +723,11 @@ impl ServiceInner { // Wait for either: // - All the peers to re-load an old plan (if one exists) // - Enough peers to create a new plan (if one does not exist) - let maybe_plan = self.load_plan().await?; - let expectation = if let Some(plan) = &maybe_plan { + let maybe_sled_plan = self.load_sled_plan().await?; + let expectation = if let Some(plan) = &maybe_sled_plan { PeerExpectation::LoadOldPlan(plan.keys().map(|a| *a.ip()).collect()) } else { - PeerExpectation::CreateNewPlan(config.requests.len()) + PeerExpectation::CreateNewPlan(MINIMUM_SLED_COUNT) }; let addrs = self.wait_for_peers(expectation).await?; info!(self.log, "Enough peers exist to enact RSS plan"); @@ -562,24 +736,24 @@ impl ServiceInner { // // NOTE: This is a "point-of-no-return" -- before sending any requests // to neighboring sleds, the plan must be recorded to durable storage. - // This way, if the RSS power-cycles, it can idempotently execute the - // same allocation plan. - let plan = if let Some(plan) = maybe_plan { + // This way, if the RSS power-cycles, it can idempotently provide the + // same subnets to the same sleds. + let plan = if let Some(plan) = maybe_sled_plan { info!(self.log, "Re-using existing allocation plan"); plan } else { info!(self.log, "Creating new allocation plan"); - self.create_plan(config, addrs).await? + self.create_sled_plan(config, addrs).await? }; // Forward the sled initialization requests to our sled-agent. local_bootstrap_agent .initialize_sleds( plan.iter() - .map(|(bootstrap_addr, allocation)| { + .map(|(bootstrap_addr, initialization_request)| { ( *bootstrap_addr, - allocation.initialization_request.clone(), + initialization_request.clone(), ) }) .collect(), @@ -587,22 +761,34 @@ impl ServiceInner { .await .map_err(SetupServiceError::SledInitialization)?; + let sled_addresses: Vec<_> = plan.iter() + .map(|(_, initialization_request)| { + get_sled_address( + initialization_request.subnet, + ) + }) + .collect(); + + // Now that sled agents have been initialized, we can create + // a service allocation plan. + let service_plan = if let Some(plan) = self.load_service_plan().await? { + plan + } else { + self.create_service_plan(&config, &sled_addresses).await? + }; + // Set up internal DNS services. futures::future::join_all( - plan.iter() - .filter(|(_, allocation)| { + service_plan.iter() + .filter(|(_, service_request)| { // Only send requests to sleds that are supposed to be running // DNS services. - !allocation.services_request.dns_services.is_empty() + !service_request.dns_services.is_empty() }) - .map(|(_, allocation)| async move { - let sled_address = SocketAddr::V6(get_sled_address( - allocation.initialization_request.subnet, - )); - + .map(|(sled_address, services_request)| async move { self.initialize_services( - sled_address, - &allocation.services_request.dns_services, + *sled_address, + &services_request.dns_services, ) .await?; Ok(()) @@ -621,15 +807,12 @@ impl ServiceInner { .map_err(|_| ()) .expect("Already set DNS servers"); - // Issue the dataset initialization requests to all sleds. - futures::future::join_all(plan.iter().map( - |(_, allocation)| async move { - let sled_address = SocketAddr::V6(get_sled_address( - allocation.initialization_request.subnet, - )); - self.initialize_datasets( - sled_address, - &allocation.services_request.datasets, + // Issue the crdb initialization requests to all sleds. + futures::future::join_all(service_plan.iter().map( + |(sled_address, services_request)| async move { + self.initialize_crdb( + *sled_address, + &services_request.datasets, ) .await?; Ok(()) @@ -646,21 +829,23 @@ impl ServiceInner { // Note that this must happen *after* the dataset initialization, // to ensure that CockroachDB has been initialized before Nexus // starts. - futures::future::join_all(plan.iter().map( - |(_, allocation)| async move { - let sled_address = SocketAddr::V6(get_sled_address( - allocation.initialization_request.subnet, - )); - - let all_services = allocation - .services_request + futures::future::join_all(service_plan.iter().map( + |(sled_address, services_request)| async move { + // With the current implementation of "initialize_services", + // we must provide the set of *all* services that should be + // executing on a sled. + // + // This means re-requesting the DNS service, even if it is + // already running - this is fine, however, as the receiving + // sled agent doesn't modify the already-running service. + let all_services = services_request .services .iter() - .chain(allocation.services_request.dns_services.iter()) + .chain(services_request.dns_services.iter()) .map(|s| s.clone()) .collect::>(); - self.initialize_services(sled_address, &all_services).await?; + self.initialize_services(*sled_address, &all_services).await?; Ok(()) }, )) @@ -672,11 +857,10 @@ impl ServiceInner { // Finally, make sure the configuration is saved so we don't inject // the requests on the next iteration. - let plan_path = rss_plan_path(); - tokio::fs::rename(&plan_path, &rss_completed_plan_path).await.map_err( + tokio::fs::File::create(&rss_completed_plan_path).await.map_err( |err| SetupServiceError::Io { message: format!( - "renaming {plan_path:?} to {rss_completed_plan_path:?}" + "creating {rss_completed_plan_path:?}" ), err, }, diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 5f8f1e500ab..6260191f58b 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -14,7 +14,7 @@ use crate::instance_manager::InstanceManager; use crate::nexus::NexusClient; use crate::params::{ DatasetKind, DiskStateRequested, InstanceHardware, InstanceMigrateParams, - InstanceRuntimeStateRequested, ServiceEnsureBody, + InstanceRuntimeStateRequested, ServiceEnsureBody, Zpool }; use crate::services::{self, ServiceManager}; use crate::storage_manager::StorageManager; @@ -274,6 +274,15 @@ impl SledAgent { Ok(()) } + pub async fn zpools_get( + &self + ) -> Result, Error> { + let zpools = self.storage + .get_zpools() + .await?; + Ok(zpools) + } + /// Ensures that a filesystem type exists within the zpool. pub async fn filesystem_ensure( &self, diff --git a/sled-agent/src/storage_manager.rs b/sled-agent/src/storage_manager.rs index a02e68baae9..467bb70b3d8 100644 --- a/sled-agent/src/storage_manager.rs +++ b/sled-agent/src/storage_manager.rs @@ -957,6 +957,15 @@ impl StorageManager { Ok(()) } + pub async fn get_zpools(&self) -> Result, Error> { + let pools = self.pools.lock().await; + Ok(pools.keys().map(|zpool| { + crate::params::Zpool { + id: zpool.id() + } + }).collect()) + } + pub async fn upsert_filesystem( &self, zpool_id: Uuid, diff --git a/smf/sled-agent/config-rss.toml b/smf/sled-agent/config-rss.toml index 18a1a3d8597..5640bc69c81 100644 --- a/smf/sled-agent/config-rss.toml +++ b/smf/sled-agent/config-rss.toml @@ -6,8 +6,8 @@ # |...............| <- This /56 is the Rack Subnet rack_subnet = "fd00:1122:3344:0100::" -# [[request]] -# +[[request]] + # TODO(https://github.com/oxidecomputer/omicron/issues/732): Nexus # should allocate crucible datasets. # [[request.dataset]] From 565862e988c597f17f1b8b27c4d0b6a15d02aa70 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Thu, 9 Jun 2022 18:54:01 -0400 Subject: [PATCH 03/35] RSS side of handoff to Nexus mostly complete --- nexus/src/app/rack.rs | 19 +- nexus/src/db/datastore.rs | 71 ++- nexus/src/internal_api/http_entrypoints.rs | 11 +- nexus/src/internal_api/params.rs | 13 + openapi/nexus-internal.json | 48 +- sled-agent/src/bin/sled-agent.rs | 5 +- sled-agent/src/bootstrap/agent.rs | 13 +- sled-agent/src/bootstrap/params.rs | 4 + sled-agent/src/bootstrap/server.rs | 2 +- sled-agent/src/config.rs | 3 - sled-agent/src/rack_setup/mod.rs | 1 + sled-agent/src/rack_setup/plan/mod.rs | 8 + sled-agent/src/rack_setup/plan/service.rs | 319 ++++++++++++ sled-agent/src/rack_setup/plan/sled.rs | 140 ++++++ sled-agent/src/rack_setup/service.rs | 533 +++++++-------------- sled-agent/src/server.rs | 4 +- sled-agent/src/sled_agent.rs | 7 +- sled-agent/src/sp.rs | 7 +- smf/sled-agent/config-rss.toml | 3 +- smf/sled-agent/config.toml | 3 - 20 files changed, 815 insertions(+), 399 deletions(-) create mode 100644 sled-agent/src/rack_setup/plan/mod.rs create mode 100644 sled-agent/src/rack_setup/plan/service.rs create mode 100644 sled-agent/src/rack_setup/plan/sled.rs diff --git a/nexus/src/app/rack.rs b/nexus/src/app/rack.rs index a9a10a616aa..8b9728c7f77 100644 --- a/nexus/src/app/rack.rs +++ b/nexus/src/app/rack.rs @@ -7,7 +7,7 @@ use crate::authz; use crate::context::OpContext; use crate::db; -use crate::internal_api::params::ServicePutRequest; +use crate::internal_api::params::RackInitializationRequest; use futures::future::ready; use futures::StreamExt; use omicron_common::api::external::DataPageParams; @@ -69,12 +69,12 @@ impl super::Nexus { &self, opctx: &OpContext, rack_id: Uuid, - services: Vec, + request: RackInitializationRequest, ) -> Result<(), Error> { opctx.authorize(authz::Action::Modify, &authz::FLEET).await?; // Convert from parameter -> DB type. - let services: Vec<_> = services + let services: Vec<_> = request.services .into_iter() .map(|svc| { db::model::Service::new( @@ -86,8 +86,19 @@ impl super::Nexus { }) .collect(); + let datasets: Vec<_> = request.datasets + .into_iter() + .map(|dataset| { + db::model::Dataset::new( + dataset.dataset_id, + dataset.zpool_id, + dataset.request.address, + dataset.request.kind.into(), + ) + }) + .collect(); self.db_datastore - .rack_set_initialized(opctx, rack_id, services) + .rack_set_initialized(opctx, rack_id, services, datasets) .await?; Ok(()) diff --git a/nexus/src/db/datastore.rs b/nexus/src/db/datastore.rs index 9083b45eca0..a386c098eb6 100644 --- a/nexus/src/db/datastore.rs +++ b/nexus/src/db/datastore.rs @@ -181,13 +181,14 @@ impl DataStore { opctx: &OpContext, rack_id: Uuid, services: Vec, + datasets: Vec, ) -> UpdateResult { use db::schema::rack::dsl as rack_dsl; - use db::schema::service::dsl as service_dsl; #[derive(Debug)] enum RackInitError { ServiceInsert { err: SyncInsertError, sled_id: Uuid, svc_id: Uuid }, + DatasetInsert { err: SyncInsertError, zpool_id: Uuid, dataset_id: Uuid }, RackUpdate(diesel::result::Error), } type TxnError = TransactionError; @@ -209,22 +210,21 @@ impl DataStore { return Ok(rack); } - // Otherwise, insert services and set rack.initialized = true. + // Otherwise, insert services and datasets for svc in services { + use db::schema::service::dsl; let sled_id = svc.sled_id; >::insert_resource( sled_id, - diesel::insert_into(service_dsl::service) + diesel::insert_into(dsl::service) .values(svc.clone()) - .on_conflict(service_dsl::id) + .on_conflict(dsl::id) .do_update() .set(( - service_dsl::time_modified.eq(Utc::now()), - service_dsl::sled_id - .eq(excluded(service_dsl::sled_id)), - service_dsl::ip.eq(excluded(service_dsl::ip)), - service_dsl::kind - .eq(excluded(service_dsl::kind)), + dsl::time_modified.eq(Utc::now()), + dsl::sled_id.eq(excluded(dsl::sled_id)), + dsl::ip.eq(excluded(dsl::ip)), + dsl::kind.eq(excluded(dsl::kind)), )), ) .insert_and_get_result(conn) @@ -236,6 +236,34 @@ impl DataStore { }) })?; } + for dataset in datasets { + use db::schema::dataset::dsl; + let zpool_id = dataset.pool_id; + >::insert_resource( + zpool_id, + diesel::insert_into(dsl::dataset) + .values(dataset.clone()) + .on_conflict(dsl::id) + .do_update() + .set(( + dsl::time_modified.eq(Utc::now()), + dsl::pool_id.eq(excluded(dsl::pool_id)), + dsl::ip.eq(excluded(dsl::ip)), + dsl::port.eq(excluded(dsl::port)), + dsl::kind.eq(excluded(dsl::kind)), + )), + ) + .insert_and_get_result(conn) + .map_err(|err| { + TxnError::CustomError(RackInitError::DatasetInsert { + err, + zpool_id, + dataset_id: dataset.id(), + }) + })?; + } + + // Set the rack to "initialized" once the handoff is complete diesel::update(rack_dsl::rack) .filter(rack_dsl::id.eq(rack_id)) .set(( @@ -250,6 +278,25 @@ impl DataStore { }) .await .map_err(|e| match e { + TxnError::CustomError(RackInitError::DatasetInsert { + err, + zpool_id, + dataset_id, + }) => match err { + SyncInsertError::CollectionNotFound => { + Error::ObjectNotFound { + type_name: ResourceType::Zpool, + lookup_type: LookupType::ById(zpool_id), + } + } + SyncInsertError::DatabaseError(e) => { + public_error_from_diesel_create( + e, + ResourceType::Dataset, + &dataset_id.to_string(), + ) + } + }, TxnError::CustomError(RackInitError::ServiceInsert { err, sled_id, @@ -4433,14 +4480,14 @@ mod test { // Initialize the Rack. let result = datastore - .rack_set_initialized(&opctx, rack.id(), vec![]) + .rack_set_initialized(&opctx, rack.id(), vec![], vec![]) .await .unwrap(); assert!(result.initialized); // Re-initialize the rack (check for idempotency) let result = datastore - .rack_set_initialized(&opctx, rack.id(), vec![]) + .rack_set_initialized(&opctx, rack.id(), vec![], vec![]) .await .unwrap(); assert!(result.initialized); diff --git a/nexus/src/internal_api/http_entrypoints.rs b/nexus/src/internal_api/http_entrypoints.rs index 83c0c3baec8..2ead37db565 100644 --- a/nexus/src/internal_api/http_entrypoints.rs +++ b/nexus/src/internal_api/http_entrypoints.rs @@ -7,8 +7,9 @@ use crate::context::OpContext; use crate::ServerContext; use super::params::{ - DatasetPutRequest, DatasetPutResponse, OximeterInfo, ServicePutRequest, - SledAgentStartupInfo, ZpoolPutRequest, ZpoolPutResponse, + DatasetPutRequest, DatasetPutResponse, OximeterInfo, + RackInitializationRequest, SledAgentStartupInfo, ZpoolPutRequest, + ZpoolPutResponse, }; use dropshot::endpoint; use dropshot::ApiDescription; @@ -104,15 +105,15 @@ struct RackPathParam { async fn rack_initialization_complete( rqctx: Arc>>, path_params: Path, - info: TypedBody>, + info: TypedBody, ) -> Result { let apictx = rqctx.context(); let nexus = &apictx.nexus; let path = path_params.into_inner(); - let svcs = info.into_inner(); + let request = info.into_inner(); let opctx = OpContext::for_internal_api(&rqctx).await; - nexus.rack_initialize(&opctx, path.rack_id, svcs).await?; + nexus.rack_initialize(&opctx, path.rack_id, request).await?; Ok(HttpResponseUpdatedNoContent()) } diff --git a/nexus/src/internal_api/params.rs b/nexus/src/internal_api/params.rs index 8b83138c2b5..7dda7610573 100644 --- a/nexus/src/internal_api/params.rs +++ b/nexus/src/internal_api/params.rs @@ -149,6 +149,19 @@ pub struct ServicePutRequest { pub kind: ServiceKind, } +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] +pub struct DatasetCreateRequest { + pub zpool_id: Uuid, + pub dataset_id: Uuid, + pub request: DatasetPutRequest, +} + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] +pub struct RackInitializationRequest { + pub services: Vec, + pub datasets: Vec, +} + /// Message used to notify Nexus that this oximeter instance is up and running. #[derive(Debug, Clone, Copy, JsonSchema, Serialize, Deserialize)] pub struct OximeterInfo { diff --git a/openapi/nexus-internal.json b/openapi/nexus-internal.json index 743b4107589..103e0481ca9 100644 --- a/openapi/nexus-internal.json +++ b/openapi/nexus-internal.json @@ -249,11 +249,7 @@ "content": { "application/json": { "schema": { - "title": "Array_of_ServicePutRequest", - "type": "array", - "items": { - "$ref": "#/components/schemas/ServicePutRequest" - } + "$ref": "#/components/schemas/RackInitializationRequest" } } }, @@ -668,6 +664,27 @@ "value" ] }, + "DatasetCreateRequest": { + "type": "object", + "properties": { + "dataset_id": { + "type": "string", + "format": "uuid" + }, + "request": { + "$ref": "#/components/schemas/DatasetPutRequest" + }, + "zpool_id": { + "type": "string", + "format": "uuid" + } + }, + "required": [ + "dataset_id", + "request", + "zpool_id" + ] + }, "DatasetKind": { "description": "Describes the purpose of the dataset.", "type": "string", @@ -1705,6 +1722,27 @@ } ] }, + "RackInitializationRequest": { + "type": "object", + "properties": { + "datasets": { + "type": "array", + "items": { + "$ref": "#/components/schemas/DatasetCreateRequest" + } + }, + "services": { + "type": "array", + "items": { + "$ref": "#/components/schemas/ServicePutRequest" + } + } + }, + "required": [ + "datasets", + "services" + ] + }, "Sample": { "description": "A concrete type representing a single, timestamped measurement from a timeseries.", "type": "object", diff --git a/sled-agent/src/bin/sled-agent.rs b/sled-agent/src/bin/sled-agent.rs index fba2d0d5de0..ea09da733da 100644 --- a/sled-agent/src/bin/sled-agent.rs +++ b/sled-agent/src/bin/sled-agent.rs @@ -15,6 +15,7 @@ use omicron_sled_agent::{config::Config as SledConfig, server as sled_server}; use sp_sim::config::GimletConfig; use std::path::PathBuf; use structopt::StructOpt; +use uuid::Uuid; #[derive(Debug, StructOpt)] #[structopt( @@ -99,7 +100,9 @@ async fn do_run() -> Result<(), CmdError> { // Configure and run the Bootstrap server. let bootstrap_config = BootstrapConfig { - id: config.id, + // NOTE: The UUID of this bootstrap server is not stable across + // reboots. + id: Uuid::new_v4(), bind_address: bootstrap_address, log: config.log.clone(), rss_config, diff --git a/sled-agent/src/bootstrap/agent.rs b/sled-agent/src/bootstrap/agent.rs index bc0f84c2bad..e47e7300240 100644 --- a/sled-agent/src/bootstrap/agent.rs +++ b/sled-agent/src/bootstrap/agent.rs @@ -138,7 +138,6 @@ impl Agent { ) -> Result { let ba_log = log.new(o!( "component" => "BootstrapAgent", - "server" => sled_config.id.to_string(), )); // We expect this directory to exist - ensure that it does, before any @@ -246,7 +245,14 @@ impl Agent { // Server already exists, return it. info!(&self.log, "Sled Agent already loaded"); - if &server.address().ip() != sled_address.ip() { + if server.id() != request.id { + let err_str = format!( + "Sled Agent already running with UUID {}, but {} was requested", + server.id(), + request.id, + ); + return Err(BootstrapError::SledError(err_str)); + } else if &server.address().ip() != sled_address.ip() { let err_str = format!( "Sled Agent already running on address {}, but {} was requested", server.address().ip(), @@ -261,6 +267,7 @@ impl Agent { let server = SledServer::start( &self.sled_config, self.parent_log.clone(), + request.id, sled_address, ) .await @@ -289,7 +296,7 @@ impl Agent { err, })?; - Ok(SledAgentResponse { id: self.sled_config.id }) + Ok(SledAgentResponse { id: request.id }) } /// Communicates with peers, sharing secrets, until the rack has been diff --git a/sled-agent/src/bootstrap/params.rs b/sled-agent/src/bootstrap/params.rs index 334376f28d3..5a01f4c6e0b 100644 --- a/sled-agent/src/bootstrap/params.rs +++ b/sled-agent/src/bootstrap/params.rs @@ -9,6 +9,7 @@ use std::borrow::Cow; use omicron_common::address::{Ipv6Subnet, SLED_PREFIX}; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; +use uuid::Uuid; /// Identity signed by local RoT and Oxide certificate chain. #[derive(Serialize, Deserialize, JsonSchema)] @@ -20,6 +21,9 @@ pub struct ShareRequest { /// Configuration information for launching a Sled Agent. #[derive(Clone, Debug, Serialize, Deserialize, JsonSchema, PartialEq)] pub struct SledAgentRequest { + /// Uuid of the Sled Agent to be created. + pub id: Uuid, + /// Portion of the IP space to be managed by the Sled Agent. pub subnet: Ipv6Subnet, } diff --git a/sled-agent/src/bootstrap/server.rs b/sled-agent/src/bootstrap/server.rs index c5e663b7eab..629adc0ed14 100644 --- a/sled-agent/src/bootstrap/server.rs +++ b/sled-agent/src/bootstrap/server.rs @@ -53,7 +53,7 @@ impl Server { } info!(log, "detecting (real or simulated) SP"); - let sp = SpHandle::detect(&config.sp_config, &sled_config, &log) + let sp = SpHandle::detect(&config.sp_config, &log) .await .map_err(|err| format!("Failed to detect local SP: {err}"))?; diff --git a/sled-agent/src/config.rs b/sled-agent/src/config.rs index d67dd088e8a..a7b0d9fb1ab 100644 --- a/sled-agent/src/config.rs +++ b/sled-agent/src/config.rs @@ -11,13 +11,10 @@ use dropshot::ConfigLogging; use serde::Deserialize; use std::net::SocketAddr; use std::path::{Path, PathBuf}; -use uuid::Uuid; /// Configuration for a sled agent #[derive(Clone, Debug, Deserialize)] pub struct Config { - /// Unique id for the sled - pub id: Uuid, /// Address of Nexus instance pub nexus_address: SocketAddr, /// Configuration for the sled agent debug log diff --git a/sled-agent/src/rack_setup/mod.rs b/sled-agent/src/rack_setup/mod.rs index e947ff99ef0..f052b6c3120 100644 --- a/sled-agent/src/rack_setup/mod.rs +++ b/sled-agent/src/rack_setup/mod.rs @@ -6,3 +6,4 @@ pub mod config; pub mod service; +mod plan; diff --git a/sled-agent/src/rack_setup/plan/mod.rs b/sled-agent/src/rack_setup/plan/mod.rs new file mode 100644 index 00000000000..2343a3be2e6 --- /dev/null +++ b/sled-agent/src/rack_setup/plan/mod.rs @@ -0,0 +1,8 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Rack Setup Service plan generation + +pub mod service; +pub mod sled; diff --git a/sled-agent/src/rack_setup/plan/service.rs b/sled-agent/src/rack_setup/plan/service.rs new file mode 100644 index 00000000000..c5ceb3c1ef0 --- /dev/null +++ b/sled-agent/src/rack_setup/plan/service.rs @@ -0,0 +1,319 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Plan generation for "where should services be initialized". + +use crate::params::{DatasetEnsureBody, ServiceRequest, ServiceType}; +use crate::rack_setup::config::SetupServiceConfig as Config; +use omicron_common::address::{ + get_sled_address, ReservedRackSubnet, DNS_PORT, DNS_SERVER_PORT, + NEXUS_EXTERNAL_PORT, NEXUS_INTERNAL_PORT, RSS_RESERVED_ADDRESSES, +}; +use omicron_common::backoff::{ + internal_service_policy, retry_notify, BackoffError, +}; +use serde::{Deserialize, Serialize}; +use sled_agent_client::{ + Client as SledAgentClient, + Error as SledAgentError, + types as SledAgentTypes, +}; +use slog::Logger; +use std::collections::HashMap; +use std::net::{Ipv6Addr, SocketAddrV6}; +use std::path::{Path, PathBuf}; +use thiserror::Error; +use uuid::Uuid; + +// The number of Nexus instances to create from RSS. +const NEXUS_COUNT: usize = 1; + +// The number of CRDB instances to create from RSS. +const CRDB_COUNT: usize = 1; + +fn rss_service_plan_path() -> PathBuf { + Path::new(omicron_common::OMICRON_CONFIG_PATH) + .join("rss-service-plan.toml") +} + +/// Describes errors which may occur while generating a plan for services. +#[derive(Error, Debug)] +pub enum PlanError { + #[error("I/O error while {message}: {err}")] + Io { + message: String, + #[source] + err: std::io::Error, + }, + + #[error("Cannot deserialize TOML file at {path}: {err}")] + Toml { path: PathBuf, err: toml::de::Error }, + + #[error("Error making HTTP request to Sled Agent: {0}")] + SledApi(#[from] SledAgentError), + + #[error("Error initializing sled via sled-agent: {0}")] + SledInitialization(String), + + #[error("Failed to construct an HTTP client: {0}")] + HttpClient(reqwest::Error), +} + +#[derive(Clone, Debug, Default, Deserialize, Serialize, PartialEq)] +pub struct SledRequest { + /// Datasets to be created. + #[serde(default, rename = "dataset")] + pub datasets: Vec, + + /// Services to be instantiated. + #[serde(default, rename = "service")] + pub services: Vec, + + /// DNS Services to be instantiated. + #[serde(default, rename = "dns_service")] + pub dns_services: Vec, +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct Plan { + pub services: HashMap, +} + +impl Plan { + pub async fn load( + log: &Logger, + ) -> Result, PlanError> + { + // If we already created a plan for this RSS to allocate + // services to sleds, re-use that existing plan. + let rss_service_plan_path = rss_service_plan_path(); + if rss_service_plan_path.exists() { + info!(log, "RSS plan already created, loading from file"); + + let plan: Self = + toml::from_str( + &tokio::fs::read_to_string(&rss_service_plan_path).await.map_err( + |err| PlanError::Io { + message: format!( + "Loading RSS plan {rss_service_plan_path:?}" + ), + err, + }, + )?, + ) + .map_err(|err| PlanError::Toml { + path: rss_service_plan_path, + err, + })?; + Ok(Some(plan)) + } else { + Ok(None) + } + } + + // Gets a zpool UUID from the sled. + async fn get_a_zpool_from_sled( + log: &Logger, + address: SocketAddrV6, + ) -> Result { + let dur = std::time::Duration::from_secs(60); + let client = reqwest::ClientBuilder::new() + .connect_timeout(dur) + .timeout(dur) + .build() + .map_err(PlanError::HttpClient)?; + let client = SledAgentClient::new_with_client( + &format!("http://{}", address), + client, + log.new(o!("SledAgentClient" => address.to_string())), + ); + + let get_zpools = || async { + let zpools: Vec = client + .zpools_get() + .await + .map(|response| { + response.into_inner() + .into_iter() + .map(|zpool| zpool.id) + .collect() + }) + .map_err(|err| { + BackoffError::transient( + PlanError::SledApi(err) + ) + })?; + + if zpools.is_empty() { + return Err(BackoffError::transient( + PlanError::SledInitialization("Awaiting zpools".to_string()) + )); + } + + Ok(zpools) + }; + let log_failure = |error, _| { + warn!(log, "failed to get zpools"; "error" => ?error); + }; + let zpools = retry_notify( + internal_service_policy(), + get_zpools, + log_failure, + ) + .await?; + + Ok(zpools[0]) + } + + pub async fn create( + log: &Logger, + config: &Config, + sled_addrs: &Vec, + ) -> Result { + let reserved_rack_subnet = ReservedRackSubnet::new(config.az_subnet()); + let dns_subnets = reserved_rack_subnet.get_dns_subnets(); + + let mut allocations = vec![]; + + for idx in 0..sled_addrs.len() { + let sled_address = sled_addrs[idx]; + let sled_subnet_index = + u8::try_from(idx + 1).expect("Too many peers!"); + let subnet = config.sled_subnet(sled_subnet_index); + let mut addr_alloc = + AddressBumpAllocator::new(*get_sled_address(subnet).ip()); + + let mut request = SledRequest::default(); + + // The first enumerated sleds get assigned the responsibility + // of hosting Nexus. + if idx < NEXUS_COUNT { + let address = addr_alloc.next().expect("Not enough addrs"); + request.services.push(ServiceRequest { + id: Uuid::new_v4(), + name: "nexus".to_string(), + addresses: vec![address], + gz_addresses: vec![], + service_type: ServiceType::Nexus { + internal_address: SocketAddrV6::new( + address, + NEXUS_INTERNAL_PORT, + 0, + 0, + ), + external_address: SocketAddrV6::new( + address, + NEXUS_EXTERNAL_PORT, + 0, + 0, + ), + }, + }) + } + + // The first enumerated sleds host the CRDB datasets, using + // zpools described from the underlying config file. + if idx < CRDB_COUNT { + let zpool_id = Self::get_a_zpool_from_sled(log, sled_address).await?; + + let address = SocketAddrV6::new( + addr_alloc.next().expect("Not enough addrs"), + omicron_common::address::COCKROACH_PORT, + 0, + 0, + ); + request.datasets.push(DatasetEnsureBody { + id: Uuid::new_v4(), + zpool_id, + dataset_kind: + crate::params::DatasetKind::CockroachDb { + all_addresses: vec![address], + }, + address, + }); + } + + // The first enumerated sleds get assigned the additional + // responsibility of being internal DNS servers. + if idx < dns_subnets.len() { + let dns_subnet = &dns_subnets[idx]; + let dns_addr = dns_subnet.dns_address().ip(); + request.dns_services.push(ServiceRequest { + id: Uuid::new_v4(), + name: "internal-dns".to_string(), + addresses: vec![dns_addr], + gz_addresses: vec![dns_subnet.gz_address().ip()], + service_type: ServiceType::InternalDns { + server_address: SocketAddrV6::new( + dns_addr, + DNS_SERVER_PORT, + 0, + 0, + ), + dns_address: SocketAddrV6::new( + dns_addr, DNS_PORT, 0, 0, + ), + }, + }); + } + + allocations.push(( + sled_address, + request + )); + } + + let mut services = std::collections::HashMap::new(); + for (addr, allocation) in allocations { + services.insert(addr, allocation); + } + + let plan = Self { + services + }; + + // Once we've constructed a plan, write it down to durable storage. + let serialized_plan = + toml::Value::try_from(&plan).unwrap_or_else(|e| { + panic!("Cannot serialize configuration: {:#?}: {}", plan, e) + }); + let plan_str = toml::to_string(&serialized_plan) + .expect("Cannot turn config to string"); + + info!(log, "Plan serialized as: {}", plan_str); + let path = rss_service_plan_path(); + tokio::fs::write(&path, plan_str).await.map_err(|err| { + PlanError::Io { + message: format!("Storing RSS service plan to {path:?}"), + err, + } + })?; + info!(log, "Service plan written to storage"); + + Ok(plan) + } +} + +struct AddressBumpAllocator { + last_addr: Ipv6Addr, +} + +// TODO: Testable? +// TODO: Could exist in another file? +impl AddressBumpAllocator { + fn new(sled_addr: Ipv6Addr) -> Self { + Self { last_addr: sled_addr } + } + + fn next(&mut self) -> Option { + let mut segments: [u16; 8] = self.last_addr.segments(); + segments[7] = segments[7].checked_add(1)?; + if segments[7] > RSS_RESERVED_ADDRESSES { + return None; + } + self.last_addr = Ipv6Addr::from(segments); + Some(self.last_addr) + } +} + diff --git a/sled-agent/src/rack_setup/plan/sled.rs b/sled-agent/src/rack_setup/plan/sled.rs new file mode 100644 index 00000000000..7433a31dfd5 --- /dev/null +++ b/sled-agent/src/rack_setup/plan/sled.rs @@ -0,0 +1,140 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Plan generation for "how should sleds be initialized". + +use crate::bootstrap::{ + config::BOOTSTRAP_AGENT_PORT, params::SledAgentRequest, +}; +use crate::rack_setup::config::SetupServiceConfig as Config; +use serde::{Deserialize, Serialize}; +use slog::Logger; +use std::collections::HashMap; +use std::net::{Ipv6Addr, SocketAddrV6}; +use std::path::{Path, PathBuf}; +use thiserror::Error; +use uuid::Uuid; + +fn rss_sled_plan_path() -> PathBuf { + Path::new(omicron_common::OMICRON_CONFIG_PATH) + .join("rss-sled-plan.toml") +} + +/// Describes errors which may occur while generating a plan for sleds. +#[derive(Error, Debug)] +pub enum PlanError { + #[error("I/O error while {message}: {err}")] + Io { + message: String, + #[source] + err: std::io::Error, + }, + + #[error("Cannot deserialize TOML file at {path}: {err}")] + Toml { path: PathBuf, err: toml::de::Error }, +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct Plan { + pub rack_id: Uuid, + pub sleds: HashMap, + + // TODO: Consider putting the rack subnet here? This may be operator-driven + // in the future, so it should exist in the "plan". + // + // TL;DR: The more we decouple rom "rss-config.toml", the easier it'll be to + // switch to an operator-driven interface. +} + +impl Plan { + pub async fn load( + log: &Logger, + ) -> Result, PlanError> { + // If we already created a plan for this RSS to allocate + // subnets/requests to sleds, re-use that existing plan. + let rss_sled_plan_path = rss_sled_plan_path(); + if rss_sled_plan_path.exists() { + info!(log, "RSS plan already created, loading from file"); + + let plan: Self = + toml::from_str( + &tokio::fs::read_to_string(&rss_sled_plan_path).await.map_err( + |err| PlanError::Io { + message: format!( + "Loading RSS plan {rss_sled_plan_path:?}" + ), + err, + }, + )?, + ) + .map_err(|err| PlanError::Toml { + path: rss_sled_plan_path, + err, + })?; + Ok(Some(plan)) + } else { + Ok(None) + } + } + + pub async fn create( + log: &Logger, + config: &Config, + bootstrap_addrs: impl IntoIterator, + ) -> Result { + let bootstrap_addrs = bootstrap_addrs.into_iter().enumerate(); + + let allocations = bootstrap_addrs.map(|(idx, bootstrap_addr)| { + info!( + log, + "Creating plan for the sled at {:?}", bootstrap_addr + ); + let bootstrap_addr = + SocketAddrV6::new(bootstrap_addr, BOOTSTRAP_AGENT_PORT, 0, 0); + let sled_subnet_index = + u8::try_from(idx + 1).expect("Too many peers!"); + let subnet = config.sled_subnet(sled_subnet_index); + + ( + bootstrap_addr, + SledAgentRequest { + id: Uuid::new_v4(), + subnet + }, + ) + }); + + info!(log, "Serializing plan"); + + let mut sleds = std::collections::HashMap::new(); + for (addr, allocation) in allocations { + sleds.insert(addr, allocation); + } + + let plan = Self { + rack_id: Uuid::new_v4(), + sleds, + }; + + // Once we've constructed a plan, write it down to durable storage. + let serialized_plan = + toml::Value::try_from(&plan).unwrap_or_else(|e| { + panic!("Cannot serialize configuration: {:#?}: {}", plan, e) + }); + let plan_str = toml::to_string(&serialized_plan) + .expect("Cannot turn config to string"); + + info!(log, "Plan serialized as: {}", plan_str); + let path = rss_sled_plan_path(); + tokio::fs::write(&path, plan_str).await.map_err(|err| { + PlanError::Io { + message: format!("Storing RSS sled plan to {path:?}"), + err, + } + })?; + info!(log, "Sled plan written to storage"); + + Ok(plan) + } +} diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 00f7230a3a0..2b25b8fa55a 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -6,15 +6,25 @@ use super::config::SetupServiceConfig as Config; use crate::bootstrap::{ - config::BOOTSTRAP_AGENT_PORT, discovery::PeerMonitorObserver, + discovery::PeerMonitorObserver, params::SledAgentRequest, rss_handle::BootstrapAgentHandle, }; use crate::params::{DatasetEnsureBody, ServiceRequest, ServiceType}; +use crate::rack_setup::plan::service::{ + PlanError as ServicePlanError, + Plan as ServicePlan, +}; +use crate::rack_setup::plan::sled::{ + PlanError as SledPlanError, + Plan as SledPlan, +}; use internal_dns_client::names::{AAAA, SRV}; -use omicron_common::address::{ - get_sled_address, ReservedRackSubnet, DNS_PORT, DNS_SERVER_PORT, - NEXUS_EXTERNAL_PORT, NEXUS_INTERNAL_PORT, RSS_RESERVED_ADDRESSES, +use nexus_client::{ + Client as NexusClient, + Error as NexusError, + types as NexusTypes, }; +use omicron_common::address::{get_sled_address, NEXUS_INTERNAL_PORT}; use omicron_common::backoff::{ internal_service_policy, retry_notify, BackoffError, }; @@ -26,17 +36,10 @@ use sled_agent_client::{ }; use slog::Logger; use std::collections::{HashMap, HashSet}; -use std::net::{Ipv6Addr, SocketAddrV6}; -use std::path::PathBuf; +use std::net::{Ipv6Addr, SocketAddr, SocketAddrV6}; +use std::path::{Path, PathBuf}; use thiserror::Error; use tokio::sync::{Mutex, OnceCell}; -use uuid::Uuid; - -// The number of Nexus instances to create from RSS. -const NEXUS_COUNT: usize = 1; - -// The number of CRDB instances to create from RSS. -const CRDB_COUNT: usize = 1; // The minimum number of sleds to initialize the rack. const MINIMUM_SLED_COUNT: usize = 1; @@ -51,6 +54,12 @@ pub enum SetupServiceError { err: std::io::Error, }, + #[error("Cannot create plan for sled services: {0}")] + ServicePlan(#[from] ServicePlanError), + + #[error("Cannot create plan for sled setup: {0}")] + SledPlan(#[from] SledPlanError), + #[error("Bad configuration for setting up rack: {0}")] BadConfig(String), @@ -60,8 +69,8 @@ pub enum SetupServiceError { #[error("Error making HTTP request to Sled Agent: {0}")] SledApi(#[from] SledAgentError), - #[error("Cannot deserialize TOML file at {path}: {err}")] - Toml { path: PathBuf, err: toml::de::Error }, + #[error("Error making HTTP request to Nexus: {0}")] + NexusApi(#[from] NexusError), #[error("Failed to monitor for peers: {0}")] PeerMonitor(#[from] tokio::sync::broadcast::error::RecvError), @@ -140,18 +149,8 @@ impl Service { } } -fn rss_sled_plan_path() -> std::path::PathBuf { - std::path::Path::new(omicron_common::OMICRON_CONFIG_PATH) - .join("rss-sled-plan.toml") -} - -fn rss_service_plan_path() -> std::path::PathBuf { - std::path::Path::new(omicron_common::OMICRON_CONFIG_PATH) - .join("rss-service-plan.toml") -} - -fn rss_completed_plan_path() -> std::path::PathBuf { - std::path::Path::new(omicron_common::OMICRON_CONFIG_PATH) +fn rss_completed_plan_path() -> PathBuf { + Path::new(omicron_common::OMICRON_CONFIG_PATH) .join("rss-plan-completed.marker") } @@ -173,28 +172,6 @@ enum PeerExpectation { CreateNewPlan(usize), } -struct AddressBumpAllocator { - last_addr: Ipv6Addr, -} - -// TODO: Testable? -// TODO: Could exist in another file? -impl AddressBumpAllocator { - fn new(sled_addr: Ipv6Addr) -> Self { - Self { last_addr: sled_addr } - } - - fn next(&mut self) -> Option { - let mut segments: [u16; 8] = self.last_addr.segments(); - segments[7] = segments[7].checked_add(1)?; - if segments[7] > RSS_RESERVED_ADDRESSES { - return None; - } - self.last_addr = Ipv6Addr::from(segments); - Some(self.last_addr) - } -} - /// The implementation of the Rack Setup Service. struct ServiceInner { log: Logger, @@ -330,298 +307,41 @@ impl ServiceInner { }; retry_notify(internal_service_policy(), services_put, log_failure) .await?; - Ok(()) - } - async fn load_sled_plan( - &self, - ) -> Result>, SetupServiceError> - { - // If we already created a plan for this RSS to allocate - // subnets/requests to sleds, re-use that existing plan. - let rss_sled_plan_path = rss_sled_plan_path(); - if rss_sled_plan_path.exists() { - info!(self.log, "RSS plan already created, loading from file"); - - let plan: std::collections::HashMap = - toml::from_str( - &tokio::fs::read_to_string(&rss_sled_plan_path).await.map_err( - |err| SetupServiceError::Io { - message: format!( - "Loading RSS plan {rss_sled_plan_path:?}" - ), - err, - }, - )?, - ) - .map_err(|err| SetupServiceError::Toml { - path: rss_sled_plan_path, - err, - })?; - Ok(Some(plan)) - } else { - Ok(None) - } - } - - async fn create_sled_plan( - &self, - config: &Config, - bootstrap_addrs: impl IntoIterator, - ) -> Result, SetupServiceError> { - let bootstrap_addrs = bootstrap_addrs.into_iter().enumerate(); - - let allocations = bootstrap_addrs.map(|(idx, bootstrap_addr)| { - info!( - self.log, - "Creating plan for the sled at {:?}", bootstrap_addr - ); - let bootstrap_addr = - SocketAddrV6::new(bootstrap_addr, BOOTSTRAP_AGENT_PORT, 0, 0); - let sled_subnet_index = - u8::try_from(idx + 1).expect("Too many peers!"); - let subnet = config.sled_subnet(sled_subnet_index); - - ( - bootstrap_addr, - SledAgentRequest { subnet }, - ) - }); - info!(self.log, "Serializing plan"); + // Initialize DNS records for the Nexus service. + let services: Vec<_> = services.iter().filter(|svc| { + matches!(svc.service_type, crate::params::ServiceType::Nexus { .. }) + }).collect(); - let mut plan = std::collections::HashMap::new(); - for (addr, allocation) in allocations { - plan.insert(addr, allocation); + // Early-exit for non-Nexus case + if services.is_empty() { + return Ok(()); } - // Once we've constructed a plan, write it down to durable storage. - let serialized_plan = - toml::Value::try_from(&plan).unwrap_or_else(|e| { - panic!("Cannot serialize configuration: {:#?}: {}", plan, e) - }); - let plan_str = toml::to_string(&serialized_plan) - .expect("Cannot turn config to string"); - - info!(self.log, "Plan serialized as: {}", plan_str); - let path = rss_sled_plan_path(); - tokio::fs::write(&path, plan_str).await.map_err(|err| { - SetupServiceError::Io { - message: format!("Storing RSS sled plan to {path:?}"), - err, - } - })?; - info!(self.log, "Sled plan written to storage"); - - Ok(plan) - } - - // Gets a zpool UUID from the sled. - async fn get_a_zpool_from_sled( - &self, - address: SocketAddrV6, - ) -> Result { - let dur = std::time::Duration::from_secs(60); - let client = reqwest::ClientBuilder::new() - .connect_timeout(dur) - .timeout(dur) - .build() - .map_err(SetupServiceError::HttpClient)?; - let client = SledAgentClient::new_with_client( - &format!("http://{}", address), - client, - self.log.new(o!("SledAgentClient" => address.to_string())), - ); - - let get_zpools = || async { - let zpools: Vec = client - .zpools_get() - .await - .map(|response| { - response.into_inner() - .into_iter() - .map(|zpool| zpool.id) - .collect() - }) - .map_err(|err| { - BackoffError::transient( - SetupServiceError::SledApi(err) + // Otherwise, insert DNS records for Nexus + let aaaa = services + .iter() + .map(|service| { + ( + AAAA::Zone(service.id), + SocketAddrV6::new( + service.addresses[0], + NEXUS_INTERNAL_PORT, + 0, + 0, ) - })?; - - if zpools.is_empty() { - return Err(BackoffError::transient( - SetupServiceError::SledInitialization("Awaiting zpools".to_string()) - )); - } - - Ok(zpools) - }; - let log_failure = |error, _| { - warn!(self.log, "failed to get zpools"; "error" => ?error); - }; - let zpools = retry_notify( - internal_service_policy(), - get_zpools, - log_failure, - ) - .await?; - - Ok(zpools[0]) - } - - async fn load_service_plan( - &self, - ) -> Result>, SetupServiceError> - { - // If we already created a plan for this RSS to allocate - // services to sleds, re-use that existing plan. - let rss_service_plan_path = rss_service_plan_path(); - if rss_service_plan_path.exists() { - info!(self.log, "RSS plan already created, loading from file"); - - let plan: std::collections::HashMap = - toml::from_str( - &tokio::fs::read_to_string(&rss_service_plan_path).await.map_err( - |err| SetupServiceError::Io { - message: format!( - "Loading RSS plan {rss_service_plan_path:?}" - ), - err, - }, - )?, ) - .map_err(|err| SetupServiceError::Toml { - path: rss_service_plan_path, - err, - })?; - Ok(Some(plan)) - } else { - Ok(None) - } - } - async fn create_service_plan( - &self, - config: &Config, - sled_addrs: &Vec, - ) -> Result, SetupServiceError> { - let reserved_rack_subnet = ReservedRackSubnet::new(config.az_subnet()); - let dns_subnets = reserved_rack_subnet.get_dns_subnets(); - - let mut allocations = vec![]; - - for idx in 0..sled_addrs.len() { - let sled_address = sled_addrs[idx]; - let sled_subnet_index = - u8::try_from(idx + 1).expect("Too many peers!"); - let subnet = config.sled_subnet(sled_subnet_index); - let mut addr_alloc = - AddressBumpAllocator::new(*get_sled_address(subnet).ip()); - - let mut request = SledRequest::default(); - - // The first enumerated sleds get assigned the responsibility - // of hosting Nexus. - if idx < NEXUS_COUNT { - let address = addr_alloc.next().expect("Not enough addrs"); - request.services.push(ServiceRequest { - id: Uuid::new_v4(), - name: "nexus".to_string(), - addresses: vec![address], - gz_addresses: vec![], - service_type: ServiceType::Nexus { - internal_address: SocketAddrV6::new( - address, - NEXUS_INTERNAL_PORT, - 0, - 0, - ), - external_address: SocketAddrV6::new( - address, - NEXUS_EXTERNAL_PORT, - 0, - 0, - ), - }, - }) - } - - // The first enumerated sleds host the CRDB datasets, using - // zpools described from the underlying config file. - if idx < CRDB_COUNT { - let zpool_id = self.get_a_zpool_from_sled(sled_address).await?; - - let address = SocketAddrV6::new( - addr_alloc.next().expect("Not enough addrs"), - omicron_common::address::COCKROACH_PORT, - 0, - 0, - ); - request.datasets.push(DatasetEnsureBody { - id: Uuid::new_v4(), - zpool_id, - dataset_kind: - crate::params::DatasetKind::CockroachDb { - all_addresses: vec![address], - }, - address, - }); - } - - // The first enumerated sleds get assigned the additional - // responsibility of being internal DNS servers. - if idx < dns_subnets.len() { - let dns_subnet = &dns_subnets[idx]; - let dns_addr = dns_subnet.dns_address().ip(); - request.dns_services.push(ServiceRequest { - id: Uuid::new_v4(), - name: "internal-dns".to_string(), - addresses: vec![dns_addr], - gz_addresses: vec![dns_subnet.gz_address().ip()], - service_type: ServiceType::InternalDns { - server_address: SocketAddrV6::new( - dns_addr, - DNS_SERVER_PORT, - 0, - 0, - ), - dns_address: SocketAddrV6::new( - dns_addr, DNS_PORT, 0, 0, - ), - }, - }); - } - - allocations.push(( - sled_address, - request - )); - } - - let mut plan = std::collections::HashMap::new(); - for (addr, allocation) in allocations { - plan.insert(addr, allocation); - } - - // Once we've constructed a plan, write it down to durable storage. - let serialized_plan = - toml::Value::try_from(&plan).unwrap_or_else(|e| { - panic!("Cannot serialize configuration: {:#?}: {}", plan, e) - }); - let plan_str = toml::to_string(&serialized_plan) - .expect("Cannot turn config to string"); - - info!(self.log, "Plan serialized as: {}", plan_str); - let path = rss_service_plan_path(); - tokio::fs::write(&path, plan_str).await.map_err(|err| { - SetupServiceError::Io { - message: format!("Storing RSS service plan to {path:?}"), - err, - } - })?; - info!(self.log, "Service plan written to storage"); + }) + .collect::>(); + let srv_key = SRV::Service("nexus".into()); + self.dns_servers + .get() + .expect("DNS servers must be initialized first") + .insert_dns_records(&self.log, aaaa, srv_key) + .await?; - Ok(plan) + Ok(()) } // Waits for sufficient neighbors to exist so the initial set of requests @@ -667,6 +387,108 @@ impl ServiceInner { } } + async fn handoff_to_nexus( + &self, + config: &Config, + sled_plan: &SledPlan, + service_plan: &ServicePlan, + ) -> Result<(), SetupServiceError> { + info!(self.log, "Handing off control to Nexus"); + + let resolver = internal_dns_client::multiclient::create_resolver(config.az_subnet()) + .expect("Failed to create DNS resolver"); + let response = resolver.lookup_ip( + &SRV::Service("nexus".to_string()).to_string() + ).await.expect("Failed to lookup IP"); + + let nexus_address = response.iter() + .next() + .map(|addr| { + SocketAddr::new(addr, NEXUS_INTERNAL_PORT) + }) + .expect("no addresses returned from DNS resolver"); + info!(self.log, "Nexus address: {}", nexus_address.to_string()); + + let nexus_client = NexusClient::new( + &format!("http://{}", nexus_address), + self.log.new(o!("component" => "NexusClient")) + ); + + // Ensure we can quickly look up "Sled Agent Address" -> "UUID of sled". + // + // We need the ID when passing info to Nexus. + let mut id_map = HashMap::new(); + for (_, sled_request) in sled_plan.sleds.iter() { + id_map.insert(get_sled_address(sled_request.subnet), sled_request.id); + } + + // Convert all the information we have about services and datasets into + // a format which can be processed by Nexus. + let mut services: Vec = vec![]; + let mut datasets: Vec = vec![]; + for (addr, service_request) in service_plan.services.iter() { + let sled_id = *id_map.get(addr) + .expect("Sled address in service plan, but not sled plan"); + + for svc in service_request.services.iter().chain(service_request.dns_services.iter()) { + let kind = match svc.service_type { + ServiceType::Nexus { .. } => NexusTypes::ServiceKind::Nexus, + ServiceType::InternalDns { .. } => NexusTypes::ServiceKind::InternalDNS, + ServiceType::Oximeter => NexusTypes::ServiceKind::Oximeter, + }; + + services.push( + NexusTypes::ServicePutRequest { + service_id: svc.id, + sled_id, + // TODO: Should this be a vec, or a single value? + address: svc.addresses[0], + kind, + } + ) + } + + for dataset in service_request.datasets.iter() { + datasets.push( + NexusTypes::DatasetCreateRequest { + zpool_id: dataset.zpool_id, + dataset_id: dataset.id, + request: NexusTypes::DatasetPutRequest { + address: dataset.address.to_string(), + kind: dataset.dataset_kind.clone().into() + }, + } + ) + } + } + + let request = NexusTypes::RackInitializationRequest { + services, + datasets, + }; + + let notify_nexus = || async { + nexus_client.rack_initialization_complete( + &sled_plan.rack_id, + &request, + ) + .await + .map_err(BackoffError::transient) + }; + let log_failure = |err, _| { + info!(self.log, "Failed to handoff to nexus: {err}"); + }; + + retry_notify( + internal_service_policy(), + notify_nexus, + log_failure, + ).await?; + + info!(self.log, "Handoff to Nexus is complete"); + Ok(()) + } + // In lieu of having an operator send requests to all sleds via an // initialization service, the sled-agent configuration may allow for the // automated injection of setup requests from a sled. @@ -678,8 +500,6 @@ impl ServiceInner { // time, it creates an allocation plan to provision subnets to an initial // set of sleds. // - // This plan is stored at "rss_sled_plan_path()". - // // 2. SLED ALLOCATION PLAN EXECUTION. The RSS then carries out this plan, making // requests to the sleds enumerated within the "allocation plan". // @@ -687,9 +507,6 @@ impl ServiceInner { // on their respsective subnets, they can be queried to create an // allocation plan for services. // - // This plan - for what services go where - is stored at - // "rss_service_plan_path()". - // // 4. SERVICE ALLOCATION PLAN EXECUTION. RSS requests that the services // outlined in the aforementioned step are created. // @@ -715,6 +532,17 @@ impl ServiceInner { self.log, "RSS configuration looks like it has already been applied", ); + + let sled_plan = SledPlan::load(&self.log).await? + .expect("Sled plan should exist if completed marker exists"); + let service_plan = ServicePlan::load(&self.log).await? + .expect("Service plan should exist if completed marker exists"); + self.handoff_to_nexus( + &config, + &sled_plan, + &service_plan + ).await?; + return Ok(()); } else { info!(self.log, "RSS configuration has not been fully applied yet",); @@ -723,9 +551,9 @@ impl ServiceInner { // Wait for either: // - All the peers to re-load an old plan (if one exists) // - Enough peers to create a new plan (if one does not exist) - let maybe_sled_plan = self.load_sled_plan().await?; + let maybe_sled_plan = SledPlan::load(&self.log).await?; let expectation = if let Some(plan) = &maybe_sled_plan { - PeerExpectation::LoadOldPlan(plan.keys().map(|a| *a.ip()).collect()) + PeerExpectation::LoadOldPlan(plan.sleds.keys().map(|a| *a.ip()).collect()) } else { PeerExpectation::CreateNewPlan(MINIMUM_SLED_COUNT) }; @@ -743,13 +571,13 @@ impl ServiceInner { plan } else { info!(self.log, "Creating new allocation plan"); - self.create_sled_plan(config, addrs).await? + SledPlan::create(&self.log, &config, addrs).await? }; // Forward the sled initialization requests to our sled-agent. local_bootstrap_agent .initialize_sleds( - plan.iter() + plan.sleds.iter() .map(|(bootstrap_addr, initialization_request)| { ( *bootstrap_addr, @@ -761,7 +589,7 @@ impl ServiceInner { .await .map_err(SetupServiceError::SledInitialization)?; - let sled_addresses: Vec<_> = plan.iter() + let sled_addresses: Vec<_> = plan.sleds.iter() .map(|(_, initialization_request)| { get_sled_address( initialization_request.subnet, @@ -771,15 +599,15 @@ impl ServiceInner { // Now that sled agents have been initialized, we can create // a service allocation plan. - let service_plan = if let Some(plan) = self.load_service_plan().await? { + let service_plan = if let Some(plan) = ServicePlan::load(&self.log).await? { plan } else { - self.create_service_plan(&config, &sled_addresses).await? + ServicePlan::create(&self.log, &config, &sled_addresses).await? }; // Set up internal DNS services. futures::future::join_all( - service_plan.iter() + service_plan.services.iter() .filter(|(_, service_request)| { // Only send requests to sleds that are supposed to be running // DNS services. @@ -808,7 +636,7 @@ impl ServiceInner { .expect("Already set DNS servers"); // Issue the crdb initialization requests to all sleds. - futures::future::join_all(service_plan.iter().map( + futures::future::join_all(service_plan.services.iter().map( |(sled_address, services_request)| async move { self.initialize_crdb( *sled_address, @@ -829,7 +657,7 @@ impl ServiceInner { // Note that this must happen *after* the dataset initialization, // to ensure that CockroachDB has been initialized before Nexus // starts. - futures::future::join_all(service_plan.iter().map( + futures::future::join_all(service_plan.services.iter().map( |(sled_address, services_request)| async move { // With the current implementation of "initialize_services", // we must provide the set of *all* services that should be @@ -866,6 +694,15 @@ impl ServiceInner { }, )?; + // At this point, even if we reboot, we must not try to manage sleds, + // services, or DNS records. + + self.handoff_to_nexus( + &config, + &plan, + &service_plan + ).await?; + // TODO Questions to consider: // - What if a sled comes online *right after* this setup? How does // it get a /64? diff --git a/sled-agent/src/server.rs b/sled-agent/src/server.rs index 3b31854628e..fc69359008f 100644 --- a/sled-agent/src/server.rs +++ b/sled-agent/src/server.rs @@ -37,6 +37,7 @@ impl Server { pub async fn start( config: &Config, log: Logger, + sled_id: Uuid, addr: SocketAddrV6, ) -> Result { info!(log, "setting up sled agent server"); @@ -48,7 +49,7 @@ impl Server { )); let sled_agent = - SledAgent::new(&config, log.clone(), nexus_client.clone(), addr) + SledAgent::new(&config, log.clone(), nexus_client.clone(), sled_id, addr) .await .map_err(|e| e.to_string())?; @@ -66,7 +67,6 @@ impl Server { .start(); let sled_address = http_server.local_addr(); - let sled_id = config.id; let nexus_notifier_handle = tokio::task::spawn(async move { // Notify the control plane that we're up, and continue trying this // until it succeeds. We retry with an randomized, capped exponential diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 6260191f58b..5d6481fcc34 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -112,10 +112,9 @@ impl SledAgent { config: &Config, log: Logger, nexus_client: Arc, + id: Uuid, sled_address: SocketAddrV6, ) -> Result { - let id = &config.id; - // Pass the "parent_log" to all subcomponents that want to set their own // "component" value. let parent_log = log.clone(); @@ -218,7 +217,7 @@ impl SledAgent { let storage = StorageManager::new( &parent_log, - *id, + id, nexus_client.clone(), etherstub.clone(), *sled_address.ip(), @@ -250,7 +249,7 @@ impl SledAgent { .await?; Ok(SledAgent { - id: config.id, + id, storage, instances, nexus_client, diff --git a/sled-agent/src/sp.rs b/sled-agent/src/sp.rs index f47bb4110de..d0810a4c26d 100644 --- a/sled-agent/src/sp.rs +++ b/sled-agent/src/sp.rs @@ -4,7 +4,6 @@ //! Interface to a (currently simulated) SP / RoT. -use crate::config::Config as SledConfig; use crate::illumos; use crate::illumos::dladm::CreateVnicError; use crate::illumos::dladm::Dladm; @@ -69,11 +68,10 @@ impl SpHandle { /// A return value of `Ok(None)` means no SP is available. pub async fn detect( sp_config: &Option, - sled_config: &SledConfig, log: &Logger, ) -> Result, SpError> { let inner = if let Some(config) = sp_config.as_ref() { - let sim_sp = start_simulated_sp(config, sled_config, log).await?; + let sim_sp = start_simulated_sp(config, log).await?; Some(Inner::SimulatedSp(sim_sp)) } else { None @@ -199,7 +197,6 @@ struct SimulatedSp { async fn start_simulated_sp( sp_config: &GimletConfig, - sled_config: &SledConfig, log: &Logger, ) -> Result { // Is our simulated SP going to bind to addresses (acting like management @@ -240,7 +237,6 @@ async fn start_simulated_sp( info!(log, "starting simulated gimlet SP"); let sp_log = log.new(o!( "component" => "sp-sim", - "server" => sled_config.id.clone().to_string(), )); let sp = Arc::new( sp_sim::Gimlet::spawn(&sp_config, sp_log) @@ -252,7 +248,6 @@ async fn start_simulated_sp( info!(log, "starting simulated gimlet RoT"); let rot_log = log.new(o!( "component" => "rot-sim", - "server" => sled_config.id.clone().to_string(), )); let transport = SimRotTransport { sp: Arc::clone(&sp), responses: VecDeque::new() }; diff --git a/smf/sled-agent/config-rss.toml b/smf/sled-agent/config-rss.toml index 5640bc69c81..c8652efb5ea 100644 --- a/smf/sled-agent/config-rss.toml +++ b/smf/sled-agent/config-rss.toml @@ -6,8 +6,7 @@ # |...............| <- This /56 is the Rack Subnet rack_subnet = "fd00:1122:3344:0100::" -[[request]] - +# [[request]] # TODO(https://github.com/oxidecomputer/omicron/issues/732): Nexus # should allocate crucible datasets. # [[request.dataset]] diff --git a/smf/sled-agent/config.toml b/smf/sled-agent/config.toml index 53ec733e9ec..44a237e1930 100644 --- a/smf/sled-agent/config.toml +++ b/smf/sled-agent/config.toml @@ -1,9 +1,6 @@ # Sled Agent Configuration -id = "fb0f7546-4d46-40ca-9d56-cbb810684ca7" - # TODO: Remove this address - # Internal address of Nexus nexus_address = "[fd00:1122:3344:0101::3]:12221" From dfa614b4781c11a373a329810a5927b323377994 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Fri, 10 Jun 2022 12:57:54 -0400 Subject: [PATCH 04/35] Handoff to Nexus is hacky, but working --- common/src/backoff.rs | 9 ++- common/src/nexus_config.rs | 2 + nexus/src/app/mod.rs | 18 ++++-- nexus/src/app/rack.rs | 68 +++++++++++----------- nexus/src/app/update.rs | 16 +++-- nexus/src/config.rs | 5 ++ nexus/src/context.rs | 32 +++++++--- nexus/src/db/datastore.rs | 39 ++++++++++++- nexus/src/external_api/http_entrypoints.rs | 10 ++-- nexus/src/lib.rs | 42 ++++++++----- nexus/test-utils/src/lib.rs | 3 +- nexus/tests/config.test.toml | 1 + sled-agent/src/bootstrap/agent.rs | 1 + sled-agent/src/bootstrap/params.rs | 3 + sled-agent/src/config.rs | 3 - sled-agent/src/instance.rs | 18 +++--- sled-agent/src/instance_manager.rs | 10 ++-- sled-agent/src/nexus.rs | 56 ++++++++++++++++++ sled-agent/src/rack_setup/plan/sled.rs | 7 ++- sled-agent/src/server.rs | 25 ++++---- sled-agent/src/services.rs | 11 ++++ sled-agent/src/sled_agent.rs | 21 ++++--- sled-agent/src/storage_manager.rs | 46 +++++++-------- smf/sled-agent/config.toml | 4 -- 24 files changed, 308 insertions(+), 142 deletions(-) diff --git a/common/src/backoff.rs b/common/src/backoff.rs index 128bf932d0d..bcf726ff2a0 100644 --- a/common/src/backoff.rs +++ b/common/src/backoff.rs @@ -13,14 +13,19 @@ pub use ::backoff::{backoff::Backoff, ExponentialBackoff, Notify}; /// Return a backoff policy appropriate for retrying internal services /// indefinitely. pub fn internal_service_policy() -> ::backoff::ExponentialBackoff { - const INITIAL_INTERVAL: Duration = Duration::from_millis(250); const MAX_INTERVAL: Duration = Duration::from_secs(60 * 60); + internal_service_policy_with_max(MAX_INTERVAL) +} + +pub fn internal_service_policy_with_max(max_duration: Duration) -> ::backoff::ExponentialBackoff { + const INITIAL_INTERVAL: Duration = Duration::from_millis(250); ::backoff::ExponentialBackoff { current_interval: INITIAL_INTERVAL, initial_interval: INITIAL_INTERVAL, multiplier: 2.0, - max_interval: MAX_INTERVAL, + max_interval: max_duration, max_elapsed_time: None, ..backoff::ExponentialBackoff::default() } + } diff --git a/common/src/nexus_config.rs b/common/src/nexus_config.rs index f1325ae336d..085434ebf74 100644 --- a/common/src/nexus_config.rs +++ b/common/src/nexus_config.rs @@ -102,6 +102,8 @@ pub enum Database { pub struct RuntimeConfig { /// Uuid of the Nexus instance pub id: Uuid, + /// Uuid of the Rack where Nexus is executing + pub rack_id: Uuid, /// Dropshot configuration for external API server pub dropshot_external: ConfigDropshot, /// Dropshot configuration for internal API server diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index 1c3620de7e7..13cf48b91ec 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -54,15 +54,12 @@ pub struct Nexus { /// uuid for this nexus instance. id: Uuid, - /// uuid for this rack (TODO should also be in persistent storage) + /// uuid for this rack rack_id: Uuid, /// general server log log: Logger, - /// cached rack identity metadata - api_rack_identity: db::model::RackIdentity, - /// persistent storage for resources in the control plane db_datastore: Arc, @@ -146,7 +143,6 @@ impl Nexus { id: config.runtime.id, rack_id, log: log.new(o!()), - api_rack_identity: db::model::RackIdentity::new(rack_id), db_datastore: Arc::clone(&db_datastore), authz: Arc::clone(&authz), sec_client: Arc::clone(&sec_client), @@ -217,6 +213,18 @@ impl Nexus { } } + /// Returns an [`OpContext`] used for background tasks. + // TODO: dap@ recommends using a different user for this, other than + // "internal_db_init". + pub fn opctx_for_background(&self) -> OpContext { + OpContext::for_background( + self.log.new(o!("component" => "Background Work")), + Arc::clone(&self.authz), + authn::Context::internal_db_init(), + Arc::clone(&self.datastore()), + ) + } + /// Returns an [`OpContext`] used for authenticating external requests pub fn opctx_external_authn(&self) -> &OpContext { &self.opctx_external_authn diff --git a/nexus/src/app/rack.rs b/nexus/src/app/rack.rs index 8b9728c7f77..bf1f2026e8a 100644 --- a/nexus/src/app/rack.rs +++ b/nexus/src/app/rack.rs @@ -7,59 +7,61 @@ use crate::authz; use crate::context::OpContext; use crate::db; +use crate::db::lookup::LookupPath; use crate::internal_api::params::RackInitializationRequest; -use futures::future::ready; -use futures::StreamExt; use omicron_common::api::external::DataPageParams; use omicron_common::api::external::Error; -use omicron_common::api::external::ListResult; +use omicron_common::api::external::ListResultVec; use omicron_common::api::external::LookupResult; -use omicron_common::api::external::LookupType; -use omicron_common::api::external::ResourceType; use uuid::Uuid; impl super::Nexus { - pub(crate) fn as_rack(&self) -> db::model::Rack { - db::model::Rack { - identity: self.api_rack_identity.clone(), - initialized: true, - tuf_base_url: None, - } - } - pub async fn racks_list( &self, opctx: &OpContext, pagparams: &DataPageParams<'_, Uuid>, - ) -> ListResult { - opctx.authorize(authz::Action::Read, &authz::FLEET).await?; - - if let Some(marker) = pagparams.marker { - if *marker >= self.rack_id { - return Ok(futures::stream::empty().boxed()); - } - } + ) -> ListResultVec { + self.db_datastore.rack_list(&opctx, pagparams).await + } - Ok(futures::stream::once(ready(Ok(self.as_rack()))).boxed()) + // TODO: Use this, instead of the manual one. + // + // Note that this will require insertion of the rack to occur + // during the "populate" steps. + /* + pub async fn rack_lookup( + &self, + opctx: &OpContext, + rack_id: &Uuid, + ) -> LookupResult { + let (.., db_rack) = LookupPath::new(opctx, &self.db_datastore) + .rack_id(*rack_id) + .fetch() + .await?; + Ok(db_rack) } + */ pub async fn rack_lookup( &self, opctx: &OpContext, rack_id: &Uuid, ) -> LookupResult { - let authz_rack = authz::Rack::new( - authz::FLEET, - *rack_id, - LookupType::ById(*rack_id), - ); - opctx.authorize(authz::Action::Read, &authz_rack).await?; + self.db_datastore.rack_lookup_manual(opctx, *rack_id).await + } - if *rack_id == self.rack_id { - Ok(self.as_rack()) - } else { - Err(Error::not_found_by_id(ResourceType::Rack, rack_id)) - } + /// Ensures that a rack exists in the DB. + /// + /// If the rack already exists, this function is a no-op. + pub async fn rack_insert( + &self, + opctx: &OpContext, + rack_id: Uuid, + ) -> Result<(), Error> { + self.datastore() + .rack_insert(opctx, &db::model::Rack::new(rack_id)) + .await?; + Ok(()) } /// Marks the rack as initialized with a set of services. diff --git a/nexus/src/app/update.rs b/nexus/src/app/update.rs index 0d6721ec439..65ec3b6ddde 100644 --- a/nexus/src/app/update.rs +++ b/nexus/src/app/update.rs @@ -24,11 +24,15 @@ use tokio::io::AsyncWriteExt; static BASE_ARTIFACT_DIR: &str = "/var/tmp/oxide_artifacts"; impl super::Nexus { - fn tuf_base_url(&self) -> Option { - self.updates_config.as_ref().map(|c| { - let rack = self.as_rack(); + async fn tuf_base_url(&self, opctx: &OpContext) -> Result, Error> { + let rack = self.rack_lookup( + opctx, + &self.rack_id, + ).await?; + + Ok(self.updates_config.as_ref().map(|c| { rack.tuf_base_url.unwrap_or_else(|| c.default_base_url.clone()) - }) + })) } pub async fn updates_refresh_metadata( @@ -43,7 +47,7 @@ impl super::Nexus { } })?; let base_url = - self.tuf_base_url().ok_or_else(|| Error::InvalidRequest { + self.tuf_base_url(opctx).await?.ok_or_else(|| Error::InvalidRequest { message: "updates system not configured".into(), })?; let trusted_root = tokio::fs::read(&updates_config.trusted_root) @@ -129,7 +133,7 @@ impl super::Nexus { artifact: UpdateArtifact, ) -> Result, Error> { let mut base_url = - self.tuf_base_url().ok_or_else(|| Error::InvalidRequest { + self.tuf_base_url(opctx).await?.ok_or_else(|| Error::InvalidRequest { message: "updates system not configured".into(), })?; if !base_url.ends_with('/') { diff --git a/nexus/src/config.rs b/nexus/src/config.rs index a6034a7eea3..a157c4bdbeb 100644 --- a/nexus/src/config.rs +++ b/nexus/src/config.rs @@ -327,6 +327,7 @@ mod test { max_vpc_ipv4_subnet_prefix = 27 [runtime] id = "28b90dc4-c22a-65ba-f49a-f051fe01208f" + rack_id = "38b90dc4-c22a-65ba-f49a-f051fe01208f" [runtime.dropshot_external] bind_address = "10.1.2.3:4567" request_body_max_bytes = 1024 @@ -346,6 +347,7 @@ mod test { Config { runtime: RuntimeConfig { id: "28b90dc4-c22a-65ba-f49a-f051fe01208f".parse().unwrap(), + rack_id: "38b90dc4-c22a-65ba-f49a-f051fe01208f".parse().unwrap(), dropshot_external: ConfigDropshot { bind_address: "10.1.2.3:4567" .parse::() @@ -405,6 +407,7 @@ mod test { address = "[::1]:8123" [runtime] id = "28b90dc4-c22a-65ba-f49a-f051fe01208f" + rack_id = "38b90dc4-c22a-65ba-f49a-f051fe01208f" [runtime.dropshot_external] bind_address = "10.1.2.3:4567" request_body_max_bytes = 1024 @@ -446,6 +449,7 @@ mod test { address = "[::1]:8123" [runtime] id = "28b90dc4-c22a-65ba-f49a-f051fe01208f" + rack_id = "38b90dc4-c22a-65ba-f49a-f051fe01208f" [runtime.dropshot_external] bind_address = "10.1.2.3:4567" request_body_max_bytes = 1024 @@ -501,6 +505,7 @@ mod test { max_vpc_ipv4_subnet_prefix = 100 [runtime] id = "28b90dc4-c22a-65ba-f49a-f051fe01208f" + rack_id = "38b90dc4-c22a-65ba-f49a-f051fe01208f" [runtime.dropshot_external] bind_address = "10.1.2.3:4567" request_body_max_bytes = 1024 diff --git a/nexus/src/context.rs b/nexus/src/context.rs index e0ed637aef3..4cd92a05f91 100644 --- a/nexus/src/context.rs +++ b/nexus/src/context.rs @@ -160,7 +160,7 @@ impl ServerContext { let address = response.iter().next().ok_or_else(|| { "no addresses returned from DNS resolver".to_string() })?; - info!(log, "DB addreess: {}", address); + info!(log, "DB address: {}", address); PostgresConfigWithUrl::from_str(&format!( "postgresql://root@[{}]:{}/omicron?sslmode=disable", address, COCKROACH_PORT @@ -169,15 +169,31 @@ impl ServerContext { } }; let pool = db::Pool::new(&db::Config { url }); + let nexus = Nexus::new_with_id( + rack_id, + log.new(o!("component" => "nexus")), + pool, + config, + Arc::clone(&authz), + ); + + // Do not return until a rack exists in the DB with the provided UUID. + let populate_ctx = nexus.opctx_for_background(); + loop { + let result = nexus.rack_insert(&populate_ctx, rack_id) + .await; + if let Err(e) = result { + info!(log, "Failed to create initial rack: {}", e); + tokio::time::sleep(std::time::Duration::from_millis(200)).await; + } else { + info!(log, "Rack with UUID {} exists in the database", rack_id); + nexus.rack_lookup(&populate_ctx, &rack_id).await.unwrap(); + break; + } + } Ok(Arc::new(ServerContext { - nexus: Nexus::new_with_id( - rack_id, - log.new(o!("component" => "nexus")), - pool, - config, - Arc::clone(&authz), - ), + nexus, log, external_authn, internal_authn, diff --git a/nexus/src/db/datastore.rs b/nexus/src/db/datastore.rs index a386c098eb6..c8297e36aa7 100644 --- a/nexus/src/db/datastore.rs +++ b/nexus/src/db/datastore.rs @@ -175,6 +175,21 @@ impl DataStore { }) } + pub async fn rack_lookup_manual( + &self, + _opctx: &OpContext, + rack_id: Uuid, + ) -> LookupResult { + use db::schema::rack::dsl; + + dsl::rack + .filter(dsl::id.eq(rack_id)) + .select(Rack::as_select()) + .get_result_async(self.pool()) + .await + .map_err(|e| public_error_from_diesel_pool(e, ErrorHandler::Server)) + } + /// Update a rack to mark that it has been initialized pub async fn rack_set_initialized( &self, @@ -195,6 +210,7 @@ impl DataStore { // NOTE: This operation could likely be optimized with a CTE, but given // the low-frequency of calls, this optimization has been deferred. + let log = opctx.log.clone(); self.pool_authorized(opctx) .await? .transaction(move |conn| { @@ -207,6 +223,7 @@ impl DataStore { TxnError::CustomError(RackInitError::RackUpdate(e)) })?; if rack.initialized { + info!(log, "Early exit: Rack already initialized"); return Ok(rack); } @@ -236,6 +253,7 @@ impl DataStore { }) })?; } + info!(log, "Inserted services"); for dataset in datasets { use db::schema::dataset::dsl; let zpool_id = dataset.pool_id; @@ -262,9 +280,10 @@ impl DataStore { }) })?; } + info!(log, "Inserted datasets"); // Set the rack to "initialized" once the handoff is complete - diesel::update(rack_dsl::rack) + let rack = diesel::update(rack_dsl::rack) .filter(rack_dsl::id.eq(rack_id)) .set(( rack_dsl::initialized.eq(true), @@ -274,7 +293,9 @@ impl DataStore { .get_result::(conn) .map_err(|e| { TxnError::CustomError(RackInitError::RackUpdate(e)) - }) + })?; + info!(log, "Updated rack (set initialized to true)"); + Ok(rack) }) .await .map_err(|e| match e { @@ -329,6 +350,20 @@ impl DataStore { }) } + pub async fn rack_list( + &self, + opctx: &OpContext, + pagparams: &DataPageParams<'_, Uuid>, + ) -> ListResultVec { + opctx.authorize(authz::Action::Read, &authz::FLEET).await?; + use db::schema::rack::dsl; + paginated(dsl::rack, dsl::id, pagparams) + .select(Rack::as_select()) + .load_async(self.pool_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel_pool(e, ErrorHandler::Server)) + } + /// Stores a new sled in the database. pub async fn sled_upsert(&self, sled: Sled) -> CreateResult { use db::schema::sled::dsl; diff --git a/nexus/src/external_api/http_entrypoints.rs b/nexus/src/external_api/http_entrypoints.rs index bbc1f9a517f..982d4c2397b 100644 --- a/nexus/src/external_api/http_entrypoints.rs +++ b/nexus/src/external_api/http_entrypoints.rs @@ -2871,11 +2871,13 @@ async fn hardware_racks_get( let query = query_params.into_inner(); let handler = async { let opctx = OpContext::for_external_api(&rqctx).await?; - let rack_stream = nexus + let racks = nexus .racks_list(&opctx, &data_page_params_for(&rqctx, &query)?) - .await?; - let view_list = to_list::(rack_stream).await; - Ok(HttpResponseOk(ScanById::results_page(&query, view_list)?)) + .await? + .into_iter() + .map(|r| r.into()) + .collect(); + Ok(HttpResponseOk(ScanById::results_page(&query, racks)?)) }; apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await } diff --git a/nexus/src/lib.rs b/nexus/src/lib.rs index 61abe04b1ba..627a48dd681 100644 --- a/nexus/src/lib.rs +++ b/nexus/src/lib.rs @@ -36,7 +36,6 @@ use external_api::http_entrypoints::external_api; use internal_api::http_entrypoints::internal_api; use slog::Logger; use std::sync::Arc; -use uuid::Uuid; #[macro_use] extern crate slog; @@ -82,7 +81,6 @@ impl Server { /// Start a nexus server. pub async fn start( config: &Config, - rack_id: Uuid, log: &Logger, ) -> Result { let log = log.new(o!("name" => config.runtime.id.to_string())); @@ -90,15 +88,7 @@ impl Server { let ctxlog = log.new(o!("component" => "ServerContext")); - let apictx = ServerContext::new(rack_id, ctxlog, &config).await?; - - let http_server_starter_external = dropshot::HttpServerStarter::new( - &config.runtime.dropshot_external, - external_api(), - Arc::clone(&apictx), - &log.new(o!("component" => "dropshot_external")), - ) - .map_err(|error| format!("initializing external server: {}", error))?; + let apictx = ServerContext::new(config.runtime.rack_id, ctxlog, &config).await?; let http_server_starter_internal = dropshot::HttpServerStarter::new( &config.runtime.dropshot_internal, @@ -107,9 +97,34 @@ impl Server { &log.new(o!("component" => "dropshot_internal")), ) .map_err(|error| format!("initializing internal server: {}", error))?; + let http_server_internal = http_server_starter_internal.start(); + // Wait until RSS handoff completes. + let opctx = apictx.nexus.opctx_for_background(); + loop { + let result = apictx.nexus.rack_lookup(&opctx, &config.runtime.rack_id).await; + match result { + Ok(rack) => { + if rack.initialized { + break; + } + info!(log, "Still waiting for rack initialization: {:?}", rack); + }, + Err(e) => { + warn!(log, "Cannot look up rack: {}", e); + }, + } + tokio::time::sleep(std::time::Duration::from_secs(2)).await; + } + + let http_server_starter_external = dropshot::HttpServerStarter::new( + &config.runtime.dropshot_external, + external_api(), + Arc::clone(&apictx), + &log.new(o!("component" => "dropshot_external")), + ) + .map_err(|error| format!("initializing external server: {}", error))?; let http_server_external = http_server_starter_external.start(); - let http_server_internal = http_server_starter_internal.start(); Ok(Server { apictx, http_server_external, http_server_internal }) } @@ -167,8 +182,7 @@ pub async fn run_server(config: &Config) -> Result<(), String> { } else { debug!(log, "registered DTrace probes"); } - let rack_id = Uuid::new_v4(); - let server = Server::start(config, rack_id, &log).await?; + let server = Server::start(config, &log).await?; server.register_as_producer().await; server.wait_for_finish().await } diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index e4eb744e2fa..74c8a7f2d21 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -90,7 +90,6 @@ pub async fn test_setup_with_config( config: &mut omicron_nexus::Config, ) -> ControlPlaneTestContext { let logctx = LogContext::new(test_name, &config.pkg.log); - let rack_id = Uuid::parse_str(RACK_UUID).unwrap(); let log = &logctx.log; // Start up CockroachDB. @@ -104,7 +103,7 @@ pub async fn test_setup_with_config( nexus_config::Database::FromUrl { url: database.pg_config().clone() }; config.pkg.timeseries_db.address.set_port(clickhouse.port()); - let server = omicron_nexus::Server::start(&config, rack_id, &logctx.log) + let server = omicron_nexus::Server::start(&config, &logctx.log) .await .unwrap(); server diff --git a/nexus/tests/config.test.toml b/nexus/tests/config.test.toml index 2fc4ddba192..a1b47d7f178 100644 --- a/nexus/tests/config.test.toml +++ b/nexus/tests/config.test.toml @@ -39,6 +39,7 @@ max_vpc_ipv4_subnet_prefix = 29 # Identifier for this instance of Nexus. # NOTE: The test suite always overrides this. id = "e6bff1ff-24fb-49dc-a54e-c6a350cd4d6c" +rack_id = "f6bff1ff-24fb-49dc-a54e-c6a350cd4d6c" # # NOTE: for the test suite, the port MUST be 0 (in order to bind to any diff --git a/sled-agent/src/bootstrap/agent.rs b/sled-agent/src/bootstrap/agent.rs index e47e7300240..fc4260837ca 100644 --- a/sled-agent/src/bootstrap/agent.rs +++ b/sled-agent/src/bootstrap/agent.rs @@ -269,6 +269,7 @@ impl Agent { self.parent_log.clone(), request.id, sled_address, + request.rack_id, ) .await .map_err(|e| { diff --git a/sled-agent/src/bootstrap/params.rs b/sled-agent/src/bootstrap/params.rs index 5a01f4c6e0b..0cb7cd25246 100644 --- a/sled-agent/src/bootstrap/params.rs +++ b/sled-agent/src/bootstrap/params.rs @@ -26,6 +26,9 @@ pub struct SledAgentRequest { /// Portion of the IP space to be managed by the Sled Agent. pub subnet: Ipv6Subnet, + + /// Uuid of the rack to which this sled agent belongs. + pub rack_id: Uuid, } #[derive(Clone, Debug, Serialize, Deserialize, PartialEq)] diff --git a/sled-agent/src/config.rs b/sled-agent/src/config.rs index a7b0d9fb1ab..fe1b5c57764 100644 --- a/sled-agent/src/config.rs +++ b/sled-agent/src/config.rs @@ -9,14 +9,11 @@ use crate::illumos::dladm::{self, Dladm, PhysicalLink}; use crate::illumos::zpool::ZpoolName; use dropshot::ConfigLogging; use serde::Deserialize; -use std::net::SocketAddr; use std::path::{Path, PathBuf}; /// Configuration for a sled agent #[derive(Clone, Debug, Deserialize)] pub struct Config { - /// Address of Nexus instance - pub nexus_address: SocketAddr, /// Configuration for the sled agent debug log pub log: ConfigLogging, /// Optional VLAN ID to be used for tagging guest VNICs. diff --git a/sled-agent/src/instance.rs b/sled-agent/src/instance.rs index 03248017869..8ffdc5c5519 100644 --- a/sled-agent/src/instance.rs +++ b/sled-agent/src/instance.rs @@ -12,7 +12,7 @@ use crate::illumos::svc::wait_for_service; use crate::illumos::vnic::VnicAllocator; use crate::illumos::zone::{AddressRequest, PROPOLIS_ZONE_PREFIX}; use crate::instance_manager::InstanceTicket; -use crate::nexus::NexusClient; +use crate::nexus::LazyNexusClient; use crate::opte::OptePort; use crate::opte::OptePortAllocator; use crate::params::NetworkInterface; @@ -214,7 +214,7 @@ struct InstanceInner { running_state: Option, // Connection to Nexus - nexus_client: Arc, + lazy_nexus_client: LazyNexusClient, } impl InstanceInner { @@ -243,7 +243,11 @@ impl InstanceInner { ); // Notify Nexus of the state change. - self.nexus_client + self.lazy_nexus_client + .get() + .await + // TODO: Handle me + .unwrap() .cpapi_instances_put( self.id(), &nexus_client::types::InstanceRuntimeState::from( @@ -388,7 +392,7 @@ mockall::mock! { underlay_addr: Ipv6Addr, port_allocator: OptePortAllocator, initial: InstanceHardware, - nexus_client: Arc, + lazy_nexus_client: LazyNexusClient, ) -> Result; pub async fn start( &self, @@ -420,7 +424,7 @@ impl Instance { /// * `port_allocator`: A unique (to the sled) ID generator to /// refer to an OPTE port for the guest network interfaces. /// * `initial`: State of the instance at initialization time. - /// * `nexus_client`: Connection to Nexus, used for sending notifications. + /// * `lazy_nexus_client`: Connection to Nexus, used for sending notifications. // TODO: This arg list is getting a little long; can we clean this up? pub fn new( log: Logger, @@ -429,7 +433,7 @@ impl Instance { underlay_addr: Ipv6Addr, port_allocator: OptePortAllocator, initial: InstanceHardware, - nexus_client: Arc, + lazy_nexus_client: LazyNexusClient, ) -> Result { info!(log, "Instance::new w/initial HW: {:?}", initial); let instance = InstanceInner { @@ -457,7 +461,7 @@ impl Instance { cloud_init_bytes: initial.cloud_init_bytes, state: InstanceStates::new(initial.runtime), running_state: None, - nexus_client, + lazy_nexus_client, }; let inner = Arc::new(Mutex::new(instance)); diff --git a/sled-agent/src/instance_manager.rs b/sled-agent/src/instance_manager.rs index c3a941ad8a1..a87db943195 100644 --- a/sled-agent/src/instance_manager.rs +++ b/sled-agent/src/instance_manager.rs @@ -6,7 +6,7 @@ use crate::illumos::dladm::Etherstub; use crate::illumos::vnic::VnicAllocator; -use crate::nexus::NexusClient; +use crate::nexus::LazyNexusClient; use crate::opte::OptePortAllocator; use crate::params::{ InstanceHardware, InstanceMigrateParams, InstanceRuntimeStateRequested, @@ -31,7 +31,7 @@ pub enum Error { struct InstanceManagerInternal { log: Logger, - nexus_client: Arc, + lazy_nexus_client: LazyNexusClient, // TODO: If we held an object representing an enum of "Created OR Running" // instance, we could avoid the methods within "instance.rs" that panic @@ -53,14 +53,14 @@ impl InstanceManager { /// Initializes a new [`InstanceManager`] object. pub fn new( log: Logger, - nexus_client: Arc, + lazy_nexus_client: LazyNexusClient, etherstub: Etherstub, underlay_addr: Ipv6Addr, ) -> InstanceManager { InstanceManager { inner: Arc::new(InstanceManagerInternal { log: log.new(o!("component" => "InstanceManager")), - nexus_client, + lazy_nexus_client, instances: Mutex::new(BTreeMap::new()), vnic_allocator: VnicAllocator::new("Instance", etherstub), underlay_addr, @@ -119,7 +119,7 @@ impl InstanceManager { self.inner.underlay_addr, self.inner.port_allocator.clone(), initial_hardware, - self.inner.nexus_client.clone(), + self.inner.lazy_nexus_client.clone(), )?; let instance_clone = instance.clone(); let old_instance = instances diff --git a/sled-agent/src/nexus.rs b/sled-agent/src/nexus.rs index d0c6da4ba38..8a2be0e0e54 100644 --- a/sled-agent/src/nexus.rs +++ b/sled-agent/src/nexus.rs @@ -6,3 +6,59 @@ pub use crate::mocks::MockNexusClient as NexusClient; #[cfg(not(test))] pub use nexus_client::Client as NexusClient; + +use internal_dns_client::names::SRV; +use omicron_common::address::{ + AZ_PREFIX, NEXUS_INTERNAL_PORT, Ipv6Subnet, +}; +use slog::Logger; +use std::net::Ipv6Addr; +use std::sync::Arc; + +struct Inner { + log: Logger, + addr: Ipv6Addr, + + // TODO: We could also totally cache the resolver / observed IP here? +} + +#[derive(Clone)] +pub struct LazyNexusClient { + inner: Arc, +} + +impl LazyNexusClient { + pub fn new(log: Logger, addr: Ipv6Addr) -> Self { + Self { + inner: Arc::new( + Inner { + log, + addr, + } + ) + } + } + + pub async fn get(&self) -> Result { + // TODO: Consider refactoring this: + // - Address as input + // - Lookup "nexus" DNS record + // - Result
as output + let az_subnet = Ipv6Subnet::::new(self.inner.addr); + let resolver = + internal_dns_client::multiclient::create_resolver(az_subnet) + .map_err(|e| format!("Failed to create DNS resolver: {}", e))?; + let response = resolver + .lookup_ip(&SRV::Service("nexus".to_string()).to_string()) + .await + .map_err(|e| format!("Failed to lookup Nexus IP: {}", e))?; + let address = response.iter().next().ok_or_else(|| { + "no addresses returned from DNS resolver".to_string() + })?; + + Ok(NexusClient::new( + &format!("http://[{}]:{}", address, NEXUS_INTERNAL_PORT), + self.inner.log.clone(), + )) + } +} diff --git a/sled-agent/src/rack_setup/plan/sled.rs b/sled-agent/src/rack_setup/plan/sled.rs index 7433a31dfd5..9154ab6698a 100644 --- a/sled-agent/src/rack_setup/plan/sled.rs +++ b/sled-agent/src/rack_setup/plan/sled.rs @@ -85,6 +85,8 @@ impl Plan { ) -> Result { let bootstrap_addrs = bootstrap_addrs.into_iter().enumerate(); + let rack_id = Uuid::new_v4(); + let allocations = bootstrap_addrs.map(|(idx, bootstrap_addr)| { info!( log, @@ -100,7 +102,8 @@ impl Plan { bootstrap_addr, SledAgentRequest { id: Uuid::new_v4(), - subnet + subnet, + rack_id, }, ) }); @@ -113,7 +116,7 @@ impl Plan { } let plan = Self { - rack_id: Uuid::new_v4(), + rack_id, sleds, }; diff --git a/sled-agent/src/server.rs b/sled-agent/src/server.rs index fc69359008f..2e05648ffc3 100644 --- a/sled-agent/src/server.rs +++ b/sled-agent/src/server.rs @@ -7,13 +7,12 @@ use super::config::Config; use super::http_entrypoints::api as http_api; use super::sled_agent::SledAgent; -use crate::nexus::NexusClient; +use crate::nexus::LazyNexusClient; use omicron_common::backoff::{ - internal_service_policy, retry_notify, BackoffError, + internal_service_policy_with_max, retry_notify, BackoffError, }; use slog::Logger; use std::net::{SocketAddr, SocketAddrV6}; -use std::sync::Arc; use uuid::Uuid; /// Packages up a [`SledAgent`], running the sled agent API under a Dropshot @@ -39,17 +38,16 @@ impl Server { log: Logger, sled_id: Uuid, addr: SocketAddrV6, + rack_id: Uuid, ) -> Result { info!(log, "setting up sled agent server"); let client_log = log.new(o!("component" => "NexusClient")); - let nexus_client = Arc::new(NexusClient::new( - &format!("http://{}", config.nexus_address), - client_log, - )); + + let lazy_nexus_client = LazyNexusClient::new(client_log, *addr.ip()); let sled_agent = - SledAgent::new(&config, log.clone(), nexus_client.clone(), sled_id, addr) + SledAgent::new(&config, log.clone(), lazy_nexus_client.clone(), sled_id, addr, rack_id) .await .map_err(|e| e.to_string())?; @@ -79,6 +77,9 @@ impl Server { log, "contacting server nexus, registering sled: {}", sled_id ); + let nexus_client = lazy_nexus_client.get() + .await + .map_err(|err| BackoffError::transient(err.to_string()))?; nexus_client .cpapi_sled_agents_post( &sled_id, @@ -87,16 +88,16 @@ impl Server { }, ) .await - .map_err(BackoffError::transient) + .map_err(|err| BackoffError::transient(err.to_string())) }; - let log_notification_failure = |_, delay| { + let log_notification_failure = |err, delay| { warn!( log, - "failed to contact nexus, will retry in {:?}", delay; + "failed to contact nexus: {}, will retry in {:?}", err, delay; ); }; retry_notify( - internal_service_policy(), + internal_service_policy_with_max(std::time::Duration::from_secs(5)), notify_nexus, log_notification_failure, ) diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 946a6a8bc88..e7f71810fdf 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -21,6 +21,7 @@ use std::net::{IpAddr, Ipv6Addr, SocketAddr}; use std::path::{Path, PathBuf}; use tokio::io::AsyncWriteExt; use tokio::sync::Mutex; +use uuid::Uuid; // The filename of ServiceManager's internal storage. const SERVICE_CONFIG_FILENAME: &str = "service.toml"; @@ -120,6 +121,7 @@ pub struct ServiceManager { vnic_allocator: VnicAllocator, underlay_vnic: EtherstubVnic, underlay_address: Ipv6Addr, + rack_id: Uuid, } impl ServiceManager { @@ -139,6 +141,7 @@ impl ServiceManager { underlay_vnic: EtherstubVnic, underlay_address: Ipv6Addr, config: Config, + rack_id: Uuid, ) -> Result { debug!(log, "Creating new ServiceManager"); let mgr = Self { @@ -148,6 +151,7 @@ impl ServiceManager { vnic_allocator: VnicAllocator::new("Service", etherstub), underlay_vnic, underlay_address, + rack_id, }; let config_path = mgr.services_config_path(); @@ -312,6 +316,7 @@ impl ServiceManager { // cannot be known at packaging time. let runtime_config = NexusRuntimeConfig { id: service.id, + rack_id: self.rack_id, dropshot_external: ConfigDropshot { bind_address: SocketAddr::V6(external_address), request_body_max_bytes: 1048576, @@ -694,6 +699,7 @@ mod test { EtherstubVnic(ETHERSTUB_VNIC_NAME.to_string()), Ipv6Addr::LOCALHOST, test_config.make_config(), + Uuid::new_v4(), ) .await .unwrap(); @@ -720,6 +726,7 @@ mod test { EtherstubVnic(ETHERSTUB_VNIC_NAME.to_string()), Ipv6Addr::LOCALHOST, test_config.make_config(), + Uuid::new_v4(), ) .await .unwrap(); @@ -748,6 +755,7 @@ mod test { EtherstubVnic(ETHERSTUB_VNIC_NAME.to_string()), Ipv6Addr::LOCALHOST, test_config.make_config(), + Uuid::new_v4(), ) .await .unwrap(); @@ -765,6 +773,7 @@ mod test { EtherstubVnic(ETHERSTUB_VNIC_NAME.to_string()), Ipv6Addr::LOCALHOST, test_config.make_config(), + Uuid::new_v4(), ) .await .unwrap(); @@ -789,6 +798,7 @@ mod test { EtherstubVnic(ETHERSTUB_VNIC_NAME.to_string()), Ipv6Addr::LOCALHOST, test_config.make_config(), + Uuid::new_v4(), ) .await .unwrap(); @@ -808,6 +818,7 @@ mod test { EtherstubVnic(ETHERSTUB_VNIC_NAME.to_string()), Ipv6Addr::LOCALHOST, config, + Uuid::new_v4(), ) .await .unwrap(); diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 5d6481fcc34..b45c56b374c 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -11,7 +11,7 @@ use crate::illumos::zfs::{ }; use crate::illumos::{execute, PFEXEC}; use crate::instance_manager::InstanceManager; -use crate::nexus::NexusClient; +use crate::nexus::LazyNexusClient; use crate::params::{ DatasetKind, DiskStateRequested, InstanceHardware, InstanceMigrateParams, InstanceRuntimeStateRequested, ServiceEnsureBody, Zpool @@ -24,7 +24,6 @@ use omicron_common::api::{ }; use slog::Logger; use std::net::SocketAddrV6; -use std::sync::Arc; use uuid::Uuid; #[cfg(not(test))] @@ -100,7 +99,7 @@ pub struct SledAgent { // Component of Sled Agent responsible for managing Propolis instances. instances: InstanceManager, - nexus_client: Arc, + lazy_nexus_client: LazyNexusClient, // Other Oxide-controlled services running on this Sled. services: ServiceManager, @@ -111,9 +110,10 @@ impl SledAgent { pub async fn new( config: &Config, log: Logger, - nexus_client: Arc, + lazy_nexus_client: LazyNexusClient, id: Uuid, sled_address: SocketAddrV6, + rack_id: Uuid, ) -> Result { // Pass the "parent_log" to all subcomponents that want to set their own // "component" value. @@ -218,7 +218,7 @@ impl SledAgent { let storage = StorageManager::new( &parent_log, id, - nexus_client.clone(), + lazy_nexus_client.clone(), etherstub.clone(), *sled_address.ip(), ) @@ -235,7 +235,7 @@ impl SledAgent { } let instances = InstanceManager::new( parent_log.clone(), - nexus_client.clone(), + lazy_nexus_client.clone(), etherstub.clone(), *sled_address.ip(), ); @@ -245,6 +245,7 @@ impl SledAgent { etherstub_vnic.clone(), *sled_address.ip(), services::Config::default(), + rack_id, ) .await?; @@ -252,7 +253,7 @@ impl SledAgent { id, storage, instances, - nexus_client, + lazy_nexus_client, services, }) } @@ -327,7 +328,11 @@ impl SledAgent { &self, artifact: UpdateArtifact, ) -> Result<(), Error> { - crate::updates::download_artifact(artifact, self.nexus_client.as_ref()) + let nexus_client = self.lazy_nexus_client.get() + .await + // TODO: Handle error + .unwrap(); + crate::updates::download_artifact(artifact, &nexus_client) .await?; Ok(()) } diff --git a/sled-agent/src/storage_manager.rs b/sled-agent/src/storage_manager.rs index 467bb70b3d8..6305c41f949 100644 --- a/sled-agent/src/storage_manager.rs +++ b/sled-agent/src/storage_manager.rs @@ -10,7 +10,7 @@ use crate::illumos::vnic::VnicAllocator; use crate::illumos::zone::AddressRequest; use crate::illumos::zpool::ZpoolName; use crate::illumos::{zfs::Mountpoint, zone::ZONE_PREFIX, zpool::ZpoolInfo}; -use crate::nexus::NexusClient; +use crate::nexus::LazyNexusClient; use crate::params::DatasetKind; use futures::stream::FuturesOrdered; use futures::FutureExt; @@ -523,7 +523,7 @@ async fn ensure_running_zone( } type NotifyFut = dyn futures::Future< - Output = Result<(), nexus_client::Error>, + Output = Result<(), String> > + Send; #[derive(Debug)] @@ -538,7 +538,7 @@ struct NewFilesystemRequest { struct StorageWorker { log: Logger, sled_id: Uuid, - nexus_client: Arc, + lazy_nexus_client: LazyNexusClient, pools: Arc>>, new_pools_rx: mpsc::Receiver, new_filesystems_rx: mpsc::Receiver, @@ -631,21 +631,19 @@ impl StorageWorker { size: ByteCount, ) { let sled_id = self.sled_id; - let nexus = self.nexus_client.clone(); + let lazy_nexus_client = self.lazy_nexus_client.clone(); let notify_nexus = move || { let zpool_request = ZpoolPutRequest { size: size.into() }; - let nexus = nexus.clone(); + let lazy_nexus_client = lazy_nexus_client.clone(); async move { - nexus + lazy_nexus_client + .get() + .await + .map_err(|e| backoff::BackoffError::transient(e.to_string()))? .zpool_put(&sled_id, &pool_id, &zpool_request) .await - .map_err(backoff::BackoffError::transient)?; - Ok::< - (), - backoff::BackoffError< - nexus_client::Error, - >, - >(()) + .map_err(|e| backoff::BackoffError::transient(e.to_string()))?; + Ok(()) } }; let log = self.log.clone(); @@ -673,9 +671,9 @@ impl StorageWorker { datasets: Vec<(Uuid, SocketAddrV6, DatasetKind)>, pool_id: Uuid, ) { - let nexus = self.nexus_client.clone(); + let lazy_nexus_client = self.lazy_nexus_client.clone(); let notify_nexus = move || { - let nexus = nexus.clone(); + let lazy_nexus_client = lazy_nexus_client.clone(); let datasets = datasets.clone(); async move { for (id, address, kind) in datasets { @@ -683,18 +681,16 @@ impl StorageWorker { address: address.to_string(), kind: kind.into(), }; - nexus + lazy_nexus_client + .get() + .await + .map_err(|e| backoff::BackoffError::transient(e.to_string()))? .dataset_put(&pool_id, &id, &request) .await - .map_err(backoff::BackoffError::transient)?; + .map_err(|e| backoff::BackoffError::transient(e.to_string()))?; } - Ok::< - (), - backoff::BackoffError< - nexus_client::Error, - >, - >(()) + Ok(()) } }; let log = self.log.clone(); @@ -904,7 +900,7 @@ impl StorageManager { pub async fn new( log: &Logger, sled_id: Uuid, - nexus_client: Arc, + lazy_nexus_client: LazyNexusClient, etherstub: Etherstub, underlay_address: Ipv6Addr, ) -> Self { @@ -915,7 +911,7 @@ impl StorageManager { let mut worker = StorageWorker { log, sled_id, - nexus_client, + lazy_nexus_client, pools: pools.clone(), new_pools_rx, new_filesystems_rx, diff --git a/smf/sled-agent/config.toml b/smf/sled-agent/config.toml index 44a237e1930..170350afacd 100644 --- a/smf/sled-agent/config.toml +++ b/smf/sled-agent/config.toml @@ -1,9 +1,5 @@ # Sled Agent Configuration -# TODO: Remove this address -# Internal address of Nexus -nexus_address = "[fd00:1122:3344:0101::3]:12221" - # A file-backed zpool can be manually created with the following: # # truncate -s 10GB testpool.vdev # # zpool create oxp_d462a7f7-b628-40fe-80ff-4e4189e2d62b "$PWD/testpool.vdev" From dc3b84b2b15881e0862e78012cf835633de85090 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Sun, 12 Jun 2022 15:39:53 -0400 Subject: [PATCH 05/35] Add bg work user, rack insert populate, patch tests --- common/src/backoff.rs | 5 +- nexus/examples/config.toml | 1 + nexus/src/app/mod.rs | 36 +-- nexus/src/app/rack.rs | 20 +- nexus/src/app/update.rs | 23 +- nexus/src/authn/mod.rs | 11 + nexus/src/config.rs | 4 +- nexus/src/context.rs | 25 +- nexus/src/db/datastore.rs | 28 +-- nexus/src/db/fixed_data/role_assignment.rs | 7 + nexus/src/db/fixed_data/user_builtin.rs | 11 + nexus/src/lib.rs | 13 +- nexus/src/populate.rs | 71 +++++- nexus/test-utils/src/lib.rs | 5 +- nexus/tests/config.test.toml | 2 +- .../tests/integration_tests/users_builtin.rs | 2 + sled-agent/src/http_entrypoints.rs | 6 +- sled-agent/src/instance.rs | 7 +- sled-agent/src/instance_manager.rs | 12 +- sled-agent/src/nexus.rs | 39 +++- sled-agent/src/rack_setup/mod.rs | 2 +- sled-agent/src/rack_setup/plan/service.rs | 81 +++---- sled-agent/src/rack_setup/plan/sled.rs | 50 ++-- sled-agent/src/rack_setup/service.rs | 219 ++++++++---------- sled-agent/src/server.rs | 21 +- sled-agent/src/sled_agent.rs | 25 +- sled-agent/src/storage_manager.rs | 29 ++- smf/sled-agent/config-rss.toml | 56 ----- 28 files changed, 394 insertions(+), 417 deletions(-) diff --git a/common/src/backoff.rs b/common/src/backoff.rs index bcf726ff2a0..46f05d899a6 100644 --- a/common/src/backoff.rs +++ b/common/src/backoff.rs @@ -17,7 +17,9 @@ pub fn internal_service_policy() -> ::backoff::ExponentialBackoff { internal_service_policy_with_max(MAX_INTERVAL) } -pub fn internal_service_policy_with_max(max_duration: Duration) -> ::backoff::ExponentialBackoff { +pub fn internal_service_policy_with_max( + max_duration: Duration, +) -> ::backoff::ExponentialBackoff { const INITIAL_INTERVAL: Duration = Duration::from_millis(250); ::backoff::ExponentialBackoff { current_interval: INITIAL_INTERVAL, @@ -27,5 +29,4 @@ pub fn internal_service_policy_with_max(max_duration: Duration) -> ::backoff::Ex max_elapsed_time: None, ..backoff::ExponentialBackoff::default() } - } diff --git a/nexus/examples/config.toml b/nexus/examples/config.toml index 22889ab1be9..8c22e661820 100644 --- a/nexus/examples/config.toml +++ b/nexus/examples/config.toml @@ -36,6 +36,7 @@ address = "[::1]:8123" [runtime] # Identifier for this instance of Nexus id = "e6bff1ff-24fb-49dc-a54e-c6a350cd4d6c" +rack_id = "c19a698f-c6f9-4a17-ae30-20d711b8f7dc" [runtime.dropshot_external] # IP address and TCP port on which to listen for the external API diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index 13cf48b91ec..27f8fd650fc 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -10,6 +10,7 @@ use crate::config; use crate::context::OpContext; use crate::db; use crate::populate::populate_start; +use crate::populate::PopulateArgs; use crate::populate::PopulateStatus; use crate::saga_interface::SagaContext; use anyhow::anyhow; @@ -89,6 +90,9 @@ pub struct Nexus { /// Operational context used for external request authentication opctx_external_authn: OpContext, + + /// Operational context used for Nexus-driven background tasks + opctx_background_work: OpContext, } // TODO Is it possible to make some of these operations more generic? A @@ -136,8 +140,13 @@ impl Nexus { authn::Context::internal_db_init(), Arc::clone(&db_datastore), ); - let populate_status = - populate_start(populate_ctx, Arc::clone(&db_datastore)); + + let populate_args = PopulateArgs::new(rack_id); + let populate_status = populate_start( + populate_ctx, + Arc::clone(&db_datastore), + populate_args, + ); let nexus = Nexus { id: config.runtime.id, @@ -163,6 +172,12 @@ impl Nexus { authn::Context::external_authn(), Arc::clone(&db_datastore), ), + opctx_background_work: OpContext::for_background( + log.new(o!("component" => "Background Work")), + Arc::clone(&authz), + authn::Context::internal_db_background(), + Arc::clone(&db_datastore), + ), }; // TODO-cleanup all the extra Arcs here seems wrong @@ -213,23 +228,16 @@ impl Nexus { } } - /// Returns an [`OpContext`] used for background tasks. - // TODO: dap@ recommends using a different user for this, other than - // "internal_db_init". - pub fn opctx_for_background(&self) -> OpContext { - OpContext::for_background( - self.log.new(o!("component" => "Background Work")), - Arc::clone(&self.authz), - authn::Context::internal_db_init(), - Arc::clone(&self.datastore()), - ) - } - /// Returns an [`OpContext`] used for authenticating external requests pub fn opctx_external_authn(&self) -> &OpContext { &self.opctx_external_authn } + /// Returns an [`OpContext`] used for background tasks. + pub fn opctx_for_background(&self) -> &OpContext { + &self.opctx_background_work + } + /// Used as the body of a "stub" endpoint -- one that's currently /// unimplemented but that we eventually intend to implement /// diff --git a/nexus/src/app/rack.rs b/nexus/src/app/rack.rs index bf1f2026e8a..dde3df7449c 100644 --- a/nexus/src/app/rack.rs +++ b/nexus/src/app/rack.rs @@ -24,11 +24,6 @@ impl super::Nexus { self.db_datastore.rack_list(&opctx, pagparams).await } - // TODO: Use this, instead of the manual one. - // - // Note that this will require insertion of the rack to occur - // during the "populate" steps. - /* pub async fn rack_lookup( &self, opctx: &OpContext, @@ -40,15 +35,6 @@ impl super::Nexus { .await?; Ok(db_rack) } - */ - - pub async fn rack_lookup( - &self, - opctx: &OpContext, - rack_id: &Uuid, - ) -> LookupResult { - self.db_datastore.rack_lookup_manual(opctx, *rack_id).await - } /// Ensures that a rack exists in the DB. /// @@ -76,7 +62,8 @@ impl super::Nexus { opctx.authorize(authz::Action::Modify, &authz::FLEET).await?; // Convert from parameter -> DB type. - let services: Vec<_> = request.services + let services: Vec<_> = request + .services .into_iter() .map(|svc| { db::model::Service::new( @@ -88,7 +75,8 @@ impl super::Nexus { }) .collect(); - let datasets: Vec<_> = request.datasets + let datasets: Vec<_> = request + .datasets .into_iter() .map(|dataset| { db::model::Dataset::new( diff --git a/nexus/src/app/update.rs b/nexus/src/app/update.rs index 65ec3b6ddde..2d87a44a84f 100644 --- a/nexus/src/app/update.rs +++ b/nexus/src/app/update.rs @@ -24,11 +24,11 @@ use tokio::io::AsyncWriteExt; static BASE_ARTIFACT_DIR: &str = "/var/tmp/oxide_artifacts"; impl super::Nexus { - async fn tuf_base_url(&self, opctx: &OpContext) -> Result, Error> { - let rack = self.rack_lookup( - opctx, - &self.rack_id, - ).await?; + async fn tuf_base_url( + &self, + opctx: &OpContext, + ) -> Result, Error> { + let rack = self.rack_lookup(opctx, &self.rack_id).await?; Ok(self.updates_config.as_ref().map(|c| { rack.tuf_base_url.unwrap_or_else(|| c.default_base_url.clone()) @@ -46,10 +46,11 @@ impl super::Nexus { message: "updates system not configured".into(), } })?; - let base_url = - self.tuf_base_url(opctx).await?.ok_or_else(|| Error::InvalidRequest { + let base_url = self.tuf_base_url(opctx).await?.ok_or_else(|| { + Error::InvalidRequest { message: "updates system not configured".into(), - })?; + } + })?; let trusted_root = tokio::fs::read(&updates_config.trusted_root) .await .map_err(|e| Error::InternalError { @@ -133,8 +134,10 @@ impl super::Nexus { artifact: UpdateArtifact, ) -> Result, Error> { let mut base_url = - self.tuf_base_url(opctx).await?.ok_or_else(|| Error::InvalidRequest { - message: "updates system not configured".into(), + self.tuf_base_url(opctx).await?.ok_or_else(|| { + Error::InvalidRequest { + message: "updates system not configured".into(), + } })?; if !base_url.ends_with('/') { base_url.push('/'); diff --git a/nexus/src/authn/mod.rs b/nexus/src/authn/mod.rs index 59e5bc7a889..c9399bdb131 100644 --- a/nexus/src/authn/mod.rs +++ b/nexus/src/authn/mod.rs @@ -30,6 +30,7 @@ pub mod silos; pub use crate::db::fixed_data::silo_user::USER_TEST_PRIVILEGED; pub use crate::db::fixed_data::silo_user::USER_TEST_UNPRIVILEGED; +pub use crate::db::fixed_data::user_builtin::USER_BACKGROUND_WORK; pub use crate::db::fixed_data::user_builtin::USER_DB_INIT; pub use crate::db::fixed_data::user_builtin::USER_EXTERNAL_AUTHN; pub use crate::db::fixed_data::user_builtin::USER_INTERNAL_API; @@ -170,6 +171,11 @@ impl Context { Context::context_for_builtin_user(USER_DB_INIT.id) } + /// Returns an authenticated context for Nexus-driven db work. + pub fn internal_db_background() -> Context { + Context::context_for_builtin_user(USER_BACKGROUND_WORK.id) + } + fn context_for_builtin_user(user_builtin_id: Uuid) -> Context { Context { kind: Kind::Authenticated(Details { @@ -213,6 +219,7 @@ impl Context { #[cfg(test)] mod test { use super::Context; + use super::USER_BACKGROUND_WORK; use super::USER_DB_INIT; use super::USER_INTERNAL_API; use super::USER_INTERNAL_READ; @@ -251,6 +258,10 @@ mod test { let actor = authn.actor().unwrap(); assert_eq!(actor.actor_id(), USER_DB_INIT.id); + let authn = Context::internal_db_background(); + let actor = authn.actor().unwrap(); + assert_eq!(actor.actor_id(), USER_BACKGROUND_WORK.id); + let authn = Context::internal_saga_recovery(); let actor = authn.actor().unwrap(); assert_eq!(actor.actor_id(), USER_SAGA_RECOVERY.id); diff --git a/nexus/src/config.rs b/nexus/src/config.rs index a157c4bdbeb..f8f52fcf50d 100644 --- a/nexus/src/config.rs +++ b/nexus/src/config.rs @@ -347,7 +347,9 @@ mod test { Config { runtime: RuntimeConfig { id: "28b90dc4-c22a-65ba-f49a-f051fe01208f".parse().unwrap(), - rack_id: "38b90dc4-c22a-65ba-f49a-f051fe01208f".parse().unwrap(), + rack_id: "38b90dc4-c22a-65ba-f49a-f051fe01208f" + .parse() + .unwrap(), dropshot_external: ConfigDropshot { bind_address: "10.1.2.3:4567" .parse::() diff --git a/nexus/src/context.rs b/nexus/src/context.rs index 4cd92a05f91..90e57669cd9 100644 --- a/nexus/src/context.rs +++ b/nexus/src/context.rs @@ -19,9 +19,7 @@ use authn::external::spoof::HttpAuthnSpoof; use authn::external::HttpAuthnScheme; use chrono::{DateTime, Duration, Utc}; use internal_dns_client::names::SRV; -use omicron_common::address::{ - Ipv6Subnet, AZ_PREFIX, COCKROACH_PORT, -}; +use omicron_common::address::{Ipv6Subnet, AZ_PREFIX, COCKROACH_PORT}; use omicron_common::api::external::Error; use omicron_common::nexus_config; use omicron_common::postgres_config::PostgresConfigWithUrl; @@ -154,7 +152,9 @@ impl ServerContext { nexus_config::Database::FromDns => { info!(log, "Accessing DB url from DNS"); let response = resolver - .lookup_ip(&SRV::Service("cockroachdb".to_string()).to_string()) + .lookup_ip( + &SRV::Service("cockroachdb".to_string()).to_string(), + ) .await .map_err(|e| format!("Failed to lookup IP: {}", e))?; let address = response.iter().next().ok_or_else(|| { @@ -169,7 +169,7 @@ impl ServerContext { } }; let pool = db::Pool::new(&db::Config { url }); - let nexus = Nexus::new_with_id( + let nexus = Nexus::new_with_id( rack_id, log.new(o!("component" => "nexus")), pool, @@ -177,21 +177,6 @@ impl ServerContext { Arc::clone(&authz), ); - // Do not return until a rack exists in the DB with the provided UUID. - let populate_ctx = nexus.opctx_for_background(); - loop { - let result = nexus.rack_insert(&populate_ctx, rack_id) - .await; - if let Err(e) = result { - info!(log, "Failed to create initial rack: {}", e); - tokio::time::sleep(std::time::Duration::from_millis(200)).await; - } else { - info!(log, "Rack with UUID {} exists in the database", rack_id); - nexus.rack_lookup(&populate_ctx, &rack_id).await.unwrap(); - break; - } - } - Ok(Arc::new(ServerContext { nexus, log, diff --git a/nexus/src/db/datastore.rs b/nexus/src/db/datastore.rs index c8297e36aa7..f1cba756c85 100644 --- a/nexus/src/db/datastore.rs +++ b/nexus/src/db/datastore.rs @@ -175,21 +175,6 @@ impl DataStore { }) } - pub async fn rack_lookup_manual( - &self, - _opctx: &OpContext, - rack_id: Uuid, - ) -> LookupResult { - use db::schema::rack::dsl; - - dsl::rack - .filter(dsl::id.eq(rack_id)) - .select(Rack::as_select()) - .get_result_async(self.pool()) - .await - .map_err(|e| public_error_from_diesel_pool(e, ErrorHandler::Server)) - } - /// Update a rack to mark that it has been initialized pub async fn rack_set_initialized( &self, @@ -202,8 +187,16 @@ impl DataStore { #[derive(Debug)] enum RackInitError { - ServiceInsert { err: SyncInsertError, sled_id: Uuid, svc_id: Uuid }, - DatasetInsert { err: SyncInsertError, zpool_id: Uuid, dataset_id: Uuid }, + ServiceInsert { + err: SyncInsertError, + sled_id: Uuid, + svc_id: Uuid, + }, + DatasetInsert { + err: SyncInsertError, + zpool_id: Uuid, + dataset_id: Uuid, + }, RackUpdate(diesel::result::Error), } type TxnError = TransactionError; @@ -2948,6 +2941,7 @@ impl DataStore { let builtin_users = [ // Note: "db_init" is also a builtin user, but that one by necessity // is created with the database. + &*authn::USER_BACKGROUND_WORK, &*authn::USER_INTERNAL_API, &*authn::USER_INTERNAL_READ, &*authn::USER_EXTERNAL_AUTHN, diff --git a/nexus/src/db/fixed_data/role_assignment.rs b/nexus/src/db/fixed_data/role_assignment.rs index 94caf552a13..540b57abe50 100644 --- a/nexus/src/db/fixed_data/role_assignment.rs +++ b/nexus/src/db/fixed_data/role_assignment.rs @@ -24,6 +24,13 @@ lazy_static! { *FLEET_ID, role_builtin::FLEET_ADMIN.role_name, ), + RoleAssignment::new( + IdentityType::UserBuiltin, + user_builtin::USER_BACKGROUND_WORK.id, + role_builtin::FLEET_ADMIN.resource_type, + *FLEET_ID, + role_builtin::FLEET_ADMIN.role_name, + ), // The "internal-read" user gets the "viewer" role on the sole // Fleet. This will grant them the ability to read various control diff --git a/nexus/src/db/fixed_data/user_builtin.rs b/nexus/src/db/fixed_data/user_builtin.rs index 1e9dee1b7bf..238a8f5405a 100644 --- a/nexus/src/db/fixed_data/user_builtin.rs +++ b/nexus/src/db/fixed_data/user_builtin.rs @@ -39,6 +39,15 @@ lazy_static! { "used for seeding initial database data", ); + /// Internal user for performing operations driven by Nexus, rather + /// than any API request. + pub static ref USER_BACKGROUND_WORK: UserBuiltinConfig = + UserBuiltinConfig::new_static( + "001de000-05e4-4000-8000-00000000bac3", + "background-work", + "used for Nexus-driven database operations", + ); + /// Internal user used by Nexus when handling internal API requests pub static ref USER_INTERNAL_API: UserBuiltinConfig = UserBuiltinConfig::new_static( @@ -77,6 +86,7 @@ lazy_static! { #[cfg(test)] mod test { use super::super::assert_valid_uuid; + use super::USER_BACKGROUND_WORK; use super::USER_DB_INIT; use super::USER_EXTERNAL_AUTHN; use super::USER_INTERNAL_API; @@ -85,6 +95,7 @@ mod test { #[test] fn test_builtin_user_ids_are_valid() { + assert_valid_uuid(&USER_BACKGROUND_WORK.id); assert_valid_uuid(&USER_DB_INIT.id); assert_valid_uuid(&USER_INTERNAL_API.id); assert_valid_uuid(&USER_EXTERNAL_AUTHN.id); diff --git a/nexus/src/lib.rs b/nexus/src/lib.rs index 627a48dd681..079a7a26f54 100644 --- a/nexus/src/lib.rs +++ b/nexus/src/lib.rs @@ -88,7 +88,8 @@ impl Server { let ctxlog = log.new(o!("component" => "ServerContext")); - let apictx = ServerContext::new(config.runtime.rack_id, ctxlog, &config).await?; + let apictx = + ServerContext::new(config.runtime.rack_id, ctxlog, &config).await?; let http_server_starter_internal = dropshot::HttpServerStarter::new( &config.runtime.dropshot_internal, @@ -100,6 +101,15 @@ impl Server { let http_server_internal = http_server_starter_internal.start(); // Wait until RSS handoff completes. + // TODO: This messes up the tests. Should we make this a config option? + // + // TODO: This actually raises a question; what triggers background tasks + // to execute? + // + // - Perhaps the API is exposed to tests? + // - Perhaps the invocation of that API is controlled by config + // options? + /* let opctx = apictx.nexus.opctx_for_background(); loop { let result = apictx.nexus.rack_lookup(&opctx, &config.runtime.rack_id).await; @@ -116,6 +126,7 @@ impl Server { } tokio::time::sleep(std::time::Duration::from_secs(2)).await; } + */ let http_server_starter_external = dropshot::HttpServerStarter::new( &config.runtime.dropshot_external, diff --git a/nexus/src/populate.rs b/nexus/src/populate.rs index 9f6bcdcad20..85223aef2b1 100644 --- a/nexus/src/populate.rs +++ b/nexus/src/populate.rs @@ -43,13 +43,14 @@ //! each populator behaves as expected in the above ways. use crate::context::OpContext; -use crate::db::DataStore; +use crate::db::{self, DataStore}; use futures::future::BoxFuture; use futures::FutureExt; use lazy_static::lazy_static; use omicron_common::api::external::Error; use omicron_common::backoff; use std::sync::Arc; +use uuid::Uuid; #[derive(Clone, Debug)] pub enum PopulateStatus { @@ -58,14 +59,26 @@ pub enum PopulateStatus { Failed(String), } +/// Auxiliary data necessary to populate the database. +pub struct PopulateArgs { + rack_id: Uuid, +} + +impl PopulateArgs { + pub fn new(rack_id: Uuid) -> Self { + Self { rack_id } + } +} + pub fn populate_start( opctx: OpContext, datastore: Arc, + args: PopulateArgs, ) -> tokio::sync::watch::Receiver { let (tx, rx) = tokio::sync::watch::channel(PopulateStatus::NotDone); tokio::spawn(async move { - let result = populate(&opctx, &datastore).await; + let result = populate(&opctx, &datastore, &args).await; if let Err(error) = tx.send(match result { Ok(()) => PopulateStatus::Done, Err(message) => PopulateStatus::Failed(message), @@ -80,17 +93,19 @@ pub fn populate_start( async fn populate( opctx: &OpContext, datastore: &DataStore, + args: &PopulateArgs, ) -> Result<(), String> { for p in *ALL_POPULATORS { let db_result = backoff::retry_notify( backoff::internal_service_policy(), || async { - p.populate(opctx, datastore).await.map_err(|error| match &error - { - Error::ServiceUnavailable { .. } => { - backoff::BackoffError::transient(error) + p.populate(opctx, datastore, args).await.map_err(|error| { + match &error { + Error::ServiceUnavailable { .. } => { + backoff::BackoffError::transient(error) + } + _ => backoff::BackoffError::Permanent(error), } - _ => backoff::BackoffError::Permanent(error), }) }, |error, delay| { @@ -130,6 +145,7 @@ trait Populator: std::fmt::Debug + Send + Sync { &self, opctx: &'a OpContext, datastore: &'a DataStore, + args: &'a PopulateArgs, ) -> BoxFuture<'b, Result<(), Error>> where 'a: 'b; @@ -143,6 +159,7 @@ impl Populator for PopulateBuiltinUsers { &self, opctx: &'a OpContext, datastore: &'a DataStore, + _args: &'a PopulateArgs, ) -> BoxFuture<'b, Result<(), Error>> where 'a: 'b, @@ -159,6 +176,7 @@ impl Populator for PopulateBuiltinRoles { &self, opctx: &'a OpContext, datastore: &'a DataStore, + _args: &'a PopulateArgs, ) -> BoxFuture<'b, Result<(), Error>> where 'a: 'b, @@ -175,6 +193,7 @@ impl Populator for PopulateBuiltinRoleAssignments { &self, opctx: &'a OpContext, datastore: &'a DataStore, + _args: &'a PopulateArgs, ) -> BoxFuture<'b, Result<(), Error>> where 'a: 'b, @@ -192,6 +211,7 @@ impl Populator for PopulateBuiltinSilos { &self, opctx: &'a OpContext, datastore: &'a DataStore, + _args: &'a PopulateArgs, ) -> BoxFuture<'b, Result<(), Error>> where 'a: 'b, @@ -214,6 +234,7 @@ impl Populator for PopulateSiloUsers { &self, opctx: &'a OpContext, datastore: &'a DataStore, + _args: &'a PopulateArgs, ) -> BoxFuture<'b, Result<(), Error>> where 'a: 'b, @@ -230,6 +251,7 @@ impl Populator for PopulateSiloUserRoleAssignments { &self, opctx: &'a OpContext, datastore: &'a DataStore, + _args: &'a PopulateArgs, ) -> BoxFuture<'b, Result<(), Error>> where 'a: 'b, @@ -241,19 +263,43 @@ impl Populator for PopulateSiloUserRoleAssignments { } } +#[derive(Debug)] +struct PopulateRack; +impl Populator for PopulateRack { + fn populate<'a, 'b>( + &self, + opctx: &'a OpContext, + datastore: &'a DataStore, + args: &'a PopulateArgs, + ) -> BoxFuture<'b, Result<(), Error>> + where + 'a: 'b, + { + async { + datastore + .rack_insert(opctx, &db::model::Rack::new(args.rack_id)) + .await?; + Ok(()) + } + .boxed() + } +} + lazy_static! { - static ref ALL_POPULATORS: [&'static dyn Populator; 6] = [ + static ref ALL_POPULATORS: [&'static dyn Populator; 7] = [ &PopulateBuiltinUsers, &PopulateBuiltinRoles, &PopulateBuiltinRoleAssignments, &PopulateBuiltinSilos, &PopulateSiloUsers, &PopulateSiloUserRoleAssignments, + &PopulateRack, ]; } #[cfg(test)] mod test { + use super::PopulateArgs; use super::Populator; use super::ALL_POPULATORS; use crate::authn; @@ -265,6 +311,7 @@ mod test { use omicron_common::api::external::Error; use omicron_test_utils::dev; use std::sync::Arc; + use uuid::Uuid; #[tokio::test] async fn test_populators() { @@ -287,16 +334,18 @@ mod test { ); let log = &logctx.log; + let args = PopulateArgs::new(Uuid::new_v4()); + // Running each populator once under normal conditions should work. info!(&log, "populator {:?}, run 1", p); - p.populate(&opctx, &datastore) + p.populate(&opctx, &datastore, &args) .await .with_context(|| format!("populator {:?} (try 1)", p)) .unwrap(); // It should also work fine to run it again. info!(&log, "populator {:?}, run 2 (idempotency check)", p); - p.populate(&opctx, &datastore) + p.populate(&opctx, &datastore, &args) .await .with_context(|| { format!( @@ -331,7 +380,7 @@ mod test { ); info!(&log, "populator {:?}, with database offline", p); - match p.populate(&opctx, &datastore).await { + match p.populate(&opctx, &datastore, &args).await { Err(Error::ServiceUnavailable { .. }) => (), Ok(_) => panic!( "populator {:?}: unexpectedly succeeded with no database", diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index 74c8a7f2d21..48fa1fec479 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -103,9 +103,8 @@ pub async fn test_setup_with_config( nexus_config::Database::FromUrl { url: database.pg_config().clone() }; config.pkg.timeseries_db.address.set_port(clickhouse.port()); - let server = omicron_nexus::Server::start(&config, &logctx.log) - .await - .unwrap(); + let server = + omicron_nexus::Server::start(&config, &logctx.log).await.unwrap(); server .apictx .nexus diff --git a/nexus/tests/config.test.toml b/nexus/tests/config.test.toml index a1b47d7f178..20e3df3330d 100644 --- a/nexus/tests/config.test.toml +++ b/nexus/tests/config.test.toml @@ -39,7 +39,7 @@ max_vpc_ipv4_subnet_prefix = 29 # Identifier for this instance of Nexus. # NOTE: The test suite always overrides this. id = "e6bff1ff-24fb-49dc-a54e-c6a350cd4d6c" -rack_id = "f6bff1ff-24fb-49dc-a54e-c6a350cd4d6c" +rack_id = "c19a698f-c6f9-4a17-ae30-20d711b8f7dc" # # NOTE: for the test suite, the port MUST be 0 (in order to bind to any diff --git a/nexus/tests/integration_tests/users_builtin.rs b/nexus/tests/integration_tests/users_builtin.rs index b06741a3067..c6d3615c9ef 100644 --- a/nexus/tests/integration_tests/users_builtin.rs +++ b/nexus/tests/integration_tests/users_builtin.rs @@ -27,6 +27,8 @@ async fn test_users_builtin(cptestctx: &ControlPlaneTestContext) { let u = users.remove(&authn::USER_DB_INIT.name.to_string()).unwrap(); assert_eq!(u.identity.id, authn::USER_DB_INIT.id); + let u = users.remove(&authn::USER_BACKGROUND_WORK.name.to_string()).unwrap(); + assert_eq!(u.identity.id, authn::USER_BACKGROUND_WORK.id); let u = users.remove(&authn::USER_INTERNAL_API.name.to_string()).unwrap(); assert_eq!(u.identity.id, authn::USER_INTERNAL_API.id); let u = users.remove(&authn::USER_INTERNAL_READ.name.to_string()).unwrap(); diff --git a/sled-agent/src/http_entrypoints.rs b/sled-agent/src/http_entrypoints.rs index 72a8c3c3f74..d0dd478ea4c 100644 --- a/sled-agent/src/http_entrypoints.rs +++ b/sled-agent/src/http_entrypoints.rs @@ -66,11 +66,7 @@ async fn zpools_get( rqctx: Arc>, ) -> Result>, HttpError> { let sa = rqctx.context(); - Ok(HttpResponseOk( - sa.zpools_get() - .await - .map_err(|e| Error::from(e))? - )) + Ok(HttpResponseOk(sa.zpools_get().await.map_err(|e| Error::from(e))?)) } #[endpoint { diff --git a/sled-agent/src/instance.rs b/sled-agent/src/instance.rs index 8ffdc5c5519..55358cb8dd2 100644 --- a/sled-agent/src/instance.rs +++ b/sled-agent/src/instance.rs @@ -719,7 +719,7 @@ impl Instance { mod test { use super::*; use crate::illumos::dladm::Etherstub; - use crate::mocks::MockNexusClient; + use crate::nexus::LazyNexusClient; use crate::opte::OptePortAllocator; use crate::params::InstanceStateRequested; use chrono::Utc; @@ -792,7 +792,8 @@ mod test { Etherstub("mylink".to_string()), ); let port_allocator = OptePortAllocator::new(); - let nexus_client = MockNexusClient::default(); + let lazy_nexus_client = + LazyNexusClient::new(log.clone(), std::net::Ipv6Addr::LOCALHOST); let inst = Instance::new( log.clone(), @@ -803,7 +804,7 @@ mod test { ), port_allocator, new_initial_instance(), - Arc::new(nexus_client), + lazy_nexus_client, ) .unwrap(); diff --git a/sled-agent/src/instance_manager.rs b/sled-agent/src/instance_manager.rs index a87db943195..d93b8eae04e 100644 --- a/sled-agent/src/instance_manager.rs +++ b/sled-agent/src/instance_manager.rs @@ -199,7 +199,7 @@ mod test { use crate::illumos::dladm::Etherstub; use crate::illumos::{dladm::MockDladm, zone::MockZones}; use crate::instance::MockInstance; - use crate::mocks::MockNexusClient; + use crate::nexus::LazyNexusClient; use crate::params::InstanceStateRequested; use chrono::Utc; use omicron_common::api::external::{ @@ -246,7 +246,8 @@ mod test { #[serial_test::serial] async fn ensure_instance() { let log = logger(); - let nexus_client = Arc::new(MockNexusClient::default()); + let lazy_nexus_client = + LazyNexusClient::new(log.clone(), std::net::Ipv6Addr::LOCALHOST); // Creation of the instance manager incurs some "global" system // checks: cleanup of existing zones + vnics. @@ -259,7 +260,7 @@ mod test { let im = InstanceManager::new( log, - nexus_client, + lazy_nexus_client, Etherstub("mylink".to_string()), std::net::Ipv6Addr::new( 0xfd00, 0x1de, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, @@ -329,7 +330,8 @@ mod test { #[serial_test::serial] async fn ensure_instance_repeatedly() { let log = logger(); - let nexus_client = Arc::new(MockNexusClient::default()); + let lazy_nexus_client = + LazyNexusClient::new(log.clone(), std::net::Ipv6Addr::LOCALHOST); // Instance Manager creation. @@ -341,7 +343,7 @@ mod test { let im = InstanceManager::new( log, - nexus_client, + lazy_nexus_client, Etherstub("mylink".to_string()), std::net::Ipv6Addr::new( 0xfd00, 0x1de, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, diff --git a/sled-agent/src/nexus.rs b/sled-agent/src/nexus.rs index 8a2be0e0e54..00e87fd6a1b 100644 --- a/sled-agent/src/nexus.rs +++ b/sled-agent/src/nexus.rs @@ -8,9 +8,7 @@ pub use crate::mocks::MockNexusClient as NexusClient; pub use nexus_client::Client as NexusClient; use internal_dns_client::names::SRV; -use omicron_common::address::{ - AZ_PREFIX, NEXUS_INTERNAL_PORT, Ipv6Subnet, -}; +use omicron_common::address::{Ipv6Subnet, AZ_PREFIX, NEXUS_INTERNAL_PORT}; use slog::Logger; use std::net::Ipv6Addr; use std::sync::Arc; @@ -18,10 +16,19 @@ use std::sync::Arc; struct Inner { log: Logger, addr: Ipv6Addr, - // TODO: We could also totally cache the resolver / observed IP here? } +/// Wrapper around a [`NexusClient`] object, which allows deferring +/// the DNS lookup until accessed. +/// +/// Without the assistance of OS-level DNS lookups, the [`NexusClient`] +/// interface requires knowledge of the target service IP address. +/// For some services, like Nexus, this can be painful, as the IP address +/// may not have even been allocated when the Sled Agent starts. +/// +/// This structure allows clients to access the client on-demand, performing +/// the DNS lookup only once it is actually needed. #[derive(Clone)] pub struct LazyNexusClient { inner: Arc, @@ -29,14 +36,7 @@ pub struct LazyNexusClient { impl LazyNexusClient { pub fn new(log: Logger, addr: Ipv6Addr) -> Self { - Self { - inner: Arc::new( - Inner { - log, - addr, - } - ) - } + Self { inner: Arc::new(Inner { log, addr }) } } pub async fn get(&self) -> Result { @@ -62,3 +62,18 @@ impl LazyNexusClient { )) } } + +// Provides a mock implementation of the [`LazyNexusClient`]. +// +// This allows tests to use the structure without actually performing +// any DNS lookups. +#[cfg(test)] +mockall::mock! { + pub LazyNexusClient { + pub fn new(log: Logger, addr: Ipv6Addr) -> Self; + pub async fn get(&self) -> Result; + } + impl Clone for LazyNexusClient { + fn clone(&self) -> Self; + } +} diff --git a/sled-agent/src/rack_setup/mod.rs b/sled-agent/src/rack_setup/mod.rs index f052b6c3120..4df85a7727f 100644 --- a/sled-agent/src/rack_setup/mod.rs +++ b/sled-agent/src/rack_setup/mod.rs @@ -5,5 +5,5 @@ //! Rack Setup Service pub mod config; -pub mod service; mod plan; +pub mod service; diff --git a/sled-agent/src/rack_setup/plan/service.rs b/sled-agent/src/rack_setup/plan/service.rs index c5ceb3c1ef0..0bdf332d748 100644 --- a/sled-agent/src/rack_setup/plan/service.rs +++ b/sled-agent/src/rack_setup/plan/service.rs @@ -15,9 +15,7 @@ use omicron_common::backoff::{ }; use serde::{Deserialize, Serialize}; use sled_agent_client::{ - Client as SledAgentClient, - Error as SledAgentError, - types as SledAgentTypes, + types as SledAgentTypes, Client as SledAgentClient, Error as SledAgentError, }; use slog::Logger; use std::collections::HashMap; @@ -33,8 +31,7 @@ const NEXUS_COUNT: usize = 1; const CRDB_COUNT: usize = 1; fn rss_service_plan_path() -> PathBuf { - Path::new(omicron_common::OMICRON_CONFIG_PATH) - .join("rss-service-plan.toml") + Path::new(omicron_common::OMICRON_CONFIG_PATH).join("rss-service-plan.toml") } /// Describes errors which may occur while generating a plan for services. @@ -81,31 +78,27 @@ pub struct Plan { } impl Plan { - pub async fn load( - log: &Logger, - ) -> Result, PlanError> - { + pub async fn load(log: &Logger) -> Result, PlanError> { // If we already created a plan for this RSS to allocate // services to sleds, re-use that existing plan. let rss_service_plan_path = rss_service_plan_path(); if rss_service_plan_path.exists() { info!(log, "RSS plan already created, loading from file"); - let plan: Self = - toml::from_str( - &tokio::fs::read_to_string(&rss_service_plan_path).await.map_err( - |err| PlanError::Io { - message: format!( - "Loading RSS plan {rss_service_plan_path:?}" - ), - err, - }, - )?, - ) - .map_err(|err| PlanError::Toml { - path: rss_service_plan_path, - err, - })?; + let plan: Self = toml::from_str( + &tokio::fs::read_to_string(&rss_service_plan_path) + .await + .map_err(|err| PlanError::Io { + message: format!( + "Loading RSS plan {rss_service_plan_path:?}" + ), + err, + })?, + ) + .map_err(|err| PlanError::Toml { + path: rss_service_plan_path, + err, + })?; Ok(Some(plan)) } else { Ok(None) @@ -134,20 +127,21 @@ impl Plan { .zpools_get() .await .map(|response| { - response.into_inner() + response + .into_inner() .into_iter() .map(|zpool| zpool.id) .collect() }) .map_err(|err| { - BackoffError::transient( - PlanError::SledApi(err) - ) + BackoffError::transient(PlanError::SledApi(err)) })?; if zpools.is_empty() { return Err(BackoffError::transient( - PlanError::SledInitialization("Awaiting zpools".to_string()) + PlanError::SledInitialization( + "Awaiting zpools".to_string(), + ), )); } @@ -156,12 +150,9 @@ impl Plan { let log_failure = |error, _| { warn!(log, "failed to get zpools"; "error" => ?error); }; - let zpools = retry_notify( - internal_service_policy(), - get_zpools, - log_failure, - ) - .await?; + let zpools = + retry_notify(internal_service_policy(), get_zpools, log_failure) + .await?; Ok(zpools[0]) } @@ -215,7 +206,8 @@ impl Plan { // The first enumerated sleds host the CRDB datasets, using // zpools described from the underlying config file. if idx < CRDB_COUNT { - let zpool_id = Self::get_a_zpool_from_sled(log, sled_address).await?; + let zpool_id = + Self::get_a_zpool_from_sled(log, sled_address).await?; let address = SocketAddrV6::new( addr_alloc.next().expect("Not enough addrs"), @@ -226,10 +218,9 @@ impl Plan { request.datasets.push(DatasetEnsureBody { id: Uuid::new_v4(), zpool_id, - dataset_kind: - crate::params::DatasetKind::CockroachDb { - all_addresses: vec![address], - }, + dataset_kind: crate::params::DatasetKind::CockroachDb { + all_addresses: vec![address], + }, address, }); } @@ -258,10 +249,7 @@ impl Plan { }); } - allocations.push(( - sled_address, - request - )); + allocations.push((sled_address, request)); } let mut services = std::collections::HashMap::new(); @@ -269,9 +257,7 @@ impl Plan { services.insert(addr, allocation); } - let plan = Self { - services - }; + let plan = Self { services }; // Once we've constructed a plan, write it down to durable storage. let serialized_plan = @@ -316,4 +302,3 @@ impl AddressBumpAllocator { Some(self.last_addr) } } - diff --git a/sled-agent/src/rack_setup/plan/sled.rs b/sled-agent/src/rack_setup/plan/sled.rs index 9154ab6698a..2e5559c1201 100644 --- a/sled-agent/src/rack_setup/plan/sled.rs +++ b/sled-agent/src/rack_setup/plan/sled.rs @@ -17,8 +17,7 @@ use thiserror::Error; use uuid::Uuid; fn rss_sled_plan_path() -> PathBuf { - Path::new(omicron_common::OMICRON_CONFIG_PATH) - .join("rss-sled-plan.toml") + Path::new(omicron_common::OMICRON_CONFIG_PATH).join("rss-sled-plan.toml") } /// Describes errors which may occur while generating a plan for sleds. @@ -39,7 +38,6 @@ pub enum PlanError { pub struct Plan { pub rack_id: Uuid, pub sleds: HashMap, - // TODO: Consider putting the rack subnet here? This may be operator-driven // in the future, so it should exist in the "plan". // @@ -48,30 +46,24 @@ pub struct Plan { } impl Plan { - pub async fn load( - log: &Logger, - ) -> Result, PlanError> { + pub async fn load(log: &Logger) -> Result, PlanError> { // If we already created a plan for this RSS to allocate // subnets/requests to sleds, re-use that existing plan. let rss_sled_plan_path = rss_sled_plan_path(); if rss_sled_plan_path.exists() { info!(log, "RSS plan already created, loading from file"); - let plan: Self = - toml::from_str( - &tokio::fs::read_to_string(&rss_sled_plan_path).await.map_err( - |err| PlanError::Io { - message: format!( - "Loading RSS plan {rss_sled_plan_path:?}" - ), - err, - }, - )?, - ) - .map_err(|err| PlanError::Toml { - path: rss_sled_plan_path, - err, - })?; + let plan: Self = toml::from_str( + &tokio::fs::read_to_string(&rss_sled_plan_path).await.map_err( + |err| PlanError::Io { + message: format!( + "Loading RSS plan {rss_sled_plan_path:?}" + ), + err, + }, + )?, + ) + .map_err(|err| PlanError::Toml { path: rss_sled_plan_path, err })?; Ok(Some(plan)) } else { Ok(None) @@ -88,10 +80,7 @@ impl Plan { let rack_id = Uuid::new_v4(); let allocations = bootstrap_addrs.map(|(idx, bootstrap_addr)| { - info!( - log, - "Creating plan for the sled at {:?}", bootstrap_addr - ); + info!(log, "Creating plan for the sled at {:?}", bootstrap_addr); let bootstrap_addr = SocketAddrV6::new(bootstrap_addr, BOOTSTRAP_AGENT_PORT, 0, 0); let sled_subnet_index = @@ -100,11 +89,7 @@ impl Plan { ( bootstrap_addr, - SledAgentRequest { - id: Uuid::new_v4(), - subnet, - rack_id, - }, + SledAgentRequest { id: Uuid::new_v4(), subnet, rack_id }, ) }); @@ -115,10 +100,7 @@ impl Plan { sleds.insert(addr, allocation); } - let plan = Self { - rack_id, - sleds, - }; + let plan = Self { rack_id, sleds }; // Once we've constructed a plan, write it down to durable storage. let serialized_plan = diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 2b25b8fa55a..206b2e4160b 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -6,23 +6,19 @@ use super::config::SetupServiceConfig as Config; use crate::bootstrap::{ - discovery::PeerMonitorObserver, - params::SledAgentRequest, rss_handle::BootstrapAgentHandle, + discovery::PeerMonitorObserver, params::SledAgentRequest, + rss_handle::BootstrapAgentHandle, }; use crate::params::{DatasetEnsureBody, ServiceRequest, ServiceType}; use crate::rack_setup::plan::service::{ - PlanError as ServicePlanError, - Plan as ServicePlan, + Plan as ServicePlan, PlanError as ServicePlanError, }; use crate::rack_setup::plan::sled::{ - PlanError as SledPlanError, - Plan as SledPlan, + Plan as SledPlan, PlanError as SledPlanError, }; use internal_dns_client::names::{AAAA, SRV}; use nexus_client::{ - Client as NexusClient, - Error as NexusError, - types as NexusTypes, + types as NexusTypes, Client as NexusClient, Error as NexusError, }; use omicron_common::address::{get_sled_address, NEXUS_INTERNAL_PORT}; use omicron_common::backoff::{ @@ -30,9 +26,7 @@ use omicron_common::backoff::{ }; use serde::{Deserialize, Serialize}; use sled_agent_client::{ - Client as SledAgentClient, - Error as SledAgentError, - types as SledAgentTypes, + types as SledAgentTypes, Client as SledAgentClient, Error as SledAgentError, }; use slog::Logger; use std::collections::{HashMap, HashSet}; @@ -199,7 +193,9 @@ impl ServiceInner { crate::params::DatasetKind::CockroachDb { .. } ) }) { - return Err(SetupServiceError::BadConfig("RSS should only initialize CRDB services".into())); + return Err(SetupServiceError::BadConfig( + "RSS should only initialize CRDB services".into(), + )); } let dur = std::time::Duration::from_secs(60); @@ -222,14 +218,7 @@ impl ServiceInner { .filesystem_put(&dataset.clone().into()) .await .map_err(BackoffError::transient)?; - Ok::< - (), - BackoffError< - SledAgentError< - SledAgentTypes::Error, - >, - >, - >(()) + Ok::<(), BackoffError>>(()) }; let log_failure = |error, _| { warn!(self.log, "failed to create filesystem"; "error" => ?error); @@ -248,12 +237,7 @@ impl ServiceInner { let aaaa = datasets .iter() - .map(|dataset| { - ( - AAAA::Zone(dataset.id), - dataset.address, - ) - }) + .map(|dataset| (AAAA::Zone(dataset.id), dataset.address)) .collect::>(); let srv_key = SRV::Service("cockroachdb".into()); @@ -295,12 +279,7 @@ impl ServiceInner { }) .await .map_err(BackoffError::transient)?; - Ok::< - (), - BackoffError< - SledAgentError, - >, - >(()) + Ok::<(), BackoffError>>(()) }; let log_failure = |error, _| { warn!(self.log, "failed to initialize services"; "error" => ?error); @@ -308,11 +287,16 @@ impl ServiceInner { retry_notify(internal_service_policy(), services_put, log_failure) .await?; - // Initialize DNS records for the Nexus service. - let services: Vec<_> = services.iter().filter(|svc| { - matches!(svc.service_type, crate::params::ServiceType::Nexus { .. }) - }).collect(); + let services: Vec<_> = services + .iter() + .filter(|svc| { + matches!( + svc.service_type, + crate::params::ServiceType::Nexus { .. } + ) + }) + .collect(); // Early-exit for non-Nexus case if services.is_empty() { @@ -330,7 +314,7 @@ impl ServiceInner { NEXUS_INTERNAL_PORT, 0, 0, - ) + ), ) }) .collect::>(); @@ -395,23 +379,25 @@ impl ServiceInner { ) -> Result<(), SetupServiceError> { info!(self.log, "Handing off control to Nexus"); - let resolver = internal_dns_client::multiclient::create_resolver(config.az_subnet()) - .expect("Failed to create DNS resolver"); - let response = resolver.lookup_ip( - &SRV::Service("nexus".to_string()).to_string() - ).await.expect("Failed to lookup IP"); + let resolver = internal_dns_client::multiclient::create_resolver( + config.az_subnet(), + ) + .expect("Failed to create DNS resolver"); + let response = resolver + .lookup_ip(&SRV::Service("nexus".to_string()).to_string()) + .await + .expect("Failed to lookup IP"); - let nexus_address = response.iter() + let nexus_address = response + .iter() .next() - .map(|addr| { - SocketAddr::new(addr, NEXUS_INTERNAL_PORT) - }) + .map(|addr| SocketAddr::new(addr, NEXUS_INTERNAL_PORT)) .expect("no addresses returned from DNS resolver"); info!(self.log, "Nexus address: {}", nexus_address.to_string()); let nexus_client = NexusClient::new( &format!("http://{}", nexus_address), - self.log.new(o!("component" => "NexusClient")) + self.log.new(o!("component" => "NexusClient")), ); // Ensure we can quickly look up "Sled Agent Address" -> "UUID of sled". @@ -419,7 +405,8 @@ impl ServiceInner { // We need the ID when passing info to Nexus. let mut id_map = HashMap::new(); for (_, sled_request) in sled_plan.sleds.iter() { - id_map.insert(get_sled_address(sled_request.subnet), sled_request.id); + id_map + .insert(get_sled_address(sled_request.subnet), sled_request.id); } // Convert all the information we have about services and datasets into @@ -427,63 +414,59 @@ impl ServiceInner { let mut services: Vec = vec![]; let mut datasets: Vec = vec![]; for (addr, service_request) in service_plan.services.iter() { - let sled_id = *id_map.get(addr) + let sled_id = *id_map + .get(addr) .expect("Sled address in service plan, but not sled plan"); - for svc in service_request.services.iter().chain(service_request.dns_services.iter()) { + for svc in service_request + .services + .iter() + .chain(service_request.dns_services.iter()) + { let kind = match svc.service_type { ServiceType::Nexus { .. } => NexusTypes::ServiceKind::Nexus, - ServiceType::InternalDns { .. } => NexusTypes::ServiceKind::InternalDNS, + ServiceType::InternalDns { .. } => { + NexusTypes::ServiceKind::InternalDNS + } ServiceType::Oximeter => NexusTypes::ServiceKind::Oximeter, }; - services.push( - NexusTypes::ServicePutRequest { - service_id: svc.id, - sled_id, - // TODO: Should this be a vec, or a single value? - address: svc.addresses[0], - kind, - } - ) + services.push(NexusTypes::ServicePutRequest { + service_id: svc.id, + sled_id, + // TODO: Should this be a vec, or a single value? + address: svc.addresses[0], + kind, + }) } for dataset in service_request.datasets.iter() { - datasets.push( - NexusTypes::DatasetCreateRequest { - zpool_id: dataset.zpool_id, - dataset_id: dataset.id, - request: NexusTypes::DatasetPutRequest { - address: dataset.address.to_string(), - kind: dataset.dataset_kind.clone().into() - }, - } - ) + datasets.push(NexusTypes::DatasetCreateRequest { + zpool_id: dataset.zpool_id, + dataset_id: dataset.id, + request: NexusTypes::DatasetPutRequest { + address: dataset.address.to_string(), + kind: dataset.dataset_kind.clone().into(), + }, + }) } } - let request = NexusTypes::RackInitializationRequest { - services, - datasets, - }; + let request = + NexusTypes::RackInitializationRequest { services, datasets }; let notify_nexus = || async { - nexus_client.rack_initialization_complete( - &sled_plan.rack_id, - &request, - ) - .await - .map_err(BackoffError::transient) + nexus_client + .rack_initialization_complete(&sled_plan.rack_id, &request) + .await + .map_err(BackoffError::transient) }; let log_failure = |err, _| { info!(self.log, "Failed to handoff to nexus: {err}"); }; - retry_notify( - internal_service_policy(), - notify_nexus, - log_failure, - ).await?; + retry_notify(internal_service_policy(), notify_nexus, log_failure) + .await?; info!(self.log, "Handoff to Nexus is complete"); Ok(()) @@ -533,15 +516,13 @@ impl ServiceInner { "RSS configuration looks like it has already been applied", ); - let sled_plan = SledPlan::load(&self.log).await? + let sled_plan = SledPlan::load(&self.log) + .await? .expect("Sled plan should exist if completed marker exists"); - let service_plan = ServicePlan::load(&self.log).await? + let service_plan = ServicePlan::load(&self.log) + .await? .expect("Service plan should exist if completed marker exists"); - self.handoff_to_nexus( - &config, - &sled_plan, - &service_plan - ).await?; + self.handoff_to_nexus(&config, &sled_plan, &service_plan).await?; return Ok(()); } else { @@ -553,7 +534,9 @@ impl ServiceInner { // - Enough peers to create a new plan (if one does not exist) let maybe_sled_plan = SledPlan::load(&self.log).await?; let expectation = if let Some(plan) = &maybe_sled_plan { - PeerExpectation::LoadOldPlan(plan.sleds.keys().map(|a| *a.ip()).collect()) + PeerExpectation::LoadOldPlan( + plan.sleds.keys().map(|a| *a.ip()).collect(), + ) } else { PeerExpectation::CreateNewPlan(MINIMUM_SLED_COUNT) }; @@ -577,37 +560,38 @@ impl ServiceInner { // Forward the sled initialization requests to our sled-agent. local_bootstrap_agent .initialize_sleds( - plan.sleds.iter() + plan.sleds + .iter() .map(|(bootstrap_addr, initialization_request)| { - ( - *bootstrap_addr, - initialization_request.clone(), - ) + (*bootstrap_addr, initialization_request.clone()) }) .collect(), ) .await .map_err(SetupServiceError::SledInitialization)?; - let sled_addresses: Vec<_> = plan.sleds.iter() + let sled_addresses: Vec<_> = plan + .sleds + .iter() .map(|(_, initialization_request)| { - get_sled_address( - initialization_request.subnet, - ) + get_sled_address(initialization_request.subnet) }) .collect(); // Now that sled agents have been initialized, we can create // a service allocation plan. - let service_plan = if let Some(plan) = ServicePlan::load(&self.log).await? { - plan - } else { - ServicePlan::create(&self.log, &config, &sled_addresses).await? - }; + let service_plan = + if let Some(plan) = ServicePlan::load(&self.log).await? { + plan + } else { + ServicePlan::create(&self.log, &config, &sled_addresses).await? + }; // Set up internal DNS services. futures::future::join_all( - service_plan.services.iter() + service_plan + .services + .iter() .filter(|(_, service_request)| { // Only send requests to sleds that are supposed to be running // DNS services. @@ -638,11 +622,8 @@ impl ServiceInner { // Issue the crdb initialization requests to all sleds. futures::future::join_all(service_plan.services.iter().map( |(sled_address, services_request)| async move { - self.initialize_crdb( - *sled_address, - &services_request.datasets, - ) - .await?; + self.initialize_crdb(*sled_address, &services_request.datasets) + .await?; Ok(()) }, )) @@ -687,9 +668,7 @@ impl ServiceInner { // the requests on the next iteration. tokio::fs::File::create(&rss_completed_plan_path).await.map_err( |err| SetupServiceError::Io { - message: format!( - "creating {rss_completed_plan_path:?}" - ), + message: format!("creating {rss_completed_plan_path:?}"), err, }, )?; @@ -697,11 +676,7 @@ impl ServiceInner { // At this point, even if we reboot, we must not try to manage sleds, // services, or DNS records. - self.handoff_to_nexus( - &config, - &plan, - &service_plan - ).await?; + self.handoff_to_nexus(&config, &plan, &service_plan).await?; // TODO Questions to consider: // - What if a sled comes online *right after* this setup? How does diff --git a/sled-agent/src/server.rs b/sled-agent/src/server.rs index 2e05648ffc3..6273e1f2a2f 100644 --- a/sled-agent/src/server.rs +++ b/sled-agent/src/server.rs @@ -46,10 +46,16 @@ impl Server { let lazy_nexus_client = LazyNexusClient::new(client_log, *addr.ip()); - let sled_agent = - SledAgent::new(&config, log.clone(), lazy_nexus_client.clone(), sled_id, addr, rack_id) - .await - .map_err(|e| e.to_string())?; + let sled_agent = SledAgent::new( + &config, + log.clone(), + lazy_nexus_client.clone(), + sled_id, + addr, + rack_id, + ) + .await + .map_err(|e| e.to_string())?; let mut dropshot_config = dropshot::ConfigDropshot::default(); dropshot_config.request_body_max_bytes = 1024 * 1024; @@ -77,7 +83,8 @@ impl Server { log, "contacting server nexus, registering sled: {}", sled_id ); - let nexus_client = lazy_nexus_client.get() + let nexus_client = lazy_nexus_client + .get() .await .map_err(|err| BackoffError::transient(err.to_string()))?; nexus_client @@ -97,7 +104,9 @@ impl Server { ); }; retry_notify( - internal_service_policy_with_max(std::time::Duration::from_secs(5)), + internal_service_policy_with_max( + std::time::Duration::from_secs(5), + ), notify_nexus, log_notification_failure, ) diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index b45c56b374c..9fbada9571e 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -14,7 +14,7 @@ use crate::instance_manager::InstanceManager; use crate::nexus::LazyNexusClient; use crate::params::{ DatasetKind, DiskStateRequested, InstanceHardware, InstanceMigrateParams, - InstanceRuntimeStateRequested, ServiceEnsureBody, Zpool + InstanceRuntimeStateRequested, ServiceEnsureBody, Zpool, }; use crate::services::{self, ServiceManager}; use crate::storage_manager::StorageManager; @@ -249,13 +249,7 @@ impl SledAgent { ) .await?; - Ok(SledAgent { - id, - storage, - instances, - lazy_nexus_client, - services, - }) + Ok(SledAgent { id, storage, instances, lazy_nexus_client, services }) } pub fn id(&self) -> Uuid { @@ -274,12 +268,8 @@ impl SledAgent { Ok(()) } - pub async fn zpools_get( - &self - ) -> Result, Error> { - let zpools = self.storage - .get_zpools() - .await?; + pub async fn zpools_get(&self) -> Result, Error> { + let zpools = self.storage.get_zpools().await?; Ok(zpools) } @@ -328,12 +318,13 @@ impl SledAgent { &self, artifact: UpdateArtifact, ) -> Result<(), Error> { - let nexus_client = self.lazy_nexus_client.get() + let nexus_client = self + .lazy_nexus_client + .get() .await // TODO: Handle error .unwrap(); - crate::updates::download_artifact(artifact, &nexus_client) - .await?; + crate::updates::download_artifact(artifact, &nexus_client).await?; Ok(()) } } diff --git a/sled-agent/src/storage_manager.rs b/sled-agent/src/storage_manager.rs index 6305c41f949..5b82396b6fd 100644 --- a/sled-agent/src/storage_manager.rs +++ b/sled-agent/src/storage_manager.rs @@ -522,9 +522,7 @@ async fn ensure_running_zone( } } -type NotifyFut = dyn futures::Future< - Output = Result<(), String> - > + Send; +type NotifyFut = dyn futures::Future> + Send; #[derive(Debug)] struct NewFilesystemRequest { @@ -639,10 +637,14 @@ impl StorageWorker { lazy_nexus_client .get() .await - .map_err(|e| backoff::BackoffError::transient(e.to_string()))? + .map_err(|e| { + backoff::BackoffError::transient(e.to_string()) + })? .zpool_put(&sled_id, &pool_id, &zpool_request) .await - .map_err(|e| backoff::BackoffError::transient(e.to_string()))?; + .map_err(|e| { + backoff::BackoffError::transient(e.to_string()) + })?; Ok(()) } }; @@ -684,10 +686,14 @@ impl StorageWorker { lazy_nexus_client .get() .await - .map_err(|e| backoff::BackoffError::transient(e.to_string()))? + .map_err(|e| { + backoff::BackoffError::transient(e.to_string()) + })? .dataset_put(&pool_id, &id, &request) .await - .map_err(|e| backoff::BackoffError::transient(e.to_string()))?; + .map_err(|e| { + backoff::BackoffError::transient(e.to_string()) + })?; } Ok(()) @@ -955,11 +961,10 @@ impl StorageManager { pub async fn get_zpools(&self) -> Result, Error> { let pools = self.pools.lock().await; - Ok(pools.keys().map(|zpool| { - crate::params::Zpool { - id: zpool.id() - } - }).collect()) + Ok(pools + .keys() + .map(|zpool| crate::params::Zpool { id: zpool.id() }) + .collect()) } pub async fn upsert_filesystem( diff --git a/smf/sled-agent/config-rss.toml b/smf/sled-agent/config-rss.toml index c8652efb5ea..9fb540b075f 100644 --- a/smf/sled-agent/config-rss.toml +++ b/smf/sled-agent/config-rss.toml @@ -5,59 +5,3 @@ # |............| <- This /48 is the AZ Subnet # |...............| <- This /56 is the Rack Subnet rack_subnet = "fd00:1122:3344:0100::" - -# [[request]] -# TODO(https://github.com/oxidecomputer/omicron/issues/732): Nexus -# should allocate crucible datasets. -# [[request.dataset]] -# id = "09a9a25f-2602-4e2f-9630-31af9c492c3e" -# zpool_id = "d462a7f7-b628-40fe-80ff-4e4189e2d62b" -# address = "[fd00:1122:3344:0101::6]:32345" -# dataset_kind.type = "crucible" -# -# [[request.dataset]] -# id = "2713b37a-3043-4ed5-aaff-f38200e45cfb" -# zpool_id = "e4b4dc87-ab46-49fb-a4b4-d361ae214c03" -# address = "[fd00:1122:3344:0101::7]:32345" -# dataset_kind.type = "crucible" -# -# [[request.dataset]] -# id = "ffd16cad-e5d5-495e-9c59-4312a3857d91" -# zpool_id = "f4b4dc87-ab46-49fb-a4b4-d361ae214c03" -# address = "[fd00:1122:3344:0101::8]:32345" -# dataset_kind.type = "crucible" - -# [[request.dataset]] -# id = "4d08fc19-3d5f-4f6b-9c48-925f8eac7255" -# zpool_id = "d462a7f7-b628-40fe-80ff-4e4189e2d62b" -# address = "[fd00:1122:3344:0101::2]:32221" -# dataset_kind.type = "cockroach_db" -# dataset_kind.all_addresses = [ "[fd00:1122:3344:0101::2]:32221" ] - -# TODO(https://github.com/oxidecomputer/omicron/issues/732): Nexus -# should allocate clickhouse datasets. -# [[request.dataset]] -# id = "a3505b41-a592-420b-84f2-3d76bf0e0a81" -# zpool_id = "d462a7f7-b628-40fe-80ff-4e4189e2d62b" -# address = "[fd00:1122:3344:0101::5]:8123" -# dataset_kind.type = "clickhouse" - -# [[request.service]] -# id = "e6bff1ff-24fb-49dc-a54e-c6a350cd4d6c" -# name = "nexus" -# addresses = [ "fd00:1122:3344:0101::3" ] -# gz_addresses = [] -# [request.service.service_type] -# type = "nexus" -# internal_address = "[fd00:1122:3344:0101::3]:12221" -# external_address = "[fd00:1122:3344:0101::3]:12220" - -# TODO(https://github.com/oxidecomputer/omicron/issues/732): Nexus -# should allocate Oximeter services. -# [[request.service]] -# id = "1da65e5b-210c-4859-a7d7-200c1e659972" -# name = "oximeter" -# addresses = [ "fd00:1122:3344:0101::4" ] -# gz_addresses = [] -# [request.service.service_type] -# type = "oximeter" From e265f0d0b937d378eb64cd306231b24bf638992f Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Sun, 12 Jun 2022 16:43:14 -0400 Subject: [PATCH 06/35] Await RSS handoff, even in tests --- nexus/src/lib.rs | 87 ++++++++++++++----- nexus/test-utils/src/lib.rs | 33 ++++++- .../tests/integration_tests/users_builtin.rs | 3 +- 3 files changed, 95 insertions(+), 28 deletions(-) diff --git a/nexus/src/lib.rs b/nexus/src/lib.rs index 079a7a26f54..f743c7e19ba 100644 --- a/nexus/src/lib.rs +++ b/nexus/src/lib.rs @@ -66,23 +66,27 @@ pub fn run_openapi_internal() -> Result<(), String> { .map_err(|e| e.to_string()) } -/// Packages up a [`Nexus`], running both external and internal HTTP API servers -/// wired up to Nexus -pub struct Server { +/// A partially-initialized Nexus server, which exposes an internal interface, +/// but is not ready to receive external requests. +pub struct InternalServer<'a> { /// shared state used by API request handlers pub apictx: Arc, - /// dropshot server for external API - pub http_server_external: dropshot::HttpServer>, /// dropshot server for internal API pub http_server_internal: dropshot::HttpServer>, + + config: &'a Config, + log: Logger, } -impl Server { - /// Start a nexus server. +impl<'a> InternalServer<'a> { + /// Creates a Nexus instance with only the internal API exposed. + /// + /// This is often used as an argument when creating a [`Server`], + /// which also exposes the external API. pub async fn start( - config: &Config, + config: &'a Config, log: &Logger, - ) -> Result { + ) -> Result, String> { let log = log.new(o!("name" => config.runtime.id.to_string())); info!(log, "setting up nexus server"); @@ -100,33 +104,67 @@ impl Server { .map_err(|error| format!("initializing internal server: {}", error))?; let http_server_internal = http_server_starter_internal.start(); + Ok(Self { apictx, http_server_internal, config, log }) + } +} + +/// Packages up a [`Nexus`], running both external and internal HTTP API servers +/// wired up to Nexus +pub struct Server { + /// shared state used by API request handlers + pub apictx: Arc, + /// dropshot server for external API + pub http_server_external: dropshot::HttpServer>, + /// dropshot server for internal API + pub http_server_internal: dropshot::HttpServer>, +} + +impl Server { + pub async fn start<'a>( + internal: InternalServer<'a>, + ) -> Result { + let apictx = internal.apictx; + let http_server_internal = internal.http_server_internal; + let log = internal.log; + let config = internal.config; + // Wait until RSS handoff completes. - // TODO: This messes up the tests. Should we make this a config option? - // - // TODO: This actually raises a question; what triggers background tasks - // to execute? - // - // - Perhaps the API is exposed to tests? - // - Perhaps the invocation of that API is controlled by config - // options? - /* let opctx = apictx.nexus.opctx_for_background(); loop { - let result = apictx.nexus.rack_lookup(&opctx, &config.runtime.rack_id).await; + let result = + apictx.nexus.rack_lookup(&opctx, &config.runtime.rack_id).await; match result { Ok(rack) => { if rack.initialized { break; } - info!(log, "Still waiting for rack initialization: {:?}", rack); - }, + info!( + log, + "Still waiting for rack initialization: {:?}", rack + ); + } Err(e) => { warn!(log, "Cannot look up rack: {}", e); - }, + } } tokio::time::sleep(std::time::Duration::from_secs(2)).await; } - */ + + // TODO: What triggers background tasks to execute? + // + // - Perhaps the API is exposed to tests? + // - Perhaps the invocation of that API is controlled by config + // options? + // + // TODO: services we need to start: + // + // Datasets: + // - Crucible (as a dataset on each unique zpool) + // - Clickhouse (as a dataset on a zpool) + // - CRDB (prolly just check it exists, period) + // + // - Oximeter (as a service) + // - Nexus (again, maybe just check it exists at all) let http_server_starter_external = dropshot::HttpServerStarter::new( &config.runtime.dropshot_external, @@ -193,7 +231,8 @@ pub async fn run_server(config: &Config) -> Result<(), String> { } else { debug!(log, "registered DTrace probes"); } - let server = Server::start(config, &log).await?; + let internal_server = InternalServer::start(config, &log).await?; + let server = Server::start(internal_server).await?; server.register_as_producer().await; server.wait_for_finish().await } diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index 48fa1fec479..d3a22be62e5 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -103,15 +103,42 @@ pub async fn test_setup_with_config( nexus_config::Database::FromUrl { url: database.pg_config().clone() }; config.pkg.timeseries_db.address.set_port(clickhouse.port()); - let server = - omicron_nexus::Server::start(&config, &logctx.log).await.unwrap(); - server + // Start the Nexus internal API. + let internal_server = + omicron_nexus::InternalServer::start(&config, &logctx.log) + .await + .unwrap(); + internal_server .apictx .nexus .wait_for_populate() .await .expect("Nexus never loaded users"); + // Perform the "handoff from RSS". + // + // However, RSS isn't running, so we'll do the handoff ourselves. + let opctx = internal_server.apictx.nexus.opctx_for_background(); + internal_server + .apictx + .nexus + .rack_initialize( + &opctx, + config.runtime.rack_id, + // NOTE: In the context of this test utility, we arguably do have an + // instance of CRDB and Nexus running. However, as this info isn't + // necessary for most tests, we pass no information here. + omicron_nexus::internal_api::params::RackInitializationRequest { + services: vec![], + datasets: vec![], + }, + ) + .await + .expect("Could not initialize rack"); + + // Start the Nexus external API. + let server = omicron_nexus::Server::start(internal_server).await.unwrap(); + let testctx_external = ClientTestContext::new( server.http_server_external.local_addr(), logctx.log.new(o!("component" => "external client test context")), diff --git a/nexus/tests/integration_tests/users_builtin.rs b/nexus/tests/integration_tests/users_builtin.rs index c6d3615c9ef..0df3fbaf04b 100644 --- a/nexus/tests/integration_tests/users_builtin.rs +++ b/nexus/tests/integration_tests/users_builtin.rs @@ -27,7 +27,8 @@ async fn test_users_builtin(cptestctx: &ControlPlaneTestContext) { let u = users.remove(&authn::USER_DB_INIT.name.to_string()).unwrap(); assert_eq!(u.identity.id, authn::USER_DB_INIT.id); - let u = users.remove(&authn::USER_BACKGROUND_WORK.name.to_string()).unwrap(); + let u = + users.remove(&authn::USER_BACKGROUND_WORK.name.to_string()).unwrap(); assert_eq!(u.identity.id, authn::USER_BACKGROUND_WORK.id); let u = users.remove(&authn::USER_INTERNAL_API.name.to_string()).unwrap(); assert_eq!(u.identity.id, authn::USER_INTERNAL_API.id); From b5ca139e19cc34a385a011ad4f954eb4081c84a7 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 13 Jun 2022 01:13:42 -0400 Subject: [PATCH 07/35] Partway through service allocation - still very WIP --- common/src/address.rs | 4 +- nexus/src/app/background/mod.rs | 7 + nexus/src/app/background/services.rs | 285 +++++++++++++++++++++++++++ nexus/src/app/mod.rs | 11 ++ nexus/src/app/rack.rs | 33 ++++ nexus/src/db/datastore.rs | 59 +++++- nexus/src/lib.rs | 20 +- 7 files changed, 397 insertions(+), 22 deletions(-) create mode 100644 nexus/src/app/background/mod.rs create mode 100644 nexus/src/app/background/services.rs diff --git a/common/src/address.rs b/common/src/address.rs index b105588b587..7a3c488a82d 100644 --- a/common/src/address.rs +++ b/common/src/address.rs @@ -20,12 +20,12 @@ pub const SLED_PREFIX: u8 = 64; /// The amount of redundancy for DNS servers. /// /// Must be less than MAX_DNS_REDUNDANCY. -pub const DNS_REDUNDANCY: usize = 1; +pub const DNS_REDUNDANCY: u32 = 1; /// The maximum amount of redundancy for DNS servers. /// /// This determines the number of addresses which are /// reserved for DNS servers. -pub const MAX_DNS_REDUNDANCY: usize = 5; +pub const MAX_DNS_REDUNDANCY: u32 = 5; pub const DNS_PORT: u16 = 53; pub const DNS_SERVER_PORT: u16 = 5353; diff --git a/nexus/src/app/background/mod.rs b/nexus/src/app/background/mod.rs new file mode 100644 index 00000000000..bd25adc89e9 --- /dev/null +++ b/nexus/src/app/background/mod.rs @@ -0,0 +1,7 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Background tasks managed by Nexus. + +mod services; diff --git a/nexus/src/app/background/services.rs b/nexus/src/app/background/services.rs new file mode 100644 index 00000000000..cbb1e8c81ba --- /dev/null +++ b/nexus/src/app/background/services.rs @@ -0,0 +1,285 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Task which ensures that expected Nexus services exist. + +use crate::Nexus; +use crate::context::OpContext; +use crate::db::identity::Asset; +use crate::db::model::DatasetKind; +use crate::db::model::ServiceKind; +use omicron_common::api::external::Error; +use omicron_common::address::{DNS_REDUNDANCY, ReservedRackSubnet}; +use slog::Logger; +use std::sync::Arc; +use std::net::Ipv6Addr; +use uuid::Uuid; + +// Policy for the number of services to be provisioned. +#[derive(Debug)] +enum ServiceRedundancy { + // This service must exist on at least this many sleds + // within the racki. + PerRack(u32), + + // This service must exist on at least this many sleds + // within the availability zone. + DnsPerAz(u32), +} + +#[derive(Debug)] +struct ExpectedService { + kind: ServiceKind, + redundancy: ServiceRedundancy, +} + +const EXPECTED_SERVICES: [ExpectedService; 3] = [ + ExpectedService { + kind: ServiceKind::InternalDNS, + redundancy: ServiceRedundancy::DnsPerAz(DNS_REDUNDANCY), + }, + ExpectedService { + kind: ServiceKind::Nexus, + redundancy: ServiceRedundancy::PerRack(1), + }, + ExpectedService { + kind: ServiceKind::Oximeter, + redundancy: ServiceRedundancy::PerRack(1), + }, +]; + +pub struct ServiceWorker { + log: Logger, + nexus: Arc, +} + +impl ServiceWorker { + async fn ensure_rack_svc( + &self, + opctx: &OpContext, + expected_svc: &ExpectedService, + desired_count: u32, + ) -> Result<(), Error> { + // Look up all the sleds, both with and without the service. + let sleds_and_maybe_svcs = self.nexus + .datastore() + .sled_and_service_list( + opctx, + expected_svc.kind.clone(), + self.nexus.rack_id, + ) + .await?; + let (sleds_with_svc, sleds_without_svc): (Vec<_>, Vec<_>) = + sleds_and_maybe_svcs + .iter() + .partition(|(_, maybe_svc)| { + maybe_svc.is_some() + }); + let mut sleds_without_svc = sleds_without_svc.into_iter() + .map(|(sled, _)| sled); + let mut actual_count = sleds_with_svc.len() as u32; + + // Add services to sleds, in-order, until we've met a + // number sufficient for our redundancy. + while desired_count < actual_count { + let sled = sleds_without_svc.next().ok_or_else(|| { + Error::internal_error("Not enough sleds to deploy service") + })?; + let svc_id = Uuid::new_v4(); + let address = self.nexus.datastore() + .next_ipv6_address(&opctx, sled.id()) + .await?; + + self.nexus.upsert_service( + &opctx, + svc_id, + sled.id(), + address, + expected_svc.kind.clone() + ) + .await?; + + actual_count += 1; + } + + // TODO: Actually deploy service + + Ok(()) + } + + async fn ensure_dns_svc( + &self, + opctx: &OpContext, + expected_svc: &ExpectedService, + desired_count: u32, + ) -> Result<(), Error> { + if !matches!(expected_svc.kind, ServiceKind::InternalDNS) { + // NOTE: This is a constraint on how we allocate IP addresses + // within the AZ - however, as DNS is the only existing + // AZ-wide service, support for this has been punted. + return Err(Error::internal_error( + &format!("DNS is the only suppoted svc ({:?} is not supported)", expected_svc), + )); + } + + // Look up all existing DNS services. + // + // Note that we should not look up "all services" - as internal DNS servers + // are rack-wide, this would be too expensive of an operation. + let existing_services = self.nexus + .datastore() + .dns_service_list(opctx) + .await?; + + let mut actual_count = existing_services.len() as u32; + + // Get all subnets not allocated to existing services. + let mut usable_dns_subnets = ReservedRackSubnet(self.nexus.rack_subnet) + .get_dns_subnets() + .into_iter() + .filter(|subnet| { + // This address is only usable if none of the existing + // DNS services are using it. + existing_services.iter() + .all(|svc| Ipv6Addr::from(svc.ip) != subnet.dns_address().ip()) + }); + + // Get all sleds which aren't already running DNS services. + let mut target_sleds = self.nexus + .datastore() + .sled_list_with_limit(opctx, desired_count) + .await? + .into_iter() + .filter(|sled| { + // The target sleds are only considered if they aren't already + // running a DNS service. + existing_services.iter() + .all(|svc| svc.sled_id != sled.id()) + }); + + while desired_count < actual_count { + let sled = target_sleds.next().ok_or_else(|| { + Error::internal_error("Not enough sleds to deploy service") + })?; + let svc_id = Uuid::new_v4(); + let dns_subnet = usable_dns_subnets.next().ok_or_else(|| { + Error::internal_error("Not enough IPs to deploy service") + })?; + let address = dns_subnet + .dns_address() + .ip(); + + self.nexus.upsert_service( + &opctx, + svc_id, + sled.id(), + address, + expected_svc.kind.clone() + ) + .await?; + + actual_count += 1; + } + + // TODO: actually deploy service + + Ok(()) + } + + // Provides a single point-in-time evaluation and adjustment of + // the services provisioned within the rack. + // + // May adjust the provisioned services to meet the redundancy of the + // rack, if necessary. + // + // TODO: Can we: + // - [ ] Put these steps in a saga, to ensure they happen + // - [ ] Use a state variable on the rack to ensure mutual exclusion + // of service re-balancing. It's an involved operation; it would + // be nice to not be conflicting with anyone else while operating - + // and also helps us avoid using transactions. + pub async fn ensure_services_provisioned( + &self, + opctx: &OpContext, + ) -> Result<(), Error> { + for expected_svc in &EXPECTED_SERVICES { + info!( + self.log, + "Ensuring service {:?} exists according to redundancy {:?}", + expected_svc.kind, + expected_svc.redundancy, + ); + match expected_svc.redundancy { + ServiceRedundancy::PerRack(desired_count) => { + self.ensure_rack_svc(opctx, expected_svc, desired_count).await?; + }, + ServiceRedundancy::DnsPerAz(desired_count) => { + self.ensure_dns_svc(opctx, expected_svc, desired_count).await?; + } + } + } + + // Strategy: + // + // TODO Step 1. In a transaction: + // - Look up all sleds within the Rack + // - Look up all the services of a particular kind (e.g., Oximeter) + // - IF enough exist, exit early. + // - ELSE assign services to sleds. Write to Db. + // + // Step 2. As follow-up: request those svcs execute on sleds. + + Ok(()) + + } +} + +// Redundancy for the number of datasets to be provisioned. +enum DatasetRedundancy { + // The dataset should exist on all zpools. + OnAll, + // The dataset should exist on at least this many zpools. + PerRack(u32), +} + +struct ExpectedDataset { + kind: DatasetKind, + redundancy: DatasetRedundancy, +} + +const EXPECTED_DATASERT: [ExpectedDataset; 3] = [ + ExpectedDataset { + kind: DatasetKind::Crucible, + redundancy: DatasetRedundancy::OnAll, + }, + ExpectedDataset { + kind: DatasetKind::Cockroach, + redundancy: DatasetRedundancy::PerRack(1), + }, + ExpectedDataset { + kind: DatasetKind::Clickhouse, + redundancy: DatasetRedundancy::PerRack(1), + }, +]; + +fn ensure_datasets_provisioned() { + // TODO: + // - [ ] Each zpool has Crucible + // - [ ] Clickhouse exists on N zpools + // - [ ] CRDB exists on N zpools + + // Strategy: + // + // Step 1. In a transaction: + // - Look up all sleds within the Rack + // - Look up all zpools within those sleds + // + // - Look up all the services of a particular kind (e.g., Oximeter) + // - IF enough exist, exit early. + // - ELSE assign services to sleds. Write to Db. + // + // Step 2. As follow-up: request those datasets exist on sleds. + + +} diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index 27f8fd650fc..51c326390dd 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -14,6 +14,7 @@ use crate::populate::PopulateArgs; use crate::populate::PopulateStatus; use crate::saga_interface::SagaContext; use anyhow::anyhow; +use omicron_common::address::{Ipv6Subnet, RACK_PREFIX}; use omicron_common::api::external::Error; use slog::Logger; use std::sync::Arc; @@ -39,6 +40,9 @@ mod vpc; mod vpc_router; mod vpc_subnet; +// Background tasks exist in the "background" module. +mod background; + // Sagas are not part of the "Nexus" implementation, but they are // application logic. mod sagas; @@ -58,6 +62,9 @@ pub struct Nexus { /// uuid for this rack rack_id: Uuid, + /// subnet of this rack + rack_subnet: Ipv6Subnet, + /// general server log log: Logger, @@ -151,6 +158,7 @@ impl Nexus { let nexus = Nexus { id: config.runtime.id, rack_id, + rack_subnet: config.runtime.subnet, log: log.new(o!()), db_datastore: Arc::clone(&db_datastore), authz: Arc::clone(&authz), @@ -234,6 +242,9 @@ impl Nexus { } /// Returns an [`OpContext`] used for background tasks. + // TODO: Probably should be making a *new* opctx here? + // + // I think there should be one-per-"op", to get better metrics on bg ops. pub fn opctx_for_background(&self) -> &OpContext { &self.opctx_background_work } diff --git a/nexus/src/app/rack.rs b/nexus/src/app/rack.rs index dde3df7449c..f180395d4e1 100644 --- a/nexus/src/app/rack.rs +++ b/nexus/src/app/rack.rs @@ -93,4 +93,37 @@ impl super::Nexus { Ok(()) } + + /// Awaits the initialization of the rack. + /// + /// This will occur by either: + /// 1. RSS invoking the internal API, handing off responsibility, or + /// 2. Re-reading a value from the DB, if the rack has already been + /// initialized. + /// + /// See RFD 278 for additional context. + pub async fn await_rack_initialization( + &self, + opctx: &OpContext + ) { + loop { + let result = self.rack_lookup(&opctx, &self.rack_id).await; + match result { + Ok(rack) => { + if rack.initialized { + return; + } + info!( + self.log, + "Still waiting for rack initialization: {:?}", rack + ); + } + Err(e) => { + warn!(self.log, "Cannot look up rack: {}", e); + } + } + tokio::time::sleep(std::time::Duration::from_secs(2)).await; + } + + } } diff --git a/nexus/src/db/datastore.rs b/nexus/src/db/datastore.rs index f1cba756c85..2b3285395e3 100644 --- a/nexus/src/db/datastore.rs +++ b/nexus/src/db/datastore.rs @@ -55,7 +55,7 @@ use crate::db::{ Instance, InstanceRuntimeState, Name, NetworkInterface, Organization, OrganizationUpdate, OximeterInfo, ProducerEndpoint, Project, ProjectUpdate, Rack, Region, RoleAssignment, RoleBuiltin, RouterRoute, - RouterRouteUpdate, Service, Silo, SiloUser, Sled, SshKey, + RouterRouteUpdate, Service, ServiceKind, Silo, SiloUser, Sled, SshKey, UpdateAvailableArtifact, UserBuiltin, Volume, Vpc, VpcFirewallRule, VpcRouter, VpcRouterUpdate, VpcSubnet, VpcSubnetUpdate, VpcUpdate, Zpool, @@ -397,6 +397,63 @@ impl DataStore { .map_err(|e| public_error_from_diesel_pool(e, ErrorHandler::Server)) } + // TODO: de-duplicate with sled_list? + pub async fn sled_list_with_limit( + &self, + opctx: &OpContext, + limit: u32, + ) -> ListResultVec { + opctx.authorize(authz::Action::Read, &authz::FLEET).await?; + use db::schema::sled::dsl; + dsl::sled + .filter(dsl::time_deleted.is_null()) + .limit(limit as i64) + .select(Sled::as_select()) + .load_async(self.pool_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel_pool(e, ErrorHandler::Server)) + } + + // TODO-correctness: Filter the sleds by rack ID! + // This filtering will feasible when Sleds store a FK for + // the rack on which they're stored. + pub async fn sled_and_service_list( + &self, + opctx: &OpContext, + kind: ServiceKind, + _rack_id: Uuid, + ) -> ListResultVec<(Sled, Option)> { + opctx.authorize(authz::Action::Read, &authz::FLEET).await?; + use db::schema::service::dsl as svc_dsl; + use db::schema::sled::dsl as sled_dsl; + + db::schema::sled::table + .filter(sled_dsl::time_deleted.is_null()) + .left_outer_join(db::schema::service::table.on( + svc_dsl::id.eq(svc_dsl::sled_id) + )) + .filter(svc_dsl::kind.eq(kind)) + .select(<(Sled, Option)>::as_select()) + .get_results_async(self.pool_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel_pool(e, ErrorHandler::Server)) + } + + pub async fn dns_service_list( + &self, + opctx: &OpContext, + ) -> ListResultVec { + opctx.authorize(authz::Action::Read, &authz::FLEET).await?; + use db::schema::service::dsl as svc; + + svc::service + .filter(svc::kind.eq(ServiceKind::InternalDNS)) + .select(Service::as_select()) + .get_results_async(self.pool_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel_pool(e, ErrorHandler::Server)) + } + /// Stores a new zpool in the database. pub async fn zpool_upsert(&self, zpool: Zpool) -> CreateResult { use db::schema::zpool::dsl; diff --git a/nexus/src/lib.rs b/nexus/src/lib.rs index f743c7e19ba..60881613181 100644 --- a/nexus/src/lib.rs +++ b/nexus/src/lib.rs @@ -130,25 +130,7 @@ impl Server { // Wait until RSS handoff completes. let opctx = apictx.nexus.opctx_for_background(); - loop { - let result = - apictx.nexus.rack_lookup(&opctx, &config.runtime.rack_id).await; - match result { - Ok(rack) => { - if rack.initialized { - break; - } - info!( - log, - "Still waiting for rack initialization: {:?}", rack - ); - } - Err(e) => { - warn!(log, "Cannot look up rack: {}", e); - } - } - tokio::time::sleep(std::time::Duration::from_secs(2)).await; - } + apictx.nexus.await_rack_initialization(&opctx).await; // TODO: What triggers background tasks to execute? // From 7e986b85a00bb9a35ba03c6cade5222ff440da01 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 13 Jun 2022 23:51:08 -0400 Subject: [PATCH 08/35] v1 of nexus-managed services is code complete; no tests yet --- common/src/address.rs | 1 + nexus/src/app/background/mod.rs | 33 ++ nexus/src/app/background/services.rs | 409 +++++++++++++------------ nexus/src/app/mod.rs | 30 +- nexus/src/db/datastore.rs | 432 +++++++++++++++++++++++++-- nexus/src/db/model/dataset.rs | 3 +- nexus/src/db/model/dataset_kind.rs | 11 + nexus/src/lib.rs | 3 + 8 files changed, 698 insertions(+), 224 deletions(-) diff --git a/common/src/address.rs b/common/src/address.rs index 7a3c488a82d..bd5d03da2e6 100644 --- a/common/src/address.rs +++ b/common/src/address.rs @@ -35,6 +35,7 @@ pub const SLED_AGENT_PORT: u16 = 12345; pub const PROPOLIS_PORT: u16 = 12400; pub const COCKROACH_PORT: u16 = 32221; pub const CRUCIBLE_PORT: u16 = 32345; +pub const CLICKHOUSE_PORT: u16 = 8123; pub const NEXUS_EXTERNAL_PORT: u16 = 12220; pub const NEXUS_INTERNAL_PORT: u16 = 12221; diff --git a/nexus/src/app/background/mod.rs b/nexus/src/app/background/mod.rs index bd25adc89e9..d091fdd7858 100644 --- a/nexus/src/app/background/mod.rs +++ b/nexus/src/app/background/mod.rs @@ -5,3 +5,36 @@ //! Background tasks managed by Nexus. mod services; + +use crate::app::Nexus; +use std::sync::Arc; +use tokio::task::{spawn, JoinHandle}; + +/// Management structure which encapsulates periodically-executing background +/// tasks. +pub struct TaskRunner { + _handle: JoinHandle<()>, +} + +impl TaskRunner { + pub fn new(nexus: Arc) -> Self { + let handle = spawn(async move { + let log = nexus.log.new(o!("component" => "BackgroundTaskRunner")); + let service_balancer = services::ServiceBalancer::new(log.clone(), nexus.clone()); + + loop { + // TODO: We may want triggers to exist here, to invoke this task + // more frequently (e.g., on Sled failure). + let opctx = nexus.opctx_for_background(); + if let Err(e) = service_balancer.balance_services(&opctx).await { + warn!(log, "Failed to balance services: {:?}", e); + } + + tokio::time::sleep(std::time::Duration::from_secs(30)).await; + } + }); + Self { + _handle: handle, + } + } +} diff --git a/nexus/src/app/background/services.rs b/nexus/src/app/background/services.rs index cbb1e8c81ba..6f60dc09904 100644 --- a/nexus/src/app/background/services.rs +++ b/nexus/src/app/background/services.rs @@ -6,15 +6,23 @@ use crate::Nexus; use crate::context::OpContext; +use crate::db::datastore::DatasetRedundancy; use crate::db::identity::Asset; +use crate::db::model::Dataset; use crate::db::model::DatasetKind; +use crate::db::model::Service; use crate::db::model::ServiceKind; +use crate::db::model::Sled; +use crate::db::model::Zpool; use omicron_common::api::external::Error; -use omicron_common::address::{DNS_REDUNDANCY, ReservedRackSubnet}; +use omicron_common::address::{ + DNS_REDUNDANCY, NEXUS_INTERNAL_PORT, NEXUS_EXTERNAL_PORT, DNS_SERVER_PORT, DNS_PORT +}; +use sled_agent_client::types as SledAgentTypes; use slog::Logger; +use std::collections::{HashMap, HashSet}; use std::sync::Arc; -use std::net::Ipv6Addr; -use uuid::Uuid; +use std::net::{Ipv6Addr, SocketAddrV6}; // Policy for the number of services to be provisioned. #[derive(Debug)] @@ -49,237 +57,254 @@ const EXPECTED_SERVICES: [ExpectedService; 3] = [ }, ]; -pub struct ServiceWorker { +#[derive(Debug)] +struct ExpectedDataset { + kind: DatasetKind, + redundancy: DatasetRedundancy, +} + +const EXPECTED_DATASETS: [ExpectedDataset; 3] = [ + ExpectedDataset { + kind: DatasetKind::Crucible, + redundancy: DatasetRedundancy::OnAll, + }, + ExpectedDataset { + kind: DatasetKind::Cockroach, + redundancy: DatasetRedundancy::PerRack(1), + }, + ExpectedDataset { + kind: DatasetKind::Clickhouse, + redundancy: DatasetRedundancy::PerRack(1), + }, +]; + +pub struct ServiceBalancer { log: Logger, nexus: Arc, } -impl ServiceWorker { - async fn ensure_rack_svc( +impl ServiceBalancer { + pub fn new(log: Logger, nexus: Arc) -> Self { + Self { + log, + nexus, + } + } + + // Reaches out to all sled agents implied in "services", and + // requests that the desired services are executing. + async fn instantiate_services( &self, opctx: &OpContext, - expected_svc: &ExpectedService, - desired_count: u32, + services: Vec ) -> Result<(), Error> { - // Look up all the sleds, both with and without the service. - let sleds_and_maybe_svcs = self.nexus - .datastore() - .sled_and_service_list( - opctx, - expected_svc.kind.clone(), - self.nexus.rack_id, - ) - .await?; - let (sleds_with_svc, sleds_without_svc): (Vec<_>, Vec<_>) = - sleds_and_maybe_svcs - .iter() - .partition(|(_, maybe_svc)| { - maybe_svc.is_some() - }); - let mut sleds_without_svc = sleds_without_svc.into_iter() - .map(|(sled, _)| sled); - let mut actual_count = sleds_with_svc.len() as u32; + let mut sled_ids = HashSet::new(); + for svc in &services { + sled_ids.insert(svc.sled_id); + } - // Add services to sleds, in-order, until we've met a - // number sufficient for our redundancy. - while desired_count < actual_count { - let sled = sleds_without_svc.next().ok_or_else(|| { - Error::internal_error("Not enough sleds to deploy service") - })?; - let svc_id = Uuid::new_v4(); - let address = self.nexus.datastore() - .next_ipv6_address(&opctx, sled.id()) - .await?; + // For all sleds requiring an update, request all services be + // instantiated. + for sled_id in &sled_ids { + // TODO: This interface kinda sucks; ideally we would + // only insert the *new* services. + // + // Inserting the old ones too is costing us an extra query. + let services = self.nexus.datastore().service_list(opctx, *sled_id).await?; + let sled_client = self.nexus.sled_client(sled_id).await?; + + sled_client.services_put(&SledAgentTypes::ServiceEnsureBody { + services: services.iter().map(|s| { + let address = Ipv6Addr::from(s.ip); + let (name, service_type) = Self::get_service_name_and_type(address, s.kind.clone()); + + SledAgentTypes::ServiceRequest { + id: s.id(), + name: name.to_string(), + addresses: vec![address], + gz_addresses: vec![], + service_type, + } + }).collect() + }).await?; + } + Ok(()) + } - self.nexus.upsert_service( - &opctx, - svc_id, - sled.id(), - address, - expected_svc.kind.clone() + // Translates (address, db kind) to Sled Agent client types. + fn get_service_name_and_type( + address: Ipv6Addr, + kind: ServiceKind + ) -> (String, SledAgentTypes::ServiceType) { + match kind { + ServiceKind::Nexus => { + ( + "nexus".to_string(), + SledAgentTypes::ServiceType::Nexus { + internal_address: SocketAddrV6::new(address, NEXUS_INTERNAL_PORT, 0, 0).to_string(), + external_address: SocketAddrV6::new(address, NEXUS_EXTERNAL_PORT, 0, 0).to_string(), + } ) - .await?; - - actual_count += 1; + }, + ServiceKind::InternalDNS => { + ( + "internal-dns".to_string(), + SledAgentTypes::ServiceType::InternalDns { + server_address: SocketAddrV6::new(address, DNS_SERVER_PORT, 0, 0).to_string(), + dns_address: SocketAddrV6::new(address, DNS_PORT, 0, 0).to_string(), + }, + ) + }, + ServiceKind::Oximeter => { + ( + "oximeter".to_string(), + SledAgentTypes::ServiceType::Oximeter, + ) + }, } - - // TODO: Actually deploy service - - Ok(()) } - async fn ensure_dns_svc( + async fn ensure_rack_service( &self, opctx: &OpContext, - expected_svc: &ExpectedService, + kind: ServiceKind, desired_count: u32, ) -> Result<(), Error> { - if !matches!(expected_svc.kind, ServiceKind::InternalDNS) { - // NOTE: This is a constraint on how we allocate IP addresses - // within the AZ - however, as DNS is the only existing - // AZ-wide service, support for this has been punted. - return Err(Error::internal_error( - &format!("DNS is the only suppoted svc ({:?} is not supported)", expected_svc), - )); - } - - // Look up all existing DNS services. - // - // Note that we should not look up "all services" - as internal DNS servers - // are rack-wide, this would be too expensive of an operation. - let existing_services = self.nexus + // Provision the services within the database. + let new_services = self.nexus .datastore() - .dns_service_list(opctx) + .ensure_rack_service( + opctx, + self.nexus.rack_id, + kind, + desired_count, + ) .await?; - let mut actual_count = existing_services.len() as u32; - - // Get all subnets not allocated to existing services. - let mut usable_dns_subnets = ReservedRackSubnet(self.nexus.rack_subnet) - .get_dns_subnets() - .into_iter() - .filter(|subnet| { - // This address is only usable if none of the existing - // DNS services are using it. - existing_services.iter() - .all(|svc| Ipv6Addr::from(svc.ip) != subnet.dns_address().ip()) - }); + // Actually instantiate those services. + self.instantiate_services(opctx, new_services).await + } - // Get all sleds which aren't already running DNS services. - let mut target_sleds = self.nexus + async fn ensure_dns_service( + &self, + opctx: &OpContext, + desired_count: u32, + ) -> Result<(), Error> { + // Provision the services within the database. + let new_services = self.nexus .datastore() - .sled_list_with_limit(opctx, desired_count) - .await? - .into_iter() - .filter(|sled| { - // The target sleds are only considered if they aren't already - // running a DNS service. - existing_services.iter() - .all(|svc| svc.sled_id != sled.id()) - }); - - while desired_count < actual_count { - let sled = target_sleds.next().ok_or_else(|| { - Error::internal_error("Not enough sleds to deploy service") - })?; - let svc_id = Uuid::new_v4(); - let dns_subnet = usable_dns_subnets.next().ok_or_else(|| { - Error::internal_error("Not enough IPs to deploy service") - })?; - let address = dns_subnet - .dns_address() - .ip(); - - self.nexus.upsert_service( - &opctx, - svc_id, - sled.id(), - address, - expected_svc.kind.clone() - ) - .await?; - - actual_count += 1; - } - - // TODO: actually deploy service + .ensure_dns_service(opctx, self.nexus.rack_subnet, desired_count) + .await?; - Ok(()) + // Actually instantiate those services. + self.instantiate_services(opctx, new_services).await } - // Provides a single point-in-time evaluation and adjustment of - // the services provisioned within the rack. - // - // May adjust the provisioned services to meet the redundancy of the - // rack, if necessary. - // - // TODO: Can we: - // - [ ] Put these steps in a saga, to ensure they happen - // - [ ] Use a state variable on the rack to ensure mutual exclusion - // of service re-balancing. It's an involved operation; it would - // be nice to not be conflicting with anyone else while operating - - // and also helps us avoid using transactions. - pub async fn ensure_services_provisioned( + // TODO: Consider using sagas to ensure the rollout of services happens. + // Not using sagas *happens* to be fine because these operations are + // re-tried periodically, but that's kind forcing a dependency on the + // caller. + async fn ensure_services_provisioned( &self, opctx: &OpContext, ) -> Result<(), Error> { for expected_svc in &EXPECTED_SERVICES { - info!( - self.log, - "Ensuring service {:?} exists according to redundancy {:?}", - expected_svc.kind, - expected_svc.redundancy, - ); + info!(self.log, "Ensuring service {:?} exists", expected_svc); match expected_svc.redundancy { ServiceRedundancy::PerRack(desired_count) => { - self.ensure_rack_svc(opctx, expected_svc, desired_count).await?; + self.ensure_rack_service(opctx, expected_svc.kind.clone(), desired_count).await?; }, ServiceRedundancy::DnsPerAz(desired_count) => { - self.ensure_dns_svc(opctx, expected_svc, desired_count).await?; + self.ensure_dns_service(opctx, desired_count).await?; } } } - - // Strategy: - // - // TODO Step 1. In a transaction: - // - Look up all sleds within the Rack - // - Look up all the services of a particular kind (e.g., Oximeter) - // - IF enough exist, exit early. - // - ELSE assign services to sleds. Write to Db. - // - // Step 2. As follow-up: request those svcs execute on sleds. - Ok(()) - } -} - -// Redundancy for the number of datasets to be provisioned. -enum DatasetRedundancy { - // The dataset should exist on all zpools. - OnAll, - // The dataset should exist on at least this many zpools. - PerRack(u32), -} -struct ExpectedDataset { - kind: DatasetKind, - redundancy: DatasetRedundancy, -} + async fn ensure_rack_dataset( + &self, + opctx: &OpContext, + kind: DatasetKind, + redundancy: DatasetRedundancy, + ) -> Result<(), Error> { + // Provision the datasets within the database. + let new_datasets = self.nexus + .datastore() + .ensure_rack_dataset( + opctx, + self.nexus.rack_id, + kind, + redundancy, + ) + .await?; -const EXPECTED_DATASERT: [ExpectedDataset; 3] = [ - ExpectedDataset { - kind: DatasetKind::Crucible, - redundancy: DatasetRedundancy::OnAll, - }, - ExpectedDataset { - kind: DatasetKind::Cockroach, - redundancy: DatasetRedundancy::PerRack(1), - }, - ExpectedDataset { - kind: DatasetKind::Clickhouse, - redundancy: DatasetRedundancy::PerRack(1), - }, -]; + // Actually instantiate those datasets. + self.instantiate_datasets(new_datasets).await + } -fn ensure_datasets_provisioned() { - // TODO: - // - [ ] Each zpool has Crucible - // - [ ] Clickhouse exists on N zpools - // - [ ] CRDB exists on N zpools + // Reaches out to all sled agents implied in "services", and + // requests that the desired services are executing. + async fn instantiate_datasets( + &self, + datasets: Vec<(Sled, Zpool, Dataset)> + ) -> Result<(), Error> { + let mut sled_clients = HashMap::new(); + + for (sled, zpool, dataset) in &datasets { + let sled_client = { + match sled_clients.get(&sled.id()) { + Some(client) => client, + None => { + let sled_client = self.nexus.sled_client(&sled.id()).await?; + sled_clients.insert(sled.id(), sled_client); + sled_clients.get(&sled.id()).unwrap() + } + } + }; + + let dataset_kind = match dataset.kind { + // TODO: This set of "all addresses" isn't right. + // TODO: ... should we even be using "all addresses" to contact CRDB? + DatasetKind::Cockroach => SledAgentTypes::DatasetKind::CockroachDb(vec![]), + DatasetKind::Crucible => SledAgentTypes::DatasetKind::Crucible, + DatasetKind::Clickhouse => SledAgentTypes::DatasetKind::Clickhouse, + }; + + // Instantiate each dataset. + sled_client.filesystem_put(&SledAgentTypes::DatasetEnsureBody { + id: dataset.id(), + zpool_id: zpool.id(), + dataset_kind, + address: dataset.address().to_string(), + }).await?; + } - // Strategy: - // - // Step 1. In a transaction: - // - Look up all sleds within the Rack - // - Look up all zpools within those sleds - // - // - Look up all the services of a particular kind (e.g., Oximeter) - // - IF enough exist, exit early. - // - ELSE assign services to sleds. Write to Db. - // - // Step 2. As follow-up: request those datasets exist on sleds. + Ok(()) + } + async fn ensure_datasets_provisioned( + &self, + opctx: &OpContext, + ) -> Result<(), Error> { + for expected_dataset in &EXPECTED_DATASETS { + info!(self.log, "Ensuring dataset {:?} exists", expected_dataset); + self.ensure_rack_dataset(opctx, expected_dataset.kind.clone(), expected_dataset.redundancy).await? + } + Ok(()) + } + // Provides a single point-in-time evaluation and adjustment of + // the services provisioned within the rack. + // + // May adjust the provisioned services to meet the redundancy of the + // rack, if necessary. + pub async fn balance_services( + &self, + opctx: &OpContext, + ) -> Result<(), Error> { + self.ensure_datasets_provisioned(opctx).await?; + self.ensure_services_provisioned(opctx).await?; + Ok(()) + } } diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index 51c326390dd..61a4fbf7d22 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -18,6 +18,7 @@ use omicron_common::address::{Ipv6Subnet, RACK_PREFIX}; use omicron_common::api::external::Error; use slog::Logger; use std::sync::Arc; +use tokio::sync::OnceCell; use uuid::Uuid; // The implementation of Nexus is large, and split into a number of submodules @@ -83,6 +84,9 @@ pub struct Nexus { /// Status of background task to populate database populate_status: tokio::sync::watch::Receiver, + /// Background task for Nexus. + background_task_runner: OnceCell, + /// Client to the timeseries database. timeseries_client: oximeter_db::Client, @@ -97,9 +101,6 @@ pub struct Nexus { /// Operational context used for external request authentication opctx_external_authn: OpContext, - - /// Operational context used for Nexus-driven background tasks - opctx_background_work: OpContext, } // TODO Is it possible to make some of these operations more generic? A @@ -165,6 +166,7 @@ impl Nexus { sec_client: Arc::clone(&sec_client), recovery_task: std::sync::Mutex::new(None), populate_status, + background_task_runner: OnceCell::new(), timeseries_client, updates_config: config.pkg.updates.clone(), tunables: config.pkg.tunables.clone(), @@ -180,12 +182,6 @@ impl Nexus { authn::Context::external_authn(), Arc::clone(&db_datastore), ), - opctx_background_work: OpContext::for_background( - log.new(o!("component" => "Background Work")), - Arc::clone(&authz), - authn::Context::internal_db_background(), - Arc::clone(&db_datastore), - ), }; // TODO-cleanup all the extra Arcs here seems wrong @@ -236,6 +232,13 @@ impl Nexus { } } + pub fn start_background_tasks(self: &Arc) -> Result<(), anyhow::Error> { + let nexus = self.clone(); + self.background_task_runner.set( + background::TaskRunner::new(nexus) + ).map_err(|error| anyhow!(error.to_string())) + } + /// Returns an [`OpContext`] used for authenticating external requests pub fn opctx_external_authn(&self) -> &OpContext { &self.opctx_external_authn @@ -245,8 +248,13 @@ impl Nexus { // TODO: Probably should be making a *new* opctx here? // // I think there should be one-per-"op", to get better metrics on bg ops. - pub fn opctx_for_background(&self) -> &OpContext { - &self.opctx_background_work + pub fn opctx_for_background(&self) -> OpContext { + OpContext::for_background( + self.log.new(o!("component" => "BackgroundWork")), + Arc::clone(&self.authz), + authn::Context::internal_db_background(), + Arc::clone(&self.db_datastore), + ) } /// Used as the body of a "stub" endpoint -- one that's currently diff --git a/nexus/src/db/datastore.rs b/nexus/src/db/datastore.rs index 2b3285395e3..23b66b0bb4f 100644 --- a/nexus/src/db/datastore.rs +++ b/nexus/src/db/datastore.rs @@ -74,6 +74,9 @@ use diesel::query_builder::{QueryFragment, QueryId}; use diesel::query_dsl::methods::LoadQuery; use diesel::upsert::excluded; use diesel::{ExpressionMethods, QueryDsl, SelectableHelper}; +use omicron_common::address::{ + RACK_PREFIX, Ipv6Subnet, ReservedRackSubnet, +}; use omicron_common::api; use omicron_common::api::external; use omicron_common::api::external::DataPageParams; @@ -90,7 +93,7 @@ use omicron_common::api::external::{ use omicron_common::bail_unless; use sled_agent_client::types as sled_client_types; use std::convert::{TryFrom, TryInto}; -use std::net::Ipv6Addr; +use std::net::{Ipv6Addr, SocketAddr, SocketAddrV6}; use std::sync::Arc; use uuid::Uuid; @@ -119,6 +122,15 @@ impl RunnableQuery for T where { } +// Redundancy for the number of datasets to be provisioned. +#[derive(Clone, Copy, Debug)] +pub enum DatasetRedundancy { + // The dataset should exist on all zpools. + OnAll, + // The dataset should exist on at least this many zpools. + PerRack(u32), +} + pub struct DataStore { pool: Arc, } @@ -397,18 +409,28 @@ impl DataStore { .map_err(|e| public_error_from_diesel_pool(e, ErrorHandler::Server)) } - // TODO: de-duplicate with sled_list? - pub async fn sled_list_with_limit( - &self, - opctx: &OpContext, + pub fn sled_list_with_limit_sync( + conn: &mut DbConnection, limit: u32, - ) -> ListResultVec { - opctx.authorize(authz::Action::Read, &authz::FLEET).await?; + ) -> Result, diesel::result::Error> { use db::schema::sled::dsl; dsl::sled .filter(dsl::time_deleted.is_null()) .limit(limit as i64) .select(Sled::as_select()) + .load(conn) + } + + pub async fn service_list( + &self, + opctx: &OpContext, + sled_id: Uuid, + ) -> Result, Error> { + opctx.authorize(authz::Action::Read, &authz::FLEET).await?; + use db::schema::service::dsl; + dsl::service + .filter(dsl::sled_id.eq(sled_id)) + .select(Service::as_select()) .load_async(self.pool_authorized(opctx).await?) .await .map_err(|e| public_error_from_diesel_pool(e, ErrorHandler::Server)) @@ -417,41 +439,307 @@ impl DataStore { // TODO-correctness: Filter the sleds by rack ID! // This filtering will feasible when Sleds store a FK for // the rack on which they're stored. - pub async fn sled_and_service_list( - &self, - opctx: &OpContext, - kind: ServiceKind, + pub fn sled_and_service_list_sync( + conn: &mut DbConnection, _rack_id: Uuid, - ) -> ListResultVec<(Sled, Option)> { - opctx.authorize(authz::Action::Read, &authz::FLEET).await?; + kind: ServiceKind, + ) -> Result)>, diesel::result::Error> { use db::schema::service::dsl as svc_dsl; use db::schema::sled::dsl as sled_dsl; db::schema::sled::table .filter(sled_dsl::time_deleted.is_null()) .left_outer_join(db::schema::service::table.on( - svc_dsl::id.eq(svc_dsl::sled_id) + svc_dsl::sled_id.eq(sled_dsl::id) )) .filter(svc_dsl::kind.eq(kind)) .select(<(Sled, Option)>::as_select()) - .get_results_async(self.pool_authorized(opctx).await?) + .get_results(conn) + } + + pub async fn ensure_rack_service( + &self, + opctx: &OpContext, + rack_id: Uuid, + kind: ServiceKind, + redundancy: u32, + ) -> Result, Error> { + opctx.authorize(authz::Action::Read, &authz::FLEET).await?; + + #[derive(Debug)] + enum ServiceError { + NotEnoughSleds, + Other(Error), + } + type TxnError = TransactionError; + + self.pool() + .transaction(move |conn| { + let sleds_and_maybe_svcs = Self::sled_and_service_list_sync( + conn, + rack_id, + kind.clone(), + )?; + + // Split the set of returned sleds into "those with" and "those + // without" the requested service. + let (sleds_with_svc, sleds_without_svc): (Vec<_>, Vec<_>) = + sleds_and_maybe_svcs + .iter() + .partition(|(_, maybe_svc)| { + maybe_svc.is_some() + }); + let mut sleds_without_svc = sleds_without_svc.into_iter() + .map(|(sled, _)| sled); + let existing_count = sleds_with_svc.len(); + + // Add services to sleds, in-order, until we've met a + // number sufficient for our redundancy. + // + // The selection of "which sleds run this service" is completely + // arbitrary. + let mut new_svcs = vec![]; + while (redundancy as usize) < existing_count + new_svcs.len() { + let sled = sleds_without_svc.next().ok_or_else(|| { + TxnError::CustomError(ServiceError::NotEnoughSleds) + })?; + let svc_id = Uuid::new_v4(); + let address = Self::next_ipv6_address_sync(conn, sled.id()) + .map_err(|e| TxnError::CustomError(ServiceError::Other(e)))?; + + let service = db::model::Service::new( + svc_id, + sled.id(), + address, + kind.clone() + ); + + // TODO: Can we insert all the services at the same time? + let svc = Self::service_upsert_sync(conn, service) + .map_err(|e| TxnError::CustomError(ServiceError::Other(e)))?; + new_svcs.push(svc); + } + + return Ok(new_svcs); + }) .await - .map_err(|e| public_error_from_diesel_pool(e, ErrorHandler::Server)) + .map_err(|e| match e { + TxnError::CustomError(ServiceError::NotEnoughSleds) => { + Error::unavail("Not enough sleds for service allocation") + }, + TxnError::CustomError(ServiceError::Other(e)) => e, + TxnError::Pool(e) => public_error_from_diesel_pool(e, ErrorHandler::Server) + }) } - pub async fn dns_service_list( + pub async fn ensure_dns_service( &self, opctx: &OpContext, - ) -> ListResultVec { + rack_subnet: Ipv6Subnet, + redundancy: u32, + ) -> Result, Error> { opctx.authorize(authz::Action::Read, &authz::FLEET).await?; + + #[derive(Debug)] + enum ServiceError { + NotEnoughSleds, + NotEnoughIps, + Other(Error), + } + type TxnError = TransactionError; + + self.pool() + .transaction(move |conn| { + let existing_services = Self::dns_service_list_sync(conn)?; + let existing_count = existing_services.len(); + + // Get all subnets not allocated to existing services. + let mut usable_dns_subnets = ReservedRackSubnet(rack_subnet) + .get_dns_subnets() + .into_iter() + .filter(|subnet| { + // This address is only usable if none of the existing + // DNS services are using it. + existing_services.iter() + .all(|svc| Ipv6Addr::from(svc.ip) != subnet.dns_address().ip()) + }); + + + // Get all sleds which aren't already running DNS services. + let mut target_sleds = Self::sled_list_with_limit_sync(conn, redundancy)? + .into_iter() + .filter(|sled| { + // The target sleds are only considered if they aren't already + // running a DNS service. + existing_services.iter() + .all(|svc| svc.sled_id != sled.id()) + }); + + let mut new_svcs = vec![]; + while (redundancy as usize) < existing_count + new_svcs.len() { + let sled = target_sleds.next().ok_or_else(|| { + TxnError::CustomError(ServiceError::NotEnoughSleds) + })?; + let svc_id = Uuid::new_v4(); + let dns_subnet = usable_dns_subnets.next().ok_or_else(|| { + TxnError::CustomError(ServiceError::NotEnoughIps) + })?; + let address = dns_subnet + .dns_address() + .ip(); + + let service = db::model::Service::new( + svc_id, + sled.id(), + address, + ServiceKind::InternalDNS, + ); + + // TODO: Can we insert all the services at the same time? + let svc = Self::service_upsert_sync(conn, service) + .map_err(|e| TxnError::CustomError(ServiceError::Other(e)))?; + + new_svcs.push(svc); + } + return Ok(new_svcs); + }) + .await + .map_err(|e| match e { + TxnError::CustomError(ServiceError::NotEnoughSleds) => { + Error::unavail("Not enough sleds for service allocation") + }, + TxnError::CustomError(ServiceError::NotEnoughIps) => { + Error::unavail("Not enough IP addresses for service allocation") + }, + TxnError::CustomError(ServiceError::Other(e)) => e, + TxnError::Pool(e) => public_error_from_diesel_pool(e, ErrorHandler::Server) + }) + } + + fn dns_service_list_sync( + conn: &mut DbConnection, + ) -> Result, diesel::result::Error> { use db::schema::service::dsl as svc; svc::service .filter(svc::kind.eq(ServiceKind::InternalDNS)) .select(Service::as_select()) - .get_results_async(self.pool_authorized(opctx).await?) + .get_results(conn) + } + + // TODO: Filter by rack ID + pub fn sled_zpool_and_dataset_list_sync( + conn: &mut DbConnection, + _rack_id: Uuid, + kind: DatasetKind, + ) -> Result)>, diesel::result::Error> { + use db::schema::sled::dsl as sled_dsl; + use db::schema::zpool::dsl as zpool_dsl; + use db::schema::dataset::dsl as dataset_dsl; + + db::schema::sled::table + .filter(sled_dsl::time_deleted.is_null()) + .inner_join(db::schema::zpool::table.on( + zpool_dsl::sled_id.eq(sled_dsl::id) + )) + .filter(zpool_dsl::time_deleted.is_null()) + .left_outer_join(db::schema::dataset::table.on( + dataset_dsl::pool_id.eq(zpool_dsl::id) + )) + .filter(dataset_dsl::kind.eq(kind)) + .select(<(Sled, Zpool, Option)>::as_select()) + .get_results(conn) + } + + pub async fn ensure_rack_dataset( + &self, + opctx: &OpContext, + rack_id: Uuid, + kind: DatasetKind, + redundancy: DatasetRedundancy, + ) -> Result, Error> { + opctx.authorize(authz::Action::Read, &authz::FLEET).await?; + + #[derive(Debug)] + enum DatasetError { + NotEnoughZpools, + Other(Error), + } + type TxnError = TransactionError; + + self.pool() + .transaction(move |conn| { + let sleds_zpools_and_maybe_datasets = Self::sled_zpool_and_dataset_list_sync( + conn, + rack_id, + kind.clone(), + )?; + + // Split the set of returned zpools into "those with" and "those + // without" the requested dataset. + let (zpools_with_dataset, zpools_without_dataset): (Vec<_>, Vec<_>) = + sleds_zpools_and_maybe_datasets + .into_iter() + .partition(|(_, _, maybe_dataset)| { + maybe_dataset.is_some() + }); + let mut zpools_without_dataset = zpools_without_dataset.into_iter() + .map(|(sled, zpool, _)| (sled, zpool)) + .peekable(); + let existing_count = zpools_with_dataset.len(); + + // Add services to zpools, in-order, until we've met a + // number sufficient for our redundancy. + // + // The selection of "which zpools run this service" is completely + // arbitrary. + let mut new_datasets = vec![]; + + loop { + match redundancy { + DatasetRedundancy::OnAll => { + if zpools_without_dataset.peek().is_none() { + break; + } + }, + DatasetRedundancy::PerRack(count) => { + if (count as usize) >= existing_count + new_datasets.len() { + break; + } + }, + }; + + let (sled, zpool) = zpools_without_dataset.next().ok_or_else(|| { + TxnError::CustomError(DatasetError::NotEnoughZpools) + })?; + let dataset_id = Uuid::new_v4(); + let address = Self::next_ipv6_address_sync(conn, sled.id()) + .map_err(|e| TxnError::CustomError(DatasetError::Other(e))) + .map(|ip| SocketAddr::V6(SocketAddrV6::new(ip, kind.port(), 0, 0)))?; + + let dataset = db::model::Dataset::new( + dataset_id, + zpool.id(), + address, + kind.clone() + ); + + // TODO: Can we insert all the datasets at the same time? + let dataset = Self::dataset_upsert_sync(conn, dataset) + .map_err(|e| TxnError::CustomError(DatasetError::Other(e)))?; + new_datasets.push((sled, zpool, dataset)); + } + + return Ok(new_datasets); + }) .await - .map_err(|e| public_error_from_diesel_pool(e, ErrorHandler::Server)) + .map_err(|e| match e { + TxnError::CustomError(DatasetError::NotEnoughZpools) => { + Error::unavail("Not enough sleds for dataset allocation") + }, + TxnError::CustomError(DatasetError::Other(e)) => e, + TxnError::Pool(e) => public_error_from_diesel_pool(e, ErrorHandler::Server) + }) } /// Stores a new zpool in the database. @@ -531,6 +819,44 @@ impl DataStore { }) } + /// Stores a new dataset in the database. + pub fn dataset_upsert_sync( + conn: &mut DbConnection, + dataset: Dataset, + ) -> CreateResult { + use db::schema::dataset::dsl; + + let zpool_id = dataset.pool_id; + Zpool::insert_resource( + zpool_id, + diesel::insert_into(dsl::dataset) + .values(dataset.clone()) + .on_conflict(dsl::id) + .do_update() + .set(( + dsl::time_modified.eq(Utc::now()), + dsl::pool_id.eq(excluded(dsl::pool_id)), + dsl::ip.eq(excluded(dsl::ip)), + dsl::port.eq(excluded(dsl::port)), + dsl::kind.eq(excluded(dsl::kind)), + )), + ) + .insert_and_get_result(conn) + .map_err(|e| match e { + SyncInsertError::CollectionNotFound => Error::ObjectNotFound { + type_name: ResourceType::Zpool, + lookup_type: LookupType::ById(zpool_id), + }, + SyncInsertError::DatabaseError(e) => { + public_error_from_diesel_create( + e, + ResourceType::Dataset, + &dataset.id().to_string(), + ) + } + }) + } + /// Stores a new service in the database. pub async fn service_upsert( &self, @@ -572,6 +898,42 @@ impl DataStore { }) } + pub fn service_upsert_sync( + conn: &mut DbConnection, + service: Service, + ) -> CreateResult { + use db::schema::service::dsl; + + let sled_id = service.sled_id; + Sled::insert_resource( + sled_id, + diesel::insert_into(dsl::service) + .values(service.clone()) + .on_conflict(dsl::id) + .do_update() + .set(( + dsl::time_modified.eq(Utc::now()), + dsl::sled_id.eq(excluded(dsl::sled_id)), + dsl::ip.eq(excluded(dsl::ip)), + dsl::kind.eq(excluded(dsl::kind)), + )), + ) + .insert_and_get_result(conn) + .map_err(|e| match e { + SyncInsertError::CollectionNotFound => Error::ObjectNotFound { + type_name: ResourceType::Sled, + lookup_type: LookupType::ById(sled_id), + }, + SyncInsertError::DatabaseError(e) => { + public_error_from_diesel_create( + e, + ResourceType::Service, + &service.id().to_string(), + ) + } + }) + } + fn get_allocated_regions_query( volume_id: Uuid, ) -> impl RunnableQuery<(Dataset, Region)> { @@ -3578,6 +3940,36 @@ impl DataStore { } } + /// Return the next available IPv6 address for an Oxide service running on + /// the provided sled. + pub fn next_ipv6_address_sync( + conn: &mut DbConnection, + sled_id: Uuid, + ) -> Result { + use db::schema::sled::dsl; + let net = diesel::update( + dsl::sled.find(sled_id).filter(dsl::time_deleted.is_null()), + ) + .set(dsl::last_used_address.eq(dsl::last_used_address + 1)) + .returning(dsl::last_used_address) + .get_result(conn) + .map_err(|e| { + public_error_from_diesel_lookup( + e, + ResourceType::Sled, + &LookupType::ById(sled_id), + ) + })?; + + // TODO-correctness: We could ensure that this address is actually + // within the sled's underlay prefix, once that's included in the + // database record. + match net { + ipnetwork::IpNetwork::V6(net) => Ok(net.ip()), + _ => panic!("Sled IP must be IPv6"), + } + } + pub async fn global_image_list_images( &self, opctx: &OpContext, diff --git a/nexus/src/db/model/dataset.rs b/nexus/src/db/model/dataset.rs index fd4d24eee40..fa4e238fb47 100644 --- a/nexus/src/db/model/dataset.rs +++ b/nexus/src/db/model/dataset.rs @@ -38,11 +38,12 @@ pub struct Dataset { ip: ipnetwork::IpNetwork, port: SqlU16, - kind: DatasetKind, + pub kind: DatasetKind, pub size_used: Option, } impl Dataset { + // TODO: Only operate on SocketAddrV6 pub fn new( id: Uuid, pool_id: Uuid, diff --git a/nexus/src/db/model/dataset_kind.rs b/nexus/src/db/model/dataset_kind.rs index c760a12f53c..effced69522 100644 --- a/nexus/src/db/model/dataset_kind.rs +++ b/nexus/src/db/model/dataset_kind.rs @@ -6,6 +6,7 @@ use super::impl_enum_type; use crate::internal_api; use serde::{Deserialize, Serialize}; use std::io::Write; +use omicron_common::address::{COCKROACH_PORT, CRUCIBLE_PORT, CLICKHOUSE_PORT}; impl_enum_type!( #[derive(SqlType, Debug, QueryId)] @@ -22,6 +23,16 @@ impl_enum_type!( Clickhouse => b"clickhouse" ); +impl DatasetKind { + pub fn port(&self) -> u16 { + match self { + DatasetKind::Crucible => CRUCIBLE_PORT, + DatasetKind::Cockroach => COCKROACH_PORT, + DatasetKind::Clickhouse => CLICKHOUSE_PORT, + } + } +} + impl From for DatasetKind { fn from(k: internal_api::params::DatasetKind) -> Self { match k { diff --git a/nexus/src/lib.rs b/nexus/src/lib.rs index 60881613181..b5d3ccc5085 100644 --- a/nexus/src/lib.rs +++ b/nexus/src/lib.rs @@ -131,6 +131,9 @@ impl Server { // Wait until RSS handoff completes. let opctx = apictx.nexus.opctx_for_background(); apictx.nexus.await_rack_initialization(&opctx).await; + apictx.nexus + .start_background_tasks() + .map_err(|e| e.to_string())?; // TODO: What triggers background tasks to execute? // From 95a5873b1e7bbc919a7be7258d68fe60548cf30f Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 15 Jun 2022 14:27:52 -0400 Subject: [PATCH 09/35] Add indices, add tests, fix bugs --- common/src/address.rs | 2 +- common/src/sql/dbinit.sql | 27 +- nexus/src/app/background/mod.rs | 10 +- nexus/src/app/background/services.rs | 183 +++--- nexus/src/app/mod.rs | 10 +- nexus/src/app/rack.rs | 6 +- nexus/src/app/sled.rs | 2 +- nexus/src/db/datastore.rs | 803 +++++++++++++++++++++++---- nexus/src/db/model/dataset_kind.rs | 4 +- nexus/src/db/model/service.rs | 2 +- nexus/src/db/model/service_kind.rs | 2 +- nexus/src/db/model/sled.rs | 7 +- nexus/src/db/model/zpool.rs | 2 +- nexus/src/db/schema.rs | 1 + nexus/src/lib.rs | 20 +- sled-agent/src/storage_manager.rs | 20 +- 16 files changed, 858 insertions(+), 243 deletions(-) diff --git a/common/src/address.rs b/common/src/address.rs index bd5d03da2e6..7284ba4cc64 100644 --- a/common/src/address.rs +++ b/common/src/address.rs @@ -178,7 +178,7 @@ mod test { // Observe the first DNS subnet within this reserved rack subnet. let dns_subnets = rack_subnet.get_dns_subnets(); - assert_eq!(DNS_REDUNDANCY, dns_subnets.len()); + assert_eq!(DNS_REDUNDANCY, dns_subnets.len() as u32); // The DNS address and GZ address should be only differing by one. assert_eq!( diff --git a/common/src/sql/dbinit.sql b/common/src/sql/dbinit.sql index d13d3ba2b04..d568cc833b8 100644 --- a/common/src/sql/dbinit.sql +++ b/common/src/sql/dbinit.sql @@ -75,6 +75,9 @@ CREATE TABLE omicron.public.sled ( time_deleted TIMESTAMPTZ, rcgen INT NOT NULL, + /* FK into the Rack table */ + rack_id UUID NOT NULL, + /* The IP address and bound port of the sled agent server. */ ip INET NOT NULL, port INT4 CHECK (port BETWEEN 0 AND 65535) NOT NULL, @@ -83,6 +86,12 @@ CREATE TABLE omicron.public.sled ( last_used_address INET NOT NULL ); +/* Add an index which lets us look up the sleds on a rack */ +CREATE INDEX ON omicron.public.sled ( + rack_id +) WHERE + time_deleted IS NULL; + /* * Services */ @@ -109,7 +118,13 @@ CREATE TABLE omicron.public.service ( /* Add an index which lets us look up the services on a sled */ CREATE INDEX ON omicron.public.service ( - sled_id + sled_id, + kind +); + +/* Add an index which lets us look up services of a particular kind on a sled */ +CREATE INDEX ON omicron.public.service ( + kind ); /* @@ -132,6 +147,11 @@ CREATE TABLE omicron.public.Zpool ( total_size INT NOT NULL ); +/* Create an index which allows looking up all zpools on a sled */ +CREATE INDEX on omicron.public.Zpool ( + sled_id +) WHERE time_deleted IS NULL; + CREATE TYPE omicron.public.dataset_kind AS ENUM ( 'crucible', 'cockroach', @@ -162,6 +182,11 @@ CREATE TABLE omicron.public.Dataset ( size_used INT ); +/* Create an index which allows looking up all datasets in a pool */ +CREATE INDEX on omicron.public.Dataset ( + pool_id +) WHERE time_deleted IS NULL; + /* Create an index on the size usage for Crucible's allocation */ CREATE INDEX on omicron.public.Dataset ( size_used diff --git a/nexus/src/app/background/mod.rs b/nexus/src/app/background/mod.rs index d091fdd7858..82e08c2b680 100644 --- a/nexus/src/app/background/mod.rs +++ b/nexus/src/app/background/mod.rs @@ -20,21 +20,21 @@ impl TaskRunner { pub fn new(nexus: Arc) -> Self { let handle = spawn(async move { let log = nexus.log.new(o!("component" => "BackgroundTaskRunner")); - let service_balancer = services::ServiceBalancer::new(log.clone(), nexus.clone()); + let service_balancer = + services::ServiceBalancer::new(log.clone(), nexus.clone()); loop { // TODO: We may want triggers to exist here, to invoke this task // more frequently (e.g., on Sled failure). let opctx = nexus.opctx_for_background(); - if let Err(e) = service_balancer.balance_services(&opctx).await { + if let Err(e) = service_balancer.balance_services(&opctx).await + { warn!(log, "Failed to balance services: {:?}", e); } tokio::time::sleep(std::time::Duration::from_secs(30)).await; } }); - Self { - _handle: handle, - } + Self { _handle: handle } } } diff --git a/nexus/src/app/background/services.rs b/nexus/src/app/background/services.rs index 6f60dc09904..8aee6b7f3b2 100644 --- a/nexus/src/app/background/services.rs +++ b/nexus/src/app/background/services.rs @@ -4,7 +4,6 @@ //! Task which ensures that expected Nexus services exist. -use crate::Nexus; use crate::context::OpContext; use crate::db::datastore::DatasetRedundancy; use crate::db::identity::Asset; @@ -14,15 +13,17 @@ use crate::db::model::Service; use crate::db::model::ServiceKind; use crate::db::model::Sled; use crate::db::model::Zpool; -use omicron_common::api::external::Error; +use crate::Nexus; use omicron_common::address::{ - DNS_REDUNDANCY, NEXUS_INTERNAL_PORT, NEXUS_EXTERNAL_PORT, DNS_SERVER_PORT, DNS_PORT + DNS_PORT, DNS_REDUNDANCY, DNS_SERVER_PORT, NEXUS_EXTERNAL_PORT, + NEXUS_INTERNAL_PORT, }; +use omicron_common::api::external::Error; use sled_agent_client::types as SledAgentTypes; use slog::Logger; use std::collections::{HashMap, HashSet}; -use std::sync::Arc; use std::net::{Ipv6Addr, SocketAddrV6}; +use std::sync::Arc; // Policy for the number of services to be provisioned. #[derive(Debug)] @@ -85,10 +86,7 @@ pub struct ServiceBalancer { impl ServiceBalancer { pub fn new(log: Logger, nexus: Arc) -> Self { - Self { - log, - nexus, - } + Self { log, nexus } } // Reaches out to all sled agents implied in "services", and @@ -96,7 +94,7 @@ impl ServiceBalancer { async fn instantiate_services( &self, opctx: &OpContext, - services: Vec + services: Vec, ) -> Result<(), Error> { let mut sled_ids = HashSet::new(); for svc in &services { @@ -110,23 +108,33 @@ impl ServiceBalancer { // only insert the *new* services. // // Inserting the old ones too is costing us an extra query. - let services = self.nexus.datastore().service_list(opctx, *sled_id).await?; + let services = + self.nexus.datastore().service_list(opctx, *sled_id).await?; let sled_client = self.nexus.sled_client(sled_id).await?; - sled_client.services_put(&SledAgentTypes::ServiceEnsureBody { - services: services.iter().map(|s| { - let address = Ipv6Addr::from(s.ip); - let (name, service_type) = Self::get_service_name_and_type(address, s.kind.clone()); + sled_client + .services_put(&SledAgentTypes::ServiceEnsureBody { + services: services + .iter() + .map(|s| { + let address = Ipv6Addr::from(s.ip); + let (name, service_type) = + Self::get_service_name_and_type( + address, + s.kind.clone(), + ); - SledAgentTypes::ServiceRequest { - id: s.id(), - name: name.to_string(), - addresses: vec![address], - gz_addresses: vec![], - service_type, - } - }).collect() - }).await?; + SledAgentTypes::ServiceRequest { + id: s.id(), + name: name.to_string(), + addresses: vec![address], + gz_addresses: vec![], + service_type, + } + }) + .collect(), + }) + .await?; } Ok(()) } @@ -134,33 +142,45 @@ impl ServiceBalancer { // Translates (address, db kind) to Sled Agent client types. fn get_service_name_and_type( address: Ipv6Addr, - kind: ServiceKind + kind: ServiceKind, ) -> (String, SledAgentTypes::ServiceType) { match kind { - ServiceKind::Nexus => { - ( - "nexus".to_string(), - SledAgentTypes::ServiceType::Nexus { - internal_address: SocketAddrV6::new(address, NEXUS_INTERNAL_PORT, 0, 0).to_string(), - external_address: SocketAddrV6::new(address, NEXUS_EXTERNAL_PORT, 0, 0).to_string(), - } - ) - }, - ServiceKind::InternalDNS => { - ( - "internal-dns".to_string(), - SledAgentTypes::ServiceType::InternalDns { - server_address: SocketAddrV6::new(address, DNS_SERVER_PORT, 0, 0).to_string(), - dns_address: SocketAddrV6::new(address, DNS_PORT, 0, 0).to_string(), - }, - ) - }, + ServiceKind::Nexus => ( + "nexus".to_string(), + SledAgentTypes::ServiceType::Nexus { + internal_address: SocketAddrV6::new( + address, + NEXUS_INTERNAL_PORT, + 0, + 0, + ) + .to_string(), + external_address: SocketAddrV6::new( + address, + NEXUS_EXTERNAL_PORT, + 0, + 0, + ) + .to_string(), + }, + ), + ServiceKind::InternalDNS => ( + "internal-dns".to_string(), + SledAgentTypes::ServiceType::InternalDns { + server_address: SocketAddrV6::new( + address, + DNS_SERVER_PORT, + 0, + 0, + ) + .to_string(), + dns_address: SocketAddrV6::new(address, DNS_PORT, 0, 0) + .to_string(), + }, + ), ServiceKind::Oximeter => { - ( - "oximeter".to_string(), - SledAgentTypes::ServiceType::Oximeter, - ) - }, + ("oximeter".to_string(), SledAgentTypes::ServiceType::Oximeter) + } } } @@ -171,14 +191,10 @@ impl ServiceBalancer { desired_count: u32, ) -> Result<(), Error> { // Provision the services within the database. - let new_services = self.nexus + let new_services = self + .nexus .datastore() - .ensure_rack_service( - opctx, - self.nexus.rack_id, - kind, - desired_count, - ) + .ensure_rack_service(opctx, self.nexus.rack_id, kind, desired_count) .await?; // Actually instantiate those services. @@ -191,7 +207,8 @@ impl ServiceBalancer { desired_count: u32, ) -> Result<(), Error> { // Provision the services within the database. - let new_services = self.nexus + let new_services = self + .nexus .datastore() .ensure_dns_service(opctx, self.nexus.rack_subnet, desired_count) .await?; @@ -208,12 +225,20 @@ impl ServiceBalancer { &self, opctx: &OpContext, ) -> Result<(), Error> { + // NOTE: If any sleds host DNS + other redudant services, we send + // redundant requests. We could propagate the service list up to a + // higher level, and do instantiation after all services complete? for expected_svc in &EXPECTED_SERVICES { info!(self.log, "Ensuring service {:?} exists", expected_svc); match expected_svc.redundancy { ServiceRedundancy::PerRack(desired_count) => { - self.ensure_rack_service(opctx, expected_svc.kind.clone(), desired_count).await?; - }, + self.ensure_rack_service( + opctx, + expected_svc.kind.clone(), + desired_count, + ) + .await?; + } ServiceRedundancy::DnsPerAz(desired_count) => { self.ensure_dns_service(opctx, desired_count).await?; } @@ -229,14 +254,10 @@ impl ServiceBalancer { redundancy: DatasetRedundancy, ) -> Result<(), Error> { // Provision the datasets within the database. - let new_datasets = self.nexus + let new_datasets = self + .nexus .datastore() - .ensure_rack_dataset( - opctx, - self.nexus.rack_id, - kind, - redundancy, - ) + .ensure_rack_dataset(opctx, self.nexus.rack_id, kind, redundancy) .await?; // Actually instantiate those datasets. @@ -247,7 +268,7 @@ impl ServiceBalancer { // requests that the desired services are executing. async fn instantiate_datasets( &self, - datasets: Vec<(Sled, Zpool, Dataset)> + datasets: Vec<(Sled, Zpool, Dataset)>, ) -> Result<(), Error> { let mut sled_clients = HashMap::new(); @@ -256,7 +277,8 @@ impl ServiceBalancer { match sled_clients.get(&sled.id()) { Some(client) => client, None => { - let sled_client = self.nexus.sled_client(&sled.id()).await?; + let sled_client = + self.nexus.sled_client(&sled.id()).await?; sled_clients.insert(sled.id(), sled_client); sled_clients.get(&sled.id()).unwrap() } @@ -266,18 +288,24 @@ impl ServiceBalancer { let dataset_kind = match dataset.kind { // TODO: This set of "all addresses" isn't right. // TODO: ... should we even be using "all addresses" to contact CRDB? - DatasetKind::Cockroach => SledAgentTypes::DatasetKind::CockroachDb(vec![]), + DatasetKind::Cockroach => { + SledAgentTypes::DatasetKind::CockroachDb(vec![]) + } DatasetKind::Crucible => SledAgentTypes::DatasetKind::Crucible, - DatasetKind::Clickhouse => SledAgentTypes::DatasetKind::Clickhouse, + DatasetKind::Clickhouse => { + SledAgentTypes::DatasetKind::Clickhouse + } }; // Instantiate each dataset. - sled_client.filesystem_put(&SledAgentTypes::DatasetEnsureBody { - id: dataset.id(), - zpool_id: zpool.id(), - dataset_kind, - address: dataset.address().to_string(), - }).await?; + sled_client + .filesystem_put(&SledAgentTypes::DatasetEnsureBody { + id: dataset.id(), + zpool_id: zpool.id(), + dataset_kind, + address: dataset.address().to_string(), + }) + .await?; } Ok(()) @@ -289,7 +317,12 @@ impl ServiceBalancer { ) -> Result<(), Error> { for expected_dataset in &EXPECTED_DATASETS { info!(self.log, "Ensuring dataset {:?} exists", expected_dataset); - self.ensure_rack_dataset(opctx, expected_dataset.kind.clone(), expected_dataset.redundancy).await? + self.ensure_rack_dataset( + opctx, + expected_dataset.kind.clone(), + expected_dataset.redundancy, + ) + .await? } Ok(()) } diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index 61a4fbf7d22..0abf1d4abfd 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -232,11 +232,13 @@ impl Nexus { } } - pub fn start_background_tasks(self: &Arc) -> Result<(), anyhow::Error> { + pub fn start_background_tasks( + self: &Arc, + ) -> Result<(), anyhow::Error> { let nexus = self.clone(); - self.background_task_runner.set( - background::TaskRunner::new(nexus) - ).map_err(|error| anyhow!(error.to_string())) + self.background_task_runner + .set(background::TaskRunner::new(nexus)) + .map_err(|error| anyhow!(error.to_string())) } /// Returns an [`OpContext`] used for authenticating external requests diff --git a/nexus/src/app/rack.rs b/nexus/src/app/rack.rs index f180395d4e1..5abf7b4ea99 100644 --- a/nexus/src/app/rack.rs +++ b/nexus/src/app/rack.rs @@ -102,10 +102,7 @@ impl super::Nexus { /// initialized. /// /// See RFD 278 for additional context. - pub async fn await_rack_initialization( - &self, - opctx: &OpContext - ) { + pub async fn await_rack_initialization(&self, opctx: &OpContext) { loop { let result = self.rack_lookup(&opctx, &self.rack_id).await; match result { @@ -124,6 +121,5 @@ impl super::Nexus { } tokio::time::sleep(std::time::Duration::from_secs(2)).await; } - } } diff --git a/nexus/src/app/sled.rs b/nexus/src/app/sled.rs index 0150cbec148..e4fc616f095 100644 --- a/nexus/src/app/sled.rs +++ b/nexus/src/app/sled.rs @@ -31,7 +31,7 @@ impl super::Nexus { address: SocketAddrV6, ) -> Result<(), Error> { info!(self.log, "registered sled agent"; "sled_uuid" => id.to_string()); - let sled = db::model::Sled::new(id, address); + let sled = db::model::Sled::new(id, address, self.rack_id); self.db_datastore.sled_upsert(sled).await?; Ok(()) } diff --git a/nexus/src/db/datastore.rs b/nexus/src/db/datastore.rs index 23b66b0bb4f..81afcc25fdc 100644 --- a/nexus/src/db/datastore.rs +++ b/nexus/src/db/datastore.rs @@ -75,7 +75,7 @@ use diesel::query_dsl::methods::LoadQuery; use diesel::upsert::excluded; use diesel::{ExpressionMethods, QueryDsl, SelectableHelper}; use omicron_common::address::{ - RACK_PREFIX, Ipv6Subnet, ReservedRackSubnet, + Ipv6Subnet, ReservedRackSubnet, DNS_REDUNDANCY, RACK_PREFIX, }; use omicron_common::api; use omicron_common::api::external; @@ -409,7 +409,7 @@ impl DataStore { .map_err(|e| public_error_from_diesel_pool(e, ErrorHandler::Server)) } - pub fn sled_list_with_limit_sync( + fn sled_list_with_limit_sync( conn: &mut DbConnection, limit: u32, ) -> Result, diesel::result::Error> { @@ -436,12 +436,9 @@ impl DataStore { .map_err(|e| public_error_from_diesel_pool(e, ErrorHandler::Server)) } - // TODO-correctness: Filter the sleds by rack ID! - // This filtering will feasible when Sleds store a FK for - // the rack on which they're stored. - pub fn sled_and_service_list_sync( + fn sled_and_service_list_sync( conn: &mut DbConnection, - _rack_id: Uuid, + rack_id: Uuid, kind: ServiceKind, ) -> Result)>, diesel::result::Error> { use db::schema::service::dsl as svc_dsl; @@ -449,10 +446,12 @@ impl DataStore { db::schema::sled::table .filter(sled_dsl::time_deleted.is_null()) - .left_outer_join(db::schema::service::table.on( - svc_dsl::sled_id.eq(sled_dsl::id) - )) - .filter(svc_dsl::kind.eq(kind)) + .filter(sled_dsl::rack_id.eq(rack_id)) + .left_outer_join( + db::schema::service::table.on(svc_dsl::sled_id + .eq(sled_dsl::id) + .and(svc_dsl::kind.eq(kind.clone()))), + ) .select(<(Sled, Option)>::as_select()) .get_results(conn) } @@ -476,59 +475,80 @@ impl DataStore { self.pool() .transaction(move |conn| { let sleds_and_maybe_svcs = Self::sled_and_service_list_sync( - conn, - rack_id, - kind.clone(), - )?; + conn, + rack_id, + kind.clone(), + )?; + + eprintln!( + "Observed sleds/services: {:?}", + sleds_and_maybe_svcs + ); // Split the set of returned sleds into "those with" and "those // without" the requested service. let (sleds_with_svc, sleds_without_svc): (Vec<_>, Vec<_>) = sleds_and_maybe_svcs - .iter() - .partition(|(_, maybe_svc)| { - maybe_svc.is_some() - }); - let mut sleds_without_svc = sleds_without_svc.into_iter() - .map(|(sled, _)| sled); - let existing_count = sleds_with_svc.len(); + .into_iter() + .partition(|(_, maybe_svc)| maybe_svc.is_some()); + // Identify sleds without services (targets for future + // allocation). + let mut sleds_without_svc = + sleds_without_svc.into_iter().map(|(sled, _)| sled); + + // Identify sleds with services (part of output). + let mut svcs: Vec<_> = sleds_with_svc + .into_iter() + .map(|(_, maybe_svc)| { + maybe_svc.expect( + "Should have filtered by sleds with the service", + ) + }) + .collect(); + + eprintln!("Observed services: {:?}", svcs); // Add services to sleds, in-order, until we've met a // number sufficient for our redundancy. // // The selection of "which sleds run this service" is completely // arbitrary. - let mut new_svcs = vec![]; - while (redundancy as usize) < existing_count + new_svcs.len() { + while svcs.len() < (redundancy as usize) { let sled = sleds_without_svc.next().ok_or_else(|| { TxnError::CustomError(ServiceError::NotEnoughSleds) })?; let svc_id = Uuid::new_v4(); let address = Self::next_ipv6_address_sync(conn, sled.id()) - .map_err(|e| TxnError::CustomError(ServiceError::Other(e)))?; + .map_err(|e| { + TxnError::CustomError(ServiceError::Other(e)) + })?; let service = db::model::Service::new( svc_id, sled.id(), address, - kind.clone() + kind.clone(), ); // TODO: Can we insert all the services at the same time? let svc = Self::service_upsert_sync(conn, service) - .map_err(|e| TxnError::CustomError(ServiceError::Other(e)))?; - new_svcs.push(svc); + .map_err(|e| { + TxnError::CustomError(ServiceError::Other(e)) + })?; + svcs.push(svc); } - return Ok(new_svcs); + return Ok(svcs); }) .await .map_err(|e| match e { TxnError::CustomError(ServiceError::NotEnoughSleds) => { Error::unavail("Not enough sleds for service allocation") - }, + } TxnError::CustomError(ServiceError::Other(e)) => e, - TxnError::Pool(e) => public_error_from_diesel_pool(e, ErrorHandler::Server) + TxnError::Pool(e) => { + public_error_from_diesel_pool(e, ErrorHandler::Server) + } }) } @@ -550,44 +570,51 @@ impl DataStore { self.pool() .transaction(move |conn| { - let existing_services = Self::dns_service_list_sync(conn)?; - let existing_count = existing_services.len(); + let mut svcs = Self::dns_service_list_sync(conn)?; + eprintln!("Observed DNS services: {:?}", svcs); // Get all subnets not allocated to existing services. let mut usable_dns_subnets = ReservedRackSubnet(rack_subnet) .get_dns_subnets() .into_iter() .filter(|subnet| { - // This address is only usable if none of the existing - // DNS services are using it. - existing_services.iter() - .all(|svc| Ipv6Addr::from(svc.ip) != subnet.dns_address().ip()) - }); - + // If any existing services are using this address, + // skip it. + !svcs.iter().any(|svc| { + Ipv6Addr::from(svc.ip) == subnet.dns_address().ip() + }) + }) + .collect::>() + .into_iter(); + eprintln!("Usable DNS services: {:?}", usable_dns_subnets); // Get all sleds which aren't already running DNS services. - let mut target_sleds = Self::sled_list_with_limit_sync(conn, redundancy)? - .into_iter() - .filter(|sled| { - // The target sleds are only considered if they aren't already - // running a DNS service. - existing_services.iter() - .all(|svc| svc.sled_id != sled.id()) - }); + let mut target_sleds = + Self::sled_list_with_limit_sync(conn, redundancy)? + .into_iter() + .filter(|sled| { + // The target sleds are only considered if they aren't already + // running a DNS service. + svcs.iter().all(|svc| svc.sled_id != sled.id()) + }) + .collect::>() + .into_iter(); - let mut new_svcs = vec![]; - while (redundancy as usize) < existing_count + new_svcs.len() { + while svcs.len() < (redundancy as usize) { let sled = target_sleds.next().ok_or_else(|| { - TxnError::CustomError(ServiceError::NotEnoughSleds) - })?; + TxnError::CustomError(ServiceError::NotEnoughSleds) + })?; let svc_id = Uuid::new_v4(); - let dns_subnet = usable_dns_subnets.next().ok_or_else(|| { + let dns_subnet = + usable_dns_subnets.next().ok_or_else(|| { TxnError::CustomError(ServiceError::NotEnoughIps) })?; - let address = dns_subnet - .dns_address() - .ip(); + let address = dns_subnet.dns_address().ip(); + // TODO: How are we tracking the GZ address that must be + // allocated? They're tracked by the "DnsSubnet" object + // in address.rs, but I don't think they're getting + // propagated out of here. let service = db::model::Service::new( svc_id, sled.id(), @@ -597,22 +624,28 @@ impl DataStore { // TODO: Can we insert all the services at the same time? let svc = Self::service_upsert_sync(conn, service) - .map_err(|e| TxnError::CustomError(ServiceError::Other(e)))?; + .map_err(|e| { + TxnError::CustomError(ServiceError::Other(e)) + })?; - new_svcs.push(svc); + svcs.push(svc); } - return Ok(new_svcs); + return Ok(svcs); }) .await .map_err(|e| match e { TxnError::CustomError(ServiceError::NotEnoughSleds) => { Error::unavail("Not enough sleds for service allocation") - }, + } TxnError::CustomError(ServiceError::NotEnoughIps) => { - Error::unavail("Not enough IP addresses for service allocation") - }, + Error::unavail( + "Not enough IP addresses for service allocation", + ) + } TxnError::CustomError(ServiceError::Other(e)) => e, - TxnError::Pool(e) => public_error_from_diesel_pool(e, ErrorHandler::Server) + TxnError::Pool(e) => { + public_error_from_diesel_pool(e, ErrorHandler::Server) + } }) } @@ -623,30 +656,35 @@ impl DataStore { svc::service .filter(svc::kind.eq(ServiceKind::InternalDNS)) + .limit(DNS_REDUNDANCY.into()) .select(Service::as_select()) .get_results(conn) } - // TODO: Filter by rack ID - pub fn sled_zpool_and_dataset_list_sync( + fn sled_zpool_and_dataset_list_sync( conn: &mut DbConnection, - _rack_id: Uuid, + rack_id: Uuid, kind: DatasetKind, - ) -> Result)>, diesel::result::Error> { + ) -> Result)>, diesel::result::Error> + { + use db::schema::dataset::dsl as dataset_dsl; use db::schema::sled::dsl as sled_dsl; use db::schema::zpool::dsl as zpool_dsl; - use db::schema::dataset::dsl as dataset_dsl; db::schema::sled::table .filter(sled_dsl::time_deleted.is_null()) - .inner_join(db::schema::zpool::table.on( - zpool_dsl::sled_id.eq(sled_dsl::id) - )) - .filter(zpool_dsl::time_deleted.is_null()) - .left_outer_join(db::schema::dataset::table.on( - dataset_dsl::pool_id.eq(zpool_dsl::id) - )) - .filter(dataset_dsl::kind.eq(kind)) + .filter(sled_dsl::rack_id.eq(rack_id)) + .inner_join( + db::schema::zpool::table.on(zpool_dsl::sled_id + .eq(sled_dsl::id) + .and(zpool_dsl::time_deleted.is_null())), + ) + .left_outer_join( + db::schema::dataset::table.on(dataset_dsl::pool_id + .eq(zpool_dsl::id) + .and(dataset_dsl::kind.eq(kind.clone())) + .and(dataset_dsl::time_deleted.is_null())), + ) .select(<(Sled, Zpool, Option)>::as_select()) .get_results(conn) } @@ -669,76 +707,108 @@ impl DataStore { self.pool() .transaction(move |conn| { - let sleds_zpools_and_maybe_datasets = Self::sled_zpool_and_dataset_list_sync( + let sleds_zpools_and_maybe_datasets = + Self::sled_zpool_and_dataset_list_sync( conn, rack_id, kind.clone(), )?; + eprintln!( + "Observed datasets: {:?}", + sleds_zpools_and_maybe_datasets + ); + // Split the set of returned zpools into "those with" and "those // without" the requested dataset. - let (zpools_with_dataset, zpools_without_dataset): (Vec<_>, Vec<_>) = - sleds_zpools_and_maybe_datasets + let (zpools_with_dataset, zpools_without_dataset): ( + Vec<_>, + Vec<_>, + ) = sleds_zpools_and_maybe_datasets + .into_iter() + .partition(|(_, _, maybe_dataset)| maybe_dataset.is_some()); + let mut zpools_without_dataset = zpools_without_dataset .into_iter() - .partition(|(_, _, maybe_dataset)| { - maybe_dataset.is_some() - }); - let mut zpools_without_dataset = zpools_without_dataset.into_iter() .map(|(sled, zpool, _)| (sled, zpool)) .peekable(); - let existing_count = zpools_with_dataset.len(); - // Add services to zpools, in-order, until we've met a + eprintln!("Dataset targets: {:?}", zpools_without_dataset); + + let mut datasets: Vec<_> = zpools_with_dataset + .into_iter() + .map(|(sled, zpool, maybe_dataset)| { + ( + sled, + zpool, + maybe_dataset.expect("Dataset should exist"), + ) + }) + .collect(); + eprintln!("Existing datasets: {:?}", datasets); + + // Add datasets to zpools, in-order, until we've met a // number sufficient for our redundancy. // - // The selection of "which zpools run this service" is completely + // The selection of "which zpools contain this dataset" is completely // arbitrary. - let mut new_datasets = vec![]; - loop { match redundancy { DatasetRedundancy::OnAll => { if zpools_without_dataset.peek().is_none() { break; } - }, - DatasetRedundancy::PerRack(count) => { - if (count as usize) >= existing_count + new_datasets.len() { + } + DatasetRedundancy::PerRack(desired) => { + if datasets.len() >= (desired as usize) { break; } - }, + } }; - let (sled, zpool) = zpools_without_dataset.next().ok_or_else(|| { - TxnError::CustomError(DatasetError::NotEnoughZpools) - })?; + let (sled, zpool) = + zpools_without_dataset.next().ok_or_else(|| { + TxnError::CustomError(DatasetError::NotEnoughZpools) + })?; let dataset_id = Uuid::new_v4(); let address = Self::next_ipv6_address_sync(conn, sled.id()) - .map_err(|e| TxnError::CustomError(DatasetError::Other(e))) - .map(|ip| SocketAddr::V6(SocketAddrV6::new(ip, kind.port(), 0, 0)))?; + .map_err(|e| { + TxnError::CustomError(DatasetError::Other(e)) + }) + .map(|ip| { + SocketAddr::V6(SocketAddrV6::new( + ip, + kind.port(), + 0, + 0, + )) + })?; let dataset = db::model::Dataset::new( dataset_id, zpool.id(), address, - kind.clone() + kind.clone(), ); // TODO: Can we insert all the datasets at the same time? let dataset = Self::dataset_upsert_sync(conn, dataset) - .map_err(|e| TxnError::CustomError(DatasetError::Other(e)))?; - new_datasets.push((sled, zpool, dataset)); + .map_err(|e| { + TxnError::CustomError(DatasetError::Other(e)) + })?; + datasets.push((sled, zpool, dataset)); } - return Ok(new_datasets); + return Ok(datasets); }) .await .map_err(|e| match e { TxnError::CustomError(DatasetError::NotEnoughZpools) => { - Error::unavail("Not enough sleds for dataset allocation") - }, + Error::unavail("Not enough zpools for dataset allocation") + } TxnError::CustomError(DatasetError::Other(e)) => e, - TxnError::Pool(e) => public_error_from_diesel_pool(e, ErrorHandler::Server) + TxnError::Pool(e) => { + public_error_from_diesel_pool(e, ErrorHandler::Server) + } }) } @@ -778,6 +848,25 @@ impl DataStore { }) } + // NOTE: This doesn't need to be test-only, it just happens to be test-only + // to avoid unused warnings. + #[cfg(test)] + async fn dataset_list( + &self, + opctx: &OpContext, + zpool_id: Uuid, + ) -> Result, Error> { + opctx.authorize(authz::Action::Read, &authz::FLEET).await?; + use db::schema::dataset::dsl; + dsl::dataset + .filter(dsl::time_deleted.is_null()) + .filter(dsl::pool_id.eq(zpool_id)) + .select(Dataset::as_select()) + .load_async(self.pool_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel_pool(e, ErrorHandler::Server)) + } + /// Stores a new dataset in the database. pub async fn dataset_upsert( &self, @@ -820,7 +909,7 @@ impl DataStore { } /// Stores a new dataset in the database. - pub fn dataset_upsert_sync( + fn dataset_upsert_sync( conn: &mut DbConnection, dataset: Dataset, ) -> CreateResult { @@ -898,7 +987,7 @@ impl DataStore { }) } - pub fn service_upsert_sync( + fn service_upsert_sync( conn: &mut DbConnection, service: Service, ) -> CreateResult { @@ -3942,7 +4031,7 @@ impl DataStore { /// Return the next available IPv6 address for an Oxide service running on /// the provided sled. - pub fn next_ipv6_address_sync( + fn next_ipv6_address_sync( conn: &mut DbConnection, sled_id: Uuid, ) -> Result { @@ -4265,6 +4354,7 @@ mod test { use crate::authz; use crate::db::explain::ExplainableAsync; use crate::db::fixed_data::silo::SILO_ID; + use crate::db::identity::Asset; use crate::db::identity::Resource; use crate::db::lookup::LookupPath; use crate::db::model::{ConsoleSession, DatasetKind, Project, ServiceKind}; @@ -4275,7 +4365,7 @@ mod test { ByteCount, Error, IdentityMetadataCreateParams, LookupType, Name, }; use omicron_test_utils::dev; - use std::collections::HashSet; + use std::collections::{HashMap, HashSet}; use std::net::Ipv6Addr; use std::net::SocketAddrV6; use std::net::{IpAddr, Ipv4Addr, SocketAddr}; @@ -4436,7 +4526,8 @@ mod test { 0, ); let sled_id = Uuid::new_v4(); - let sled = Sled::new(sled_id, bogus_addr.clone()); + let rack_id = Uuid::new_v4(); + let sled = Sled::new(sled_id, bogus_addr.clone(), rack_id); datastore.sled_upsert(sled).await.unwrap(); sled_id } @@ -4791,15 +4882,16 @@ mod test { let datastore = Arc::new(DataStore::new(Arc::clone(&pool))); let opctx = OpContext::for_tests(logctx.log.new(o!()), datastore.clone()); + let rack_id = Uuid::new_v4(); let addr1 = "[fd00:1de::1]:12345".parse().unwrap(); let sled1_id = "0de4b299-e0b4-46f0-d528-85de81a7095f".parse().unwrap(); - let sled1 = db::model::Sled::new(sled1_id, addr1); + let sled1 = db::model::Sled::new(sled1_id, addr1, rack_id); datastore.sled_upsert(sled1).await.unwrap(); let addr2 = "[fd00:1df::1]:12345".parse().unwrap(); let sled2_id = "66285c18-0c79-43e0-e54f-95271f271314".parse().unwrap(); - let sled2 = db::model::Sled::new(sled2_id, addr2); + let sled2 = db::model::Sled::new(sled2_id, addr2, rack_id); datastore.sled_upsert(sled2).await.unwrap(); let ip = datastore.next_ipv6_address(&opctx, sled1_id).await.unwrap(); @@ -4939,6 +5031,497 @@ mod test { logctx.cleanup_successful(); } + #[tokio::test] + async fn test_ensure_rack_service() { + let logctx = dev::test_setup_log("test_ensure_rack_service"); + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = datastore_test(&logctx, &db).await; + + // Create a sled on which the service should exist. + let sled_addr = SocketAddrV6::new( + Ipv6Addr::new(0xfd00, 0, 0, 0, 0, 0, 0, 1), + 8080, + 0, + 0, + ); + let sled_id = Uuid::new_v4(); + let rack_id = Uuid::new_v4(); + let sled = Sled::new(sled_id, sled_addr.clone(), rack_id); + datastore + .sled_upsert(sled) + .await + .expect("Should be able to upsert sled"); + + // Ensure a service exists on the rack. + let services = datastore + .ensure_rack_service(&opctx, rack_id, ServiceKind::Nexus, 1) + .await + .expect("Should have allocated service"); + + // Only a single service was allocated, with the type / address we + // expect. + assert_eq!(1, services.len()); + assert_eq!(ServiceKind::Nexus, services[0].kind); + assert_eq!(sled_id, services[0].sled_id); + + // Listing services only shows this one. + let observed_services = datastore + .service_list(&opctx, sled_id) + .await + .expect("Should be able to list services"); + assert_eq!(1, observed_services.len()); + assert_eq!(services[0].id(), observed_services[0].id()); + + // Test that ensuring services is idempotent. + let services_again = datastore + .ensure_rack_service(&opctx, rack_id, ServiceKind::Nexus, 1) + .await + .expect("Should have allocated service"); + assert_eq!(services_again, services); + + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_ensure_rack_service_multi_sled() { + let logctx = dev::test_setup_log("test_ensure_rack_service_multi_sled"); + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = datastore_test(&logctx, &db).await; + + let rack_id = Uuid::new_v4(); + + // Create sleds with distinct underlay subnets. + const SLED_COUNT: usize = 3; + let mut sleds = HashMap::new(); + for i in 0..SLED_COUNT { + let sled_addr = SocketAddrV6::new( + Ipv6Addr::new(0xfd00, 0, 0, i.try_into().unwrap(), 0, 0, 0, 1), + 8080, + 0, + 0, + ); + let sled_id = Uuid::new_v4(); + let sled = Sled::new(sled_id, sled_addr.clone(), rack_id); + datastore + .sled_upsert(sled.clone()) + .await + .expect("Should be able to upsert sled"); + sleds.insert(sled.id(), sled); + } + + // Ensure a service exists on the rack, with some redundancy. + const NEXUS_COUNT: u32 = 3; + let mut services = datastore + .ensure_rack_service( + &opctx, + rack_id, + ServiceKind::Nexus, + NEXUS_COUNT, + ) + .await + .expect("Should have allocated service"); + services.sort_by(|a, b| a.id().cmp(&b.id())); + + assert_eq!(NEXUS_COUNT, services.len() as u32); + for svc in &services { + assert_eq!(ServiceKind::Nexus, svc.kind); + + // Each service should have been provisioned to a distinct sled. + let observed_services = datastore + .service_list(&opctx, svc.sled_id) + .await + .expect("Should be able to list services"); + assert_eq!(1, observed_services.len()); + assert_eq!(svc.id(), observed_services[0].id()); + } + + // Test that ensuring services is idempotent. + let mut services_again = datastore + .ensure_rack_service( + &opctx, + rack_id, + ServiceKind::Nexus, + NEXUS_COUNT, + ) + .await + .expect("Should have allocated service"); + services_again.sort_by(|a, b| a.id().cmp(&b.id())); + assert_eq!(services_again, services); + + // Ask for a different service type on the rack. + let oximeter_services = datastore + .ensure_rack_service(&opctx, rack_id, ServiceKind::Oximeter, 1) + .await + .expect("Should have allocated service"); + + // This should only return a single service + assert_eq!(1, oximeter_services.len()); + + // The target sled should contain both the nexus and oximeter services + let observed_services = datastore + .service_list(&opctx, oximeter_services[0].sled_id) + .await + .expect("Should be able to list services"); + assert_eq!(2, observed_services.len()); + + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_ensure_rack_service_not_enough_sleds() { + let logctx = + dev::test_setup_log("test_ensure_rack_service_not_enough_sleds"); + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = datastore_test(&logctx, &db).await; + + // Create a sled on which the service should exist. + let sled_addr = SocketAddrV6::new( + Ipv6Addr::new(0xfd00, 0, 0, 0, 0, 0, 0, 1), + 8080, + 0, + 0, + ); + let sled_id = Uuid::new_v4(); + let rack_id = Uuid::new_v4(); + let sled = Sled::new(sled_id, sled_addr.clone(), rack_id); + datastore + .sled_upsert(sled) + .await + .expect("Should be able to upsert sled"); + + // Try to request a redundancy which is larger than the number of sleds. + let err = datastore + .ensure_rack_service(&opctx, rack_id, ServiceKind::Nexus, 2) + .await + .expect_err("Should have failed to allocate service"); + + assert!( + matches!(err, Error::ServiceUnavailable { .. }), + "Error should have been ServiceUnavailable: {:?}", + err + ); + assert!( + err.to_string().contains("Not enough sleds"), + "Error should have identified 'Not enough sleds' as the cause: {:?}", err + ); + + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_ensure_dns_service() { + let logctx = dev::test_setup_log("test_ensure_dns_service"); + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = datastore_test(&logctx, &db).await; + + // Create a sled on which the service should exist. + let sled_addr = SocketAddrV6::new( + Ipv6Addr::new(0xfd00, 0, 0, 0, 0, 0, 0, 1), + 8080, + 0, + 0, + ); + let sled_id = Uuid::new_v4(); + let rack_id = Uuid::new_v4(); + let sled = Sled::new(sled_id, sled_addr.clone(), rack_id); + datastore + .sled_upsert(sled) + .await + .expect("Should be able to upsert sled"); + + let rack_subnet = Ipv6Subnet::::new(*sled_addr.ip()); + + // Ensure a service exists on the rack. + let services = datastore + .ensure_dns_service(&opctx, rack_subnet, 1) + .await + .expect("Should have allocated service"); + + // Only a single service was allocated, with the type / address we + // expect. + assert_eq!(1, services.len()); + assert_eq!(ServiceKind::InternalDNS, services[0].kind); + assert_eq!(sled_id, services[0].sled_id); + + // Listing services only shows this one. + let observed_services = datastore + .service_list(&opctx, sled_id) + .await + .expect("Should be able to list services"); + assert_eq!(1, observed_services.len()); + assert_eq!(services[0].id(), observed_services[0].id()); + + // Test that ensuring services is idempotent. + let services_again = datastore + .ensure_dns_service(&opctx, rack_subnet, 1) + .await + .expect("Should have allocated service"); + assert_eq!(services_again, services); + + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_ensure_dns_service_multi_sled() { + let logctx = dev::test_setup_log("test_ensure_dns_service_multi_sled"); + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = datastore_test(&logctx, &db).await; + + let rack_id = Uuid::new_v4(); + + // Create sleds with distinct underlay subnets. + const SLED_COUNT: u32 = DNS_REDUNDANCY; + let mut sleds = HashMap::new(); + for i in 0..SLED_COUNT { + let sled_addr = SocketAddrV6::new( + Ipv6Addr::new(0xfd00, 0, 0, i.try_into().unwrap(), 0, 0, 0, 1), + 8080, + 0, + 0, + ); + let sled_id = Uuid::new_v4(); + let sled = Sled::new(sled_id, sled_addr.clone(), rack_id); + datastore + .sled_upsert(sled.clone()) + .await + .expect("Should be able to upsert sled"); + sleds.insert(sled.id(), sled); + } + let rack_subnet = Ipv6Subnet::::new(Ipv6Addr::from( + sleds.values().next().unwrap().ip, + )); + + for sled in sleds.values() { + assert_eq!( + rack_subnet, + Ipv6Subnet::::new(Ipv6Addr::from(sled.ip)), + "Test pre-condition violated: All sleds must belong to the same rack" + ); + } + + // Ensure a service exists on the rack. + const DNS_COUNT: u32 = DNS_REDUNDANCY; + let mut services = datastore + .ensure_dns_service(&opctx, rack_subnet, DNS_COUNT) + .await + .expect("Should have allocated service"); + services.sort_by(|a, b| a.id().cmp(&b.id())); + + assert_eq!(DNS_COUNT, services.len() as u32); + for svc in &services { + assert_eq!(ServiceKind::InternalDNS, svc.kind); + + // Each service should have been provisioned to a distinct sled. + let observed_services = datastore + .service_list(&opctx, svc.sled_id) + .await + .expect("Should be able to list services"); + assert_eq!(1, observed_services.len()); + assert_eq!(svc.id(), observed_services[0].id()); + } + + // Test for idempotency + let mut services_again = datastore + .ensure_dns_service(&opctx, rack_subnet, DNS_COUNT) + .await + .expect("Should have allocated service"); + services_again.sort_by(|a, b| a.id().cmp(&b.id())); + assert_eq!(services_again, services); + + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_ensure_rack_dataset() { + let logctx = dev::test_setup_log("test_ensure_rack_dataset"); + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = datastore_test(&logctx, &db).await; + + // Create a sled on which the dataset should exist. + let sled_addr = SocketAddrV6::new( + Ipv6Addr::new(0xfd00, 0, 0, 0, 0, 0, 0, 1), + 8080, + 0, + 0, + ); + let sled_id = Uuid::new_v4(); + let rack_id = Uuid::new_v4(); + let sled = Sled::new(sled_id, sled_addr.clone(), rack_id); + datastore + .sled_upsert(sled) + .await + .expect("Should be able to upsert sled"); + let zpool_id = create_test_zpool(&datastore, sled_id).await; + + // Ensure a dataset exists on the rack. + let output = datastore + .ensure_rack_dataset( + &opctx, + rack_id, + DatasetKind::Crucible, + DatasetRedundancy::PerRack(1), + ) + .await + .expect("Should have allocated dataset"); + + // Observe that only a single dataset was allocated + assert_eq!(1, output.len()); + let (_, _, output_dataset) = &output[0]; + assert_eq!(DatasetKind::Crucible, output_dataset.kind); + assert_eq!(zpool_id, output_dataset.pool_id); + + // Listing datasets only shows this one. + let observed_datasets = datastore + .dataset_list(&opctx, zpool_id) + .await + .expect("Should be able to list datasets"); + assert_eq!(1, observed_datasets.len()); + assert_eq!(output_dataset.id(), observed_datasets[0].id()); + + // Test that ensuring datasets is idempotent. + let output_again = datastore + .ensure_rack_dataset( + &opctx, + rack_id, + DatasetKind::Crucible, + DatasetRedundancy::PerRack(1), + ) + .await + .expect("Should have allocated dataset"); + let (_, _, output_dataset_again) = &output_again[0]; + assert_eq!(output_dataset_again, output_dataset); + + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_ensure_rack_dataset_not_enough_zpools() { + let logctx = + dev::test_setup_log("test_ensure_rack_dataset_not_enough_zpools"); + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = datastore_test(&logctx, &db).await; + + // Create a sled on which the dataset should exist. + let sled_addr = SocketAddrV6::new( + Ipv6Addr::new(0xfd00, 0, 0, 0, 0, 0, 0, 1), + 8080, + 0, + 0, + ); + let sled_id = Uuid::new_v4(); + let rack_id = Uuid::new_v4(); + let sled = Sled::new(sled_id, sled_addr.clone(), rack_id); + datastore + .sled_upsert(sled) + .await + .expect("Should be able to upsert sled"); + + // Attempt to allocate a dataset on a rack without zpools. + let err = datastore + .ensure_rack_dataset( + &opctx, + rack_id, + DatasetKind::Crucible, + DatasetRedundancy::PerRack(1), + ) + .await + .expect_err("Should not have allocated dataset"); + + assert!( + matches!(err, Error::ServiceUnavailable { .. }), + "Error should have been ServiceUnavailable: {:?}", + err + ); + assert!( + err.to_string().contains("Not enough zpools"), + "Error should have identified 'Not enough zpools' as the cause: {:?}", err + ); + + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_ensure_rack_dataset_multi_sled() { + let logctx = dev::test_setup_log("test_ensure_rack_dataset_multi_sled"); + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = datastore_test(&logctx, &db).await; + + let rack_id = Uuid::new_v4(); + + // Create sleds with distinct underlay subnets. + const SLED_COUNT: usize = 3; + let mut sleds = HashMap::new(); + for i in 0..SLED_COUNT { + let sled_addr = SocketAddrV6::new( + Ipv6Addr::new(0xfd00, 0, 0, i.try_into().unwrap(), 0, 0, 0, 1), + 8080, + 0, + 0, + ); + let sled_id = Uuid::new_v4(); + let sled = Sled::new(sled_id, sled_addr.clone(), rack_id); + datastore + .sled_upsert(sled.clone()) + .await + .expect("Should be able to upsert sled"); + sleds.insert(sled.id(), sled); + create_test_zpool(&datastore, sled_id).await; + } + + // Ensure datasets exist on the rack. + let output = datastore + .ensure_rack_dataset( + &opctx, + rack_id, + DatasetKind::Crucible, + DatasetRedundancy::OnAll, + ) + .await + .expect("Should have allocated dataset"); + assert_eq!(SLED_COUNT, output.len()); + for (sled, zpool, dataset) in &output { + assert_eq!(DatasetKind::Crucible, dataset.kind); + assert_eq!(zpool.id(), dataset.pool_id); + assert_eq!(sled.id(), zpool.sled_id); + + let observed_datasets = datastore + .dataset_list(&opctx, zpool.id()) + .await + .expect("Should be able to list datasets"); + assert_eq!(1, observed_datasets.len()); + assert_eq!(dataset.id(), observed_datasets[0].id()) + } + + // Test that ensuring datasets is idempotent. + let output_again = datastore + .ensure_rack_dataset( + &opctx, + rack_id, + DatasetKind::Crucible, + DatasetRedundancy::OnAll, + ) + .await + .expect("Should have allocated dataset"); + + let mut output: Vec<_> = + output.into_iter().map(|(_, _, dataset)| dataset).collect(); + output.sort_by(|a, b| a.id().cmp(&b.id())); + let mut output_again: Vec<_> = + output_again.into_iter().map(|(_, _, dataset)| dataset).collect(); + output_again.sort_by(|a, b| a.id().cmp(&b.id())); + assert_eq!(output, output_again); + + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } + #[tokio::test] async fn test_rack_initialize_is_idempotent() { let logctx = dev::test_setup_log("test_rack_initialize_is_idempotent"); diff --git a/nexus/src/db/model/dataset_kind.rs b/nexus/src/db/model/dataset_kind.rs index effced69522..bd85972b3dc 100644 --- a/nexus/src/db/model/dataset_kind.rs +++ b/nexus/src/db/model/dataset_kind.rs @@ -4,12 +4,12 @@ use super::impl_enum_type; use crate::internal_api; +use omicron_common::address::{CLICKHOUSE_PORT, COCKROACH_PORT, CRUCIBLE_PORT}; use serde::{Deserialize, Serialize}; use std::io::Write; -use omicron_common::address::{COCKROACH_PORT, CRUCIBLE_PORT, CLICKHOUSE_PORT}; impl_enum_type!( - #[derive(SqlType, Debug, QueryId)] + #[derive(Clone, SqlType, Debug, QueryId)] #[diesel(postgres_type(name = "dataset_kind"))] pub struct DatasetKindEnum; diff --git a/nexus/src/db/model/service.rs b/nexus/src/db/model/service.rs index 3501337e42b..0762db538b1 100644 --- a/nexus/src/db/model/service.rs +++ b/nexus/src/db/model/service.rs @@ -10,7 +10,7 @@ use std::net::Ipv6Addr; use uuid::Uuid; /// Representation of services which may run on Sleds. -#[derive(Queryable, Insertable, Debug, Clone, Selectable, Asset)] +#[derive(Queryable, Insertable, Debug, Clone, Selectable, Asset, PartialEq)] #[diesel(table_name = service)] pub struct Service { #[diesel(embed)] diff --git a/nexus/src/db/model/service_kind.rs b/nexus/src/db/model/service_kind.rs index f66532e64c0..b9a3e96c6f6 100644 --- a/nexus/src/db/model/service_kind.rs +++ b/nexus/src/db/model/service_kind.rs @@ -8,7 +8,7 @@ use serde::{Deserialize, Serialize}; use std::io::Write; impl_enum_type!( - #[derive(SqlType, Debug, QueryId)] + #[derive(Clone, SqlType, Debug, QueryId)] #[diesel(postgres_type(name = "service_kind"))] pub struct ServiceKindEnum; diff --git a/nexus/src/db/model/sled.rs b/nexus/src/db/model/sled.rs index ad756c3473f..84882679087 100644 --- a/nexus/src/db/model/sled.rs +++ b/nexus/src/db/model/sled.rs @@ -13,7 +13,7 @@ use std::net::SocketAddrV6; use uuid::Uuid; /// Database representation of a Sled. -#[derive(Queryable, Insertable, Debug, Clone, Selectable, Asset)] +#[derive(Queryable, Insertable, Debug, Clone, Selectable, Asset, PartialEq)] #[diesel(table_name = sled)] pub struct Sled { #[diesel(embed)] @@ -21,6 +21,8 @@ pub struct Sled { time_deleted: Option>, rcgen: Generation, + pub rack_id: Uuid, + // ServiceAddress (Sled Agent). pub ip: ipv6::Ipv6Addr, pub port: SqlU16, @@ -30,7 +32,7 @@ pub struct Sled { } impl Sled { - pub fn new(id: Uuid, addr: SocketAddrV6) -> Self { + pub fn new(id: Uuid, addr: SocketAddrV6, rack_id: Uuid) -> Self { let last_used_address = { let mut segments = addr.ip().segments(); segments[7] += omicron_common::address::RSS_RESERVED_ADDRESSES; @@ -40,6 +42,7 @@ impl Sled { identity: SledIdentity::new(id), time_deleted: None, rcgen: Generation::new(), + rack_id, ip: ipv6::Ipv6Addr::from(addr.ip()), port: addr.port().into(), last_used_address, diff --git a/nexus/src/db/model/zpool.rs b/nexus/src/db/model/zpool.rs index 511312a3382..475fc7bf0ee 100644 --- a/nexus/src/db/model/zpool.rs +++ b/nexus/src/db/model/zpool.rs @@ -14,7 +14,7 @@ use uuid::Uuid; /// /// A zpool represents a ZFS storage pool, allocated on a single /// physical sled. -#[derive(Queryable, Insertable, Debug, Clone, Selectable, Asset)] +#[derive(Queryable, Insertable, Debug, Clone, Selectable, Asset, PartialEq)] #[diesel(table_name = zpool)] pub struct Zpool { #[diesel(embed)] diff --git a/nexus/src/db/schema.rs b/nexus/src/db/schema.rs index a6d281d987e..41c8c3527b9 100644 --- a/nexus/src/db/schema.rs +++ b/nexus/src/db/schema.rs @@ -297,6 +297,7 @@ table! { time_deleted -> Nullable, rcgen -> Int8, + rack_id -> Uuid, ip -> Inet, port -> Int4, last_used_address -> Inet, diff --git a/nexus/src/lib.rs b/nexus/src/lib.rs index b5d3ccc5085..d9aa20adb48 100644 --- a/nexus/src/lib.rs +++ b/nexus/src/lib.rs @@ -131,25 +131,7 @@ impl Server { // Wait until RSS handoff completes. let opctx = apictx.nexus.opctx_for_background(); apictx.nexus.await_rack_initialization(&opctx).await; - apictx.nexus - .start_background_tasks() - .map_err(|e| e.to_string())?; - - // TODO: What triggers background tasks to execute? - // - // - Perhaps the API is exposed to tests? - // - Perhaps the invocation of that API is controlled by config - // options? - // - // TODO: services we need to start: - // - // Datasets: - // - Crucible (as a dataset on each unique zpool) - // - Clickhouse (as a dataset on a zpool) - // - CRDB (prolly just check it exists, period) - // - // - Oximeter (as a service) - // - Nexus (again, maybe just check it exists at all) + apictx.nexus.start_background_tasks().map_err(|e| e.to_string())?; let http_server_starter_external = dropshot::HttpServerStarter::new( &config.runtime.dropshot_external, diff --git a/sled-agent/src/storage_manager.rs b/sled-agent/src/storage_manager.rs index 5b82396b6fd..24bf14ccfa1 100644 --- a/sled-agent/src/storage_manager.rs +++ b/sled-agent/src/storage_manager.rs @@ -15,7 +15,7 @@ use crate::params::DatasetKind; use futures::stream::FuturesOrdered; use futures::FutureExt; use futures::StreamExt; -use nexus_client::types::{DatasetPutRequest, ZpoolPutRequest}; +use nexus_client::types::ZpoolPutRequest; use omicron_common::api::external::{ByteCount, ByteCountRangeError}; use omicron_common::backoff; use schemars::JsonSchema; @@ -667,6 +667,7 @@ impl StorageWorker { // Adds a "notification to nexus" to `nexus_notifications`, // informing it about the addition of `datasets` to `pool_id`. + /* fn add_datasets_notify( &self, nexus_notifications: &mut FuturesOrdered>>, @@ -715,6 +716,7 @@ impl StorageWorker { .boxed(), ); } + */ // TODO: a lot of these functions act on the `FuturesOrdered` - should // that just be a part of the "worker" struct? @@ -722,7 +724,6 @@ impl StorageWorker { // Attempts to add a dataset within a zpool, according to `request`. async fn add_dataset( &self, - nexus_notifications: &mut FuturesOrdered>>, request: &NewFilesystemRequest, ) -> Result<(), Error> { info!(self.log, "add_dataset: {:?}", request); @@ -769,12 +770,6 @@ impl StorageWorker { err, })?; - self.add_datasets_notify( - nexus_notifications, - vec![(id, dataset_info.address, dataset_info.kind)], - pool.id(), - ); - Ok(()) } @@ -868,21 +863,16 @@ impl StorageWorker { } } - // Notify Nexus of the zpool and all datasets within. + // Notify Nexus of the zpool. self.add_zpool_notify( &mut nexus_notifications, pool.id(), size, ); - self.add_datasets_notify( - &mut nexus_notifications, - datasets, - pool.id(), - ); }, Some(request) = self.new_filesystems_rx.recv() => { - let result = self.add_dataset(&mut nexus_notifications, &request).await; + let result = self.add_dataset(&request).await; let _ = request.responder.send(result); } } From 2a28eb99e66bb7295a38e5f957f00238af976502 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 15 Jun 2022 15:12:23 -0400 Subject: [PATCH 10/35] It's hacky, but it's working. I'm seeing services be re-balanced correctly --- nexus/src/app/background/services.rs | 26 +++++++++++++++++++++----- nexus/src/db/datastore.rs | 17 ----------------- sled-agent/src/services.rs | 2 +- 3 files changed, 22 insertions(+), 23 deletions(-) diff --git a/nexus/src/app/background/services.rs b/nexus/src/app/background/services.rs index 8aee6b7f3b2..53628f5905c 100644 --- a/nexus/src/app/background/services.rs +++ b/nexus/src/app/background/services.rs @@ -112,6 +112,8 @@ impl ServiceBalancer { self.nexus.datastore().service_list(opctx, *sled_id).await?; let sled_client = self.nexus.sled_client(sled_id).await?; + info!(self.log, "instantiate_services: {:?}", services); + sled_client .services_put(&SledAgentTypes::ServiceEnsureBody { services: services @@ -124,11 +126,24 @@ impl ServiceBalancer { s.kind.clone(), ); + // TODO: This is hacky, specifically to inject + // global zone addresses in the DNS service. + let gz_addresses = match &s.kind { + ServiceKind::InternalDNS => { + let mut octets = address.octets(); + octets[15] = octets[15] + 1; + vec![Ipv6Addr::from(octets)] + } + _ => vec![], + }; + + // TODO: this is wrong for DNS service; needs the gz + // addreess SledAgentTypes::ServiceRequest { id: s.id(), name: name.to_string(), addresses: vec![address], - gz_addresses: vec![], + gz_addresses, service_type, } }) @@ -191,14 +206,14 @@ impl ServiceBalancer { desired_count: u32, ) -> Result<(), Error> { // Provision the services within the database. - let new_services = self + let services = self .nexus .datastore() .ensure_rack_service(opctx, self.nexus.rack_id, kind, desired_count) .await?; // Actually instantiate those services. - self.instantiate_services(opctx, new_services).await + self.instantiate_services(opctx, services).await } async fn ensure_dns_service( @@ -207,14 +222,14 @@ impl ServiceBalancer { desired_count: u32, ) -> Result<(), Error> { // Provision the services within the database. - let new_services = self + let services = self .nexus .datastore() .ensure_dns_service(opctx, self.nexus.rack_subnet, desired_count) .await?; // Actually instantiate those services. - self.instantiate_services(opctx, new_services).await + self.instantiate_services(opctx, services).await } // TODO: Consider using sagas to ensure the rollout of services happens. @@ -272,6 +287,7 @@ impl ServiceBalancer { ) -> Result<(), Error> { let mut sled_clients = HashMap::new(); + // TODO: We could issue these requests concurrently for (sled, zpool, dataset) in &datasets { let sled_client = { match sled_clients.get(&sled.id()) { diff --git a/nexus/src/db/datastore.rs b/nexus/src/db/datastore.rs index 81afcc25fdc..53ad724560d 100644 --- a/nexus/src/db/datastore.rs +++ b/nexus/src/db/datastore.rs @@ -480,11 +480,6 @@ impl DataStore { kind.clone(), )?; - eprintln!( - "Observed sleds/services: {:?}", - sleds_and_maybe_svcs - ); - // Split the set of returned sleds into "those with" and "those // without" the requested service. let (sleds_with_svc, sleds_without_svc): (Vec<_>, Vec<_>) = @@ -506,8 +501,6 @@ impl DataStore { }) .collect(); - eprintln!("Observed services: {:?}", svcs); - // Add services to sleds, in-order, until we've met a // number sufficient for our redundancy. // @@ -572,7 +565,6 @@ impl DataStore { .transaction(move |conn| { let mut svcs = Self::dns_service_list_sync(conn)?; - eprintln!("Observed DNS services: {:?}", svcs); // Get all subnets not allocated to existing services. let mut usable_dns_subnets = ReservedRackSubnet(rack_subnet) .get_dns_subnets() @@ -586,7 +578,6 @@ impl DataStore { }) .collect::>() .into_iter(); - eprintln!("Usable DNS services: {:?}", usable_dns_subnets); // Get all sleds which aren't already running DNS services. let mut target_sleds = @@ -714,11 +705,6 @@ impl DataStore { kind.clone(), )?; - eprintln!( - "Observed datasets: {:?}", - sleds_zpools_and_maybe_datasets - ); - // Split the set of returned zpools into "those with" and "those // without" the requested dataset. let (zpools_with_dataset, zpools_without_dataset): ( @@ -732,8 +718,6 @@ impl DataStore { .map(|(sled, zpool, _)| (sled, zpool)) .peekable(); - eprintln!("Dataset targets: {:?}", zpools_without_dataset); - let mut datasets: Vec<_> = zpools_with_dataset .into_iter() .map(|(sled, zpool, maybe_dataset)| { @@ -744,7 +728,6 @@ impl DataStore { ) }) .collect(); - eprintln!("Existing datasets: {:?}", datasets); // Add datasets to zpools, in-order, until we've met a // number sufficient for our redundancy. diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index e7f71810fdf..3bae6521a4c 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -486,7 +486,7 @@ impl ServiceManager { // that removal implicitly. warn!( self.log, - "Cannot request services on this sled, differing configurations: {:?}", + "Cannot request services on this sled, differing configurations: {:#?}", known_set.symmetric_difference(&requested_set) ); return Err(Error::ServicesAlreadyConfigured); From b07322c2a5132574097691ce47e5937558b24bd0 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 15 Jun 2022 18:00:48 -0400 Subject: [PATCH 11/35] clippy, fmt --- nexus/src/app/background/services.rs | 11 ++++------- nexus/src/db/datastore.rs | 25 +++++++++---------------- nexus/src/db/model/dataset_kind.rs | 2 +- nexus/src/db/model/service_kind.rs | 2 +- nexus/src/lib.rs | 4 +--- sled-agent/src/server.rs | 2 +- sled-agent/src/storage_manager.rs | 4 +--- 7 files changed, 18 insertions(+), 32 deletions(-) diff --git a/nexus/src/app/background/services.rs b/nexus/src/app/background/services.rs index 53628f5905c..16ef56fe77b 100644 --- a/nexus/src/app/background/services.rs +++ b/nexus/src/app/background/services.rs @@ -122,8 +122,7 @@ impl ServiceBalancer { let address = Ipv6Addr::from(s.ip); let (name, service_type) = Self::get_service_name_and_type( - address, - s.kind.clone(), + address, s.kind, ); // TODO: This is hacky, specifically to inject @@ -137,11 +136,9 @@ impl ServiceBalancer { _ => vec![], }; - // TODO: this is wrong for DNS service; needs the gz - // addreess SledAgentTypes::ServiceRequest { id: s.id(), - name: name.to_string(), + name, addresses: vec![address], gz_addresses, service_type, @@ -249,7 +246,7 @@ impl ServiceBalancer { ServiceRedundancy::PerRack(desired_count) => { self.ensure_rack_service( opctx, - expected_svc.kind.clone(), + expected_svc.kind, desired_count, ) .await?; @@ -335,7 +332,7 @@ impl ServiceBalancer { info!(self.log, "Ensuring dataset {:?} exists", expected_dataset); self.ensure_rack_dataset( opctx, - expected_dataset.kind.clone(), + expected_dataset.kind, expected_dataset.redundancy, ) .await? diff --git a/nexus/src/db/datastore.rs b/nexus/src/db/datastore.rs index 6249c5975b0..54985c276a9 100644 --- a/nexus/src/db/datastore.rs +++ b/nexus/src/db/datastore.rs @@ -449,11 +449,9 @@ impl DataStore { db::schema::sled::table .filter(sled_dsl::time_deleted.is_null()) .filter(sled_dsl::rack_id.eq(rack_id)) - .left_outer_join( - db::schema::service::table.on(svc_dsl::sled_id - .eq(sled_dsl::id) - .and(svc_dsl::kind.eq(kind.clone()))), - ) + .left_outer_join(db::schema::service::table.on( + svc_dsl::sled_id.eq(sled_dsl::id).and(svc_dsl::kind.eq(kind)), + )) .select(<(Sled, Option)>::as_select()) .get_results(conn) } @@ -476,11 +474,8 @@ impl DataStore { self.pool() .transaction(move |conn| { - let sleds_and_maybe_svcs = Self::sled_and_service_list_sync( - conn, - rack_id, - kind.clone(), - )?; + let sleds_and_maybe_svcs = + Self::sled_and_service_list_sync(conn, rack_id, kind)?; // Split the set of returned sleds into "those with" and "those // without" the requested service. @@ -522,7 +517,7 @@ impl DataStore { svc_id, sled.id(), address, - kind.clone(), + kind, ); // TODO: Can we insert all the services at the same time? @@ -675,7 +670,7 @@ impl DataStore { .left_outer_join( db::schema::dataset::table.on(dataset_dsl::pool_id .eq(zpool_dsl::id) - .and(dataset_dsl::kind.eq(kind.clone())) + .and(dataset_dsl::kind.eq(kind)) .and(dataset_dsl::time_deleted.is_null())), ) .select(<(Sled, Zpool, Option)>::as_select()) @@ -702,9 +697,7 @@ impl DataStore { .transaction(move |conn| { let sleds_zpools_and_maybe_datasets = Self::sled_zpool_and_dataset_list_sync( - conn, - rack_id, - kind.clone(), + conn, rack_id, kind, )?; // Split the set of returned zpools into "those with" and "those @@ -772,7 +765,7 @@ impl DataStore { dataset_id, zpool.id(), address, - kind.clone(), + kind, ); // TODO: Can we insert all the datasets at the same time? diff --git a/nexus/src/db/model/dataset_kind.rs b/nexus/src/db/model/dataset_kind.rs index bd85972b3dc..ef004bef9bf 100644 --- a/nexus/src/db/model/dataset_kind.rs +++ b/nexus/src/db/model/dataset_kind.rs @@ -13,7 +13,7 @@ impl_enum_type!( #[diesel(postgres_type(name = "dataset_kind"))] pub struct DatasetKindEnum; - #[derive(Clone, Debug, AsExpression, FromSqlRow, Serialize, Deserialize, PartialEq)] + #[derive(Clone, Copy, Debug, AsExpression, FromSqlRow, Serialize, Deserialize, PartialEq)] #[diesel(sql_type = DatasetKindEnum)] pub enum DatasetKind; diff --git a/nexus/src/db/model/service_kind.rs b/nexus/src/db/model/service_kind.rs index b9a3e96c6f6..0cbb0d0f658 100644 --- a/nexus/src/db/model/service_kind.rs +++ b/nexus/src/db/model/service_kind.rs @@ -12,7 +12,7 @@ impl_enum_type!( #[diesel(postgres_type(name = "service_kind"))] pub struct ServiceKindEnum; - #[derive(Clone, Debug, AsExpression, FromSqlRow, Serialize, Deserialize, PartialEq)] + #[derive(Clone, Copy, Debug, AsExpression, FromSqlRow, Serialize, Deserialize, PartialEq)] #[diesel(sql_type = ServiceKindEnum)] pub enum ServiceKind; diff --git a/nexus/src/lib.rs b/nexus/src/lib.rs index 1f11a323cc5..1a461a61559 100644 --- a/nexus/src/lib.rs +++ b/nexus/src/lib.rs @@ -121,9 +121,7 @@ pub struct Server { } impl Server { - pub async fn start<'a>( - internal: InternalServer<'a>, - ) -> Result { + pub async fn start(internal: InternalServer<'_>) -> Result { let apictx = internal.apictx; let http_server_internal = internal.http_server_internal; let log = internal.log; diff --git a/sled-agent/src/server.rs b/sled-agent/src/server.rs index 6273e1f2a2f..97920925789 100644 --- a/sled-agent/src/server.rs +++ b/sled-agent/src/server.rs @@ -86,7 +86,7 @@ impl Server { let nexus_client = lazy_nexus_client .get() .await - .map_err(|err| BackoffError::transient(err.to_string()))?; + .map_err(|err| BackoffError::transient(err))?; nexus_client .cpapi_sled_agents_post( &sled_id, diff --git a/sled-agent/src/storage_manager.rs b/sled-agent/src/storage_manager.rs index 24bf14ccfa1..d559ee70589 100644 --- a/sled-agent/src/storage_manager.rs +++ b/sled-agent/src/storage_manager.rs @@ -637,9 +637,7 @@ impl StorageWorker { lazy_nexus_client .get() .await - .map_err(|e| { - backoff::BackoffError::transient(e.to_string()) - })? + .map_err(|e| backoff::BackoffError::transient(e))? .zpool_put(&sled_id, &pool_id, &zpool_request) .await .map_err(|e| { From 7f41e42cf63fdbb4d64554a438c4fa3ed514655a Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 15 Jun 2022 19:02:23 -0400 Subject: [PATCH 12/35] Strongly-typed DNS service names --- internal-dns-client/src/names.rs | 30 ++++++++++++++++++++++++++-- nexus/src/context.rs | 4 ++-- sled-agent/src/nexus.rs | 4 ++-- sled-agent/src/rack_setup/service.rs | 9 ++++----- 4 files changed, 36 insertions(+), 11 deletions(-) diff --git a/internal-dns-client/src/names.rs b/internal-dns-client/src/names.rs index 6384ec9e503..66e356f46cc 100644 --- a/internal-dns-client/src/names.rs +++ b/internal-dns-client/src/names.rs @@ -7,17 +7,43 @@ use uuid::Uuid; const DNS_ZONE: &str = "control-plane.oxide.internal"; +pub enum ServiceName { + Nexus, + Cockroach, +} + +impl fmt::Display for ServiceName { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match &self { + ServiceName::Nexus => write!(f, "nexus"), + ServiceName::Cockroach => write!(f, "cockroachdb"), + } + } +} + +pub enum BackendName { + SledAgent, +} + +impl fmt::Display for BackendName { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match &self { + BackendName::SledAgent => write!(f, "sledagent"), + } + } +} + pub enum SRV { /// A service identified and accessed by name, such as "nexus", "CRDB", etc. /// /// This is used in cases where services are interchangeable. - Service(String), + Service(ServiceName), /// A service identified by name and a unique identifier. /// /// This is used in cases where services are not interchangeable, such as /// for the Sled agent. - Backend(String, Uuid), + Backend(BackendName, Uuid), } impl fmt::Display for SRV { diff --git a/nexus/src/context.rs b/nexus/src/context.rs index 636b2bde30f..2677df0c3c9 100644 --- a/nexus/src/context.rs +++ b/nexus/src/context.rs @@ -18,7 +18,7 @@ use authn::external::session_cookie::HttpAuthnSessionCookie; use authn::external::spoof::HttpAuthnSpoof; use authn::external::HttpAuthnScheme; use chrono::{DateTime, Duration, Utc}; -use internal_dns_client::names::SRV; +use internal_dns_client::names::{ServiceName, SRV}; use omicron_common::address::{Ipv6Subnet, AZ_PREFIX, COCKROACH_PORT}; use omicron_common::api::external::Error; use omicron_common::nexus_config; @@ -155,7 +155,7 @@ impl ServerContext { info!(log, "Accessing DB url from DNS"); let response = resolver .lookup_ip( - &SRV::Service("cockroachdb".to_string()).to_string(), + &SRV::Service(ServiceName::Cockroach).to_string(), ) .await .map_err(|e| format!("Failed to lookup IP: {}", e))?; diff --git a/sled-agent/src/nexus.rs b/sled-agent/src/nexus.rs index 00e87fd6a1b..3dc40369219 100644 --- a/sled-agent/src/nexus.rs +++ b/sled-agent/src/nexus.rs @@ -7,7 +7,7 @@ pub use crate::mocks::MockNexusClient as NexusClient; #[cfg(not(test))] pub use nexus_client::Client as NexusClient; -use internal_dns_client::names::SRV; +use internal_dns_client::names::{ServiceName, SRV}; use omicron_common::address::{Ipv6Subnet, AZ_PREFIX, NEXUS_INTERNAL_PORT}; use slog::Logger; use std::net::Ipv6Addr; @@ -49,7 +49,7 @@ impl LazyNexusClient { internal_dns_client::multiclient::create_resolver(az_subnet) .map_err(|e| format!("Failed to create DNS resolver: {}", e))?; let response = resolver - .lookup_ip(&SRV::Service("nexus".to_string()).to_string()) + .lookup_ip(&SRV::Service(ServiceName::Nexus).to_string()) .await .map_err(|e| format!("Failed to lookup Nexus IP: {}", e))?; let address = response.iter().next().ok_or_else(|| { diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 30b0e867b79..802fac5f304 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -15,7 +15,7 @@ use crate::rack_setup::plan::service::{ use crate::rack_setup::plan::sled::{ Plan as SledPlan, PlanError as SledPlanError, }; -use internal_dns_client::names::{AAAA, SRV}; +use internal_dns_client::names::{ServiceName, AAAA, SRV}; use nexus_client::{ types as NexusTypes, Client as NexusClient, Error as NexusError, }; @@ -225,8 +225,7 @@ impl ServiceInner { .iter() .map(|dataset| (AAAA::Zone(dataset.id), dataset.address)) .collect::>(); - let srv_key = SRV::Service("cockroachdb".into()); - + let srv_key = SRV::Service(ServiceName::Cockroach); self.dns_servers .get() .expect("DNS servers must be initialized first") @@ -304,7 +303,7 @@ impl ServiceInner { ) }) .collect::>(); - let srv_key = SRV::Service("nexus".into()); + let srv_key = SRV::Service(ServiceName::Nexus); self.dns_servers .get() .expect("DNS servers must be initialized first") @@ -370,7 +369,7 @@ impl ServiceInner { ) .expect("Failed to create DNS resolver"); let response = resolver - .lookup_ip(&SRV::Service("nexus".to_string()).to_string()) + .lookup_ip(&SRV::Service(ServiceName::Nexus).to_string()) .await .expect("Failed to lookup IP"); From a68de334aa30e39ce3ccb3510c4cdeb5d9666d08 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 15 Jun 2022 22:58:34 -0400 Subject: [PATCH 13/35] Populate DNS records --- common/src/address.rs | 1 + internal-dns-client/src/multiclient.rs | 56 ++++++++++++++++++-- internal-dns-client/src/names.rs | 15 +++++- nexus/src/app/background/services.rs | 41 +++++++++++++-- nexus/src/app/mod.rs | 6 ++- nexus/src/app/sled.rs | 4 +- nexus/src/db/datastore.rs | 6 +-- nexus/src/db/ipv6.rs | 2 + nexus/src/db/model/dataset.rs | 34 +++++++++--- nexus/src/db/model/service.rs | 28 +++++++++- nexus/src/internal_api/params.rs | 2 +- sled-agent/src/params.rs | 61 +++++++++++++++++++--- sled-agent/src/rack_setup/service.rs | 72 +++++--------------------- 13 files changed, 237 insertions(+), 91 deletions(-) diff --git a/common/src/address.rs b/common/src/address.rs index 7284ba4cc64..32fef1c3994 100644 --- a/common/src/address.rs +++ b/common/src/address.rs @@ -36,6 +36,7 @@ pub const PROPOLIS_PORT: u16 = 12400; pub const COCKROACH_PORT: u16 = 32221; pub const CRUCIBLE_PORT: u16 = 32345; pub const CLICKHOUSE_PORT: u16 = 8123; +pub const OXIMETER_PORT: u16 = 12223; pub const NEXUS_EXTERNAL_PORT: u16 = 12220; pub const NEXUS_INTERNAL_PORT: u16 = 12221; diff --git a/internal-dns-client/src/multiclient.rs b/internal-dns-client/src/multiclient.rs index 24c8817c274..de14055197b 100644 --- a/internal-dns-client/src/multiclient.rs +++ b/internal-dns-client/src/multiclient.rs @@ -21,9 +21,16 @@ type DnsError = crate::Error; /// A connection used to update multiple DNS servers. pub struct Updater { + log: Logger, clients: Vec, } +pub trait Service { + fn aaaa(&self) -> crate::names::AAAA; + fn srv(&self) -> crate::names::SRV; + fn address(&self) -> SocketAddrV6; +} + impl Updater { /// Creates a new "Updater", capable of communicating with all /// DNS servers within the AZ. @@ -41,15 +48,56 @@ impl Updater { }) .collect::>(); - Self { clients } + Self { log, clients } + } + + /// Inserts all service records into the DNS server. + /// + /// This method is most efficient when records are sorted by + /// SRV key. + pub async fn insert_dns_records( + &self, + records: &Vec, + ) -> Result<(), DnsError> { + let mut records = records.iter().peekable(); + + while let Some(record) = records.next() { + let srv = record.srv(); + + match &srv { + &crate::names::SRV::Service(_) => { + let mut aaaa = vec![]; + while let Some(record) = records.peek() { + if record.srv() == srv { + let record = records.next().unwrap(); + aaaa.push((record.aaaa(), record.address())); + } else { + break; + } + } + + self.insert_dns_records_internal( + aaaa, + srv, + ).await?; + }, + &crate::names::SRV::Backend(_, _) => { + let aaaa = vec![(record.aaaa(), record.address())]; + self.insert_dns_records_internal( + aaaa, + record.srv(), + ).await?; + }, + }; + } + Ok(()) } /// Utility function to insert: /// - A set of uniquely-named AAAA records, each corresponding to an address /// - An SRV record, pointing to each of the AAAA records. - pub async fn insert_dns_records( + async fn insert_dns_records_internal( &self, - log: &Logger, aaaa: Vec<(crate::names::AAAA, SocketAddrV6)>, srv_key: crate::names::SRV, ) -> Result<(), DnsError> { @@ -84,7 +132,7 @@ impl Updater { Ok::<(), BackoffError>(()) }; let log_failure = |error, _| { - warn!(log, "Failed to set DNS records"; "error" => ?error); + warn!(self.log, "Failed to set DNS records"; "error" => ?error); }; retry_notify(internal_service_policy(), set_record, log_failure) diff --git a/internal-dns-client/src/names.rs b/internal-dns-client/src/names.rs index 66e356f46cc..53c1504d168 100644 --- a/internal-dns-client/src/names.rs +++ b/internal-dns-client/src/names.rs @@ -7,32 +7,43 @@ use uuid::Uuid; const DNS_ZONE: &str = "control-plane.oxide.internal"; +#[derive(Debug, PartialEq, PartialOrd)] pub enum ServiceName { - Nexus, + Clickhouse, Cockroach, + InternalDNS, + Nexus, + Oximeter, } impl fmt::Display for ServiceName { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match &self { + ServiceName::Clickhouse => write!(f, "clickhouse"), + ServiceName::Cockroach => write!(f, "cockroach"), + ServiceName::InternalDNS => write!(f, "internalDNS"), ServiceName::Nexus => write!(f, "nexus"), - ServiceName::Cockroach => write!(f, "cockroachdb"), + ServiceName::Oximeter => write!(f, "oximeter"), } } } +#[derive(Debug, PartialEq, PartialOrd)] pub enum BackendName { + Crucible, SledAgent, } impl fmt::Display for BackendName { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match &self { + BackendName::Crucible => write!(f, "crucible"), BackendName::SledAgent => write!(f, "sledagent"), } } } +#[derive(Debug, PartialEq, PartialOrd)] pub enum SRV { /// A service identified and accessed by name, such as "nexus", "CRDB", etc. /// diff --git a/nexus/src/app/background/services.rs b/nexus/src/app/background/services.rs index 16ef56fe77b..33835fa7087 100644 --- a/nexus/src/app/background/services.rs +++ b/nexus/src/app/background/services.rs @@ -14,6 +14,10 @@ use crate::db::model::ServiceKind; use crate::db::model::Sled; use crate::db::model::Zpool; use crate::Nexus; +use internal_dns_client::multiclient::{ + Service as DnsService, + Updater as DnsUpdater +}; use omicron_common::address::{ DNS_PORT, DNS_REDUNDANCY, DNS_SERVER_PORT, NEXUS_EXTERNAL_PORT, NEXUS_INTERNAL_PORT, @@ -82,11 +86,21 @@ const EXPECTED_DATASETS: [ExpectedDataset; 3] = [ pub struct ServiceBalancer { log: Logger, nexus: Arc, + dns_updater: DnsUpdater, } impl ServiceBalancer { pub fn new(log: Logger, nexus: Arc) -> Self { - Self { log, nexus } + let dns_updater = DnsUpdater::new( + nexus.az_subnet(), + log.new(o!("component" => "DNS Updater")), + ); + + Self { + log, + nexus, + dns_updater, + } } // Reaches out to all sled agents implied in "services", and @@ -94,7 +108,7 @@ impl ServiceBalancer { async fn instantiate_services( &self, opctx: &OpContext, - services: Vec, + mut services: Vec, ) -> Result<(), Error> { let mut sled_ids = HashSet::new(); for svc in &services { @@ -148,6 +162,15 @@ impl ServiceBalancer { }) .await?; } + + // Putting records of the same SRV right next to each other isn't + // strictly necessary, but doing so makes the record insertion more + // efficient. + services.sort_by(|a, b| a.srv().partial_cmp(&b.srv()).unwrap()); + self.dns_updater.insert_dns_records( + &services + ).await.map_err(|e| Error::internal_error(&e.to_string()))?; + Ok(()) } @@ -273,7 +296,7 @@ impl ServiceBalancer { .await?; // Actually instantiate those datasets. - self.instantiate_datasets(new_datasets).await + self.instantiate_datasets(new_datasets, kind).await } // Reaches out to all sled agents implied in "services", and @@ -281,7 +304,12 @@ impl ServiceBalancer { async fn instantiate_datasets( &self, datasets: Vec<(Sled, Zpool, Dataset)>, + kind: DatasetKind, ) -> Result<(), Error> { + if datasets.is_empty() { + return Ok(()); + } + let mut sled_clients = HashMap::new(); // TODO: We could issue these requests concurrently @@ -298,9 +326,10 @@ impl ServiceBalancer { } }; - let dataset_kind = match dataset.kind { + let dataset_kind = match kind { // TODO: This set of "all addresses" isn't right. // TODO: ... should we even be using "all addresses" to contact CRDB? + // Can it just rely on DNS, somehow? DatasetKind::Cockroach => { SledAgentTypes::DatasetKind::CockroachDb(vec![]) } @@ -321,6 +350,10 @@ impl ServiceBalancer { .await?; } + self.dns_updater.insert_dns_records( + &datasets.into_iter().map(|(_, _, dataset)| dataset).collect() + ).await.map_err(|e| Error::internal_error(&e.to_string()))?; + Ok(()) } diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index 2f12e461b73..8b6cc606802 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -14,7 +14,7 @@ use crate::populate::PopulateArgs; use crate::populate::PopulateStatus; use crate::saga_interface::SagaContext; use anyhow::anyhow; -use omicron_common::address::{Ipv6Subnet, RACK_PREFIX}; +use omicron_common::address::{Ipv6Subnet, AZ_PREFIX, RACK_PREFIX}; use omicron_common::api::external::Error; use slog::Logger; use std::sync::Arc; @@ -210,6 +210,10 @@ impl Nexus { nexus } + pub fn az_subnet(&self) -> Ipv6Subnet { + Ipv6Subnet::::new(self.rack_subnet.net().ip()) + } + /// Return the tunable configuration parameters, e.g. for use in tests. pub fn tunables(&self) -> &config::Tunables { &self.tunables diff --git a/nexus/src/app/sled.rs b/nexus/src/app/sled.rs index e4fc616f095..0e01112c532 100644 --- a/nexus/src/app/sled.rs +++ b/nexus/src/app/sled.rs @@ -16,7 +16,7 @@ use omicron_common::api::external::Error; use omicron_common::api::external::ListResultVec; use omicron_common::api::external::LookupResult; use sled_agent_client::Client as SledAgentClient; -use std::net::{Ipv6Addr, SocketAddr, SocketAddrV6}; +use std::net::{Ipv6Addr, SocketAddrV6}; use std::sync::Arc; use uuid::Uuid; @@ -135,7 +135,7 @@ impl super::Nexus { &self, id: Uuid, zpool_id: Uuid, - address: SocketAddr, + address: SocketAddrV6, kind: DatasetKind, ) -> Result<(), Error> { info!(self.log, "upserting dataset"; "zpool_id" => zpool_id.to_string(), "dataset_id" => id.to_string(), "address" => address.to_string()); diff --git a/nexus/src/db/datastore.rs b/nexus/src/db/datastore.rs index 54985c276a9..6485cfd3d42 100644 --- a/nexus/src/db/datastore.rs +++ b/nexus/src/db/datastore.rs @@ -95,7 +95,7 @@ use omicron_common::api::external::{ use omicron_common::bail_unless; use sled_agent_client::types as sled_client_types; use std::convert::{TryFrom, TryInto}; -use std::net::{Ipv6Addr, SocketAddr, SocketAddrV6}; +use std::net::{Ipv6Addr, SocketAddrV6}; use std::sync::Arc; use uuid::Uuid; @@ -753,12 +753,12 @@ impl DataStore { TxnError::CustomError(DatasetError::Other(e)) }) .map(|ip| { - SocketAddr::V6(SocketAddrV6::new( + SocketAddrV6::new( ip, kind.port(), 0, 0, - )) + ) })?; let dataset = db::model::Dataset::new( diff --git a/nexus/src/db/ipv6.rs b/nexus/src/db/ipv6.rs index 2b494100825..41855e0da0c 100644 --- a/nexus/src/db/ipv6.rs +++ b/nexus/src/db/ipv6.rs @@ -16,9 +16,11 @@ use diesel::sql_types::Inet; use ipnetwork::IpNetwork; use ipnetwork::Ipv6Network; use omicron_common::api::external::Error; +use serde::{Deserialize, Serialize}; #[derive( Clone, Copy, AsExpression, FromSqlRow, PartialEq, Ord, PartialOrd, Eq, + Deserialize, Serialize, )] #[diesel(sql_type = Inet)] pub struct Ipv6Addr(std::net::Ipv6Addr); diff --git a/nexus/src/db/model/dataset.rs b/nexus/src/db/model/dataset.rs index fa4e238fb47..652cd7ede0b 100644 --- a/nexus/src/db/model/dataset.rs +++ b/nexus/src/db/model/dataset.rs @@ -3,12 +3,15 @@ // file, You can obtain one at https://mozilla.org/MPL/2.0/. use super::{DatasetKind, Generation, Region, SqlU16}; +use crate::db::identity::Asset; use crate::db::collection_insert::DatastoreCollection; +use crate::db::ipv6; use crate::db::schema::{dataset, region}; use chrono::{DateTime, Utc}; use db_macros::Asset; +use internal_dns_client::names::{AAAA, SRV, ServiceName, BackendName}; use serde::{Deserialize, Serialize}; -use std::net::SocketAddr; +use std::net::{Ipv6Addr, SocketAddrV6}; use uuid::Uuid; /// Database representation of a Dataset. @@ -35,7 +38,7 @@ pub struct Dataset { pub pool_id: Uuid, - ip: ipnetwork::IpNetwork, + ip: ipv6::Ipv6Addr, port: SqlU16, pub kind: DatasetKind, @@ -43,11 +46,10 @@ pub struct Dataset { } impl Dataset { - // TODO: Only operate on SocketAddrV6 pub fn new( id: Uuid, pool_id: Uuid, - addr: SocketAddr, + addr: SocketAddrV6, kind: DatasetKind, ) -> Self { let size_used = match kind { @@ -66,12 +68,30 @@ impl Dataset { } } - pub fn address(&self) -> SocketAddr { + pub fn address(&self) -> SocketAddrV6 { self.address_with_port(self.port.into()) } - pub fn address_with_port(&self, port: u16) -> SocketAddr { - SocketAddr::new(self.ip.ip(), port) + pub fn address_with_port(&self, port: u16) -> SocketAddrV6 { + SocketAddrV6::new(Ipv6Addr::from(self.ip), port, 0, 0) + } +} + +impl internal_dns_client::multiclient::Service for Dataset { + fn aaaa(&self) -> AAAA { + AAAA::Zone(self.id()) + } + + fn srv(&self) -> SRV { + match self.kind { + DatasetKind::Crucible => SRV::Backend(BackendName::Crucible, self.id()), + DatasetKind::Clickhouse => SRV::Service(ServiceName::Clickhouse), + DatasetKind::Cockroach => SRV::Service(ServiceName::Cockroach), + } + } + + fn address(&self) -> SocketAddrV6 { + self.address() } } diff --git a/nexus/src/db/model/service.rs b/nexus/src/db/model/service.rs index 0762db538b1..0ff7987c464 100644 --- a/nexus/src/db/model/service.rs +++ b/nexus/src/db/model/service.rs @@ -4,9 +4,12 @@ use super::ServiceKind; use crate::db::ipv6; +use crate::db::identity::Asset; use crate::db::schema::service; use db_macros::Asset; -use std::net::Ipv6Addr; +use internal_dns_client::names::{AAAA, SRV, ServiceName}; +use omicron_common::address::{DNS_SERVER_PORT, NEXUS_INTERNAL_PORT, OXIMETER_PORT}; +use std::net::{Ipv6Addr, SocketAddrV6}; use uuid::Uuid; /// Representation of services which may run on Sleds. @@ -36,3 +39,26 @@ impl Service { } } } + +impl internal_dns_client::multiclient::Service for Service { + fn aaaa(&self) -> AAAA { + AAAA::Zone(self.id()) + } + + fn srv(&self) -> SRV { + match self.kind { + ServiceKind::InternalDNS => SRV::Service(ServiceName::InternalDNS), + ServiceKind::Nexus => SRV::Service(ServiceName::Nexus), + ServiceKind::Oximeter => SRV::Service(ServiceName::Oximeter), + } + } + + fn address(&self) -> SocketAddrV6 { + let port = match self.kind { + ServiceKind::InternalDNS => DNS_SERVER_PORT, + ServiceKind::Nexus => NEXUS_INTERNAL_PORT, + ServiceKind::Oximeter => OXIMETER_PORT, + }; + SocketAddrV6::new(Ipv6Addr::from(self.ip), port, 0, 0) + } +} diff --git a/nexus/src/internal_api/params.rs b/nexus/src/internal_api/params.rs index 7dda7610573..32199a08505 100644 --- a/nexus/src/internal_api/params.rs +++ b/nexus/src/internal_api/params.rs @@ -74,7 +74,7 @@ impl FromStr for DatasetKind { pub struct DatasetPutRequest { /// Address on which a service is responding to requests for the /// dataset. - pub address: SocketAddr, + pub address: SocketAddrV6, /// Type of dataset being inserted. pub kind: DatasetKind, diff --git a/sled-agent/src/params.rs b/sled-agent/src/params.rs index 32fd6ab6248..261ec9ecb16 100644 --- a/sled-agent/src/params.rs +++ b/sled-agent/src/params.rs @@ -2,6 +2,8 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. +use internal_dns_client::names::{BackendName, ServiceName, AAAA, SRV}; +use omicron_common::address::OXIMETER_PORT; use omicron_common::api::external; use omicron_common::api::internal::nexus::{ DiskRuntimeState, InstanceRuntimeState, @@ -252,14 +254,28 @@ pub struct DatasetEnsureBody { pub dataset_kind: DatasetKind, // The address on which the zone will listen for requests. pub address: SocketAddrV6, - // NOTE: We could insert a UUID here, if we want that to be set by the - // caller explicitly? Currently, the lack of a UUID implies that - // "at most one dataset type" exists within a zpool. - // - // It's unclear if this is actually necessary - making this change - // would also require the RSS to query existing datasets before - // requesting new ones (after all, we generally wouldn't want to - // create two CRDB datasets with different UUIDs on the same zpool). +} + +impl internal_dns_client::multiclient::Service for DatasetEnsureBody { + fn aaaa(&self) -> AAAA { + AAAA::Zone(self.id) + } + + fn srv(&self) -> SRV { + match self.dataset_kind { + DatasetKind::Crucible => { + SRV::Backend(BackendName::Crucible, self.id) + } + DatasetKind::Clickhouse => SRV::Service(ServiceName::Clickhouse), + DatasetKind::CockroachDb { .. } => { + SRV::Service(ServiceName::Cockroach) + } + } + } + + fn address(&self) -> SocketAddrV6 { + self.address + } } impl From for sled_agent_client::types::DatasetEnsureBody { @@ -331,6 +347,35 @@ pub struct ServiceRequest { pub service_type: ServiceType, } +impl internal_dns_client::multiclient::Service for ServiceRequest { + fn aaaa(&self) -> AAAA { + AAAA::Zone(self.id) + } + + fn srv(&self) -> SRV { + match self.service_type { + ServiceType::InternalDns { .. } => { + SRV::Service(ServiceName::InternalDNS) + } + ServiceType::Nexus { .. } => SRV::Service(ServiceName::Nexus), + ServiceType::Oximeter => SRV::Service(ServiceName::Oximeter), + } + } + + fn address(&self) -> SocketAddrV6 { + match self.service_type { + ServiceType::InternalDns { server_address, .. } => server_address, + ServiceType::Nexus { internal_address, .. } => internal_address, + ServiceType::Oximeter => SocketAddrV6::new( + Ipv6Addr::from(self.addresses[0]), + OXIMETER_PORT, + 0, + 0, + ), + } + } +} + impl From for sled_agent_client::types::ServiceRequest { fn from(s: ServiceRequest) -> Self { Self { diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 802fac5f304..3b2f7c4aa4d 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -15,7 +15,7 @@ use crate::rack_setup::plan::service::{ use crate::rack_setup::plan::sled::{ Plan as SledPlan, PlanError as SledPlanError, }; -use internal_dns_client::names::{ServiceName, AAAA, SRV}; +use internal_dns_client::names::{ServiceName, SRV}; use nexus_client::{ types as NexusTypes, Client as NexusClient, Error as NexusError, }; @@ -168,22 +168,11 @@ impl ServiceInner { } } - async fn initialize_crdb( + async fn initialize_datasets( &self, sled_address: SocketAddrV6, datasets: &Vec, ) -> Result<(), SetupServiceError> { - if datasets.iter().any(|dataset| { - !matches!( - dataset.dataset_kind, - crate::params::DatasetKind::CockroachDb { .. } - ) - }) { - return Err(SetupServiceError::BadConfig( - "RSS should only initialize CRDB services".into(), - )); - } - let dur = std::time::Duration::from_secs(60); let client = reqwest::ClientBuilder::new() .connect_timeout(dur) @@ -218,18 +207,10 @@ impl ServiceInner { } // Initialize DNS records for these datasets. - // - // CRDB is treated as a service, since they are interchangeable. - - let aaaa = datasets - .iter() - .map(|dataset| (AAAA::Zone(dataset.id), dataset.address)) - .collect::>(); - let srv_key = SRV::Service(ServiceName::Cockroach); self.dns_servers .get() .expect("DNS servers must be initialized first") - .insert_dns_records(&self.log, aaaa, srv_key) + .insert_dns_records(datasets) .await?; Ok(()) @@ -272,42 +253,11 @@ impl ServiceInner { retry_notify(internal_service_policy(), services_put, log_failure) .await?; - // Initialize DNS records for the Nexus service. - let services: Vec<_> = services - .iter() - .filter(|svc| { - matches!( - svc.service_type, - crate::params::ServiceType::Nexus { .. } - ) - }) - .collect(); - - // Early-exit for non-Nexus case - if services.is_empty() { - return Ok(()); - } - - // Otherwise, insert DNS records for Nexus - let aaaa = services - .iter() - .map(|service| { - ( - AAAA::Zone(service.id), - SocketAddrV6::new( - service.addresses[0], - NEXUS_INTERNAL_PORT, - 0, - 0, - ), - ) - }) - .collect::>(); - let srv_key = SRV::Service(ServiceName::Nexus); + // Insert DNS records self.dns_servers .get() .expect("DNS servers must be initialized first") - .insert_dns_records(&self.log, aaaa, srv_key) + .insert_dns_records(services) .await?; Ok(()) @@ -609,8 +559,11 @@ impl ServiceInner { // Issue the crdb initialization requests to all sleds. futures::future::join_all(service_plan.services.iter().map( |(sled_address, services_request)| async move { - self.initialize_crdb(*sled_address, &services_request.datasets) - .await?; + self.initialize_datasets( + *sled_address, + &services_request.datasets, + ) + .await?; Ok(()) }, )) @@ -622,9 +575,12 @@ impl ServiceInner { // Issue service initialization requests. // - // Note that this must happen *after* the dataset initialization, + // NOTE: This must happen *after* the dataset initialization, // to ensure that CockroachDB has been initialized before Nexus // starts. + // + // If Nexus was more resilient to concurrent initialization + // of CRDB, this requirement could be relaxed. futures::future::join_all(service_plan.services.iter().map( |(sled_address, services_request)| async move { // With the current implementation of "initialize_services", From 746114bdb04e7476184f3360aaded2a3cef174d9 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 15 Jun 2022 23:55:23 -0400 Subject: [PATCH 14/35] Fix dns client bug, start shortening timeouts --- internal-dns-client/src/multiclient.rs | 18 +++++++----------- nexus/src/app/background/services.rs | 25 ++++++++++++------------- nexus/src/db/datastore.rs | 9 +-------- nexus/src/db/ipv6.rs | 12 ++++++++++-- nexus/src/db/model/dataset.rs | 8 +++++--- nexus/src/db/model/service.rs | 8 +++++--- sled-agent/src/rack_setup/service.rs | 10 ++++------ sled-agent/src/storage_manager.rs | 4 +++- 8 files changed, 47 insertions(+), 47 deletions(-) diff --git a/internal-dns-client/src/multiclient.rs b/internal-dns-client/src/multiclient.rs index de14055197b..d91a58d4d73 100644 --- a/internal-dns-client/src/multiclient.rs +++ b/internal-dns-client/src/multiclient.rs @@ -63,10 +63,11 @@ impl Updater { while let Some(record) = records.next() { let srv = record.srv(); + info!(self.log, "Inserting DNS record: {:?}", srv); match &srv { &crate::names::SRV::Service(_) => { - let mut aaaa = vec![]; + let mut aaaa = vec![(record.aaaa(), record.address())]; while let Some(record) = records.peek() { if record.srv() == srv { let record = records.next().unwrap(); @@ -76,18 +77,13 @@ impl Updater { } } - self.insert_dns_records_internal( - aaaa, - srv, - ).await?; - }, + self.insert_dns_records_internal(aaaa, srv).await?; + } &crate::names::SRV::Backend(_, _) => { let aaaa = vec![(record.aaaa(), record.address())]; - self.insert_dns_records_internal( - aaaa, - record.srv(), - ).await?; - }, + self.insert_dns_records_internal(aaaa, record.srv()) + .await?; + } }; } Ok(()) diff --git a/nexus/src/app/background/services.rs b/nexus/src/app/background/services.rs index 33835fa7087..eca4d336607 100644 --- a/nexus/src/app/background/services.rs +++ b/nexus/src/app/background/services.rs @@ -15,8 +15,7 @@ use crate::db::model::Sled; use crate::db::model::Zpool; use crate::Nexus; use internal_dns_client::multiclient::{ - Service as DnsService, - Updater as DnsUpdater + Service as DnsService, Updater as DnsUpdater, }; use omicron_common::address::{ DNS_PORT, DNS_REDUNDANCY, DNS_SERVER_PORT, NEXUS_EXTERNAL_PORT, @@ -96,11 +95,7 @@ impl ServiceBalancer { log.new(o!("component" => "DNS Updater")), ); - Self { - log, - nexus, - dns_updater, - } + Self { log, nexus, dns_updater } } // Reaches out to all sled agents implied in "services", and @@ -167,9 +162,10 @@ impl ServiceBalancer { // strictly necessary, but doing so makes the record insertion more // efficient. services.sort_by(|a, b| a.srv().partial_cmp(&b.srv()).unwrap()); - self.dns_updater.insert_dns_records( - &services - ).await.map_err(|e| Error::internal_error(&e.to_string()))?; + self.dns_updater + .insert_dns_records(&services) + .await + .map_err(|e| Error::internal_error(&e.to_string()))?; Ok(()) } @@ -350,9 +346,12 @@ impl ServiceBalancer { .await?; } - self.dns_updater.insert_dns_records( - &datasets.into_iter().map(|(_, _, dataset)| dataset).collect() - ).await.map_err(|e| Error::internal_error(&e.to_string()))?; + self.dns_updater + .insert_dns_records( + &datasets.into_iter().map(|(_, _, dataset)| dataset).collect(), + ) + .await + .map_err(|e| Error::internal_error(&e.to_string()))?; Ok(()) } diff --git a/nexus/src/db/datastore.rs b/nexus/src/db/datastore.rs index 6485cfd3d42..3ea27b32c7b 100644 --- a/nexus/src/db/datastore.rs +++ b/nexus/src/db/datastore.rs @@ -752,14 +752,7 @@ impl DataStore { .map_err(|e| { TxnError::CustomError(DatasetError::Other(e)) }) - .map(|ip| { - SocketAddrV6::new( - ip, - kind.port(), - 0, - 0, - ) - })?; + .map(|ip| SocketAddrV6::new(ip, kind.port(), 0, 0))?; let dataset = db::model::Dataset::new( dataset_id, diff --git a/nexus/src/db/ipv6.rs b/nexus/src/db/ipv6.rs index 41855e0da0c..60f7c0558c6 100644 --- a/nexus/src/db/ipv6.rs +++ b/nexus/src/db/ipv6.rs @@ -19,8 +19,16 @@ use omicron_common::api::external::Error; use serde::{Deserialize, Serialize}; #[derive( - Clone, Copy, AsExpression, FromSqlRow, PartialEq, Ord, PartialOrd, Eq, - Deserialize, Serialize, + Clone, + Copy, + AsExpression, + FromSqlRow, + PartialEq, + Ord, + PartialOrd, + Eq, + Deserialize, + Serialize, )] #[diesel(sql_type = Inet)] pub struct Ipv6Addr(std::net::Ipv6Addr); diff --git a/nexus/src/db/model/dataset.rs b/nexus/src/db/model/dataset.rs index 652cd7ede0b..4b2b294542a 100644 --- a/nexus/src/db/model/dataset.rs +++ b/nexus/src/db/model/dataset.rs @@ -3,13 +3,13 @@ // file, You can obtain one at https://mozilla.org/MPL/2.0/. use super::{DatasetKind, Generation, Region, SqlU16}; -use crate::db::identity::Asset; use crate::db::collection_insert::DatastoreCollection; +use crate::db::identity::Asset; use crate::db::ipv6; use crate::db::schema::{dataset, region}; use chrono::{DateTime, Utc}; use db_macros::Asset; -use internal_dns_client::names::{AAAA, SRV, ServiceName, BackendName}; +use internal_dns_client::names::{BackendName, ServiceName, AAAA, SRV}; use serde::{Deserialize, Serialize}; use std::net::{Ipv6Addr, SocketAddrV6}; use uuid::Uuid; @@ -84,7 +84,9 @@ impl internal_dns_client::multiclient::Service for Dataset { fn srv(&self) -> SRV { match self.kind { - DatasetKind::Crucible => SRV::Backend(BackendName::Crucible, self.id()), + DatasetKind::Crucible => { + SRV::Backend(BackendName::Crucible, self.id()) + } DatasetKind::Clickhouse => SRV::Service(ServiceName::Clickhouse), DatasetKind::Cockroach => SRV::Service(ServiceName::Cockroach), } diff --git a/nexus/src/db/model/service.rs b/nexus/src/db/model/service.rs index 0ff7987c464..6f05011d415 100644 --- a/nexus/src/db/model/service.rs +++ b/nexus/src/db/model/service.rs @@ -3,12 +3,14 @@ // file, You can obtain one at https://mozilla.org/MPL/2.0/. use super::ServiceKind; -use crate::db::ipv6; use crate::db::identity::Asset; +use crate::db::ipv6; use crate::db::schema::service; use db_macros::Asset; -use internal_dns_client::names::{AAAA, SRV, ServiceName}; -use omicron_common::address::{DNS_SERVER_PORT, NEXUS_INTERNAL_PORT, OXIMETER_PORT}; +use internal_dns_client::names::{ServiceName, AAAA, SRV}; +use omicron_common::address::{ + DNS_SERVER_PORT, NEXUS_INTERNAL_PORT, OXIMETER_PORT, +}; use std::net::{Ipv6Addr, SocketAddrV6}; use uuid::Uuid; diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 3b2f7c4aa4d..862c0e05e21 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -253,12 +253,10 @@ impl ServiceInner { retry_notify(internal_service_policy(), services_put, log_failure) .await?; - // Insert DNS records - self.dns_servers - .get() - .expect("DNS servers must be initialized first") - .insert_dns_records(services) - .await?; + // Insert DNS records, if the DNS servers have been initialized + if let Some(dns_servers) = self.dns_servers.get() { + dns_servers.insert_dns_records(services).await?; + } Ok(()) } diff --git a/sled-agent/src/storage_manager.rs b/sled-agent/src/storage_manager.rs index d559ee70589..d010a9bfff5 100644 --- a/sled-agent/src/storage_manager.rs +++ b/sled-agent/src/storage_manager.rs @@ -655,7 +655,9 @@ impl StorageWorker { }; nexus_notifications.push( backoff::retry_notify( - backoff::internal_service_policy(), + backoff::internal_service_policy_with_max( + std::time::Duration::from_secs(1), + ), notify_nexus, log_post_failure, ) From 1b019b1586347f3c42cdd82de3afa3e7f03860eb Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Thu, 16 Jun 2022 00:07:05 -0400 Subject: [PATCH 15/35] clippy --- sled-agent/src/params.rs | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/sled-agent/src/params.rs b/sled-agent/src/params.rs index 261ec9ecb16..bd74b1ca324 100644 --- a/sled-agent/src/params.rs +++ b/sled-agent/src/params.rs @@ -366,12 +366,9 @@ impl internal_dns_client::multiclient::Service for ServiceRequest { match self.service_type { ServiceType::InternalDns { server_address, .. } => server_address, ServiceType::Nexus { internal_address, .. } => internal_address, - ServiceType::Oximeter => SocketAddrV6::new( - Ipv6Addr::from(self.addresses[0]), - OXIMETER_PORT, - 0, - 0, - ), + ServiceType::Oximeter => { + SocketAddrV6::new(self.addresses[0], OXIMETER_PORT, 0, 0) + } } } } From 94b4b46723e5906f7340cbe45791b8cf5aec46d2 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Thu, 16 Jun 2022 01:28:46 -0400 Subject: [PATCH 16/35] Concurrent provisioning --- nexus/src/app/background/services.rs | 259 ++++++++++++---------- nexus/src/db/datastore.rs | 16 +- nexus/tests/integration_tests/datasets.rs | 8 +- sled-agent/src/rack_setup/service.rs | 11 +- sled-agent/src/server.rs | 4 +- sled-agent/src/services.rs | 4 +- sled-agent/src/sled_agent.rs | 2 +- sled-agent/src/storage_manager.rs | 62 +----- 8 files changed, 167 insertions(+), 199 deletions(-) diff --git a/nexus/src/app/background/services.rs b/nexus/src/app/background/services.rs index eca4d336607..7178ae7930b 100644 --- a/nexus/src/app/background/services.rs +++ b/nexus/src/app/background/services.rs @@ -14,6 +14,7 @@ use crate::db::model::ServiceKind; use crate::db::model::Sled; use crate::db::model::Zpool; use crate::Nexus; +use futures::stream::{self, StreamExt, TryStreamExt}; use internal_dns_client::multiclient::{ Service as DnsService, Updater as DnsUpdater, }; @@ -112,51 +113,58 @@ impl ServiceBalancer { // For all sleds requiring an update, request all services be // instantiated. - for sled_id in &sled_ids { - // TODO: This interface kinda sucks; ideally we would - // only insert the *new* services. - // - // Inserting the old ones too is costing us an extra query. - let services = - self.nexus.datastore().service_list(opctx, *sled_id).await?; - let sled_client = self.nexus.sled_client(sled_id).await?; - - info!(self.log, "instantiate_services: {:?}", services); - - sled_client - .services_put(&SledAgentTypes::ServiceEnsureBody { - services: services - .iter() - .map(|s| { - let address = Ipv6Addr::from(s.ip); - let (name, service_type) = - Self::get_service_name_and_type( - address, s.kind, - ); - - // TODO: This is hacky, specifically to inject - // global zone addresses in the DNS service. - let gz_addresses = match &s.kind { - ServiceKind::InternalDNS => { - let mut octets = address.octets(); - octets[15] = octets[15] + 1; - vec![Ipv6Addr::from(octets)] + stream::iter(&sled_ids) + .map(Ok::<_, Error>) + .try_for_each_concurrent(None, |sled_id| async { + // TODO: This interface kinda sucks; ideally we would + // only insert the *new* services. + // + // Inserting the old ones too is costing us an extra query. + let services = self + .nexus + .datastore() + .service_list(opctx, *sled_id) + .await?; + let sled_client = self.nexus.sled_client(sled_id).await?; + + info!(self.log, "instantiate_services: {:?}", services); + + sled_client + .services_put(&SledAgentTypes::ServiceEnsureBody { + services: services + .iter() + .map(|s| { + let address = Ipv6Addr::from(s.ip); + let (name, service_type) = + Self::get_service_name_and_type( + address, s.kind, + ); + + // TODO: This is hacky, specifically to inject + // global zone addresses in the DNS service. + let gz_addresses = match &s.kind { + ServiceKind::InternalDNS => { + let mut octets = address.octets(); + octets[15] = octets[15] + 1; + vec![Ipv6Addr::from(octets)] + } + _ => vec![], + }; + + SledAgentTypes::ServiceRequest { + id: s.id(), + name, + addresses: vec![address], + gz_addresses, + service_type, } - _ => vec![], - }; - - SledAgentTypes::ServiceRequest { - id: s.id(), - name, - addresses: vec![address], - gz_addresses, - service_type, - } - }) - .collect(), - }) - .await?; - } + }) + .collect(), + }) + .await?; + Ok(()) + }) + .await?; // Putting records of the same SRV right next to each other isn't // strictly necessary, but doing so makes the record insertion more @@ -215,40 +223,33 @@ impl ServiceBalancer { } } - async fn ensure_rack_service( + // Provision the services within the database. + async fn provision_rack_service( &self, opctx: &OpContext, kind: ServiceKind, desired_count: u32, - ) -> Result<(), Error> { - // Provision the services within the database. - let services = self - .nexus + ) -> Result, Error> { + self.nexus .datastore() .ensure_rack_service(opctx, self.nexus.rack_id, kind, desired_count) - .await?; - - // Actually instantiate those services. - self.instantiate_services(opctx, services).await + .await } - async fn ensure_dns_service( + // Provision the services within the database. + async fn provision_dns_service( &self, opctx: &OpContext, desired_count: u32, - ) -> Result<(), Error> { - // Provision the services within the database. - let services = self - .nexus + ) -> Result, Error> { + self.nexus .datastore() .ensure_dns_service(opctx, self.nexus.rack_subnet, desired_count) - .await?; - - // Actually instantiate those services. - self.instantiate_services(opctx, services).await + .await } - // TODO: Consider using sagas to ensure the rollout of services happens. + // TODO: Consider using sagas to ensure the rollout of services. + // // Not using sagas *happens* to be fine because these operations are // re-tried periodically, but that's kind forcing a dependency on the // caller. @@ -256,25 +257,34 @@ impl ServiceBalancer { &self, opctx: &OpContext, ) -> Result<(), Error> { - // NOTE: If any sleds host DNS + other redudant services, we send - // redundant requests. We could propagate the service list up to a - // higher level, and do instantiation after all services complete? + // Provision services within the database. + let mut svcs = vec![]; for expected_svc in &EXPECTED_SERVICES { info!(self.log, "Ensuring service {:?} exists", expected_svc); match expected_svc.redundancy { ServiceRedundancy::PerRack(desired_count) => { - self.ensure_rack_service( - opctx, - expected_svc.kind, - desired_count, - ) - .await?; + svcs.extend_from_slice( + &self + .provision_rack_service( + opctx, + expected_svc.kind, + desired_count, + ) + .await?, + ); } ServiceRedundancy::DnsPerAz(desired_count) => { - self.ensure_dns_service(opctx, desired_count).await?; + svcs.extend_from_slice( + &self + .provision_dns_service(opctx, desired_count) + .await?, + ); } } } + + // Ensure services exist on the target sleds. + self.instantiate_services(opctx, svcs).await?; Ok(()) } @@ -306,46 +316,50 @@ impl ServiceBalancer { return Ok(()); } + // Ensure that there is one connection per sled. let mut sled_clients = HashMap::new(); + for (sled, _, _) in &datasets { + if sled_clients.get(&sled.id()).is_none() { + let sled_client = self.nexus.sled_client(&sled.id()).await?; + sled_clients.insert(sled.id(), sled_client); + } + } - // TODO: We could issue these requests concurrently - for (sled, zpool, dataset) in &datasets { - let sled_client = { - match sled_clients.get(&sled.id()) { - Some(client) => client, - None => { - let sled_client = - self.nexus.sled_client(&sled.id()).await?; - sled_clients.insert(sled.id(), sled_client); - sled_clients.get(&sled.id()).unwrap() + // Issue all dataset instantiation requests concurrently. + stream::iter(&datasets) + .map(Ok::<_, Error>) + .try_for_each_concurrent(None, |(sled, zpool, dataset)| async { + let sled_client = sled_clients.get(&sled.id()).unwrap(); + + let dataset_kind = match kind { + // TODO: This set of "all addresses" isn't right. + // TODO: ... should we even be using "all addresses" to contact CRDB? + // Can it just rely on DNS, somehow? + DatasetKind::Cockroach => { + SledAgentTypes::DatasetKind::CockroachDb(vec![]) } - } - }; - - let dataset_kind = match kind { - // TODO: This set of "all addresses" isn't right. - // TODO: ... should we even be using "all addresses" to contact CRDB? - // Can it just rely on DNS, somehow? - DatasetKind::Cockroach => { - SledAgentTypes::DatasetKind::CockroachDb(vec![]) - } - DatasetKind::Crucible => SledAgentTypes::DatasetKind::Crucible, - DatasetKind::Clickhouse => { - SledAgentTypes::DatasetKind::Clickhouse - } - }; - - // Instantiate each dataset. - sled_client - .filesystem_put(&SledAgentTypes::DatasetEnsureBody { - id: dataset.id(), - zpool_id: zpool.id(), - dataset_kind, - address: dataset.address().to_string(), - }) - .await?; - } + DatasetKind::Crucible => { + SledAgentTypes::DatasetKind::Crucible + } + DatasetKind::Clickhouse => { + SledAgentTypes::DatasetKind::Clickhouse + } + }; + + // Instantiate each dataset. + sled_client + .filesystem_put(&SledAgentTypes::DatasetEnsureBody { + id: dataset.id(), + zpool_id: zpool.id(), + dataset_kind, + address: dataset.address().to_string(), + }) + .await?; + Ok(()) + }) + .await?; + // Ensure all DNS records are updated for the created datasets. self.dns_updater .insert_dns_records( &datasets.into_iter().map(|(_, _, dataset)| dataset).collect(), @@ -360,16 +374,23 @@ impl ServiceBalancer { &self, opctx: &OpContext, ) -> Result<(), Error> { - for expected_dataset in &EXPECTED_DATASETS { - info!(self.log, "Ensuring dataset {:?} exists", expected_dataset); - self.ensure_rack_dataset( - opctx, - expected_dataset.kind, - expected_dataset.redundancy, - ) - .await? - } - Ok(()) + // Provision all dataset types concurrently. + stream::iter(&EXPECTED_DATASETS) + .map(Ok::<_, Error>) + .try_for_each_concurrent(None, |expected_dataset| async move { + info!( + self.log, + "Ensuring dataset {:?} exists", expected_dataset + ); + self.ensure_rack_dataset( + opctx, + expected_dataset.kind, + expected_dataset.redundancy, + ) + .await?; + Ok(()) + }) + .await } // Provides a single point-in-time evaluation and adjustment of diff --git a/nexus/src/db/datastore.rs b/nexus/src/db/datastore.rs index 3ea27b32c7b..a21895938f3 100644 --- a/nexus/src/db/datastore.rs +++ b/nexus/src/db/datastore.rs @@ -4459,9 +4459,7 @@ mod test { }; use omicron_test_utils::dev; use std::collections::{HashMap, HashSet}; - use std::net::Ipv6Addr; - use std::net::SocketAddrV6; - use std::net::{IpAddr, Ipv4Addr, SocketAddr}; + use std::net::{Ipv6Addr, SocketAddrV6}; use std::sync::Arc; use uuid::Uuid; @@ -4677,8 +4675,7 @@ mod test { // ... and datasets within that zpool. let dataset_count = REGION_REDUNDANCY_THRESHOLD * 2; - let bogus_addr = - SocketAddr::new(IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)), 8080); + let bogus_addr = SocketAddrV6::new(Ipv6Addr::LOCALHOST, 8080, 0, 0); let dataset_ids: Vec = (0..dataset_count).map(|_| Uuid::new_v4()).collect(); for id in &dataset_ids { @@ -4759,8 +4756,7 @@ mod test { // ... and datasets within that zpool. let dataset_count = REGION_REDUNDANCY_THRESHOLD; - let bogus_addr = - SocketAddr::new(IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)), 8080); + let bogus_addr = SocketAddrV6::new(Ipv6Addr::LOCALHOST, 8080, 0, 0); let dataset_ids: Vec = (0..dataset_count).map(|_| Uuid::new_v4()).collect(); for id in &dataset_ids { @@ -4826,8 +4822,7 @@ mod test { // ... and datasets within that zpool. let dataset_count = REGION_REDUNDANCY_THRESHOLD - 1; - let bogus_addr = - SocketAddr::new(IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)), 8080); + let bogus_addr = SocketAddrV6::new(Ipv6Addr::LOCALHOST, 8080, 0, 0); let dataset_ids: Vec = (0..dataset_count).map(|_| Uuid::new_v4()).collect(); for id in &dataset_ids { @@ -4878,8 +4873,7 @@ mod test { // ... and datasets within that zpool. let dataset_count = REGION_REDUNDANCY_THRESHOLD; - let bogus_addr = - SocketAddr::new(IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)), 8080); + let bogus_addr = SocketAddrV6::new(Ipv6Addr::LOCALHOST, 8080, 0, 0); let dataset_ids: Vec = (0..dataset_count).map(|_| Uuid::new_v4()).collect(); for id in &dataset_ids { diff --git a/nexus/tests/integration_tests/datasets.rs b/nexus/tests/integration_tests/datasets.rs index ebc89f71378..d65a7fa1f81 100644 --- a/nexus/tests/integration_tests/datasets.rs +++ b/nexus/tests/integration_tests/datasets.rs @@ -8,7 +8,7 @@ use omicron_common::api::external::ByteCount; use omicron_nexus::internal_api::params::{ DatasetKind, DatasetPutRequest, ZpoolPutRequest, }; -use std::net::{IpAddr, Ipv4Addr, SocketAddr}; +use std::net::{Ipv6Addr, SocketAddrV6}; use uuid::Uuid; use nexus_test_utils::{ControlPlaneTestContext, SLED_AGENT_UUID}; @@ -36,8 +36,7 @@ async fn test_dataset_put_success(cptestctx: &ControlPlaneTestContext) { .await .unwrap(); - let address = - SocketAddr::new(IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)), 8080); + let address = SocketAddrV6::new(Ipv6Addr::LOCALHOST, 8080, 0, 0); let kind = DatasetKind::Crucible; let request = DatasetPutRequest { address, kind }; let dataset_id = Uuid::new_v4(); @@ -69,8 +68,7 @@ async fn test_dataset_put_bad_zpool_returns_not_found( let dataset_put_url = format!("/zpools/{}/dataset/{}", zpool_id, dataset_id); - let address = - SocketAddr::new(IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)), 8080); + let address = SocketAddrV6::new(Ipv6Addr::LOCALHOST, 8080, 0, 0); let kind = DatasetKind::Crucible; let request = DatasetPutRequest { address, kind }; diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 862c0e05e21..29b579484bf 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -21,7 +21,8 @@ use nexus_client::{ }; use omicron_common::address::{get_sled_address, NEXUS_INTERNAL_PORT}; use omicron_common::backoff::{ - internal_service_policy, retry_notify, BackoffError, + internal_service_policy, internal_service_policy_with_max, retry_notify, + BackoffError, }; use sled_agent_client::{ types as SledAgentTypes, Client as SledAgentClient, Error as SledAgentError, @@ -398,8 +399,12 @@ impl ServiceInner { info!(self.log, "Failed to handoff to nexus: {err}"); }; - retry_notify(internal_service_policy(), notify_nexus, log_failure) - .await?; + retry_notify( + internal_service_policy_with_max(std::time::Duration::from_secs(1)), + notify_nexus, + log_failure, + ) + .await?; info!(self.log, "Handoff to Nexus is complete"); Ok(()) diff --git a/sled-agent/src/server.rs b/sled-agent/src/server.rs index 97920925789..6725a8351e7 100644 --- a/sled-agent/src/server.rs +++ b/sled-agent/src/server.rs @@ -100,12 +100,12 @@ impl Server { let log_notification_failure = |err, delay| { warn!( log, - "failed to contact nexus: {}, will retry in {:?}", err, delay; + "failed to notify nexus about sled agent: {}, will retry in {:?}", err, delay; ); }; retry_notify( internal_service_policy_with_max( - std::time::Duration::from_secs(5), + std::time::Duration::from_secs(1), ), notify_nexus, log_notification_failure, diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index fd89dc686a1..2900bf7761c 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -202,11 +202,11 @@ impl ServiceManager { existing_zones: &mut Vec, services: &Vec, ) -> Result<(), Error> { - info!(self.log, "Ensuring services are initialized: {:?}", services); // TODO(https://github.com/oxidecomputer/omicron/issues/726): // As long as we ensure the requests don't overlap, we could // parallelize this request. for service in services { + info!(self.log, "Ensuring service is initialized: {:?}", service); // Before we bother allocating anything for this request, check if // this service has already been created. let expected_zone_name = @@ -332,8 +332,6 @@ impl ServiceManager { subnet: Ipv6Subnet::::new( self.underlay_address, ), - // TODO: Switch to inferring this URL by DNS. - // "postgresql://root@[fd00:1122:3344:0101::2]:32221/omicron?sslmode=disable" database: nexus_config::Database::FromDns, }; diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index c7e8faa391c..11212cf8e44 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -324,7 +324,7 @@ impl SledAgent { .lazy_nexus_client .get() .await - // TODO: Handle error + // TODO: Handle error... or push out lazy nexus client. .unwrap(); crate::updates::download_artifact(artifact, &nexus_client).await?; Ok(()) diff --git a/sled-agent/src/storage_manager.rs b/sled-agent/src/storage_manager.rs index d010a9bfff5..bb0a21930cc 100644 --- a/sled-agent/src/storage_manager.rs +++ b/sled-agent/src/storage_manager.rs @@ -241,6 +241,9 @@ impl DatasetInfo { address: SocketAddrV6, do_format: bool, ) -> Result<(), Error> { + // TODO: Related to + // https://github.com/oxidecomputer/omicron/pull/1124 , should we + // avoid importing these manifests? match self.kind { DatasetKind::CockroachDb { .. } => { info!(log, "start_zone: Loading CRDB manifest"); @@ -317,7 +320,9 @@ impl DatasetInfo { warn!(log, "cockroachdb not yet alive"); }; backoff::retry_notify( - backoff::internal_service_policy(), + backoff::internal_service_policy_with_max( + std::time::Duration::from_secs(1), + ), check_health, log_failure, ) @@ -650,7 +655,7 @@ impl StorageWorker { let log_post_failure = move |_, delay| { warn!( log, - "failed to notify nexus, will retry in {:?}", delay; + "failed to notify nexus about zpool, will retry in {:?}", delay; ); }; nexus_notifications.push( @@ -665,59 +670,6 @@ impl StorageWorker { ); } - // Adds a "notification to nexus" to `nexus_notifications`, - // informing it about the addition of `datasets` to `pool_id`. - /* - fn add_datasets_notify( - &self, - nexus_notifications: &mut FuturesOrdered>>, - datasets: Vec<(Uuid, SocketAddrV6, DatasetKind)>, - pool_id: Uuid, - ) { - let lazy_nexus_client = self.lazy_nexus_client.clone(); - let notify_nexus = move || { - let lazy_nexus_client = lazy_nexus_client.clone(); - let datasets = datasets.clone(); - async move { - for (id, address, kind) in datasets { - let request = DatasetPutRequest { - address: address.to_string(), - kind: kind.into(), - }; - lazy_nexus_client - .get() - .await - .map_err(|e| { - backoff::BackoffError::transient(e.to_string()) - })? - .dataset_put(&pool_id, &id, &request) - .await - .map_err(|e| { - backoff::BackoffError::transient(e.to_string()) - })?; - } - - Ok(()) - } - }; - let log = self.log.clone(); - let log_post_failure = move |_, delay| { - warn!( - log, - "failed to notify nexus about datasets, will retry in {:?}", delay; - ); - }; - nexus_notifications.push( - backoff::retry_notify( - backoff::internal_service_policy(), - notify_nexus, - log_post_failure, - ) - .boxed(), - ); - } - */ - // TODO: a lot of these functions act on the `FuturesOrdered` - should // that just be a part of the "worker" struct? From a02e009b9a2ccbd325fe99c1fcf0b43fb7ff23d0 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Thu, 16 Jun 2022 11:25:29 -0400 Subject: [PATCH 17/35] Dynamic oximeter config --- Cargo.lock | 2 + internal-dns-client/Cargo.toml | 1 + internal-dns-client/src/multiclient.rs | 64 +++++++++++++++++++++++++- internal-dns-client/src/names.rs | 6 +-- oximeter/collector/Cargo.toml | 1 + oximeter/collector/config.toml | 7 --- oximeter/collector/src/bin/oximeter.rs | 13 +++++- oximeter/collector/src/lib.rs | 62 ++++++++++++++++--------- sled-agent/src/instance.rs | 3 +- sled-agent/src/instance_manager.rs | 6 ++- sled-agent/src/nexus.rs | 43 ++++++++--------- sled-agent/src/server.rs | 5 +- sled-agent/src/services.rs | 48 +++++++++++++++++-- sled-agent/src/storage_manager.rs | 4 +- smf/oximeter/config.toml | 8 ---- smf/oximeter/manifest.xml | 7 ++- 16 files changed, 205 insertions(+), 75 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4c672006255..25bd98d2e97 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2346,6 +2346,7 @@ dependencies = [ "serde", "serde_json", "slog", + "thiserror", "trust-dns-proto", "trust-dns-resolver", "uuid", @@ -3440,6 +3441,7 @@ dependencies = [ "clap 3.2.5", "dropshot", "expectorate", + "internal-dns-client", "nexus-client 0.1.0", "omicron-common 0.1.0", "omicron-test-utils", diff --git a/internal-dns-client/Cargo.toml b/internal-dns-client/Cargo.toml index 3303ddfc44c..9572b53b40f 100644 --- a/internal-dns-client/Cargo.toml +++ b/internal-dns-client/Cargo.toml @@ -12,6 +12,7 @@ reqwest = { version = "0.11", features = ["json", "rustls-tls", "stream"] } serde = { version = "1.0", features = [ "derive" ] } serde_json = "1.0" slog = { version = "2.5.0", features = [ "max_level_trace", "release_max_level_debug" ] } +thiserror = "1.0" trust-dns-proto = "0.21" trust-dns-resolver = "0.21" uuid = { version = "1.1.0", features = [ "v4", "serde" ] } diff --git a/internal-dns-client/src/multiclient.rs b/internal-dns-client/src/multiclient.rs index d91a58d4d73..19336bd3d17 100644 --- a/internal-dns-client/src/multiclient.rs +++ b/internal-dns-client/src/multiclient.rs @@ -11,7 +11,7 @@ use omicron_common::backoff::{ internal_service_policy, retry_notify, BackoffError, }; use slog::{info, warn, Logger}; -use std::net::{SocketAddr, SocketAddrV6}; +use std::net::{IpAddr, Ipv6Addr, SocketAddr, SocketAddrV6}; use trust_dns_resolver::config::{ NameServerConfig, Protocol, ResolverConfig, ResolverOpts, }; @@ -197,3 +197,65 @@ pub fn create_resolver( } TokioAsyncResolver::tokio(rc, ResolverOpts::default()) } + +#[derive(Debug, Clone, thiserror::Error)] +pub enum ResolveError { + #[error(transparent)] + Resolve(#[from] trust_dns_resolver::error::ResolveError), + + #[error("Record not found for SRV key: {0}")] + NotFound(crate::names::SRV), +} + +/// A wrapper around a DNS resolver, providing a way to conveniently +/// look up IP addresses of services based on their SRV keys. +pub struct Resolver { + inner: TokioAsyncResolver, +} + +impl Resolver { + /// Creates a DNS resolver, looking up DNS server addresses based on + /// the provided subnet. + pub fn new(subnet: Ipv6Subnet) -> Result { + Ok(Self { inner: create_resolver(subnet)? }) + } + + /// Convenience wrapper for [`Resolver::new`] which determines the subnet + /// based on a provided IP address. + pub fn new_from_ip(address: Ipv6Addr) -> Result { + let subnet = Ipv6Subnet::::new(address); + + Resolver::new(subnet) + } + + /// Looks up a single [`Ipv6Addr`] based on the SRV name. + /// Returns an error if the record does not exist. + // TODO: There are lots of ways this API can expand: Caching, + // actually respecting TTL, looking up ports, etc. + // + // For now, however, it serves as a very simple "get everyone using DNS" + // API that can be improved upon later. + pub async fn lookup_ipv6( + &self, + srv: crate::names::SRV, + ) -> Result { + let response = self.inner.ipv6_lookup(&srv.to_string()).await?; + let address = response + .iter() + .next() + .ok_or_else(|| ResolveError::NotFound(srv))?; + Ok(*address) + } + + pub async fn lookup_ip( + &self, + srv: crate::names::SRV, + ) -> Result { + let response = self.inner.lookup_ip(&srv.to_string()).await?; + let address = response + .iter() + .next() + .ok_or_else(|| ResolveError::NotFound(srv))?; + Ok(address) + } +} diff --git a/internal-dns-client/src/names.rs b/internal-dns-client/src/names.rs index 53c1504d168..d920ef77fbd 100644 --- a/internal-dns-client/src/names.rs +++ b/internal-dns-client/src/names.rs @@ -7,7 +7,7 @@ use uuid::Uuid; const DNS_ZONE: &str = "control-plane.oxide.internal"; -#[derive(Debug, PartialEq, PartialOrd)] +#[derive(Clone, Debug, PartialEq, PartialOrd)] pub enum ServiceName { Clickhouse, Cockroach, @@ -28,7 +28,7 @@ impl fmt::Display for ServiceName { } } -#[derive(Debug, PartialEq, PartialOrd)] +#[derive(Clone, Debug, PartialEq, PartialOrd)] pub enum BackendName { Crucible, SledAgent, @@ -43,7 +43,7 @@ impl fmt::Display for BackendName { } } -#[derive(Debug, PartialEq, PartialOrd)] +#[derive(Clone, Debug, PartialEq, PartialOrd)] pub enum SRV { /// A service identified and accessed by name, such as "nexus", "CRDB", etc. /// diff --git a/oximeter/collector/Cargo.toml b/oximeter/collector/Cargo.toml index 7e36050d9af..10fe6058c0a 100644 --- a/oximeter/collector/Cargo.toml +++ b/oximeter/collector/Cargo.toml @@ -8,6 +8,7 @@ license = "MPL-2.0" [dependencies] clap = { version = "3.2", features = ["derive"] } dropshot = { git = "https://github.com/oxidecomputer/dropshot", branch = "main", features = [ "usdt-probes" ] } +internal-dns-client = { path = "../../internal-dns-client" } nexus-client = { path = "../../nexus-client" } omicron-common = { path = "../../common" } oximeter = { path = "../oximeter" } diff --git a/oximeter/collector/config.toml b/oximeter/collector/config.toml index 6b03a3974d2..0e8557a71bf 100644 --- a/oximeter/collector/config.toml +++ b/oximeter/collector/config.toml @@ -1,16 +1,9 @@ # Example configuration file for running an oximeter collector server -id = "1da65e5b-210c-4859-a7d7-200c1e659972" -nexus_address = "127.0.0.1:12221" - [db] -address = "[::1]:8123" batch_size = 1000 batch_interval = 5 # In seconds [log] level = "debug" mode = "stderr-terminal" - -[dropshot] -bind_address = "[::1]:12223" diff --git a/oximeter/collector/src/bin/oximeter.rs b/oximeter/collector/src/bin/oximeter.rs index 19f9b5b3da0..b9ff5e42d6b 100644 --- a/oximeter/collector/src/bin/oximeter.rs +++ b/oximeter/collector/src/bin/oximeter.rs @@ -8,8 +8,10 @@ use clap::Parser; use omicron_common::cmd::fatal; use omicron_common::cmd::CmdError; -use oximeter_collector::{oximeter_api, Config, Oximeter}; +use oximeter_collector::{oximeter_api, Config, Oximeter, OximeterArguments}; +use std::net::SocketAddrV6; use std::path::PathBuf; +use uuid::Uuid; pub fn run_openapi() -> Result<(), String> { oximeter_api() @@ -36,6 +38,12 @@ struct Args { /// Path to TOML file with configuration for the server #[clap(name = "CONFIG_FILE", action)] config_file: PathBuf, + + #[clap(short, long, action)] + id: Uuid, + + #[clap(short, long, action)] + address: SocketAddrV6, } #[tokio::main] @@ -51,7 +59,8 @@ async fn do_run() -> Result<(), CmdError> { if args.openapi { run_openapi().map_err(CmdError::Failure) } else { - Oximeter::new(&config) + let args = OximeterArguments { id: args.id, address: args.address }; + Oximeter::new(&config, &args) .await .unwrap() .serve_forever() diff --git a/oximeter/collector/src/lib.rs b/oximeter/collector/src/lib.rs index 4e2f6ca4fda..6f19492e83e 100644 --- a/oximeter/collector/src/lib.rs +++ b/oximeter/collector/src/lib.rs @@ -11,6 +11,11 @@ use dropshot::{ HttpResponseUpdatedNoContent, HttpServer, HttpServerStarter, RequestContext, TypedBody, }; +use internal_dns_client::{ + multiclient::{ResolveError, Resolver}, + names::{ServiceName, SRV}, +}; +use omicron_common::address::{CLICKHOUSE_PORT, NEXUS_INTERNAL_PORT}; use omicron_common::api::internal::nexus::ProducerEndpoint; use omicron_common::backoff; use oximeter::types::{ProducerResults, ProducerResultsItem}; @@ -18,7 +23,7 @@ use oximeter_db::{Client, DbWrite}; use serde::{Deserialize, Serialize}; use slog::{debug, error, info, o, trace, warn, Drain, Logger}; use std::collections::{btree_map::Entry, BTreeMap}; -use std::net::SocketAddr; +use std::net::{SocketAddr, SocketAddrV6}; use std::path::Path; use std::sync::Arc; use std::time::Duration; @@ -37,6 +42,9 @@ pub enum Error { #[error(transparent)] Database(#[from] oximeter_db::Error), + + #[error(transparent)] + ResolveError(#[from] ResolveError), } // Messages for controlling a collection task @@ -231,9 +239,6 @@ async fn results_sink( /// Configuration for interacting with the metric database. #[derive(Debug, Clone, Copy, Deserialize, Serialize)] pub struct DbConfig { - /// Address of the ClickHouse server - pub address: SocketAddr, - /// Batch size of samples at which to insert pub batch_size: usize, @@ -259,6 +264,7 @@ impl OximeterAgent { pub async fn with_id( id: Uuid, db_config: DbConfig, + resolver: &Resolver, log: &Logger, ) -> Result { let (result_sender, result_receiver) = mpsc::channel(8); @@ -267,7 +273,11 @@ impl OximeterAgent { // Construct the ClickHouse client first, propagate an error if we can't reach the // database. - let client = Client::new(db_config.address, &log); + let db_address = SocketAddr::new( + resolver.lookup_ip(SRV::Service(ServiceName::Clickhouse)).await?, + CLICKHOUSE_PORT, + ); + let client = Client::new(db_address, &log); client.init_db().await?; // Spawn the task for aggregating and inserting all metrics @@ -334,18 +344,9 @@ impl OximeterAgent { /// Configuration used to initialize an oximeter server #[derive(Clone, Debug, Deserialize, Serialize)] pub struct Config { - /// An unique ID for this oximeter server - pub id: Uuid, - - /// The address used to connect to Nexus. - pub nexus_address: SocketAddr, - /// Configuration for working with ClickHouse pub db: DbConfig, - /// The internal Dropshot HTTP server configuration - pub dropshot: ConfigDropshot, - /// Logging configuration pub log: ConfigLogging, } @@ -360,6 +361,11 @@ impl Config { } } +pub struct OximeterArguments { + pub id: Uuid, + pub address: SocketAddrV6, +} + /// A server used to collect metrics from components in the control plane. pub struct Oximeter { _agent: Arc, @@ -371,7 +377,10 @@ impl Oximeter { /// /// This starts an HTTP server used to communicate with other agents in Omicron, especially /// Nexus. It also registers itself as a new `oximeter` instance with Nexus. - pub async fn new(config: &Config) -> Result { + pub async fn new( + config: &Config, + args: &OximeterArguments, + ) -> Result { let (drain, registration) = slog_dtrace::with_drain( config .log @@ -388,10 +397,13 @@ impl Oximeter { } info!(log, "starting oximeter server"); + let resolver = Resolver::new_from_ip(*args.address.ip())?; + let make_agent = || async { debug!(log, "creating ClickHouse client"); Ok(Arc::new( - OximeterAgent::with_id(config.id, config.db, &log).await?, + OximeterAgent::with_id(args.id, config.db, &resolver, &log) + .await?, )) }; let log_client_failure = |error, delay| { @@ -411,7 +423,10 @@ impl Oximeter { let dropshot_log = log.new(o!("component" => "dropshot")); let server = HttpServerStarter::new( - &config.dropshot, + &ConfigDropshot { + bind_address: SocketAddr::V6(args.address), + ..Default::default() + }, oximeter_api(), Arc::clone(&agent), &dropshot_log, @@ -423,10 +438,15 @@ impl Oximeter { let client = reqwest::Client::new(); let notify_nexus = || async { debug!(log, "contacting nexus"); + let nexus_address = resolver + .lookup_ipv6(SRV::Service(ServiceName::Nexus)) + .await + .map_err(|e| backoff::BackoffError::transient(e.to_string()))?; + client .post(format!( - "http://{}/metrics/collectors", - config.nexus_address + "http://[{}]:{}/metrics/collectors", + nexus_address, NEXUS_INTERNAL_PORT, )) .json(&nexus_client::types::OximeterInfo { address: server.local_addr().to_string(), @@ -434,9 +454,9 @@ impl Oximeter { }) .send() .await - .map_err(backoff::BackoffError::transient)? + .map_err(|e| backoff::BackoffError::transient(e.to_string()))? .error_for_status() - .map_err(backoff::BackoffError::transient) + .map_err(|e| backoff::BackoffError::transient(e.to_string())) }; let log_notification_failure = |error, delay| { warn!( diff --git a/sled-agent/src/instance.rs b/sled-agent/src/instance.rs index 9d3b9cc0d17..16d0fffad30 100644 --- a/sled-agent/src/instance.rs +++ b/sled-agent/src/instance.rs @@ -831,7 +831,8 @@ mod test { ); let port_allocator = OptePortAllocator::new(); let lazy_nexus_client = - LazyNexusClient::new(log.clone(), std::net::Ipv6Addr::LOCALHOST); + LazyNexusClient::new(log.clone(), std::net::Ipv6Addr::LOCALHOST) + .unwrap(); let inst = Instance::new( log.clone(), diff --git a/sled-agent/src/instance_manager.rs b/sled-agent/src/instance_manager.rs index b89eab6473a..37805f868fd 100644 --- a/sled-agent/src/instance_manager.rs +++ b/sled-agent/src/instance_manager.rs @@ -271,7 +271,8 @@ mod test { async fn ensure_instance() { let log = logger(); let lazy_nexus_client = - LazyNexusClient::new(log.clone(), std::net::Ipv6Addr::LOCALHOST); + LazyNexusClient::new(log.clone(), std::net::Ipv6Addr::LOCALHOST) + .unwrap(); // Creation of the instance manager incurs some "global" system // checks: cleanup of existing zones + vnics. @@ -355,7 +356,8 @@ mod test { async fn ensure_instance_repeatedly() { let log = logger(); let lazy_nexus_client = - LazyNexusClient::new(log.clone(), std::net::Ipv6Addr::LOCALHOST); + LazyNexusClient::new(log.clone(), std::net::Ipv6Addr::LOCALHOST) + .unwrap(); // Instance Manager creation. diff --git a/sled-agent/src/nexus.rs b/sled-agent/src/nexus.rs index 3dc40369219..7c252cde417 100644 --- a/sled-agent/src/nexus.rs +++ b/sled-agent/src/nexus.rs @@ -7,16 +7,18 @@ pub use crate::mocks::MockNexusClient as NexusClient; #[cfg(not(test))] pub use nexus_client::Client as NexusClient; -use internal_dns_client::names::{ServiceName, SRV}; -use omicron_common::address::{Ipv6Subnet, AZ_PREFIX, NEXUS_INTERNAL_PORT}; +use internal_dns_client::{ + multiclient::{ResolveError, Resolver}, + names::{ServiceName, SRV}, +}; +use omicron_common::address::NEXUS_INTERNAL_PORT; use slog::Logger; use std::net::Ipv6Addr; use std::sync::Arc; struct Inner { log: Logger, - addr: Ipv6Addr, - // TODO: We could also totally cache the resolver / observed IP here? + resolver: Resolver, } /// Wrapper around a [`NexusClient`] object, which allows deferring @@ -35,26 +37,21 @@ pub struct LazyNexusClient { } impl LazyNexusClient { - pub fn new(log: Logger, addr: Ipv6Addr) -> Self { - Self { inner: Arc::new(Inner { log, addr }) } + pub fn new(log: Logger, addr: Ipv6Addr) -> Result { + Ok(Self { + inner: Arc::new(Inner { + log, + resolver: Resolver::new_from_ip(addr)?, + }), + }) } - pub async fn get(&self) -> Result { - // TODO: Consider refactoring this: - // - Address as input - // - Lookup "nexus" DNS record - // - Result
as output - let az_subnet = Ipv6Subnet::::new(self.inner.addr); - let resolver = - internal_dns_client::multiclient::create_resolver(az_subnet) - .map_err(|e| format!("Failed to create DNS resolver: {}", e))?; - let response = resolver - .lookup_ip(&SRV::Service(ServiceName::Nexus).to_string()) - .await - .map_err(|e| format!("Failed to lookup Nexus IP: {}", e))?; - let address = response.iter().next().ok_or_else(|| { - "no addresses returned from DNS resolver".to_string() - })?; + pub async fn get(&self) -> Result { + let address = self + .inner + .resolver + .lookup_ipv6(SRV::Service(ServiceName::Nexus)) + .await?; Ok(NexusClient::new( &format!("http://[{}]:{}", address, NEXUS_INTERNAL_PORT), @@ -70,7 +67,7 @@ impl LazyNexusClient { #[cfg(test)] mockall::mock! { pub LazyNexusClient { - pub fn new(log: Logger, addr: Ipv6Addr) -> Self; + pub fn new(log: Logger, addr: Ipv6Addr) -> Result; pub async fn get(&self) -> Result; } impl Clone for LazyNexusClient { diff --git a/sled-agent/src/server.rs b/sled-agent/src/server.rs index 6725a8351e7..bd73c6e4169 100644 --- a/sled-agent/src/server.rs +++ b/sled-agent/src/server.rs @@ -44,7 +44,8 @@ impl Server { let client_log = log.new(o!("component" => "NexusClient")); - let lazy_nexus_client = LazyNexusClient::new(client_log, *addr.ip()); + let lazy_nexus_client = LazyNexusClient::new(client_log, *addr.ip()) + .map_err(|e| e.to_string())?; let sled_agent = SledAgent::new( &config, @@ -86,7 +87,7 @@ impl Server { let nexus_client = lazy_nexus_client .get() .await - .map_err(|err| BackoffError::transient(err))?; + .map_err(|err| BackoffError::transient(err.to_string()))?; nexus_client .cpapi_sled_agents_post( &sled_id, diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 2900bf7761c..ea989c3eab0 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -12,7 +12,7 @@ use crate::illumos::zone::AddressRequest; use crate::params::{ServiceEnsureBody, ServiceRequest, ServiceType}; use crate::zone::Zones; use dropshot::ConfigDropshot; -use omicron_common::address::{Ipv6Subnet, RACK_PREFIX}; +use omicron_common::address::{Ipv6Subnet, OXIMETER_PORT, RACK_PREFIX}; use omicron_common::nexus_config::{ self, DeploymentConfig as NexusDeploymentConfig, }; @@ -427,8 +427,50 @@ impl ServiceManager { ServiceType::Oximeter => { info!(self.log, "Setting up oximeter service"); - // TODO: Implement with dynamic parameters, when address is - // dynamically assigned. + let address = service.addresses[0]; + running_zone + .run_cmd(&[ + crate::illumos::zone::SVCCFG, + "-s", + &smf_name, + "setprop", + &format!("config/id={}", service.id), + ]) + .map_err(|err| Error::ZoneCommand { + intent: "set server ID".to_string(), + err, + })?; + + running_zone + .run_cmd(&[ + crate::illumos::zone::SVCCFG, + "-s", + &smf_name, + "setprop", + &format!( + "config/address=[{}]:{}", + address, OXIMETER_PORT, + ), + ]) + .map_err(|err| Error::ZoneCommand { + intent: "set server address".to_string(), + err, + })?; + + running_zone + .run_cmd(&[ + crate::illumos::zone::SVCCFG, + "-s", + &default_smf_name, + "refresh", + ]) + .map_err(|err| Error::ZoneCommand { + intent: format!( + "Refresh SMF manifest {}", + default_smf_name + ), + err, + })?; } } diff --git a/sled-agent/src/storage_manager.rs b/sled-agent/src/storage_manager.rs index bb0a21930cc..72a3f8c4327 100644 --- a/sled-agent/src/storage_manager.rs +++ b/sled-agent/src/storage_manager.rs @@ -642,7 +642,9 @@ impl StorageWorker { lazy_nexus_client .get() .await - .map_err(|e| backoff::BackoffError::transient(e))? + .map_err(|e| { + backoff::BackoffError::transient(e.to_string()) + })? .zpool_put(&sled_id, &pool_id, &zpool_request) .await .map_err(|e| { diff --git a/smf/oximeter/config.toml b/smf/oximeter/config.toml index 4a0095fdd00..ca14fe6ec8b 100644 --- a/smf/oximeter/config.toml +++ b/smf/oximeter/config.toml @@ -1,11 +1,6 @@ # Example configuration file for running an oximeter collector server -id = "1da65e5b-210c-4859-a7d7-200c1e659972" -# Internal address of nexus -nexus_address = "[fd00:1122:3344:0101::3]:12221" - [db] -address = "[fd00:1122:3344:0101::5]:8123" batch_size = 1000 batch_interval = 5 # In seconds @@ -14,6 +9,3 @@ level = "debug" mode = "file" path = "/dev/stdout" if_exists = "append" - -[dropshot] -bind_address = "[fd00:1122:3344:0101::4]:12223" diff --git a/smf/oximeter/manifest.xml b/smf/oximeter/manifest.xml index 47e3cb254f1..5e91cbfc96a 100644 --- a/smf/oximeter/manifest.xml +++ b/smf/oximeter/manifest.xml @@ -18,10 +18,15 @@ + + + + + From a5be4d0508a65a42ae33e93819216ebe7628b04c Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Fri, 17 Jun 2022 14:10:18 -0400 Subject: [PATCH 18/35] Allow oximeter to use config-provided addresses --- nexus/test-utils/src/lib.rs | 17 ++++++------ oximeter/collector/src/lib.rs | 50 ++++++++++++++++++++++++++--------- 2 files changed, 46 insertions(+), 21 deletions(-) diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index 72ece48526f..d4e234b9e8b 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -18,7 +18,7 @@ use oximeter_collector::Oximeter; use oximeter_producer::Server as ProducerServer; use slog::o; use slog::Logger; -use std::net::{IpAddr, Ipv6Addr, SocketAddr}; +use std::net::{IpAddr, Ipv6Addr, SocketAddr, SocketAddrV6}; use std::path::Path; use std::time::Duration; use uuid::Uuid; @@ -224,21 +224,20 @@ pub async fn start_oximeter( id: Uuid, ) -> Result { let db = oximeter_collector::DbConfig { - address: SocketAddr::new(Ipv6Addr::LOCALHOST.into(), db_port), + address: Some(SocketAddr::new(Ipv6Addr::LOCALHOST.into(), db_port)), batch_size: 10, batch_interval: 1, }; let config = oximeter_collector::Config { - id, - nexus_address, + nexus_address: Some(nexus_address), db, - dropshot: ConfigDropshot { - bind_address: SocketAddr::new(Ipv6Addr::LOCALHOST.into(), 0), - ..Default::default() - }, log: ConfigLogging::StderrTerminal { level: ConfigLoggingLevel::Error }, }; - Oximeter::new(&config).await.map_err(|e| e.to_string()) + let args = oximeter_collector::OximeterArguments { + id, + address: SocketAddrV6::new(Ipv6Addr::LOCALHOST, 0, 0, 0), + }; + Oximeter::new(&config, &args).await.map_err(|e| e.to_string()) } #[derive(Debug, Clone, oximeter::Target)] diff --git a/oximeter/collector/src/lib.rs b/oximeter/collector/src/lib.rs index 6f19492e83e..64a2af4c96a 100644 --- a/oximeter/collector/src/lib.rs +++ b/oximeter/collector/src/lib.rs @@ -239,6 +239,12 @@ async fn results_sink( /// Configuration for interacting with the metric database. #[derive(Debug, Clone, Copy, Deserialize, Serialize)] pub struct DbConfig { + /// Optional address of the ClickHouse server. + /// + /// If "None", will be inferred from DNS. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub address: Option, + /// Batch size of samples at which to insert pub batch_size: usize, @@ -273,10 +279,16 @@ impl OximeterAgent { // Construct the ClickHouse client first, propagate an error if we can't reach the // database. - let db_address = SocketAddr::new( - resolver.lookup_ip(SRV::Service(ServiceName::Clickhouse)).await?, - CLICKHOUSE_PORT, - ); + let db_address = if let Some(address) = db_config.address { + address + } else { + SocketAddr::new( + resolver + .lookup_ip(SRV::Service(ServiceName::Clickhouse)) + .await?, + CLICKHOUSE_PORT, + ) + }; let client = Client::new(db_address, &log); client.init_db().await?; @@ -344,6 +356,12 @@ impl OximeterAgent { /// Configuration used to initialize an oximeter server #[derive(Clone, Debug, Deserialize, Serialize)] pub struct Config { + /// The address used to connect to Nexus. + /// + /// If "None", will be inferred from DNS. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub nexus_address: Option, + /// Configuration for working with ClickHouse pub db: DbConfig, @@ -438,16 +456,24 @@ impl Oximeter { let client = reqwest::Client::new(); let notify_nexus = || async { debug!(log, "contacting nexus"); - let nexus_address = resolver - .lookup_ipv6(SRV::Service(ServiceName::Nexus)) - .await - .map_err(|e| backoff::BackoffError::transient(e.to_string()))?; + let nexus_address = if let Some(address) = config.nexus_address { + address + } else { + SocketAddr::V6(SocketAddrV6::new( + resolver + .lookup_ipv6(SRV::Service(ServiceName::Nexus)) + .await + .map_err(|e| { + backoff::BackoffError::transient(e.to_string()) + })?, + NEXUS_INTERNAL_PORT, + 0, + 0, + )) + }; client - .post(format!( - "http://[{}]:{}/metrics/collectors", - nexus_address, NEXUS_INTERNAL_PORT, - )) + .post(format!("http://{}/metrics/collectors", nexus_address,)) .json(&nexus_client::types::OximeterInfo { address: server.local_addr().to_string(), collector_id: agent.id, From 59dc38273de962f2f53e1e1d40e84fbbbb6e5965 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Fri, 17 Jun 2022 14:26:32 -0400 Subject: [PATCH 19/35] Fix command-based tests --- oximeter/collector/src/bin/oximeter.rs | 51 +++++++++---------- .../tests/output/cmd-oximeter-noargs-stderr | 14 +++-- oximeter/collector/tests/test_commands.rs | 2 +- smf/oximeter/manifest.xml | 2 +- 4 files changed, 37 insertions(+), 32 deletions(-) diff --git a/oximeter/collector/src/bin/oximeter.rs b/oximeter/collector/src/bin/oximeter.rs index b9ff5e42d6b..bf54cf33fa0 100644 --- a/oximeter/collector/src/bin/oximeter.rs +++ b/oximeter/collector/src/bin/oximeter.rs @@ -26,24 +26,22 @@ pub fn run_openapi() -> Result<(), String> { /// Run an oximeter metric collection server in the Oxide Control Plane. #[derive(Parser)] #[clap(name = "oximeter", about = "See README.adoc for more information")] -struct Args { - #[clap( - short = 'O', - long = "openapi", - help = "Print the external OpenAPI Spec document and exit", - action - )] - openapi: bool, +enum Args { + /// Print the external OpenAPI Spec document and exit + Openapi, - /// Path to TOML file with configuration for the server - #[clap(name = "CONFIG_FILE", action)] - config_file: PathBuf, + /// Start an Oximeter server + Run { + /// Path to TOML file with configuration for the server + #[clap(name = "CONFIG_FILE", action)] + config_file: PathBuf, - #[clap(short, long, action)] - id: Uuid, + #[clap(short, long, action)] + id: Uuid, - #[clap(short, long, action)] - address: SocketAddrV6, + #[clap(short, long, action)] + address: SocketAddrV6, + }, } #[tokio::main] @@ -55,16 +53,17 @@ async fn main() { async fn do_run() -> Result<(), CmdError> { let args = Args::parse(); - let config = Config::from_file(args.config_file).unwrap(); - if args.openapi { - run_openapi().map_err(CmdError::Failure) - } else { - let args = OximeterArguments { id: args.id, address: args.address }; - Oximeter::new(&config, &args) - .await - .unwrap() - .serve_forever() - .await - .map_err(|e| CmdError::Failure(e.to_string())) + match args { + Args::Openapi => run_openapi().map_err(CmdError::Failure), + Args::Run { config_file, id, address } => { + let config = Config::from_file(config_file).unwrap(); + let args = OximeterArguments { id, address }; + Oximeter::new(&config, &args) + .await + .unwrap() + .serve_forever() + .await + .map_err(|e| CmdError::Failure(e.to_string())) + } } } diff --git a/oximeter/collector/tests/output/cmd-oximeter-noargs-stderr b/oximeter/collector/tests/output/cmd-oximeter-noargs-stderr index 1398febf119..dfb062bca75 100644 --- a/oximeter/collector/tests/output/cmd-oximeter-noargs-stderr +++ b/oximeter/collector/tests/output/cmd-oximeter-noargs-stderr @@ -1,7 +1,13 @@ -error: The following required arguments were not provided: - +oximeter +See README.adoc for more information USAGE: - oximeter [OPTIONS] + oximeter -For more information try --help +OPTIONS: + -h, --help Print help information + +SUBCOMMANDS: + help Print this message or the help of the given subcommand(s) + openapi Print the external OpenAPI Spec document and exit + run Start an Oximeter server diff --git a/oximeter/collector/tests/test_commands.rs b/oximeter/collector/tests/test_commands.rs index 7b910a5be4a..d3d66be0580 100644 --- a/oximeter/collector/tests/test_commands.rs +++ b/oximeter/collector/tests/test_commands.rs @@ -50,7 +50,7 @@ fn test_oximeter_openapi() { // But we do know where it is at compile time, so we load it then. let config = include_str!("../../collector/config.toml"); let config_path = write_config(config); - let exec = Exec::cmd(path_to_oximeter()).arg(&config_path).arg("--openapi"); + let exec = Exec::cmd(path_to_oximeter()).arg("openapi"); let (exit_status, stdout_text, stderr_text) = run_command(exec); fs::remove_file(&config_path).expect("failed to remove temporary file"); assert_exit_code(exit_status, EXIT_SUCCESS, &stderr_text); diff --git a/smf/oximeter/manifest.xml b/smf/oximeter/manifest.xml index 5e91cbfc96a..d16efd90d99 100644 --- a/smf/oximeter/manifest.xml +++ b/smf/oximeter/manifest.xml @@ -18,7 +18,7 @@ From 81bf2d4846ba9d20179aa9927cfd64a7fa2c82d4 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 20 Jun 2022 07:27:12 -0400 Subject: [PATCH 20/35] Nexus lazily accessing timeseries DB --- internal-dns-client/src/multiclient.rs | 1 + nexus/src/app/mod.rs | 16 +++++++-- nexus/src/app/oximeter.rs | 50 ++++++++++++++++++++++++++ nexus/src/config.rs | 11 +++--- nexus/src/context.rs | 15 ++++---- nexus/test-utils/src/lib.rs | 8 ++++- smf/nexus/config-partial.toml | 4 --- 7 files changed, 83 insertions(+), 22 deletions(-) diff --git a/internal-dns-client/src/multiclient.rs b/internal-dns-client/src/multiclient.rs index 19336bd3d17..4c6edba5498 100644 --- a/internal-dns-client/src/multiclient.rs +++ b/internal-dns-client/src/multiclient.rs @@ -173,6 +173,7 @@ impl Updater { } } +// TODO: not pub? /// Creates a resolver using all internal DNS name servers. pub fn create_resolver( subnet: Ipv6Subnet, diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index 8b6cc606802..84408965e44 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -4,6 +4,7 @@ //! Nexus, the service that operates much of the control plane in an Oxide fleet +use crate::app::oximeter::LazyTimeseriesClient; use crate::authn; use crate::authz; use crate::config; @@ -88,7 +89,7 @@ pub struct Nexus { background_task_runner: OnceCell, /// Client to the timeseries database. - timeseries_client: oximeter_db::Client, + timeseries_client: LazyTimeseriesClient, /// Contents of the trusted root role for the TUF repository. updates_config: Option, @@ -113,9 +114,10 @@ pub struct Nexus { impl Nexus { /// Create a new Nexus instance for the given rack id `rack_id` // TODO-polish revisit rack metadata - pub fn new_with_id( + pub async fn new_with_id( rack_id: Uuid, log: Logger, + resolver: internal_dns_client::multiclient::Resolver, pool: db::Pool, config: &config::Config, authz: Arc, @@ -135,8 +137,16 @@ impl Nexus { )), sec_store, )); + + // Connect to Clickhouse - but do so lazily. + // Clickhouse may not be executing when Nexus starts. let timeseries_client = - oximeter_db::Client::new(config.pkg.timeseries_db.address, &log); + if let Some(address) = &config.pkg.timeseries_db.address { + // If an address was provided, use it instead of DNS. + LazyTimeseriesClient::new_from_address(log.clone(), *address) + } else { + LazyTimeseriesClient::new_from_dns(log.clone(), resolver) + }; // TODO-cleanup We may want a first-class subsystem for managing startup // background tasks. It could use a Future for each one, a status enum diff --git a/nexus/src/app/oximeter.rs b/nexus/src/app/oximeter.rs index e270868f90c..7f6fb9b6ffd 100644 --- a/nexus/src/app/oximeter.rs +++ b/nexus/src/app/oximeter.rs @@ -9,6 +9,11 @@ use crate::context::OpContext; use crate::db; use crate::db::identity::Asset; use crate::internal_api::params::OximeterInfo; +use internal_dns_client::{ + multiclient::{ResolveError, Resolver}, + names::{ServiceName, SRV}, +}; +use omicron_common::address::CLICKHOUSE_PORT; use omicron_common::api::external::DataPageParams; use omicron_common::api::external::Error; use omicron_common::api::external::ListResultVec; @@ -19,12 +24,54 @@ use oximeter_client::Client as OximeterClient; use oximeter_db::TimeseriesSchema; use oximeter_db::TimeseriesSchemaPaginationParams; use oximeter_producer::register; +use slog::Logger; use std::convert::TryInto; use std::net::SocketAddr; use std::num::NonZeroU32; use std::time::Duration; use uuid::Uuid; +/// A client which knows how to connect to Clickhouse, but does so +/// only when a request is actually made. +/// +/// This allows callers to set up the mechanism of connection (by address +/// or DNS) separately from actually making that connection. This +/// is particularly useful in situations where configurations are parsed +/// prior to Clickhouse existing. +pub struct LazyTimeseriesClient { + log: Logger, + source: ClientSource, +} + +enum ClientSource { + FromDns { resolver: Resolver }, + FromIp { address: SocketAddr }, +} + +impl LazyTimeseriesClient { + pub fn new_from_dns(log: Logger, resolver: Resolver) -> Self { + Self { log, source: ClientSource::FromDns { resolver } } + } + + pub fn new_from_address(log: Logger, address: SocketAddr) -> Self { + Self { log, source: ClientSource::FromIp { address } } + } + + pub async fn get(&self) -> Result { + let address = match &self.source { + ClientSource::FromIp { address } => *address, + ClientSource::FromDns { resolver } => SocketAddr::new( + resolver + .lookup_ip(SRV::Service(ServiceName::Clickhouse)) + .await?, + CLICKHOUSE_PORT, + ), + }; + + Ok(oximeter_db::Client::new(address, &self.log)) + } +} + impl super::Nexus { /// Insert a new record of an Oximeter collector server. pub async fn upsert_oximeter_collector( @@ -160,6 +207,9 @@ impl super::Nexus { ) -> Result, Error> { opctx.authorize(authz::Action::Read, &authz::FLEET).await?; self.timeseries_client + .get() + .await + .map_err(|e| Error::internal_error(&e.to_string()))? .timeseries_schema_list(&pag_params.page, limit) .await .map_err(|e| match e { diff --git a/nexus/src/config.rs b/nexus/src/config.rs index 98cbf0169cf..5ca452e7388 100644 --- a/nexus/src/config.rs +++ b/nexus/src/config.rs @@ -45,10 +45,11 @@ pub struct UpdatesConfig { pub default_base_url: String, } -/// Configuration for the timeseries database. -#[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] +/// Optional configuration for the timeseries database. +#[derive(Clone, Debug, Default, Deserialize, PartialEq, Serialize)] pub struct TimeseriesDbConfig { - pub address: SocketAddr, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub address: Option, } // A deserializable type that does no validation on the tunable parameters. @@ -132,7 +133,7 @@ pub struct PackageConfig { /// Authentication-related configuration pub authn: AuthnConfig, /// Timeseries database configuration. - // TODO: Should this be removed? Nexus needs to initialize it. + #[serde(default)] pub timeseries_db: TimeseriesDbConfig, /// Updates-related configuration. Updates APIs return 400 Bad Request when this is /// unconfigured. @@ -381,7 +382,7 @@ mod test { path: "/nonexistent/path".to_string() }, timeseries_db: TimeseriesDbConfig { - address: "[::1]:8123".parse().unwrap() + address: Some("[::1]:8123".parse().unwrap()) }, updates: Some(UpdatesConfig { trusted_root: PathBuf::from("/path/to/root.json"), diff --git a/nexus/src/context.rs b/nexus/src/context.rs index 2677df0c3c9..31d0af0808a 100644 --- a/nexus/src/context.rs +++ b/nexus/src/context.rs @@ -145,7 +145,7 @@ impl ServerContext { Ipv6Subnet::::new(config.deployment.subnet.net().ip()); info!(log, "Setting up resolver on subnet: {:?}", az_subnet); let resolver = - internal_dns_client::multiclient::create_resolver(az_subnet) + internal_dns_client::multiclient::Resolver::new(az_subnet) .map_err(|e| format!("Failed to create DNS resolver: {}", e))?; // Set up DB pool @@ -153,15 +153,10 @@ impl ServerContext { nexus_config::Database::FromUrl { url } => url.clone(), nexus_config::Database::FromDns => { info!(log, "Accessing DB url from DNS"); - let response = resolver - .lookup_ip( - &SRV::Service(ServiceName::Cockroach).to_string(), - ) + let address = resolver + .lookup_ipv6(SRV::Service(ServiceName::Cockroach)) .await .map_err(|e| format!("Failed to lookup IP: {}", e))?; - let address = response.iter().next().ok_or_else(|| { - "no addresses returned from DNS resolver".to_string() - })?; info!(log, "DB address: {}", address); PostgresConfigWithUrl::from_str(&format!( "postgresql://root@[{}]:{}/omicron?sslmode=disable", @@ -174,10 +169,12 @@ impl ServerContext { let nexus = Nexus::new_with_id( rack_id, log.new(o!("component" => "nexus")), + resolver, pool, config, Arc::clone(&authz), - ); + ) + .await; Ok(Arc::new(ServerContext { nexus, diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index d4e234b9e8b..e1c3949c57f 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -101,7 +101,13 @@ pub async fn test_setup_with_config( // Store actual address/port information for the databases after they start. config.deployment.database = nexus_config::Database::FromUrl { url: database.pg_config().clone() }; - config.pkg.timeseries_db.address.set_port(clickhouse.port()); + config + .pkg + .timeseries_db + .address + .as_mut() + .expect("Tests expect to set a port of Clickhouse") + .set_port(clickhouse.port()); // Start the Nexus internal API. let internal_server = diff --git a/smf/nexus/config-partial.toml b/smf/nexus/config-partial.toml index b77ffc3137f..4b759f1761c 100644 --- a/smf/nexus/config-partial.toml +++ b/smf/nexus/config-partial.toml @@ -19,7 +19,3 @@ level = "info" mode = "file" path = "/dev/stdout" if_exists = "append" - -# Configuration for interacting with the timeseries database -[timeseries_db] -address = "[fd00:1122:3344:0101::5]:8123" From aed3ba6b7f90b6877c6036b380b2be3920b50c9b Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 20 Jun 2022 07:40:16 -0400 Subject: [PATCH 21/35] Cleanup TODOs --- internal-dns-client/src/multiclient.rs | 5 ++--- nexus/src/app/mod.rs | 3 --- sled-agent/src/instance.rs | 7 ++++--- sled-agent/src/rack_setup/service.rs | 17 ++++++----------- sled-agent/src/sled_agent.rs | 10 ++++------ 5 files changed, 16 insertions(+), 26 deletions(-) diff --git a/internal-dns-client/src/multiclient.rs b/internal-dns-client/src/multiclient.rs index 4c6edba5498..8c4313a40a4 100644 --- a/internal-dns-client/src/multiclient.rs +++ b/internal-dns-client/src/multiclient.rs @@ -173,9 +173,8 @@ impl Updater { } } -// TODO: not pub? -/// Creates a resolver using all internal DNS name servers. -pub fn create_resolver( +// Creates a resolver using all internal DNS name servers. +fn create_resolver( subnet: Ipv6Subnet, ) -> Result { let mut rc = ResolverConfig::new(); diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index 84408965e44..682bb406c77 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -261,9 +261,6 @@ impl Nexus { } /// Returns an [`OpContext`] used for background tasks. - // TODO: Probably should be making a *new* opctx here? - // - // I think there should be one-per-"op", to get better metrics on bg ops. pub fn opctx_for_background(&self) -> OpContext { OpContext::for_background( self.log.new(o!("component" => "BackgroundWork")), diff --git a/sled-agent/src/instance.rs b/sled-agent/src/instance.rs index 16d0fffad30..08d866a1266 100644 --- a/sled-agent/src/instance.rs +++ b/sled-agent/src/instance.rs @@ -84,6 +84,9 @@ pub enum Error { #[error("Serial console buffer: {0}")] Serial(#[from] crate::serial::Error), + + #[error("Error resolving DNS name: {0}")] + ResolveError(#[from] internal_dns_client::multiclient::ResolveError), } // Issues read-only, idempotent HTTP requests at propolis until it responds with @@ -253,9 +256,7 @@ impl InstanceInner { // Notify Nexus of the state change. self.lazy_nexus_client .get() - .await - // TODO: Handle me - .unwrap() + .await? .cpapi_instances_put( self.id(), &nexus_client::types::InstanceRuntimeState::from( diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 29b579484bf..42e821a379e 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -313,20 +313,15 @@ impl ServiceInner { ) -> Result<(), SetupServiceError> { info!(self.log, "Handing off control to Nexus"); - let resolver = internal_dns_client::multiclient::create_resolver( - config.az_subnet(), - ) - .expect("Failed to create DNS resolver"); - let response = resolver - .lookup_ip(&SRV::Service(ServiceName::Nexus).to_string()) + let resolver = + internal_dns_client::multiclient::Resolver::new(config.az_subnet()) + .expect("Failed to create DNS resolver"); + let ip = resolver + .lookup_ip(SRV::Service(ServiceName::Nexus)) .await .expect("Failed to lookup IP"); + let nexus_address = SocketAddr::new(ip, NEXUS_INTERNAL_PORT); - let nexus_address = response - .iter() - .next() - .map(|addr| SocketAddr::new(addr, NEXUS_INTERNAL_PORT)) - .expect("no addresses returned from DNS resolver"); info!(self.log, "Nexus address: {}", nexus_address.to_string()); let nexus_client = NexusClient::new( diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 11212cf8e44..5eba7217349 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -78,6 +78,9 @@ pub enum Error { #[error("Error managing guest networking: {0}")] Opte(#[from] crate::opte::Error), + + #[error("Error resolving DNS name: {0}")] + ResolveError(#[from] internal_dns_client::multiclient::ResolveError), } impl From for omicron_common::api::external::Error { @@ -320,12 +323,7 @@ impl SledAgent { &self, artifact: UpdateArtifact, ) -> Result<(), Error> { - let nexus_client = self - .lazy_nexus_client - .get() - .await - // TODO: Handle error... or push out lazy nexus client. - .unwrap(); + let nexus_client = self.lazy_nexus_client.get().await?; crate::updates::download_artifact(artifact, &nexus_client).await?; Ok(()) } From 8fce9a14e50952b784bf28263ac8de36b4061486 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 20 Jun 2022 08:00:32 -0400 Subject: [PATCH 22/35] Box resolver to make clippy happy --- internal-dns-client/src/multiclient.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/internal-dns-client/src/multiclient.rs b/internal-dns-client/src/multiclient.rs index 8c4313a40a4..0c549e9931c 100644 --- a/internal-dns-client/src/multiclient.rs +++ b/internal-dns-client/src/multiclient.rs @@ -210,14 +210,14 @@ pub enum ResolveError { /// A wrapper around a DNS resolver, providing a way to conveniently /// look up IP addresses of services based on their SRV keys. pub struct Resolver { - inner: TokioAsyncResolver, + inner: Box, } impl Resolver { /// Creates a DNS resolver, looking up DNS server addresses based on /// the provided subnet. pub fn new(subnet: Ipv6Subnet) -> Result { - Ok(Self { inner: create_resolver(subnet)? }) + Ok(Self { inner: Box::new(create_resolver(subnet)?) }) } /// Convenience wrapper for [`Resolver::new`] which determines the subnet From d26ee1442c63e2aa95c9a15dd853f0a0f3c0a7d3 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 20 Jun 2022 14:45:36 -0400 Subject: [PATCH 23/35] Internal DNS tests --- Cargo.lock | 6 + internal-dns-client/Cargo.toml | 8 + internal-dns-client/src/multiclient.rs | 522 ++++++++++++++++++++++--- internal-dns-client/src/names.rs | 62 ++- internal-dns/src/bin/dns-server.rs | 12 +- internal-dns/src/dns_server.rs | 46 ++- internal-dns/tests/basic_test.rs | 19 +- nexus/src/db/datastore.rs | 3 - 8 files changed, 601 insertions(+), 77 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 25bd98d2e97..8a7b9f25cb0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2339,14 +2339,20 @@ dependencies = [ name = "internal-dns-client" version = "0.1.0" dependencies = [ + "dropshot", "futures", + "internal-dns", "omicron-common 0.1.0", + "omicron-test-utils", "progenitor", "reqwest", "serde", "serde_json", + "sled", "slog", + "tempfile", "thiserror", + "tokio", "trust-dns-proto", "trust-dns-resolver", "uuid", diff --git a/internal-dns-client/Cargo.toml b/internal-dns-client/Cargo.toml index 9572b53b40f..4872699610a 100644 --- a/internal-dns-client/Cargo.toml +++ b/internal-dns-client/Cargo.toml @@ -16,3 +16,11 @@ thiserror = "1.0" trust-dns-proto = "0.21" trust-dns-resolver = "0.21" uuid = { version = "1.1.0", features = [ "v4", "serde" ] } + +[dev-dependencies] +dropshot = { git = "https://github.com/oxidecomputer/dropshot", branch = "main", features = [ "usdt-probes" ] } +internal-dns = { path = "../internal-dns" } +omicron-test-utils = { path = "../test-utils" } +sled = "0.34" +tempfile = "3.3" +tokio = { version = "1.18", features = [ "full" ] } diff --git a/internal-dns-client/src/multiclient.rs b/internal-dns-client/src/multiclient.rs index 0c549e9931c..333f0283dd8 100644 --- a/internal-dns-client/src/multiclient.rs +++ b/internal-dns-client/src/multiclient.rs @@ -19,32 +19,115 @@ use trust_dns_resolver::TokioAsyncResolver; type DnsError = crate::Error; -/// A connection used to update multiple DNS servers. -pub struct Updater { - log: Logger, - clients: Vec, +// A structure which instructs the client APIs how to access +// DNS servers. +// +// These functions exist in a separate struct for comparison +// with the test-utility, [`LocalAddressGetter`]. +struct FromReservedRackSubnet {} + +const FROM_RESERVED_RACK_SUBNET: FromReservedRackSubnet = + FromReservedRackSubnet {}; + +impl FromReservedRackSubnet { + fn subnet_to_ips( + subnet: Ipv6Subnet, + ) -> impl Iterator { + ReservedRackSubnet::new(subnet) + .get_dns_subnets() + .into_iter() + .map(|dns_subnet| IpAddr::V6(dns_subnet.dns_address().ip())) + } + + fn subnet_to_dropshot_server_addrs( + &self, + subnet: Ipv6Subnet, + ) -> impl Iterator { + Self::subnet_to_ips(subnet) + .map(|address| SocketAddr::new(address, DNS_SERVER_PORT)) + } + + fn subnet_to_dns_server_addrs( + &self, + subnet: Ipv6Subnet, + ) -> impl Iterator { + Self::subnet_to_ips(subnet) + .map(|address| SocketAddr::new(address, DNS_PORT)) + } +} + +// A test-only alternative to [`FromReservedRackSubnet`]. +// +// Rather than inferring DNS server addresses from the rack subnet, +// they may be explicitly supplied. This results in easier-to-test code. +#[cfg(test)] +#[derive(Default)] +struct LocalAddressGetter { + addrs: Vec<(SocketAddr, SocketAddr)>, +} + +#[cfg(test)] +impl LocalAddressGetter { + fn add_dns_server( + &mut self, + dns_address: SocketAddr, + server_address: SocketAddr, + ) { + self.addrs.push((dns_address, server_address)); + } + + fn subnet_to_dropshot_server_addrs( + &self, + ) -> impl Iterator + '_ { + self.addrs + .iter() + .map(|(_dns_address, dropshot_address)| *dropshot_address) + } + + fn subnet_to_dns_server_addrs( + &self, + ) -> impl Iterator + '_ { + self.addrs.iter().map(|(dns_address, _dropshot_address)| *dns_address) + } } +/// Describes a service which may be inserted into DNS records. pub trait Service { fn aaaa(&self) -> crate::names::AAAA; fn srv(&self) -> crate::names::SRV; fn address(&self) -> SocketAddrV6; } +/// A connection used to update multiple DNS servers. +pub struct Updater { + log: Logger, + clients: Vec, +} + impl Updater { /// Creates a new "Updater", capable of communicating with all /// DNS servers within the AZ. pub fn new(subnet: Ipv6Subnet, log: Logger) -> Self { - let clients = ReservedRackSubnet::new(subnet) - .get_dns_subnets() - .into_iter() - .map(|dns_subnet| { - let addr = dns_subnet.dns_address().ip(); + let addrs = + FROM_RESERVED_RACK_SUBNET.subnet_to_dropshot_server_addrs(subnet); + Self::new_from_addrs(addrs, log) + } + + // Creates a new updater, using test-supplied DNS servers. + #[cfg(test)] + fn new_for_test(address_getter: &LocalAddressGetter, log: Logger) -> Self { + let dns_addrs = address_getter.subnet_to_dropshot_server_addrs(); + Self::new_from_addrs(dns_addrs, log) + } + + fn new_from_addrs( + addrs: impl Iterator, + log: Logger, + ) -> Self { + let clients = addrs + .map(|addr| { info!(log, "Adding DNS server: {}", addr); - crate::Client::new( - &format!("http://[{}]:{}", addr, DNS_SERVER_PORT), - log.clone(), - ) + crate::Client::new(&format!("http://{}", addr), log.clone()) }) .collect::>(); @@ -53,8 +136,7 @@ impl Updater { /// Inserts all service records into the DNS server. /// - /// This method is most efficient when records are sorted by - /// SRV key. + /// This method is most efficient when records are sorted by SRV key. pub async fn insert_dns_records( &self, records: &Vec, @@ -89,9 +171,9 @@ impl Updater { Ok(()) } - /// Utility function to insert: - /// - A set of uniquely-named AAAA records, each corresponding to an address - /// - An SRV record, pointing to each of the AAAA records. + // Utility function to insert: + // - A set of uniquely-named AAAA records, each corresponding to an address + // - An SRV record, pointing to each of the AAAA records. async fn insert_dns_records_internal( &self, aaaa: Vec<(crate::names::AAAA, SocketAddrV6)>, @@ -173,31 +255,6 @@ impl Updater { } } -// Creates a resolver using all internal DNS name servers. -fn create_resolver( - subnet: Ipv6Subnet, -) -> Result { - let mut rc = ResolverConfig::new(); - let dns_ips = ReservedRackSubnet::new(subnet) - .get_dns_subnets() - .into_iter() - .map(|subnet| subnet.dns_address().ip()) - .collect::>(); - - for dns_ip in dns_ips { - rc.add_name_server(NameServerConfig { - socket_addr: SocketAddr::V6(SocketAddrV6::new( - dns_ip, DNS_PORT, 0, 0, - )), - protocol: Protocol::Udp, - tls_dns_name: None, - trust_nx_responses: false, - bind_addr: None, - }); - } - TokioAsyncResolver::tokio(rc, ResolverOpts::default()) -} - #[derive(Debug, Clone, thiserror::Error)] pub enum ResolveError { #[error(transparent)] @@ -217,7 +274,37 @@ impl Resolver { /// Creates a DNS resolver, looking up DNS server addresses based on /// the provided subnet. pub fn new(subnet: Ipv6Subnet) -> Result { - Ok(Self { inner: Box::new(create_resolver(subnet)?) }) + let dns_addrs = + FROM_RESERVED_RACK_SUBNET.subnet_to_dns_server_addrs(subnet); + Self::new_from_addrs(dns_addrs) + } + + // Creates a new resolver, using test-supplied DNS servers. + #[cfg(test)] + fn new_for_test( + address_getter: &LocalAddressGetter, + ) -> Result { + let dns_addrs = address_getter.subnet_to_dns_server_addrs(); + Self::new_from_addrs(dns_addrs) + } + + fn new_from_addrs( + dns_addrs: impl Iterator, + ) -> Result { + let mut rc = ResolverConfig::new(); + for socket_addr in dns_addrs { + rc.add_name_server(NameServerConfig { + socket_addr, + protocol: Protocol::Udp, + tls_dns_name: None, + trust_nx_responses: false, + bind_addr: None, + }); + } + let inner = + Box::new(TokioAsyncResolver::tokio(rc, ResolverOpts::default())?); + + Ok(Self { inner }) } /// Convenience wrapper for [`Resolver::new`] which determines the subnet @@ -259,3 +346,350 @@ impl Resolver { Ok(address) } } + +#[cfg(test)] +mod test { + use super::*; + use crate::names::{BackendName, ServiceName, AAAA, SRV}; + use omicron_test_utils::dev::test_setup_log; + use std::str::FromStr; + use std::sync::Arc; + use tempfile::TempDir; + use uuid::Uuid; + + struct DnsServer { + _storage: TempDir, + dns_server: internal_dns::dns_server::Server, + dropshot_server: + dropshot::HttpServer>, + } + + impl DnsServer { + async fn create(log: &Logger) -> Self { + let storage = + TempDir::new().expect("Failed to create temporary directory"); + + let db = Arc::new(sled::open(&storage.path()).unwrap()); + + let dns_server = { + let db = db.clone(); + let log = log.clone(); + let dns_config = internal_dns::dns_server::Config { + bind_address: "[::1]:0".to_string(), + zone: crate::names::DNS_ZONE.into(), + }; + + internal_dns::dns_server::run(log, db, dns_config) + .await + .unwrap() + }; + + let config = internal_dns::Config { + log: dropshot::ConfigLogging::StderrTerminal { + level: dropshot::ConfigLoggingLevel::Info, + }, + dropshot: dropshot::ConfigDropshot { + bind_address: "[::1]:0".parse().unwrap(), + request_body_max_bytes: 1024, + ..Default::default() + }, + data: internal_dns::dns_data::Config { + nmax_messages: 16, + storage_path: storage.path().to_string_lossy().into(), + }, + }; + + let dropshot_server = + internal_dns::start_server(config, log.clone(), db) + .await + .unwrap(); + + Self { _storage: storage, dns_server, dropshot_server } + } + + fn dns_server_address(&self) -> SocketAddr { + self.dns_server.address + } + + fn dropshot_server_address(&self) -> SocketAddr { + self.dropshot_server.local_addr() + } + } + + // The resolver cannot look up IPs before records have been inserted. + #[tokio::test] + async fn lookup_nonexistent_record_fails() { + let logctx = test_setup_log("lookup_nonexistent_record_fails"); + let dns_server = DnsServer::create(&logctx.log).await; + + let mut address_getter = LocalAddressGetter::default(); + address_getter.add_dns_server( + dns_server.dns_server_address(), + dns_server.dropshot_server_address(), + ); + + let resolver = Resolver::new_for_test(&address_getter) + .expect("Error creating localhost resolver"); + + let err = resolver + .lookup_ip(SRV::Service(ServiceName::Cockroach)) + .await + .expect_err("Looking up non-existent service should fail"); + + let dns_error = match err { + ResolveError::Resolve(err) => err, + _ => panic!("Unexpected error: {err}"), + }; + assert!( + matches!( + dns_error.kind(), + trust_dns_resolver::error::ResolveErrorKind::NoRecordsFound { .. }, + ), + "Saw error: {dns_error}", + ); + logctx.cleanup_successful(); + } + + #[derive(Clone)] + struct TestServiceRecord { + aaaa: AAAA, + srv: SRV, + addr: SocketAddrV6, + } + + impl TestServiceRecord { + fn new(aaaa: AAAA, srv: SRV, addr: SocketAddrV6) -> Self { + Self { aaaa, srv, addr } + } + } + + impl Service for TestServiceRecord { + fn aaaa(&self) -> AAAA { + self.aaaa.clone() + } + + fn srv(&self) -> SRV { + self.srv.clone() + } + + fn address(&self) -> SocketAddrV6 { + self.addr + } + } + + // Insert and retreive a single DNS record. + #[tokio::test] + async fn insert_and_lookup_one_record() { + let logctx = test_setup_log("insert_and_lookup_one_record"); + let dns_server = DnsServer::create(&logctx.log).await; + + let mut address_getter = LocalAddressGetter::default(); + address_getter.add_dns_server( + dns_server.dns_server_address(), + dns_server.dropshot_server_address(), + ); + + let resolver = Resolver::new_for_test(&address_getter) + .expect("Error creating localhost resolver"); + let updater = + Updater::new_for_test(&address_getter, logctx.log.clone()); + + let record = TestServiceRecord::new( + AAAA::Zone(Uuid::new_v4()), + SRV::Service(ServiceName::Cockroach), + SocketAddrV6::new( + Ipv6Addr::from_str("ff::01").unwrap(), + 12345, + 0, + 0, + ), + ); + updater.insert_dns_records(&vec![record.clone()]).await.unwrap(); + + let ip = resolver + .lookup_ipv6(SRV::Service(ServiceName::Cockroach)) + .await + .expect("Should have been able to look up IP address"); + assert_eq!(&ip, record.addr.ip()); + + logctx.cleanup_successful(); + } + + // Insert multiple DNS records of different types. + #[tokio::test] + async fn insert_and_lookup_multiple_records() { + let logctx = test_setup_log("insert_and_lookup_multiple_records"); + let dns_server = DnsServer::create(&logctx.log).await; + + let mut address_getter = LocalAddressGetter::default(); + address_getter.add_dns_server( + dns_server.dns_server_address(), + dns_server.dropshot_server_address(), + ); + + let resolver = Resolver::new_for_test(&address_getter) + .expect("Error creating localhost resolver"); + let updater = + Updater::new_for_test(&address_getter, logctx.log.clone()); + + let cockroach_addrs = [ + SocketAddrV6::new( + Ipv6Addr::from_str("ff::01").unwrap(), + 1111, + 0, + 0, + ), + SocketAddrV6::new( + Ipv6Addr::from_str("ff::02").unwrap(), + 2222, + 0, + 0, + ), + SocketAddrV6::new( + Ipv6Addr::from_str("ff::03").unwrap(), + 3333, + 0, + 0, + ), + ]; + let clickhouse_addr = SocketAddrV6::new( + Ipv6Addr::from_str("fe::01").unwrap(), + 4444, + 0, + 0, + ); + let crucible_addr = SocketAddrV6::new( + Ipv6Addr::from_str("fd::02").unwrap(), + 5555, + 0, + 0, + ); + + let records = vec![ + // Three Cockroach services + TestServiceRecord::new( + AAAA::Zone(Uuid::new_v4()), + SRV::Service(ServiceName::Cockroach), + cockroach_addrs[0], + ), + TestServiceRecord::new( + AAAA::Zone(Uuid::new_v4()), + SRV::Service(ServiceName::Cockroach), + cockroach_addrs[1], + ), + TestServiceRecord::new( + AAAA::Zone(Uuid::new_v4()), + SRV::Service(ServiceName::Cockroach), + cockroach_addrs[2], + ), + // One Clickhouse service + TestServiceRecord::new( + AAAA::Zone(Uuid::new_v4()), + SRV::Service(ServiceName::Clickhouse), + clickhouse_addr, + ), + // One Backend service + TestServiceRecord::new( + AAAA::Zone(Uuid::new_v4()), + SRV::Backend(BackendName::Crucible, Uuid::new_v4()), + crucible_addr, + ), + ]; + updater.insert_dns_records(&records).await.unwrap(); + + // Look up Cockroach + let ip = resolver + .lookup_ipv6(SRV::Service(ServiceName::Cockroach)) + .await + .expect("Should have been able to look up IP address"); + assert!(cockroach_addrs.iter().any(|addr| addr.ip() == &ip)); + + // Look up Clickhouse + let ip = resolver + .lookup_ipv6(SRV::Service(ServiceName::Clickhouse)) + .await + .expect("Should have been able to look up IP address"); + assert_eq!(&ip, clickhouse_addr.ip()); + + // Look up Backend Service + let ip = resolver + .lookup_ipv6(records[4].srv.clone()) + .await + .expect("Should have been able to look up IP address"); + assert_eq!(&ip, crucible_addr.ip()); + + // If we remove the AAAA records for two of the CRDB services, + // only one will remain. + updater + .dns_records_delete(&vec![DnsRecordKey { + name: records[0].aaaa.to_string(), + }]) + .await + .expect("Should have been able to delete record"); + updater + .dns_records_delete(&vec![DnsRecordKey { + name: records[1].aaaa.to_string(), + }]) + .await + .expect("Should have been able to delete record"); + let ip = resolver + .lookup_ipv6(SRV::Service(ServiceName::Cockroach)) + .await + .expect("Should have been able to look up IP address"); + assert_eq!(&ip, cockroach_addrs[2].ip()); + + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn update_record() { + let logctx = test_setup_log("update_record"); + let dns_server = DnsServer::create(&logctx.log).await; + + let mut address_getter = LocalAddressGetter::default(); + address_getter.add_dns_server( + dns_server.dns_server_address(), + dns_server.dropshot_server_address(), + ); + + let resolver = Resolver::new_for_test(&address_getter) + .expect("Error creating localhost resolver"); + let updater = + Updater::new_for_test(&address_getter, logctx.log.clone()); + + // Insert a record, observe that it exists. + let mut record = TestServiceRecord::new( + AAAA::Zone(Uuid::new_v4()), + SRV::Service(ServiceName::Cockroach), + SocketAddrV6::new( + Ipv6Addr::from_str("ff::01").unwrap(), + 12345, + 0, + 0, + ), + ); + updater.insert_dns_records(&vec![record.clone()]).await.unwrap(); + let ip = resolver + .lookup_ipv6(SRV::Service(ServiceName::Cockroach)) + .await + .expect("Should have been able to look up IP address"); + assert_eq!(&ip, record.addr.ip()); + + // If we insert the same record with a new address, it should be + // updated. + record.addr = SocketAddrV6::new( + Ipv6Addr::from_str("ee::02").unwrap(), + 54321, + 0, + 0, + ); + updater.insert_dns_records(&vec![record.clone()]).await.unwrap(); + let ip = resolver + .lookup_ipv6(SRV::Service(ServiceName::Cockroach)) + .await + .expect("Should have been able to look up IP address"); + assert_eq!(&ip, record.addr.ip()); + + logctx.cleanup_successful(); + } +} diff --git a/internal-dns-client/src/names.rs b/internal-dns-client/src/names.rs index d920ef77fbd..dbcc0d9f01c 100644 --- a/internal-dns-client/src/names.rs +++ b/internal-dns-client/src/names.rs @@ -2,11 +2,14 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. +//! Naming scheme for Internal DNS names (RFD 248). + use std::fmt; use uuid::Uuid; -const DNS_ZONE: &str = "control-plane.oxide.internal"; +pub(crate) const DNS_ZONE: &str = "control-plane.oxide.internal"; +/// Names for services where backends are interchangeable. #[derive(Clone, Debug, PartialEq, PartialOrd)] pub enum ServiceName { Clickhouse, @@ -28,6 +31,7 @@ impl fmt::Display for ServiceName { } } +/// Names for services where backends are not interchangeable. #[derive(Clone, Debug, PartialEq, PartialOrd)] pub enum BackendName { Crucible, @@ -70,6 +74,7 @@ impl fmt::Display for SRV { } } +#[derive(Clone, Debug, PartialEq, PartialOrd)] pub enum AAAA { /// Identifies an AAAA record for a sled. Sled(Uuid), @@ -90,3 +95,58 @@ impl fmt::Display for AAAA { } } } + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn display_srv_service() { + assert_eq!( + SRV::Service(ServiceName::Clickhouse).to_string(), + "_clickhouse._tcp.control-plane.oxide.internal", + ); + assert_eq!( + SRV::Service(ServiceName::Cockroach).to_string(), + "_cockroach._tcp.control-plane.oxide.internal", + ); + assert_eq!( + SRV::Service(ServiceName::InternalDNS).to_string(), + "_internalDNS._tcp.control-plane.oxide.internal", + ); + assert_eq!( + SRV::Service(ServiceName::Nexus).to_string(), + "_nexus._tcp.control-plane.oxide.internal", + ); + assert_eq!( + SRV::Service(ServiceName::Oximeter).to_string(), + "_oximeter._tcp.control-plane.oxide.internal", + ); + } + + #[test] + fn display_srv_backend() { + let uuid = Uuid::nil(); + assert_eq!( + SRV::Backend(BackendName::Crucible, uuid).to_string(), + "_crucible._tcp.00000000-0000-0000-0000-000000000000.control-plane.oxide.internal", + ); + assert_eq!( + SRV::Backend(BackendName::SledAgent, uuid).to_string(), + "_sledagent._tcp.00000000-0000-0000-0000-000000000000.control-plane.oxide.internal", + ); + } + + #[test] + fn display_aaaa() { + let uuid = Uuid::nil(); + assert_eq!( + AAAA::Sled(uuid).to_string(), + "00000000-0000-0000-0000-000000000000.sled.control-plane.oxide.internal", + ); + assert_eq!( + AAAA::Zone(uuid).to_string(), + "00000000-0000-0000-0000-000000000000.host.control-plane.oxide.internal", + ); + } +} diff --git a/internal-dns/src/bin/dns-server.rs b/internal-dns/src/bin/dns-server.rs index 96e9da6feca..12eafcc3599 100644 --- a/internal-dns/src/bin/dns-server.rs +++ b/internal-dns/src/bin/dns-server.rs @@ -54,20 +54,18 @@ async fn main() -> Result<(), anyhow::Error> { let db = Arc::new(sled::open(&config.data.storage_path)?); - { + let _dns_server = { let db = db.clone(); let log = log.clone(); let dns_config = internal_dns::dns_server::Config { bind_address: dns_address.to_string(), zone: zone.to_string(), }; - tokio::spawn(async move { - internal_dns::dns_server::run(log, db, dns_config).await - }); - } + internal_dns::dns_server::run(log, db, dns_config).await? + }; - let server = internal_dns::start_server(config, log, db).await?; - server + let dropshot_server = internal_dns::start_server(config, log, db).await?; + dropshot_server .await .map_err(|error_message| anyhow!("server exiting: {}", error_message)) } diff --git a/internal-dns/src/dns_server.rs b/internal-dns/src/dns_server.rs index bffda7cc73f..ccebda582f7 100644 --- a/internal-dns/src/dns_server.rs +++ b/internal-dns/src/dns_server.rs @@ -34,23 +34,43 @@ pub struct Config { pub zone: String, } -pub async fn run(log: Logger, db: Arc, config: Config) -> Result<()> { +pub struct Server { + pub address: SocketAddr, + pub handle: tokio::task::JoinHandle>, +} + +impl Server { + pub fn close(self) { + self.handle.abort() + } +} + +pub async fn run( + log: Logger, + db: Arc, + config: Config, +) -> Result { let socket = Arc::new(UdpSocket::bind(config.bind_address).await?); + let address = socket.local_addr()?; - loop { - let mut buf = vec![0u8; 16384]; - let (n, src) = socket.recv_from(&mut buf).await?; - buf.resize(n, 0); + let handle = tokio::task::spawn(async move { + loop { + let mut buf = vec![0u8; 16384]; + let (n, src) = socket.recv_from(&mut buf).await?; + buf.resize(n, 0); - let socket = socket.clone(); - let log = log.clone(); - let db = db.clone(); - let zone = config.zone.clone(); + let socket = socket.clone(); + let log = log.clone(); + let db = db.clone(); + let zone = config.zone.clone(); - tokio::spawn(async move { - handle_req(log, db, socket, src, buf, zone).await - }); - } + tokio::spawn(async move { + handle_req(log, db, socket, src, buf, zone).await + }); + } + }); + + Ok(Server { address, handle }) } async fn respond_nxdomain( diff --git a/internal-dns/tests/basic_test.rs b/internal-dns/tests/basic_test.rs index 29d358970c7..b20d4176d8a 100644 --- a/internal-dns/tests/basic_test.rs +++ b/internal-dns/tests/basic_test.rs @@ -280,13 +280,16 @@ pub async fn servfail() -> Result<(), anyhow::Error> { struct TestContext { client: Client, resolver: TokioAsyncResolver, - server: dropshot::HttpServer>, + dns_server: internal_dns::dns_server::Server, + dropshot_server: + dropshot::HttpServer>, tmp: tempdir::TempDir, } impl TestContext { async fn cleanup(self) { - self.server.close().await.expect("Failed to clean up server"); + self.dns_server.close(); + self.dropshot_server.close().await.expect("Failed to clean up server"); self.tmp.close().expect("Failed to clean up tmp directory"); } } @@ -326,7 +329,7 @@ async fn init_client_server( TokioAsyncResolver::tokio(rc, ResolverOpts::default()).unwrap(); // launch a dns server - { + let dns_server = { let db = db.clone(); let log = log.clone(); let dns_config = internal_dns::dns_server::Config { @@ -334,18 +337,16 @@ async fn init_client_server( zone, }; - tokio::spawn(async move { - internal_dns::dns_server::run(log, db, dns_config).await - }); - } + internal_dns::dns_server::run(log, db, dns_config).await? + }; // launch a dropshot server - let server = internal_dns::start_server(config, log, db).await?; + let dropshot_server = internal_dns::start_server(config, log, db).await?; // wait for server to start tokio::time::sleep(tokio::time::Duration::from_millis(250)).await; - Ok(TestContext { client, resolver, server, tmp }) + Ok(TestContext { client, resolver, dns_server, dropshot_server, tmp }) } fn test_config( diff --git a/nexus/src/db/datastore.rs b/nexus/src/db/datastore.rs index a21895938f3..395954959d5 100644 --- a/nexus/src/db/datastore.rs +++ b/nexus/src/db/datastore.rs @@ -520,7 +520,6 @@ impl DataStore { kind, ); - // TODO: Can we insert all the services at the same time? let svc = Self::service_upsert_sync(conn, service) .map_err(|e| { TxnError::CustomError(ServiceError::Other(e)) @@ -610,7 +609,6 @@ impl DataStore { ServiceKind::InternalDNS, ); - // TODO: Can we insert all the services at the same time? let svc = Self::service_upsert_sync(conn, service) .map_err(|e| { TxnError::CustomError(ServiceError::Other(e)) @@ -761,7 +759,6 @@ impl DataStore { kind, ); - // TODO: Can we insert all the datasets at the same time? let dataset = Self::dataset_upsert_sync(conn, dataset) .map_err(|e| { TxnError::CustomError(DatasetError::Other(e)) From 4b5dab7556167c5d3781655723bb27491376f7dc Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 20 Jun 2022 15:03:52 -0400 Subject: [PATCH 24/35] Clean up test code --- internal-dns-client/src/multiclient.rs | 182 +++++++++++-------------- nexus/src/app/background/services.rs | 2 +- nexus/src/context.rs | 2 +- sled-agent/src/rack_setup/service.rs | 9 +- 4 files changed, 84 insertions(+), 111 deletions(-) diff --git a/internal-dns-client/src/multiclient.rs b/internal-dns-client/src/multiclient.rs index 333f0283dd8..c957f78a6ec 100644 --- a/internal-dns-client/src/multiclient.rs +++ b/internal-dns-client/src/multiclient.rs @@ -19,75 +19,37 @@ use trust_dns_resolver::TokioAsyncResolver; type DnsError = crate::Error; -// A structure which instructs the client APIs how to access -// DNS servers. -// -// These functions exist in a separate struct for comparison -// with the test-utility, [`LocalAddressGetter`]. -struct FromReservedRackSubnet {} - -const FROM_RESERVED_RACK_SUBNET: FromReservedRackSubnet = - FromReservedRackSubnet {}; - -impl FromReservedRackSubnet { - fn subnet_to_ips( - subnet: Ipv6Subnet, - ) -> impl Iterator { - ReservedRackSubnet::new(subnet) - .get_dns_subnets() - .into_iter() - .map(|dns_subnet| IpAddr::V6(dns_subnet.dns_address().ip())) - } - - fn subnet_to_dropshot_server_addrs( - &self, - subnet: Ipv6Subnet, - ) -> impl Iterator { - Self::subnet_to_ips(subnet) - .map(|address| SocketAddr::new(address, DNS_SERVER_PORT)) - } - - fn subnet_to_dns_server_addrs( - &self, - subnet: Ipv6Subnet, - ) -> impl Iterator { - Self::subnet_to_ips(subnet) - .map(|address| SocketAddr::new(address, DNS_PORT)) - } +/// Describes how to find the DNS servers. +/// +/// In production code, this is nearly always [`Ipv6Subnet`], +/// but it allows a point of dependency-injection for tests to supply their +/// own address lookups. +pub trait DnsAddressLookup { + fn dropshot_server_addrs(&self) -> Vec; + + fn dns_server_addrs(&self) -> Vec; } -// A test-only alternative to [`FromReservedRackSubnet`]. -// -// Rather than inferring DNS server addresses from the rack subnet, -// they may be explicitly supplied. This results in easier-to-test code. -#[cfg(test)] -#[derive(Default)] -struct LocalAddressGetter { - addrs: Vec<(SocketAddr, SocketAddr)>, +fn subnet_to_ips( + subnet: Ipv6Subnet, +) -> impl Iterator { + ReservedRackSubnet::new(subnet) + .get_dns_subnets() + .into_iter() + .map(|dns_subnet| IpAddr::V6(dns_subnet.dns_address().ip())) } -#[cfg(test)] -impl LocalAddressGetter { - fn add_dns_server( - &mut self, - dns_address: SocketAddr, - server_address: SocketAddr, - ) { - self.addrs.push((dns_address, server_address)); - } - - fn subnet_to_dropshot_server_addrs( - &self, - ) -> impl Iterator + '_ { - self.addrs - .iter() - .map(|(_dns_address, dropshot_address)| *dropshot_address) +impl DnsAddressLookup for Ipv6Subnet { + fn dropshot_server_addrs(&self) -> Vec { + subnet_to_ips(*self) + .map(|address| SocketAddr::new(address, DNS_SERVER_PORT)) + .collect() } - fn subnet_to_dns_server_addrs( - &self, - ) -> impl Iterator + '_ { - self.addrs.iter().map(|(dns_address, _dropshot_address)| *dns_address) + fn dns_server_addrs(&self) -> Vec { + subnet_to_ips(*self) + .map(|address| SocketAddr::new(address, DNS_PORT)) + .collect() } } @@ -105,26 +67,14 @@ pub struct Updater { } impl Updater { - /// Creates a new "Updater", capable of communicating with all - /// DNS servers within the AZ. - pub fn new(subnet: Ipv6Subnet, log: Logger) -> Self { - let addrs = - FROM_RESERVED_RACK_SUBNET.subnet_to_dropshot_server_addrs(subnet); + pub fn new(address_getter: &impl DnsAddressLookup, log: Logger) -> Self { + let addrs = address_getter.dropshot_server_addrs(); Self::new_from_addrs(addrs, log) } - // Creates a new updater, using test-supplied DNS servers. - #[cfg(test)] - fn new_for_test(address_getter: &LocalAddressGetter, log: Logger) -> Self { - let dns_addrs = address_getter.subnet_to_dropshot_server_addrs(); - Self::new_from_addrs(dns_addrs, log) - } - - fn new_from_addrs( - addrs: impl Iterator, - log: Logger, - ) -> Self { + fn new_from_addrs(addrs: Vec, log: Logger) -> Self { let clients = addrs + .into_iter() .map(|addr| { info!(log, "Adding DNS server: {}", addr); crate::Client::new(&format!("http://{}", addr), log.clone()) @@ -271,28 +221,18 @@ pub struct Resolver { } impl Resolver { - /// Creates a DNS resolver, looking up DNS server addresses based on - /// the provided subnet. - pub fn new(subnet: Ipv6Subnet) -> Result { - let dns_addrs = - FROM_RESERVED_RACK_SUBNET.subnet_to_dns_server_addrs(subnet); - Self::new_from_addrs(dns_addrs) - } - - // Creates a new resolver, using test-supplied DNS servers. - #[cfg(test)] - fn new_for_test( - address_getter: &LocalAddressGetter, + pub fn new( + address_getter: &impl DnsAddressLookup, ) -> Result { - let dns_addrs = address_getter.subnet_to_dns_server_addrs(); + let dns_addrs = address_getter.dns_server_addrs(); Self::new_from_addrs(dns_addrs) } fn new_from_addrs( - dns_addrs: impl Iterator, + dns_addrs: Vec, ) -> Result { let mut rc = ResolverConfig::new(); - for socket_addr in dns_addrs { + for socket_addr in dns_addrs.into_iter() { rc.add_name_server(NameServerConfig { socket_addr, protocol: Protocol::Udp, @@ -312,7 +252,7 @@ impl Resolver { pub fn new_from_ip(address: Ipv6Addr) -> Result { let subnet = Ipv6Subnet::::new(address); - Resolver::new(subnet) + Resolver::new(&subnet) } /// Looks up a single [`Ipv6Addr`] based on the SRV name. @@ -416,6 +356,41 @@ mod test { } } + // A test-only way to infer DNS addresses. + // + // Rather than inferring DNS server addresses from the rack subnet, + // they may be explicitly supplied. This results in easier-to-test code. + #[derive(Default)] + struct LocalAddressGetter { + addrs: Vec<(SocketAddr, SocketAddr)>, + } + + impl LocalAddressGetter { + fn add_dns_server( + &mut self, + dns_address: SocketAddr, + server_address: SocketAddr, + ) { + self.addrs.push((dns_address, server_address)); + } + } + + impl DnsAddressLookup for LocalAddressGetter { + fn dropshot_server_addrs(&self) -> Vec { + self.addrs + .iter() + .map(|(_dns_address, dropshot_address)| *dropshot_address) + .collect() + } + + fn dns_server_addrs(&self) -> Vec { + self.addrs + .iter() + .map(|(dns_address, _dropshot_address)| *dns_address) + .collect() + } + } + // The resolver cannot look up IPs before records have been inserted. #[tokio::test] async fn lookup_nonexistent_record_fails() { @@ -428,7 +403,7 @@ mod test { dns_server.dropshot_server_address(), ); - let resolver = Resolver::new_for_test(&address_getter) + let resolver = Resolver::new(&address_getter) .expect("Error creating localhost resolver"); let err = resolver @@ -489,10 +464,9 @@ mod test { dns_server.dropshot_server_address(), ); - let resolver = Resolver::new_for_test(&address_getter) + let resolver = Resolver::new(&address_getter) .expect("Error creating localhost resolver"); - let updater = - Updater::new_for_test(&address_getter, logctx.log.clone()); + let updater = Updater::new(&address_getter, logctx.log.clone()); let record = TestServiceRecord::new( AAAA::Zone(Uuid::new_v4()), @@ -527,10 +501,9 @@ mod test { dns_server.dropshot_server_address(), ); - let resolver = Resolver::new_for_test(&address_getter) + let resolver = Resolver::new(&address_getter) .expect("Error creating localhost resolver"); - let updater = - Updater::new_for_test(&address_getter, logctx.log.clone()); + let updater = Updater::new(&address_getter, logctx.log.clone()); let cockroach_addrs = [ SocketAddrV6::new( @@ -652,10 +625,9 @@ mod test { dns_server.dropshot_server_address(), ); - let resolver = Resolver::new_for_test(&address_getter) + let resolver = Resolver::new(&address_getter) .expect("Error creating localhost resolver"); - let updater = - Updater::new_for_test(&address_getter, logctx.log.clone()); + let updater = Updater::new(&address_getter, logctx.log.clone()); // Insert a record, observe that it exists. let mut record = TestServiceRecord::new( diff --git a/nexus/src/app/background/services.rs b/nexus/src/app/background/services.rs index 7178ae7930b..94b2059d7ca 100644 --- a/nexus/src/app/background/services.rs +++ b/nexus/src/app/background/services.rs @@ -92,7 +92,7 @@ pub struct ServiceBalancer { impl ServiceBalancer { pub fn new(log: Logger, nexus: Arc) -> Self { let dns_updater = DnsUpdater::new( - nexus.az_subnet(), + &nexus.az_subnet(), log.new(o!("component" => "DNS Updater")), ); diff --git a/nexus/src/context.rs b/nexus/src/context.rs index 31d0af0808a..c4de9e5d044 100644 --- a/nexus/src/context.rs +++ b/nexus/src/context.rs @@ -145,7 +145,7 @@ impl ServerContext { Ipv6Subnet::::new(config.deployment.subnet.net().ip()); info!(log, "Setting up resolver on subnet: {:?}", az_subnet); let resolver = - internal_dns_client::multiclient::Resolver::new(az_subnet) + internal_dns_client::multiclient::Resolver::new(&az_subnet) .map_err(|e| format!("Failed to create DNS resolver: {}", e))?; // Set up DB pool diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 42e821a379e..4b11beb8c16 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -313,9 +313,10 @@ impl ServiceInner { ) -> Result<(), SetupServiceError> { info!(self.log, "Handing off control to Nexus"); - let resolver = - internal_dns_client::multiclient::Resolver::new(config.az_subnet()) - .expect("Failed to create DNS resolver"); + let resolver = internal_dns_client::multiclient::Resolver::new( + &config.az_subnet(), + ) + .expect("Failed to create DNS resolver"); let ip = resolver .lookup_ip(SRV::Service(ServiceName::Nexus)) .await @@ -546,7 +547,7 @@ impl ServiceInner { .collect::>()?; let dns_servers = internal_dns_client::multiclient::Updater::new( - config.az_subnet(), + &config.az_subnet(), self.log.new(o!("client" => "DNS")), ); self.dns_servers From db2b545309ceb695f4d92460e10581bc041e8cc6 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 20 Jun 2022 15:13:41 -0400 Subject: [PATCH 25/35] no retry in client library --- internal-dns-client/src/multiclient.rs | 22 +++------------- sled-agent/src/rack_setup/service.rs | 35 +++++++++++++++++++++----- 2 files changed, 32 insertions(+), 25 deletions(-) diff --git a/internal-dns-client/src/multiclient.rs b/internal-dns-client/src/multiclient.rs index c957f78a6ec..ca8387fca45 100644 --- a/internal-dns-client/src/multiclient.rs +++ b/internal-dns-client/src/multiclient.rs @@ -7,17 +7,14 @@ use futures::stream::{self, StreamExt, TryStreamExt}; use omicron_common::address::{ Ipv6Subnet, ReservedRackSubnet, AZ_PREFIX, DNS_PORT, DNS_SERVER_PORT, }; -use omicron_common::backoff::{ - internal_service_policy, retry_notify, BackoffError, -}; -use slog::{info, warn, Logger}; +use slog::{info, Logger}; use std::net::{IpAddr, Ipv6Addr, SocketAddr, SocketAddrV6}; use trust_dns_resolver::config::{ NameServerConfig, Protocol, ResolverConfig, ResolverOpts, }; use trust_dns_resolver::TokioAsyncResolver; -type DnsError = crate::Error; +pub type DnsError = crate::Error; /// Describes how to find the DNS servers. /// @@ -152,20 +149,7 @@ impl Updater { }) .collect::>(), }); - - let set_record = || async { - self.dns_records_set(&records) - .await - .map_err(BackoffError::transient)?; - Ok::<(), BackoffError>(()) - }; - let log_failure = |error, _| { - warn!(self.log, "Failed to set DNS records"; "error" => ?error); - }; - - retry_notify(internal_service_policy(), set_record, log_failure) - .await?; - Ok(()) + self.dns_records_set(&records).await } /// Sets a records on all DNS servers. diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 4b11beb8c16..55c837ceffa 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -207,11 +207,21 @@ impl ServiceInner { .await?; } - // Initialize DNS records for these datasets. - self.dns_servers - .get() - .expect("DNS servers must be initialized first") - .insert_dns_records(datasets) + let records_put = || async { + self.dns_servers + .get() + .expect("DNS servers must be initialized first") + .insert_dns_records(datasets) + .await + .map_err(BackoffError::transient)?; + Ok::<(), BackoffError>( + (), + ) + }; + let log_failure = |error, _| { + warn!(self.log, "failed to set DNS records"; "error" => ?error); + }; + retry_notify(internal_service_policy(), records_put, log_failure) .await?; Ok(()) @@ -256,7 +266,20 @@ impl ServiceInner { // Insert DNS records, if the DNS servers have been initialized if let Some(dns_servers) = self.dns_servers.get() { - dns_servers.insert_dns_records(services).await?; + let records_put = || async { + dns_servers + .insert_dns_records(services) + .await + .map_err(BackoffError::transient)?; + Ok::<(), BackoffError>( + (), + ) + }; + let log_failure = |error, _| { + warn!(self.log, "failed to set DNS records"; "error" => ?error); + }; + retry_notify(internal_service_policy(), records_put, log_failure) + .await?; } Ok(()) From 027fb3b11bf772e992a6d24e15ea532e03ba3618 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 20 Jun 2022 15:19:46 -0400 Subject: [PATCH 26/35] Fix internal-dns --- Cargo.lock | 10 ------- internal-dns/Cargo.toml | 1 - internal-dns/tests/basic_test.rs | 50 ++++++++++++++------------------ 3 files changed, 22 insertions(+), 39 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8a7b9f25cb0..126aae1dbc9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2315,7 +2315,6 @@ dependencies = [ "omicron-test-utils", "openapi-lint", "openapiv3", - "portpicker", "pretty-hex 0.3.0", "schemars", "serde", @@ -3882,15 +3881,6 @@ dependencies = [ "universal-hash", ] -[[package]] -name = "portpicker" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be97d76faf1bfab666e1375477b23fde79eccf0276e9b63b92a39d676a889ba9" -dependencies = [ - "rand 0.8.5", -] - [[package]] name = "postcard" version = "0.7.3" diff --git a/internal-dns/Cargo.toml b/internal-dns/Cargo.toml index 886fa72cc18..d49859f18c1 100644 --- a/internal-dns/Cargo.toml +++ b/internal-dns/Cargo.toml @@ -30,7 +30,6 @@ expectorate = "1.0.5" omicron-test-utils = { path = "../test-utils" } openapiv3 = "1.0" openapi-lint = { git = "https://github.com/oxidecomputer/openapi-lint", branch = "main" } -portpicker = "0.1" serde_json = "1.0" subprocess = "0.2.9" trust-dns-resolver = "0.21" diff --git a/internal-dns/tests/basic_test.rs b/internal-dns/tests/basic_test.rs index b20d4176d8a..af72ded52cb 100644 --- a/internal-dns/tests/basic_test.rs +++ b/internal-dns/tests/basic_test.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -use std::net::{Ipv6Addr, SocketAddr, SocketAddrV6}; +use std::net::Ipv6Addr; use std::sync::Arc; use anyhow::{Context, Result}; @@ -298,7 +298,7 @@ async fn init_client_server( zone: String, ) -> Result { // initialize dns server config - let (tmp, config, dropshot_port, dns_port) = test_config()?; + let (tmp, config) = test_config()?; let log = config .log .to_logger("internal-dns") @@ -308,17 +308,21 @@ async fn init_client_server( let db = Arc::new(sled::open(&config.data.storage_path)?); db.clear()?; - let client = - Client::new(&format!("http://[::1]:{}", dropshot_port), log.clone()); + // launch a dns server + let dns_server = { + let db = db.clone(); + let log = log.clone(); + let dns_config = internal_dns::dns_server::Config { + bind_address: "[::1]:0".into(), + zone, + }; + + internal_dns::dns_server::run(log, db, dns_config).await? + }; let mut rc = ResolverConfig::new(); rc.add_name_server(NameServerConfig { - socket_addr: SocketAddr::V6(SocketAddrV6::new( - Ipv6Addr::LOCALHOST, - dns_port, - 0, - 0, - )), + socket_addr: dns_server.address, protocol: Protocol::Udp, tls_dns_name: None, trust_nx_responses: false, @@ -328,31 +332,21 @@ async fn init_client_server( let resolver = TokioAsyncResolver::tokio(rc, ResolverOpts::default()).unwrap(); - // launch a dns server - let dns_server = { - let db = db.clone(); - let log = log.clone(); - let dns_config = internal_dns::dns_server::Config { - bind_address: format!("[::1]:{}", dns_port), - zone, - }; - - internal_dns::dns_server::run(log, db, dns_config).await? - }; - // launch a dropshot server - let dropshot_server = internal_dns::start_server(config, log, db).await?; + let dropshot_server = + internal_dns::start_server(config, log.clone(), db).await?; // wait for server to start tokio::time::sleep(tokio::time::Duration::from_millis(250)).await; + let client = + Client::new(&format!("http://{}", dropshot_server.local_addr()), log); + Ok(TestContext { client, resolver, dns_server, dropshot_server, tmp }) } fn test_config( -) -> Result<(tempdir::TempDir, internal_dns::Config, u16, u16), anyhow::Error> { - let dropshot_port = portpicker::pick_unused_port().expect("pick port"); - let dns_port = portpicker::pick_unused_port().expect("pick port"); +) -> Result<(tempdir::TempDir, internal_dns::Config), anyhow::Error> { let tmp_dir = tempdir::TempDir::new("internal-dns-test")?; let mut storage_path = tmp_dir.path().to_path_buf(); storage_path.push("test"); @@ -363,7 +357,7 @@ fn test_config( level: dropshot::ConfigLoggingLevel::Info, }, dropshot: dropshot::ConfigDropshot { - bind_address: format!("[::1]:{}", dropshot_port).parse().unwrap(), + bind_address: format!("[::1]:0").parse().unwrap(), request_body_max_bytes: 1024, ..Default::default() }, @@ -373,5 +367,5 @@ fn test_config( }, }; - Ok((tmp_dir, config, dropshot_port, dns_port)) + Ok((tmp_dir, config)) } From e33fb4bef1a537820d2e1dec4aa092c325c34916 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 20 Jun 2022 15:59:25 -0400 Subject: [PATCH 27/35] fix typos, warnings --- common/src/nexus_config.rs | 2 +- nexus/src/app/background/services.rs | 2 +- nexus/src/db/datastore.rs | 2 -- 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/common/src/nexus_config.rs b/common/src/nexus_config.rs index d0e87d70ebb..a18454e02d0 100644 --- a/common/src/nexus_config.rs +++ b/common/src/nexus_config.rs @@ -102,7 +102,7 @@ pub enum Database { pub struct DeploymentConfig { /// Uuid of the Nexus instance pub id: Uuid, - /// Uuid of the Rack where Nexus is executing + /// Uuid of the Rack where Nexus is executing. pub rack_id: Uuid, /// Dropshot configuration for external API server pub dropshot_external: ConfigDropshot, diff --git a/nexus/src/app/background/services.rs b/nexus/src/app/background/services.rs index 94b2059d7ca..4a97d89c407 100644 --- a/nexus/src/app/background/services.rs +++ b/nexus/src/app/background/services.rs @@ -33,7 +33,7 @@ use std::sync::Arc; #[derive(Debug)] enum ServiceRedundancy { // This service must exist on at least this many sleds - // within the racki. + // within the rack. PerRack(u32), // This service must exist on at least this many sleds diff --git a/nexus/src/db/datastore.rs b/nexus/src/db/datastore.rs index bf1ac6a79d3..12435e28d3e 100644 --- a/nexus/src/db/datastore.rs +++ b/nexus/src/db/datastore.rs @@ -4615,7 +4615,6 @@ mod test { ); let rack_id = Uuid::new_v4(); let sled_id = Uuid::new_v4(); - let rack_id = Uuid::new_v4(); let sled = Sled::new(sled_id, bogus_addr.clone(), rack_id); datastore.sled_upsert(sled).await.unwrap(); sled_id @@ -4969,7 +4968,6 @@ mod test { OpContext::for_tests(logctx.log.new(o!()), datastore.clone()); let rack_id = Uuid::new_v4(); - let rack_id = Uuid::new_v4(); let addr1 = "[fd00:1de::1]:12345".parse().unwrap(); let sled1_id = "0de4b299-e0b4-46f0-d528-85de81a7095f".parse().unwrap(); let sled1 = db::model::Sled::new(sled1_id, addr1, rack_id); From 9fc49949d8506fed511a994edbd7604ef79fd1af Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 20 Jun 2022 21:08:11 -0400 Subject: [PATCH 28/35] Cleanup imports --- sled-agent/src/rack_setup/service.rs | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 55c837ceffa..397e191cda7 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -15,6 +15,9 @@ use crate::rack_setup::plan::service::{ use crate::rack_setup::plan::sled::{ Plan as SledPlan, PlanError as SledPlanError, }; +use internal_dns_client::multiclient::{ + DnsError, Resolver as DnsResolver, Updater as DnsUpdater, +}; use internal_dns_client::names::{ServiceName, SRV}; use nexus_client::{ types as NexusTypes, Client as NexusClient, Error as NexusError, @@ -72,9 +75,8 @@ pub enum SetupServiceError { #[error("Failed to construct an HTTP client: {0}")] HttpClient(reqwest::Error), - // XXX CLEAN UP - #[error(transparent)] - Dns(#[from] internal_dns_client::Error), + #[error("Failed to access DNS server: {0}")] + Dns(#[from] DnsError), } /// The interface to the Rack Setup Service. @@ -157,7 +159,7 @@ enum PeerExpectation { struct ServiceInner { log: Logger, peer_monitor: Mutex, - dns_servers: OnceCell, + dns_servers: OnceCell, } impl ServiceInner { @@ -214,9 +216,7 @@ impl ServiceInner { .insert_dns_records(datasets) .await .map_err(BackoffError::transient)?; - Ok::<(), BackoffError>( - (), - ) + Ok::<(), BackoffError>(()) }; let log_failure = |error, _| { warn!(self.log, "failed to set DNS records"; "error" => ?error); @@ -271,9 +271,7 @@ impl ServiceInner { .insert_dns_records(services) .await .map_err(BackoffError::transient)?; - Ok::<(), BackoffError>( - (), - ) + Ok::<(), BackoffError>(()) }; let log_failure = |error, _| { warn!(self.log, "failed to set DNS records"; "error" => ?error); @@ -336,10 +334,8 @@ impl ServiceInner { ) -> Result<(), SetupServiceError> { info!(self.log, "Handing off control to Nexus"); - let resolver = internal_dns_client::multiclient::Resolver::new( - &config.az_subnet(), - ) - .expect("Failed to create DNS resolver"); + let resolver = DnsResolver::new(&config.az_subnet()) + .expect("Failed to create DNS resolver"); let ip = resolver .lookup_ip(SRV::Service(ServiceName::Nexus)) .await @@ -569,7 +565,7 @@ impl ServiceInner { .into_iter() .collect::>()?; - let dns_servers = internal_dns_client::multiclient::Updater::new( + let dns_servers = DnsUpdater::new( &config.az_subnet(), self.log.new(o!("client" => "DNS")), ); From 11ebb7bf85630b4441c444cf36888f3dd7ebe065 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 20 Jun 2022 21:36:27 -0400 Subject: [PATCH 29/35] [nexus] Add tests for rack endpoints --- nexus/src/app/mod.rs | 4 +++ nexus/tests/integration_tests/mod.rs | 1 + nexus/tests/integration_tests/rack.rs | 41 +++++++++++++++++++++++++++ 3 files changed, 46 insertions(+) create mode 100644 nexus/tests/integration_tests/rack.rs diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index 682bb406c77..a4be4beda52 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -229,6 +229,10 @@ impl Nexus { &self.tunables } + pub fn rack_id(&self) -> Uuid { + self.rack_id + } + pub async fn wait_for_populate(&self) -> Result<(), anyhow::Error> { let mut my_rx = self.populate_status.clone(); loop { diff --git a/nexus/tests/integration_tests/mod.rs b/nexus/tests/integration_tests/mod.rs index de5de9679bd..6c3e52bd785 100644 --- a/nexus/tests/integration_tests/mod.rs +++ b/nexus/tests/integration_tests/mod.rs @@ -14,6 +14,7 @@ mod instances; mod organizations; mod oximeter; mod projects; +mod rack; mod role_assignments; mod roles_builtin; mod router_routes; diff --git a/nexus/tests/integration_tests/rack.rs b/nexus/tests/integration_tests/rack.rs new file mode 100644 index 00000000000..dfcbde9740f --- /dev/null +++ b/nexus/tests/integration_tests/rack.rs @@ -0,0 +1,41 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use nexus_test_utils::http_testing::AuthnMode; +use nexus_test_utils::http_testing::NexusRequest; +use nexus_test_utils::ControlPlaneTestContext; +use nexus_test_utils_macros::nexus_test; +use omicron_nexus::external_api::views::Rack; + +#[nexus_test] +async fn test_list_own_rack(cptestctx: &ControlPlaneTestContext) { + let client = &cptestctx.external_client; + + let racks_url = "/hardware/racks"; + let racks: Vec = + NexusRequest::iter_collection_authn(client, racks_url, "", None) + .await + .expect("failed to list racks") + .all_items; + + assert_eq!(1, racks.len()); + assert_eq!(cptestctx.server.apictx.nexus.rack_id(), racks[0].identity.id); +} + +#[nexus_test] +async fn test_get_own_rack(cptestctx: &ControlPlaneTestContext) { + let client = &cptestctx.external_client; + + let expected_id = cptestctx.server.apictx.nexus.rack_id(); + let rack_url = format!("/hardware/racks/{}", expected_id); + let rack = NexusRequest::object_get(client, &rack_url) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("failed to get rack") + .parsed_body::() + .unwrap(); + + assert_eq!(expected_id, rack.identity.id); +} From 1822762765dfe4ca2292764c36f2c61f7ceb7d7e Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Tue, 21 Jun 2022 00:17:54 -0400 Subject: [PATCH 30/35] [nexus] Add tunable to disable background tasks --- nexus/src/config.rs | 28 +++++++++++++++++++++++++--- nexus/src/lib.rs | 7 ++++++- nexus/tests/config.test.toml | 2 ++ 3 files changed, 33 insertions(+), 4 deletions(-) diff --git a/nexus/src/config.rs b/nexus/src/config.rs index 5ca452e7388..e273cfbc1f7 100644 --- a/nexus/src/config.rs +++ b/nexus/src/config.rs @@ -56,18 +56,33 @@ pub struct TimeseriesDbConfig { #[derive(Clone, Debug, Deserialize, PartialEq)] struct UnvalidatedTunables { max_vpc_ipv4_subnet_prefix: u8, + enable_background_tasks: bool, +} + +fn deserialize_ipv4_subnet<'de, D>(deserializer: D) -> Result +where + D: serde::Deserializer<'de>, +{ + let prefix = u8::deserialize(deserializer)?; + Tunables::validate_ipv4_prefix(prefix) + .map_err(|e| serde::de::Error::custom(e))?; + Ok(prefix) } /// Tunable configuration parameters, intended for use in test environments or /// other situations in which experimentation / tuning is valuable. #[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] -#[serde(try_from = "UnvalidatedTunables")] pub struct Tunables { /// The maximum prefix size supported for VPC Subnet IPv4 subnetworks. /// /// Note that this is the maximum _prefix_ size, which sets the minimum size /// of the subnet. + #[serde(default, deserialize_with = "deserialize_ipv4_subnet")] pub max_vpc_ipv4_subnet_prefix: u8, + + /// Identifies whether or not background tasks will be enabled. + #[serde(default)] + pub enable_background_tasks: bool, } // Convert from the unvalidated tunables, verifying each parameter as needed. @@ -78,6 +93,7 @@ impl TryFrom for Tunables { Tunables::validate_ipv4_prefix(unvalidated.max_vpc_ipv4_subnet_prefix)?; Ok(Tunables { max_vpc_ipv4_subnet_prefix: unvalidated.max_vpc_ipv4_subnet_prefix, + enable_background_tasks: unvalidated.enable_background_tasks, }) } } @@ -119,7 +135,10 @@ pub const MAX_VPC_IPV4_SUBNET_PREFIX: u8 = 26; impl Default for Tunables { fn default() -> Self { - Tunables { max_vpc_ipv4_subnet_prefix: MAX_VPC_IPV4_SUBNET_PREFIX } + Tunables { + max_vpc_ipv4_subnet_prefix: MAX_VPC_IPV4_SUBNET_PREFIX, + enable_background_tasks: true, + } } } @@ -388,7 +407,10 @@ mod test { trusted_root: PathBuf::from("/path/to/root.json"), default_base_url: "http://example.invalid/".into(), }), - tunables: Tunables { max_vpc_ipv4_subnet_prefix: 27 }, + tunables: Tunables { + max_vpc_ipv4_subnet_prefix: 27, + enable_background_tasks: false, + }, }, } ); diff --git a/nexus/src/lib.rs b/nexus/src/lib.rs index 1a461a61559..e11e7745b55 100644 --- a/nexus/src/lib.rs +++ b/nexus/src/lib.rs @@ -130,7 +130,12 @@ impl Server { // Wait until RSS handoff completes. let opctx = apictx.nexus.opctx_for_background(); apictx.nexus.await_rack_initialization(&opctx).await; - apictx.nexus.start_background_tasks().map_err(|e| e.to_string())?; + + // With the exception of integration tests environments, + // we expect background tasks to be enabled. + if config.pkg.tunables.enable_background_tasks { + apictx.nexus.start_background_tasks().map_err(|e| e.to_string())?; + } let http_server_starter_external = dropshot::HttpServerStarter::new( &config.deployment.dropshot_external, diff --git a/nexus/tests/config.test.toml b/nexus/tests/config.test.toml index fdfeb5effb4..c451a341a5e 100644 --- a/nexus/tests/config.test.toml +++ b/nexus/tests/config.test.toml @@ -34,6 +34,8 @@ address = "[::1]:0" [tunables] # Allow small subnets, so we can test IP address exhaustion easily / quickly max_vpc_ipv4_subnet_prefix = 29 +# Disable background tests to help with test determinism +enable_background_tasks = false [deployment] # Identifier for this instance of Nexus. From d2536d7d45f386a0e6516c90683de8f524cdc7a6 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Tue, 21 Jun 2022 10:02:12 -0400 Subject: [PATCH 31/35] Delete out-dated docs --- docs/how-to-run.adoc | 21 +-------------------- 1 file changed, 1 insertion(+), 20 deletions(-) diff --git a/docs/how-to-run.adoc b/docs/how-to-run.adoc index 61d70d17b1a..8cef8af83c4 100644 --- a/docs/how-to-run.adoc +++ b/docs/how-to-run.adoc @@ -109,28 +109,9 @@ When we deploy, we're effectively creating a number of different zones for all the components that make up Omicron (Nexus, Clickhouse, Crucible, etc). Since all these services run in different zones they cannot communicate with each other (and Sled Agent in the global zone) via `localhost`. In practice, -we'll assign addresses as per RFD 63 as well as incorporating DNS based +we assign addresses as per RFD 63 as well as incorporating DNS based service discovery. -For the purposes of local development today, we specify some hardcoded IPv6 -unique local addresses in the subnet of the first Sled Agent: `fd00:1122:3344:1::/64`: - -[options="header"] -|=================================================================================================== -| Service | Endpoint -| Sled Agent: Bootstrap | Derived from MAC address of physical data link. -| Sled Agent: Dropshot API | `[fd00:1122:3344:0101::1]:12345` -| Cockroach DB | `[fd00:1122:3344:0101::2]:32221` -| Nexus: External API | `[fd00:1122:3344:0101::3]:12220` -| Nexus: Internal API | `[fd00:1122:3344:0101::3]:12221` -| Oximeter | `[fd00:1122:3344:0101::4]:12223` -| Clickhouse | `[fd00:1122:3344:0101::5]:8123` -| Crucible Downstairs 1 | `[fd00:1122:3344:0101::6]:32345` -| Crucible Downstairs 2 | `[fd00:1122:3344:0101::7]:32345` -| Crucible Downstairs 3 | `[fd00:1122:3344:0101::8]:32345` -| Internal DNS Service | `[fd00:1122:3344:0001::1]:5353` -|=================================================================================================== - Note that Sled Agent runs in the global zone and is the one responsible for bringing up all the other other services and allocating them with vNICs and IPv6 addresses. From 0ff033a7978ae223f316bb7425bae42807904f9d Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 22 Jun 2022 15:32:02 -0400 Subject: [PATCH 32/35] renamed opctx --- nexus/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nexus/src/lib.rs b/nexus/src/lib.rs index e11e7745b55..61186c144ac 100644 --- a/nexus/src/lib.rs +++ b/nexus/src/lib.rs @@ -128,7 +128,7 @@ impl Server { let config = internal.config; // Wait until RSS handoff completes. - let opctx = apictx.nexus.opctx_for_background(); + let opctx = apictx.nexus.opctx_for_service_balancer(); apictx.nexus.await_rack_initialization(&opctx).await; // With the exception of integration tests environments, From 4d7a46cf481782db886e78bca990bfc4d2e32fd4 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 22 Jun 2022 15:46:52 -0400 Subject: [PATCH 33/35] in tests too --- nexus/test-utils/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index 3fcfcba3bdd..63779b05e09 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -124,7 +124,7 @@ pub async fn test_setup_with_config( // Perform the "handoff from RSS". // // However, RSS isn't running, so we'll do the handoff ourselves. - let opctx = internal_server.apictx.nexus.opctx_for_background(); + let opctx = internal_server.apictx.nexus.opctx_for_service_balancer(); internal_server .apictx .nexus From 6047b93b4ed98950653c819576cce63881838689 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 11 Jul 2022 09:08:38 -0400 Subject: [PATCH 34/35] remove unused --- sled-agent/src/config.rs | 2 +- sled-agent/src/sled_agent.rs | 1 - sled-agent/src/sp/simulated.rs | 1 - 3 files changed, 1 insertion(+), 3 deletions(-) diff --git a/sled-agent/src/config.rs b/sled-agent/src/config.rs index c3bf3e26a52..3c454fdb637 100644 --- a/sled-agent/src/config.rs +++ b/sled-agent/src/config.rs @@ -13,7 +13,7 @@ use serde::Deserialize; use serde_with::serde_as; use serde_with::DisplayFromStr; use serde_with::PickFirst; -use std::net::{Ipv4Addr, SocketAddr}; +use std::net::Ipv4Addr; use std::path::{Path, PathBuf}; /// Configuration for a sled agent diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 8d0af9aed73..2d1662b6ed1 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -28,7 +28,6 @@ use omicron_common::api::{ use slog::Logger; use std::net::SocketAddrV6; use std::process::Command; -use std::sync::Arc; use uuid::Uuid; #[cfg(not(test))] diff --git a/sled-agent/src/sp/simulated.rs b/sled-agent/src/sp/simulated.rs index f642b6189df..fe3b3cbac2a 100644 --- a/sled-agent/src/sp/simulated.rs +++ b/sled-agent/src/sp/simulated.rs @@ -5,7 +5,6 @@ //! Implementation of a simulated SP / RoT. use super::SpError; -use crate::config::Config as SledConfig; use crate::illumos::dladm::Dladm; use crate::zone::Zones; use slog::Logger; From eba4486a72cbc87896e48b90aa0e8145aa9fb4d2 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 11 Jul 2022 09:46:20 -0400 Subject: [PATCH 35/35] Finish merge --- sled-agent/src/rack_setup/config.rs | 7 +- sled-agent/src/rack_setup/plan/service.rs | 8 +- sled-agent/src/rack_setup/service.rs | 3 + smf/sled-agent/config-rss.toml | 127 +--------------------- 4 files changed, 13 insertions(+), 132 deletions(-) diff --git a/sled-agent/src/rack_setup/config.rs b/sled-agent/src/rack_setup/config.rs index 3d389ed7dd1..60687a16764 100644 --- a/sled-agent/src/rack_setup/config.rs +++ b/sled-agent/src/rack_setup/config.rs @@ -10,7 +10,7 @@ use omicron_common::address::{ }; use serde::Deserialize; use serde::Serialize; -use std::net::Ipv6Addr; +use std::net::{IpAddr, Ipv6Addr}; use std::path::Path; /// Configuration for the "rack setup service", which is controlled during @@ -32,6 +32,10 @@ pub struct SetupServiceConfig { /// If this value is less than 2, no rack secret will be created on startup; /// this is the typical case for single-server test/development. pub rack_secret_threshold: usize, + + /// The address on which Nexus should serve an external interface. + // TODO: Eventually, this should be pulled from a pool of addresses. + pub nexus_external_address: IpAddr, } impl SetupServiceConfig { @@ -68,6 +72,7 @@ mod test { let cfg = SetupServiceConfig { rack_subnet: "fd00:1122:3344:0100::".parse().unwrap(), rack_secret_threshold: 0, + nexus_external_address: "192.168.1.20".parse().unwrap(), }; assert_eq!( diff --git a/sled-agent/src/rack_setup/plan/service.rs b/sled-agent/src/rack_setup/plan/service.rs index 0bdf332d748..429d612d62e 100644 --- a/sled-agent/src/rack_setup/plan/service.rs +++ b/sled-agent/src/rack_setup/plan/service.rs @@ -19,7 +19,7 @@ use sled_agent_client::{ }; use slog::Logger; use std::collections::HashMap; -use std::net::{Ipv6Addr, SocketAddrV6}; +use std::net::{Ipv6Addr, SocketAddr, SocketAddrV6}; use std::path::{Path, PathBuf}; use thiserror::Error; use uuid::Uuid; @@ -193,11 +193,9 @@ impl Plan { 0, 0, ), - external_address: SocketAddrV6::new( - address, + external_address: SocketAddr::new( + config.nexus_external_address, NEXUS_EXTERNAL_PORT, - 0, - 0, ), }, }) diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index cbf424b14af..4bc314c33e4 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -416,6 +416,9 @@ impl ServiceInner { NexusTypes::ServiceKind::InternalDNS } ServiceType::Oximeter => NexusTypes::ServiceKind::Oximeter, + ServiceType::Dendrite { .. } => { + NexusTypes::ServiceKind::Dendrite + } }; services.push(NexusTypes::ServicePutRequest { diff --git a/smf/sled-agent/config-rss.toml b/smf/sled-agent/config-rss.toml index 33aa0dc7f6b..0adcc276f67 100644 --- a/smf/sled-agent/config-rss.toml +++ b/smf/sled-agent/config-rss.toml @@ -10,131 +10,6 @@ rack_subnet = "fd00:1122:3344:0100::" # # For values less than 2, no rack secret will be generated. rack_secret_threshold = 1 -<<<<<<< HEAD -||||||| c56a6fd0 -[[request]] - -# TODO(https://github.com/oxidecomputer/omicron/issues/732): Nexus -# should allocate crucible datasets. -[[request.dataset]] -id = "09a9a25f-2602-4e2f-9630-31af9c492c3e" -zpool_id = "d462a7f7-b628-40fe-80ff-4e4189e2d62b" -address = "[fd00:1122:3344:0101::6]:32345" -dataset_kind.type = "crucible" - -[[request.dataset]] -id = "2713b37a-3043-4ed5-aaff-f38200e45cfb" -zpool_id = "e4b4dc87-ab46-49fb-a4b4-d361ae214c03" -address = "[fd00:1122:3344:0101::7]:32345" -dataset_kind.type = "crucible" - -[[request.dataset]] -id = "ffd16cad-e5d5-495e-9c59-4312a3857d91" -zpool_id = "f4b4dc87-ab46-49fb-a4b4-d361ae214c03" -address = "[fd00:1122:3344:0101::8]:32345" -dataset_kind.type = "crucible" - -[[request.dataset]] -id = "4d08fc19-3d5f-4f6b-9c48-925f8eac7255" -zpool_id = "d462a7f7-b628-40fe-80ff-4e4189e2d62b" -address = "[fd00:1122:3344:0101::2]:32221" -dataset_kind.type = "cockroach_db" -dataset_kind.all_addresses = [ "[fd00:1122:3344:0101::2]:32221" ] - -# TODO(https://github.com/oxidecomputer/omicron/issues/732): Nexus -# should allocate clickhouse datasets. -[[request.dataset]] -id = "a3505b41-a592-420b-84f2-3d76bf0e0a81" -zpool_id = "d462a7f7-b628-40fe-80ff-4e4189e2d62b" -address = "[fd00:1122:3344:0101::5]:8123" -dataset_kind.type = "clickhouse" - -[[request.service]] -id = "e6bff1ff-24fb-49dc-a54e-c6a350cd4d6c" -name = "nexus" -addresses = [ "fd00:1122:3344:0101::3" ] -gz_addresses = [] -[request.service.service_type] -type = "nexus" -internal_address = "[fd00:1122:3344:0101::3]:12221" -external_address = "[fd00:1122:3344:0101::3]:12220" - -# TODO(https://github.com/oxidecomputer/omicron/issues/732): Nexus -# should allocate Oximeter services. -[[request.service]] -id = "1da65e5b-210c-4859-a7d7-200c1e659972" -name = "oximeter" -addresses = [ "fd00:1122:3344:0101::4" ] -gz_addresses = [] -[request.service.service_type] -type = "oximeter" -======= - -[[request]] - -# TODO(https://github.com/oxidecomputer/omicron/issues/732): Nexus -# should allocate crucible datasets. -[[request.dataset]] -id = "09a9a25f-2602-4e2f-9630-31af9c492c3e" -zpool_id = "d462a7f7-b628-40fe-80ff-4e4189e2d62b" -address = "[fd00:1122:3344:0101::6]:32345" -dataset_kind.type = "crucible" - -[[request.dataset]] -id = "2713b37a-3043-4ed5-aaff-f38200e45cfb" -zpool_id = "e4b4dc87-ab46-49fb-a4b4-d361ae214c03" -address = "[fd00:1122:3344:0101::7]:32345" -dataset_kind.type = "crucible" - -[[request.dataset]] -id = "ffd16cad-e5d5-495e-9c59-4312a3857d91" -zpool_id = "f4b4dc87-ab46-49fb-a4b4-d361ae214c03" -address = "[fd00:1122:3344:0101::8]:32345" -dataset_kind.type = "crucible" - -[[request.dataset]] -id = "4d08fc19-3d5f-4f6b-9c48-925f8eac7255" -zpool_id = "d462a7f7-b628-40fe-80ff-4e4189e2d62b" -address = "[fd00:1122:3344:0101::2]:32221" -dataset_kind.type = "cockroach_db" -dataset_kind.all_addresses = [ "[fd00:1122:3344:0101::2]:32221" ] - -# TODO(https://github.com/oxidecomputer/omicron/issues/732): Nexus -# should allocate clickhouse datasets. -[[request.dataset]] -id = "a3505b41-a592-420b-84f2-3d76bf0e0a81" -zpool_id = "d462a7f7-b628-40fe-80ff-4e4189e2d62b" -address = "[fd00:1122:3344:0101::5]:8123" -dataset_kind.type = "clickhouse" - -[[request.service]] -id = "e6bff1ff-24fb-49dc-a54e-c6a350cd4d6c" -name = "nexus" -addresses = [ "fd00:1122:3344:0101::3" ] -gz_addresses = [] -[request.service.service_type] -type = "nexus" -internal_address = "[fd00:1122:3344:0101::3]:12221" # NOTE: In the lab, use "172.20.15.226" -external_address = "192.168.1.20:80" - -# TODO(https://github.com/oxidecomputer/omicron/issues/732): Nexus -# should allocate Oximeter services. -[[request.service]] -id = "1da65e5b-210c-4859-a7d7-200c1e659972" -name = "oximeter" -addresses = [ "fd00:1122:3344:0101::4" ] -gz_addresses = [] -[request.service.service_type] -type = "oximeter" - -[[request.service]] -id = "a0fe5ebc-9261-6f77-acc1-972481755789" -name = "dendrite" -addresses = [ "fd00:1122:3344:0101::9" ] -gz_addresses = [] -[request.service.service_type] -type = "dendrite" -asic = "tofino_stub" ->>>>>>> f11200e2534cc252c0ab8db5ab85112ae6d79e3c +nexus_external_address = "192.168.1.20"