From b78ff981f2d98ce572925381d38c5e5813f4191f Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 8 Jun 2022 15:34:24 -0400 Subject: [PATCH 01/17] [nexus] Split Nexus configuration (package vs runtime) --- Cargo.lock | 1 + common/Cargo.toml | 1 + common/src/lib.rs | 3 +- common/src/nexus_config.rs | 128 ++++++++ common/src/postgres_config.rs | 95 ++++++ nexus/benches/setup_benchmark.rs | 2 +- nexus/examples/config.toml | 41 +-- nexus/src/app/mod.rs | 10 +- nexus/src/config.rs | 295 ++++++++---------- nexus/src/context.rs | 31 +- nexus/src/db/config.rs | 2 +- nexus/src/lib.rs | 24 +- nexus/test-utils/src/lib.rs | 10 +- nexus/tests/config.test.toml | 55 ++-- nexus/tests/integration_tests/authn_http.rs | 4 +- nexus/tests/integration_tests/commands.rs | 3 +- nexus/tests/integration_tests/console_api.rs | 2 +- nexus/tests/integration_tests/updates.rs | 4 +- openapi/sled-agent.json | 80 ++++- sled-agent/src/params.rs | 50 ++- sled-agent/src/rack_setup/config.rs | 4 +- sled-agent/src/rack_setup/service.rs | 46 ++- sled-agent/src/services.rs | 272 ++++++++++++---- sled-agent/src/sled_agent.rs | 4 +- .../{config.toml => config-partial.toml} | 17 +- smf/nexus/manifest.xml | 8 + smf/sled-agent/config-rss.toml | 13 + smf/sled-agent/manifest.xml | 4 + test-utils/src/dev/db.rs | 2 +- 29 files changed, 853 insertions(+), 358 deletions(-) create mode 100644 common/src/nexus_config.rs create mode 100644 common/src/postgres_config.rs rename smf/nexus/{config.toml => config-partial.toml} (53%) diff --git a/Cargo.lock b/Cargo.lock index 268e855769e..480645db5a2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2798,6 +2798,7 @@ dependencies = [ "thiserror", "tokio", "tokio-postgres", + "toml", "uuid", ] diff --git a/common/Cargo.toml b/common/Cargo.toml index aa3b8943800..cd47bef1169 100644 --- a/common/Cargo.toml +++ b/common/Cargo.toml @@ -30,6 +30,7 @@ structopt = "0.3" thiserror = "1.0" tokio = { version = "1.18", features = [ "full" ] } tokio-postgres = { version = "0.7", features = [ "with-chrono-0_4", "with-uuid-1" ] } +toml = "0.5.9" uuid = { version = "1.1.0", features = [ "serde", "v4" ] } parse-display = "0.5.4" progenitor = { git = "https://github.com/oxidecomputer/progenitor" } diff --git a/common/src/lib.rs b/common/src/lib.rs index 2a933283425..d90ecdb7333 100644 --- a/common/src/lib.rs +++ b/common/src/lib.rs @@ -24,7 +24,8 @@ pub mod address; pub mod api; pub mod backoff; pub mod cmd; -pub mod config; +pub mod nexus_config; +pub mod postgres_config; #[macro_export] macro_rules! generate_logging_api { diff --git a/common/src/nexus_config.rs b/common/src/nexus_config.rs new file mode 100644 index 00000000000..f1325ae336d --- /dev/null +++ b/common/src/nexus_config.rs @@ -0,0 +1,128 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Configuration parameters to Nexus that are usually only known +//! at runtime. + +use super::address::{Ipv6Subnet, RACK_PREFIX}; +use super::postgres_config::PostgresConfigWithUrl; +use dropshot::ConfigDropshot; +use serde::{Deserialize, Serialize}; +use serde_with::serde_as; +use serde_with::DisplayFromStr; +use std::fmt; +use std::path::{Path, PathBuf}; +use uuid::Uuid; + +#[derive(Debug)] +pub struct LoadError { + pub path: PathBuf, + pub kind: LoadErrorKind, +} + +#[derive(Debug)] +pub struct InvalidTunable { + pub tunable: String, + pub message: String, +} + +impl std::fmt::Display for InvalidTunable { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "invalid \"{}\": \"{}\"", self.tunable, self.message) + } +} +impl std::error::Error for InvalidTunable {} + +#[derive(Debug)] +pub enum LoadErrorKind { + Io(std::io::Error), + Parse(toml::de::Error), + InvalidTunable(InvalidTunable), +} + +impl From<(PathBuf, std::io::Error)> for LoadError { + fn from((path, err): (PathBuf, std::io::Error)) -> Self { + LoadError { path, kind: LoadErrorKind::Io(err) } + } +} + +impl From<(PathBuf, toml::de::Error)> for LoadError { + fn from((path, err): (PathBuf, toml::de::Error)) -> Self { + LoadError { path, kind: LoadErrorKind::Parse(err) } + } +} + +impl std::error::Error for LoadError {} + +impl fmt::Display for LoadError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match &self.kind { + LoadErrorKind::Io(e) => { + write!(f, "read \"{}\": {}", self.path.display(), e) + } + LoadErrorKind::Parse(e) => { + write!(f, "parse \"{}\": {}", self.path.display(), e) + } + LoadErrorKind::InvalidTunable(inner) => { + write!( + f, + "invalid tunable \"{}\": {}", + self.path.display(), + inner, + ) + } + } + } +} + +impl std::cmp::PartialEq for LoadError { + fn eq(&self, other: &std::io::Error) -> bool { + if let LoadErrorKind::Io(e) = &self.kind { + e.kind() == other.kind() + } else { + false + } + } +} + +#[serde_as] +#[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] +#[serde(tag = "type", rename_all = "snake_case")] +#[allow(clippy::large_enum_variant)] +pub enum Database { + FromDns, + FromUrl { + #[serde_as(as = "DisplayFromStr")] + url: PostgresConfigWithUrl, + }, +} + +#[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] +pub struct RuntimeConfig { + /// Uuid of the Nexus instance + pub id: Uuid, + /// Dropshot configuration for external API server + pub dropshot_external: ConfigDropshot, + /// Dropshot configuration for internal API server + pub dropshot_internal: ConfigDropshot, + /// Portion of the IP space to be managed by the Rack. + pub subnet: Ipv6Subnet, + /// DB configuration. + pub database: Database, +} + +impl RuntimeConfig { + /// Load a `RuntimeConfig` from the given TOML file + /// + /// This config object can then be used to create a new `Nexus`. + /// The format is described in the README. + pub fn from_file>(path: P) -> Result { + let path = path.as_ref(); + let file_contents = std::fs::read_to_string(path) + .map_err(|e| (path.to_path_buf(), e))?; + let config_parsed: Self = toml::from_str(&file_contents) + .map_err(|e| (path.to_path_buf(), e))?; + Ok(config_parsed) + } +} diff --git a/common/src/postgres_config.rs b/common/src/postgres_config.rs new file mode 100644 index 00000000000..2509ae4fca2 --- /dev/null +++ b/common/src/postgres_config.rs @@ -0,0 +1,95 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Common objects used for configuration + +use std::fmt; +use std::ops::Deref; +use std::str::FromStr; + +/// Describes a URL for connecting to a PostgreSQL server +// The config pattern that we're using requires that types in the config impl +// Serialize. If tokio_postgres::config::Config impl'd Serialize or even +// Display, we'd just use that directly instead of this type. But it doesn't. +// We could implement a serialize function ourselves, but URLs support many +// different properties, and this could be brittle and easy to get wrong. +// Instead, this type just wraps tokio_postgres::config::Config and keeps the +// original String around. (The downside is that a consumer _generating_ a +// nexus::db::Config needs to generate a URL that matches the +// tokio_postgres::config::Config that they construct here, but this is not +// currently an important use case.) +// +// To ensure that the URL and config are kept in sync, we currently only support +// constructing one of these via `FromStr` and the fields are not public. +#[derive(Clone, Debug, PartialEq)] +pub struct PostgresConfigWithUrl { + url_raw: String, + config: tokio_postgres::config::Config, +} + +impl PostgresConfigWithUrl { + pub fn url(&self) -> String { + self.url_raw.clone() + } +} + +impl FromStr for PostgresConfigWithUrl { + type Err = tokio_postgres::Error; + + fn from_str(s: &str) -> Result { + Ok(PostgresConfigWithUrl { url_raw: s.to_owned(), config: s.parse()? }) + } +} + +impl fmt::Display for PostgresConfigWithUrl { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str(&self.url_raw) + } +} + +impl Deref for PostgresConfigWithUrl { + type Target = tokio_postgres::config::Config; + + fn deref(&self) -> &Self::Target { + &self.config + } +} + +#[cfg(test)] +mod test { + use super::PostgresConfigWithUrl; + + #[test] + fn test_bad_url() { + // There is surprisingly little that we can rely on the + // tokio_postgres::config::Config parser to include in the error + // message. + let error = "foo".parse::().unwrap_err(); + assert!(error.to_string().contains("unexpected EOF")); + "http://127.0.0.1:1234".parse::().unwrap_err(); + let error = "postgresql://example.com?sslmode=not-a-real-ssl-mode" + .parse::() + .unwrap_err(); + assert!(error + .to_string() + .contains("invalid value for option `sslmode`")); + } + + #[test] + fn test_example_url() { + let config = "postgresql://notauser@10.2.3.4:1789?sslmode=disable" + .parse::() + .unwrap(); + assert_eq!(config.get_user(), Some("notauser")); + assert_eq!( + config.get_ssl_mode(), + tokio_postgres::config::SslMode::Disable + ); + assert_eq!( + config.get_hosts(), + &[tokio_postgres::config::Host::Tcp("10.2.3.4".to_string())] + ); + assert_eq!(config.get_ports(), &[1789]); + } +} diff --git a/nexus/benches/setup_benchmark.rs b/nexus/benches/setup_benchmark.rs index c4c27bd2a97..24584670ce5 100644 --- a/nexus/benches/setup_benchmark.rs +++ b/nexus/benches/setup_benchmark.rs @@ -19,7 +19,7 @@ async fn do_full_setup() { // Wraps exclusively the CockroachDB portion of setup/teardown. async fn do_crdb_setup() { let cfg = nexus_test_utils::load_test_config(); - let logctx = LogContext::new("crdb_setup", &cfg.log); + let logctx = LogContext::new("crdb_setup", &cfg.pkg.log); let mut db = test_setup_database(&logctx.log).await; db.cleanup().await.unwrap(); } diff --git a/nexus/examples/config.toml b/nexus/examples/config.toml index 7900813cae0..22889ab1be9 100644 --- a/nexus/examples/config.toml +++ b/nexus/examples/config.toml @@ -2,9 +2,6 @@ # Oxide API: example configuration file # -# Identifier for this instance of Nexus -id = "e6bff1ff-24fb-49dc-a54e-c6a350cd4d6c" - [console] # Directory for static assets. Absolute path or relative to CWD. static_dir = "nexus/static" # TODO: figure out value @@ -20,21 +17,6 @@ session_absolute_timeout_minutes = 480 # TODO(https://github.com/oxidecomputer/omicron/issues/372): Remove "spoof". schemes_external = ["spoof", "session_cookie"] -[database] -# URL for connecting to the database -url = "postgresql://root@127.0.0.1:32221/omicron?sslmode=disable" - -[dropshot_external] -# IP address and TCP port on which to listen for the external API -bind_address = "127.0.0.1:12220" -# Allow larger request bodies (1MiB) to accomodate firewall endpoints (one -# rule is ~500 bytes) -request_body_max_bytes = 1048576 - -[dropshot_internal] -# IP address and TCP port on which to listen for the internal API -bind_address = "127.0.0.1:12221" - [log] # Show log messages of this level and more severe level = "info" @@ -51,6 +33,29 @@ mode = "stderr-terminal" [timeseries_db] address = "[::1]:8123" +[runtime] +# Identifier for this instance of Nexus +id = "e6bff1ff-24fb-49dc-a54e-c6a350cd4d6c" + +[runtime.dropshot_external] +# IP address and TCP port on which to listen for the external API +bind_address = "127.0.0.1:12220" +# Allow larger request bodies (1MiB) to accomodate firewall endpoints (one +# rule is ~500 bytes) +request_body_max_bytes = 1048576 + +[runtime.dropshot_internal] +# IP address and TCP port on which to listen for the internal API +bind_address = "127.0.0.1:12221" + +[runtime.subnet] +net = "fd00:1122:3344:0100::/56" + +[runtime.database] +# URL for connecting to the database +type = "from_url" +url = "postgresql://root@127.0.0.1:32221/omicron?sslmode=disable" + # Tunable configuration parameters, for testing or experimentation [tunables] diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index ce20065fa1f..1c3620de7e7 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -112,7 +112,7 @@ impl Nexus { authz: Arc, ) -> Arc { let pool = Arc::new(pool); - let my_sec_id = db::SecId::from(config.id); + let my_sec_id = db::SecId::from(config.runtime.id); let db_datastore = Arc::new(db::DataStore::new(Arc::clone(&pool))); let sec_store = Arc::new(db::CockroachDbSecStore::new( my_sec_id, @@ -127,7 +127,7 @@ impl Nexus { sec_store, )); let timeseries_client = - oximeter_db::Client::new(config.timeseries_db.address, &log); + oximeter_db::Client::new(config.pkg.timeseries_db.address, &log); // TODO-cleanup We may want a first-class subsystem for managing startup // background tasks. It could use a Future for each one, a status enum @@ -143,7 +143,7 @@ impl Nexus { populate_start(populate_ctx, Arc::clone(&db_datastore)); let nexus = Nexus { - id: config.id, + id: config.runtime.id, rack_id, log: log.new(o!()), api_rack_identity: db::model::RackIdentity::new(rack_id), @@ -153,8 +153,8 @@ impl Nexus { recovery_task: std::sync::Mutex::new(None), populate_status, timeseries_client, - updates_config: config.updates.clone(), - tunables: config.tunables.clone(), + updates_config: config.pkg.updates.clone(), + tunables: config.pkg.tunables.clone(), opctx_alloc: OpContext::for_background( log.new(o!("component" => "InstanceAllocator")), Arc::clone(&authz), diff --git a/nexus/src/config.rs b/nexus/src/config.rs index 11b2c8d861e..d5bf6a2a2f9 100644 --- a/nexus/src/config.rs +++ b/nexus/src/config.rs @@ -5,15 +5,13 @@ //! Interfaces for parsing configuration files and working with a nexus server //! configuration -use crate::db; use anyhow::anyhow; -use dropshot::ConfigDropshot; use dropshot::ConfigLogging; +use omicron_common::nexus_config::{InvalidTunable, LoadError, RuntimeConfig}; use serde::Deserialize; use serde::Serialize; use serde_with::DeserializeFromStr; use serde_with::SerializeDisplay; -use std::fmt; use std::net::SocketAddr; use std::path::{Path, PathBuf}; @@ -124,22 +122,15 @@ impl Default for Tunables { /// Configuration for a nexus server #[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] -pub struct Config { - /// Dropshot configuration for external API server - pub dropshot_external: ConfigDropshot, - /// Dropshot configuration for internal API server - pub dropshot_internal: ConfigDropshot, - /// Identifier for this instance of Nexus - pub id: uuid::Uuid, +pub struct PackageConfig { /// Console-related tunables pub console: ConsoleConfig, /// Server-wide logging configuration. pub log: ConfigLogging, - /// Database parameters - pub database: db::Config, /// Authentication-related configuration pub authn: AuthnConfig, /// Timeseries database configuration. + // TODO: Should this be removed? Nexus needs to initialize it. pub timeseries_db: TimeseriesDbConfig, /// Updates-related configuration. Updates APIs return 400 Bad Request when this is /// unconfigured. @@ -150,74 +141,28 @@ pub struct Config { pub tunables: Tunables, } -#[derive(Debug)] -pub struct InvalidTunable { - tunable: String, - message: String, -} - -impl std::fmt::Display for InvalidTunable { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "invalid \"{}\": \"{}\"", self.tunable, self.message) - } -} - -impl std::error::Error for InvalidTunable {} - -#[derive(Debug)] -pub struct LoadError { - path: PathBuf, - kind: LoadErrorKind, -} -#[derive(Debug)] -pub enum LoadErrorKind { - Io(std::io::Error), - Parse(toml::de::Error), - InvalidTunable(InvalidTunable), -} - -impl From<(PathBuf, std::io::Error)> for LoadError { - fn from((path, err): (PathBuf, std::io::Error)) -> Self { - LoadError { path, kind: LoadErrorKind::Io(err) } - } -} - -impl From<(PathBuf, toml::de::Error)> for LoadError { - fn from((path, err): (PathBuf, toml::de::Error)) -> Self { - LoadError { path, kind: LoadErrorKind::Parse(err) } - } -} - -impl std::error::Error for LoadError {} +#[derive(Clone, Debug, PartialEq, Deserialize, Serialize)] +pub struct Config { + /// Configuration parameters known at compile-time. + #[serde(flatten)] + pub pkg: PackageConfig, -impl fmt::Display for LoadError { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match &self.kind { - LoadErrorKind::Io(e) => { - write!(f, "read \"{}\": {}", self.path.display(), e) - } - LoadErrorKind::Parse(e) => { - write!(f, "parse \"{}\": {}", self.path.display(), e) - } - LoadErrorKind::InvalidTunable(inner) => { - write!( - f, - "invalid tunable \"{}\": {}", - self.path.display(), - inner, - ) - } - } - } + /// A variety of configuration parameters only known at runtime. + pub runtime: RuntimeConfig, } -impl std::cmp::PartialEq for LoadError { - fn eq(&self, other: &std::io::Error) -> bool { - if let LoadErrorKind::Io(e) = &self.kind { - e.kind() == other.kind() - } else { - false - } +impl Config { + /// Load a `PackageConfig` from the given TOML file + /// + /// This config object can then be used to create a new `Nexus`. + /// The format is described in the README. + pub fn from_file>(path: P) -> Result { + let path = path.as_ref(); + let file_contents = std::fs::read_to_string(path) + .map_err(|e| (path.to_path_buf(), e))?; + let config_parsed: Self = toml::from_str(&file_contents) + .map_err(|e| (path.to_path_buf(), e))?; + Ok(config_parsed) } } @@ -255,36 +200,24 @@ impl std::fmt::Display for SchemeName { } } -impl Config { - /// Load a `Config` from the given TOML file - /// - /// This config object can then be used to create a new `Nexus`. - /// The format is described in the README. - pub fn from_file>(path: P) -> Result { - let path = path.as_ref(); - let file_contents = std::fs::read_to_string(path) - .map_err(|e| (path.to_path_buf(), e))?; - let config_parsed: Config = toml::from_str(&file_contents) - .map_err(|e| (path.to_path_buf(), e))?; - Ok(config_parsed) - } -} - #[cfg(test)] mod test { use super::Tunables; use super::{ - AuthnConfig, Config, ConsoleConfig, LoadError, LoadErrorKind, + AuthnConfig, Config, ConsoleConfig, LoadError, PackageConfig, SchemeName, TimeseriesDbConfig, UpdatesConfig, }; - use crate::db; use dropshot::ConfigDropshot; use dropshot::ConfigLogging; use dropshot::ConfigLoggingIfExists; use dropshot::ConfigLoggingLevel; use libc; + use omicron_common::address::{Ipv6Subnet, RACK_PREFIX}; + use omicron_common::nexus_config::{ + Database, LoadErrorKind, RuntimeConfig, + }; use std::fs; - use std::net::SocketAddr; + use std::net::{Ipv6Addr, SocketAddr}; use std::path::Path; use std::path::PathBuf; @@ -355,7 +288,7 @@ mod test { let error = read_config("empty", "").expect_err("expected failure"); if let LoadErrorKind::Parse(error) = &error.kind { assert_eq!(error.line_col(), None); - assert_eq!(error.to_string(), "missing field `dropshot_external`"); + assert_eq!(error.to_string(), "missing field `runtime`"); } else { panic!( "Got an unexpected error, expected Parse but got {:?}", @@ -373,7 +306,6 @@ mod test { let config = read_config( "valid", r##" - id = "28b90dc4-c22a-65ba-f49a-f051fe01208f" [console] static_dir = "tests/static" cache_control_max_age_minutes = 10 @@ -381,14 +313,6 @@ mod test { session_absolute_timeout_minutes = 480 [authn] schemes_external = [] - [dropshot_external] - bind_address = "10.1.2.3:4567" - request_body_max_bytes = 1024 - [dropshot_internal] - bind_address = "10.1.2.3:4568" - request_body_max_bytes = 1024 - [database] - url = "postgresql://127.0.0.1?sslmode=disable" [log] mode = "file" level = "debug" @@ -401,6 +325,18 @@ mod test { default_base_url = "http://example.invalid/" [tunables] max_vpc_ipv4_subnet_prefix = 27 + [runtime] + id = "28b90dc4-c22a-65ba-f49a-f051fe01208f" + [runtime.dropshot_external] + bind_address = "10.1.2.3:4567" + request_body_max_bytes = 1024 + [runtime.dropshot_internal] + bind_address = "10.1.2.3:4568" + request_body_max_bytes = 1024 + [runtime.subnet] + net = "::/56" + [runtime.database] + type = "from_dns" "##, ) .unwrap(); @@ -408,51 +344,51 @@ mod test { assert_eq!( config, Config { - id: "28b90dc4-c22a-65ba-f49a-f051fe01208f".parse().unwrap(), - console: ConsoleConfig { - static_dir: "tests/static".parse().unwrap(), - cache_control_max_age_minutes: 10, - session_idle_timeout_minutes: 60, - session_absolute_timeout_minutes: 480 - }, - authn: AuthnConfig { schemes_external: Vec::new() }, - dropshot_external: ConfigDropshot { - bind_address: "10.1.2.3:4567" - .parse::() - .unwrap(), - ..Default::default() - }, - dropshot_internal: ConfigDropshot { - bind_address: "10.1.2.3:4568" - .parse::() - .unwrap(), - ..Default::default() - }, - log: ConfigLogging::File { - level: ConfigLoggingLevel::Debug, - if_exists: ConfigLoggingIfExists::Fail, - path: "/nonexistent/path".to_string() + runtime: RuntimeConfig { + id: "28b90dc4-c22a-65ba-f49a-f051fe01208f".parse().unwrap(), + dropshot_external: ConfigDropshot { + bind_address: "10.1.2.3:4567" + .parse::() + .unwrap(), + ..Default::default() + }, + dropshot_internal: ConfigDropshot { + bind_address: "10.1.2.3:4568" + .parse::() + .unwrap(), + ..Default::default() + }, + subnet: Ipv6Subnet::::new(Ipv6Addr::LOCALHOST), + database: Database::FromDns, }, - database: db::Config { - url: "postgresql://127.0.0.1?sslmode=disable" - .parse() - .unwrap() + pkg: PackageConfig { + console: ConsoleConfig { + static_dir: "tests/static".parse().unwrap(), + cache_control_max_age_minutes: 10, + session_idle_timeout_minutes: 60, + session_absolute_timeout_minutes: 480 + }, + authn: AuthnConfig { schemes_external: Vec::new() }, + log: ConfigLogging::File { + level: ConfigLoggingLevel::Debug, + if_exists: ConfigLoggingIfExists::Fail, + path: "/nonexistent/path".to_string() + }, + timeseries_db: TimeseriesDbConfig { + address: "[::1]:8123".parse().unwrap() + }, + updates: Some(UpdatesConfig { + trusted_root: PathBuf::from("/path/to/root.json"), + default_base_url: "http://example.invalid/".into(), + }), + tunables: Tunables { max_vpc_ipv4_subnet_prefix: 27 }, }, - timeseries_db: TimeseriesDbConfig { - address: "[::1]:8123".parse().unwrap() - }, - updates: Some(UpdatesConfig { - trusted_root: PathBuf::from("/path/to/root.json"), - default_base_url: "http://example.invalid/".into(), - }), - tunables: Tunables { max_vpc_ipv4_subnet_prefix: 27 }, } ); let config = read_config( "valid", r##" - id = "28b90dc4-c22a-65ba-f49a-f051fe01208f" [console] static_dir = "tests/static" cache_control_max_age_minutes = 10 @@ -460,14 +396,6 @@ mod test { session_absolute_timeout_minutes = 480 [authn] schemes_external = [ "spoof", "session_cookie" ] - [dropshot_external] - bind_address = "10.1.2.3:4567" - request_body_max_bytes = 1024 - [dropshot_internal] - bind_address = "10.1.2.3:4568" - request_body_max_bytes = 1024 - [database] - url = "postgresql://127.0.0.1?sslmode=disable" [log] mode = "file" level = "debug" @@ -475,12 +403,24 @@ mod test { if_exists = "fail" [timeseries_db] address = "[::1]:8123" + [runtime] + id = "28b90dc4-c22a-65ba-f49a-f051fe01208f" + [runtime.dropshot_external] + bind_address = "10.1.2.3:4567" + request_body_max_bytes = 1024 + [runtime.dropshot_internal] + bind_address = "10.1.2.3:4568" + request_body_max_bytes = 1024 + [runtime.subnet] + net = "::/56" + [runtime.database] + type = "from_dns" "##, ) .unwrap(); assert_eq!( - config.authn.schemes_external, + config.pkg.authn.schemes_external, vec![SchemeName::Spoof, SchemeName::SessionCookie], ); } @@ -490,7 +430,6 @@ mod test { let error = read_config( "bad authn.schemes_external", r##" - id = "28b90dc4-c22a-65ba-f49a-f051fe01208f" [console] static_dir = "tests/static" cache_control_max_age_minutes = 10 @@ -498,14 +437,6 @@ mod test { session_absolute_timeout_minutes = 480 [authn] schemes_external = ["trust-me"] - [dropshot_external] - bind_address = "10.1.2.3:4567" - request_body_max_bytes = 1024 - [dropshot_internal] - bind_address = "10.1.2.3:4568" - request_body_max_bytes = 1024 - [database] - url = "postgresql://127.0.0.1?sslmode=disable" [log] mode = "file" level = "debug" @@ -513,14 +444,29 @@ mod test { if_exists = "fail" [timeseries_db] address = "[::1]:8123" + [runtime] + id = "28b90dc4-c22a-65ba-f49a-f051fe01208f" + [runtime.dropshot_external] + bind_address = "10.1.2.3:4567" + request_body_max_bytes = 1024 + [runtime.dropshot_internal] + bind_address = "10.1.2.3:4568" + request_body_max_bytes = 1024 + [runtime.subnet] + net = "::/56" + [runtime.database] + type = "from_dns" "##, ) .expect_err("expected failure"); if let LoadErrorKind::Parse(error) = &error.kind { - assert!(error.to_string().starts_with( - "unsupported authn scheme: \"trust-me\" \ - for key `authn.schemes_external`" - )); + assert!( + error + .to_string() + .starts_with("unsupported authn scheme: \"trust-me\""), + "error = {}", + error.to_string() + ); } else { panic!( "Got an unexpected error, expected Parse but got {:?}", @@ -534,7 +480,6 @@ mod test { let error = read_config( "invalid_ipv4_prefix_tunable", r##" - id = "28b90dc4-c22a-65ba-f49a-f051fe01208f" [console] static_dir = "tests/static" cache_control_max_age_minutes = 10 @@ -542,14 +487,6 @@ mod test { session_absolute_timeout_minutes = 480 [authn] schemes_external = [] - [dropshot_external] - bind_address = "10.1.2.3:4567" - request_body_max_bytes = 1024 - [dropshot_internal] - bind_address = "10.1.2.3:4568" - request_body_max_bytes = 1024 - [database] - url = "postgresql://127.0.0.1?sslmode=disable" [log] mode = "file" level = "debug" @@ -562,6 +499,18 @@ mod test { default_base_url = "http://example.invalid/" [tunables] max_vpc_ipv4_subnet_prefix = 100 + [runtime] + id = "28b90dc4-c22a-65ba-f49a-f051fe01208f" + [runtime.dropshot_external] + bind_address = "10.1.2.3:4567" + request_body_max_bytes = 1024 + [runtime.dropshot_internal] + bind_address = "10.1.2.3:4568" + request_body_max_bytes = 1024 + [runtime.subnet] + net = "::/56" + [runtime.database] + type = "from_dns" "##, ) .expect_err("Expected failure"); diff --git a/nexus/src/context.rs b/nexus/src/context.rs index f0d9e6b13a0..a08f22304df 100644 --- a/nexus/src/context.rs +++ b/nexus/src/context.rs @@ -19,6 +19,7 @@ use authn::external::spoof::HttpAuthnSpoof; use authn::external::HttpAuthnScheme; use chrono::{DateTime, Duration, Utc}; use omicron_common::api::external::Error; +use omicron_common::nexus_config; use oximeter::types::ProducerRegistry; use oximeter_instruments::http::{HttpService, LatencyTracker}; use slog::Logger; @@ -67,13 +68,13 @@ pub struct ConsoleConfig { impl ServerContext { /// Create a new context with the given rack id and log. This creates the /// underlying nexus as well. - pub fn new( + pub async fn new( rack_id: Uuid, log: Logger, - pool: db::Pool, config: &config::Config, ) -> Result, String> { let nexus_schemes = config + .pkg .authn .schemes_external .iter() @@ -90,7 +91,8 @@ impl ServerContext { let internal_authn = Arc::new(authn::Context::internal_api()); let authz = Arc::new(authz::Authz::new(&log)); let create_tracker = |name: &str| { - let target = HttpService { name: name.to_string(), id: config.id }; + let target = + HttpService { name: name.to_string(), id: config.runtime.id }; const START_LATENCY_DECADE: i8 = -6; const END_LATENCY_DECADE: i8 = 3; LatencyTracker::with_latency_decades( @@ -102,7 +104,7 @@ impl ServerContext { }; let internal_latencies = create_tracker("nexus-internal"); let external_latencies = create_tracker("nexus-external"); - let producer_registry = ProducerRegistry::with_id(config.id); + let producer_registry = ProducerRegistry::with_id(config.runtime.id); producer_registry .register_producer(internal_latencies.clone()) .unwrap(); @@ -113,11 +115,11 @@ impl ServerContext { // Support both absolute and relative paths. If configured dir is // absolute, use it directly. If not, assume it's relative to the // current working directory. - let static_dir = if config.console.static_dir.is_absolute() { - Some(config.console.static_dir.to_owned()) + let static_dir = if config.pkg.console.static_dir.is_absolute() { + Some(config.pkg.console.static_dir.to_owned()) } else { env::current_dir() - .map(|root| root.join(&config.console.static_dir)) + .map(|root| root.join(&config.pkg.console.static_dir)) .ok() }; @@ -132,6 +134,15 @@ impl ServerContext { // like console index.html. leaving that out for now so we don't break // nexus in dev for everyone + // Set up DB pool + let url = match &config.runtime.database { + nexus_config::Database::FromUrl { url } => url.clone(), + nexus_config::Database::FromDns => { + todo!("Not yet implemented"); + } + }; + let pool = db::Pool::new(&db::Config { url }); + Ok(Arc::new(ServerContext { nexus: Nexus::new_with_id( rack_id, @@ -149,14 +160,14 @@ impl ServerContext { producer_registry, console_config: ConsoleConfig { session_idle_timeout: Duration::minutes( - config.console.session_idle_timeout_minutes.into(), + config.pkg.console.session_idle_timeout_minutes.into(), ), session_absolute_timeout: Duration::minutes( - config.console.session_absolute_timeout_minutes.into(), + config.pkg.console.session_absolute_timeout_minutes.into(), ), static_dir, cache_control_max_age: Duration::minutes( - config.console.cache_control_max_age_minutes.into(), + config.pkg.console.cache_control_max_age_minutes.into(), ), }, })) diff --git a/nexus/src/db/config.rs b/nexus/src/db/config.rs index b4066ce3cbe..afe51bca66d 100644 --- a/nexus/src/db/config.rs +++ b/nexus/src/db/config.rs @@ -4,7 +4,7 @@ //! Nexus database configuration -use omicron_common::config::PostgresConfigWithUrl; +use omicron_common::postgres_config::PostgresConfigWithUrl; use serde::Deserialize; use serde::Serialize; use serde_with::serde_as; diff --git a/nexus/src/lib.rs b/nexus/src/lib.rs index e56503c3c09..61abe04b1ba 100644 --- a/nexus/src/lib.rs +++ b/nexus/src/lib.rs @@ -29,7 +29,7 @@ pub mod updates; // public for testing pub use app::test_interfaces::TestInterfaces; pub use app::Nexus; -pub use config::Config; +pub use config::{Config, PackageConfig}; pub use context::ServerContext; pub use crucible_agent_client; use external_api::http_entrypoints::external_api; @@ -85,15 +85,15 @@ impl Server { rack_id: Uuid, log: &Logger, ) -> Result { - let log = log.new(o!("name" => config.id.to_string())); + let log = log.new(o!("name" => config.runtime.id.to_string())); info!(log, "setting up nexus server"); let ctxlog = log.new(o!("component" => "ServerContext")); - let pool = db::Pool::new(&config.database); - let apictx = ServerContext::new(rack_id, ctxlog, pool, &config)?; + + let apictx = ServerContext::new(rack_id, ctxlog, &config).await?; let http_server_starter_external = dropshot::HttpServerStarter::new( - &config.dropshot_external, + &config.runtime.dropshot_external, external_api(), Arc::clone(&apictx), &log.new(o!("component" => "dropshot_external")), @@ -101,7 +101,7 @@ impl Server { .map_err(|error| format!("initializing external server: {}", error))?; let http_server_starter_internal = dropshot::HttpServerStarter::new( - &config.dropshot_internal, + &config.runtime.dropshot_internal, internal_api(), Arc::clone(&apictx), &log.new(o!("component" => "dropshot_internal")), @@ -153,12 +153,12 @@ impl Server { /// Run an instance of the [Server]. pub async fn run_server(config: &Config) -> Result<(), String> { use slog::Drain; - let (drain, registration) = slog_dtrace::with_drain( - config - .log - .to_logger("nexus") - .map_err(|message| format!("initializing logger: {}", message))?, - ); + let (drain, registration) = + slog_dtrace::with_drain( + config.pkg.log.to_logger("nexus").map_err(|message| { + format!("initializing logger: {}", message) + })?, + ); let log = slog::Logger::root(drain.fuse(), slog::o!()); if let slog_dtrace::ProbeRegistration::Failed(e) = registration { let msg = format!("failed to register DTrace probes: {}", e); diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index a53ad85d585..e4eb744e2fa 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -11,6 +11,7 @@ use dropshot::ConfigLogging; use dropshot::ConfigLoggingLevel; use omicron_common::api::external::IdentityMetadata; use omicron_common::api::internal::nexus::ProducerEndpoint; +use omicron_common::nexus_config; use omicron_sled_agent::sim; use omicron_test_utils::dev; use oximeter_collector::Oximeter; @@ -75,7 +76,7 @@ pub fn load_test_config() -> omicron_nexus::Config { let config_file_path = Path::new("tests/config.test.toml"); let mut config = omicron_nexus::Config::from_file(config_file_path) .expect("failed to load config.test.toml"); - config.id = Uuid::new_v4(); + config.runtime.id = Uuid::new_v4(); config } @@ -88,7 +89,7 @@ pub async fn test_setup_with_config( test_name: &str, config: &mut omicron_nexus::Config, ) -> ControlPlaneTestContext { - let logctx = LogContext::new(test_name, &config.log); + let logctx = LogContext::new(test_name, &config.pkg.log); let rack_id = Uuid::parse_str(RACK_UUID).unwrap(); let log = &logctx.log; @@ -99,8 +100,9 @@ pub async fn test_setup_with_config( let clickhouse = dev::clickhouse::ClickHouseInstance::new(0).await.unwrap(); // Store actual address/port information for the databases after they start. - config.database.url = database.pg_config().clone(); - config.timeseries_db.address.set_port(clickhouse.port()); + config.runtime.database = + nexus_config::Database::FromUrl { url: database.pg_config().clone() }; + config.pkg.timeseries_db.address.set_port(clickhouse.port()); let server = omicron_nexus::Server::start(&config, rack_id, &logctx.log) .await diff --git a/nexus/tests/config.test.toml b/nexus/tests/config.test.toml index 9b8f1f42731..2fc4ddba192 100644 --- a/nexus/tests/config.test.toml +++ b/nexus/tests/config.test.toml @@ -2,10 +2,6 @@ # Oxide API: configuration file for test suite # -# Identifier for this instance of Nexus. -# NOTE: The test suite always overrides this. -id = "e6bff1ff-24fb-49dc-a54e-c6a350cd4d6c" - [console] # Directory for static assets. Absolute path or relative to CWD. static_dir = "tests/static" @@ -17,27 +13,6 @@ session_absolute_timeout_minutes = 480 [authn] schemes_external = [ "spoof", "session_cookie" ] -# -# NOTE: for the test suite, the database URL will be replaced with one -# appropriate for the database that's started by the test runner. -# -[database] -url = "postgresql://root@127.0.0.1:0/omicron?sslmode=disable" - -# -# NOTE: for the test suite, the port MUST be 0 (in order to bind to any -# available port) because the test suite will be running many servers -# concurrently. -# -[dropshot_external] -bind_address = "127.0.0.1:0" -request_body_max_bytes = 1048576 - -# port must be 0. see above -[dropshot_internal] -bind_address = "127.0.0.1:0" -request_body_max_bytes = 1048576 - # # NOTE: for the test suite, if mode = "file", the file path MUST be the sentinel # string "UNUSED". The actual path will be generated by the test suite for each @@ -59,3 +34,33 @@ address = "[::1]:0" [tunables] # Allow small subnets, so we can test IP address exhaustion easily / quickly max_vpc_ipv4_subnet_prefix = 29 + +[runtime] +# Identifier for this instance of Nexus. +# NOTE: The test suite always overrides this. +id = "e6bff1ff-24fb-49dc-a54e-c6a350cd4d6c" + +# +# NOTE: for the test suite, the port MUST be 0 (in order to bind to any +# available port) because the test suite will be running many servers +# concurrently. +# +[runtime.dropshot_external] +bind_address = "127.0.0.1:0" +request_body_max_bytes = 1048576 + +# port must be 0. see above +[runtime.dropshot_internal] +bind_address = "127.0.0.1:0" +request_body_max_bytes = 1048576 + +[runtime.subnet] +net = "fd00:1122:3344:0100::/56" + +# +# NOTE: for the test suite, the database URL will be replaced with one +# appropriate for the database that's started by the test runner. +# +[runtime.database] +type = "from_url" +url = "postgresql://root@127.0.0.1:0/omicron?sslmode=disable" diff --git a/nexus/tests/integration_tests/authn_http.rs b/nexus/tests/integration_tests/authn_http.rs index 7125a52ea90..e0234da1b97 100644 --- a/nexus/tests/integration_tests/authn_http.rs +++ b/nexus/tests/integration_tests/authn_http.rs @@ -277,7 +277,7 @@ async fn start_whoami_server( sessions: HashMap, ) -> TestContext { let config = nexus_test_utils::load_test_config(); - let logctx = LogContext::new(test_name, &config.log); + let logctx = LogContext::new(test_name, &config.pkg.log); let whoami_api = { let mut whoami_api = ApiDescription::new(); @@ -299,7 +299,7 @@ async fn start_whoami_server( TestContext::new( whoami_api, server_state, - &config.dropshot_external, + &config.runtime.dropshot_external, Some(logctx), log, ) diff --git a/nexus/tests/integration_tests/commands.rs b/nexus/tests/integration_tests/commands.rs index 7d3855d5a6c..ac770c137e3 100644 --- a/nexus/tests/integration_tests/commands.rs +++ b/nexus/tests/integration_tests/commands.rs @@ -76,8 +76,7 @@ fn test_nexus_invalid_config() { assert_eq!( stderr_text, format!( - "nexus: parse \"{}\": missing field \ - `dropshot_external`\n", + "nexus: parse \"{}\": missing field `runtime`\n", config_path.display() ), ); diff --git a/nexus/tests/integration_tests/console_api.rs b/nexus/tests/integration_tests/console_api.rs index e84c65c0fe1..779e94470eb 100644 --- a/nexus/tests/integration_tests/console_api.rs +++ b/nexus/tests/integration_tests/console_api.rs @@ -196,7 +196,7 @@ async fn test_assets(cptestctx: &ControlPlaneTestContext) { #[tokio::test] async fn test_absolute_static_dir() { let mut config = load_test_config(); - config.console.static_dir = current_dir().unwrap().join("tests/static"); + config.pkg.console.static_dir = current_dir().unwrap().join("tests/static"); let cptestctx = test_setup_with_config("test_absolute_static_dir", &mut config).await; let testctx = &cptestctx.external_client; diff --git a/nexus/tests/integration_tests/updates.rs b/nexus/tests/integration_tests/updates.rs index 1bfa25d0a2c..c09ca0b7fea 100644 --- a/nexus/tests/integration_tests/updates.rs +++ b/nexus/tests/integration_tests/updates.rs @@ -62,7 +62,7 @@ async fn test_update_end_to_end() { let mut api = ApiDescription::new(); api.register(static_content).unwrap(); let context = FileServerContext { base: tuf_repo.path().to_owned() }; - let logctx = LogContext::new("test_update_end_to_end", &config.log); + let logctx = LogContext::new("test_update_end_to_end", &config.pkg.log); let server = HttpServerStarter::new(&dropshot_config, api, context, &logctx.log) .unwrap() @@ -70,7 +70,7 @@ async fn test_update_end_to_end() { let local_addr = server.local_addr(); // stand up the test environment - config.updates = Some(UpdatesConfig { + config.pkg.updates = Some(UpdatesConfig { trusted_root: tuf_repo.path().join("metadata").join("1.root.json"), default_base_url: format!("http://{}/", local_addr), }); diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index 273082a7500..839e8ba9a76 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -247,6 +247,10 @@ "dataset_kind": { "$ref": "#/components/schemas/DatasetKind" }, + "id": { + "type": "string", + "format": "uuid" + }, "zpool_id": { "type": "string", "format": "uuid" @@ -255,6 +259,7 @@ "required": [ "address", "dataset_kind", + "id", "zpool_id" ] }, @@ -959,6 +964,7 @@ ] }, "ServiceRequest": { + "description": "Describes a request to create a service. This information should be sufficient for a Sled Agent to start a zone containing the requested service.", "type": "object", "properties": { "addresses": { @@ -976,13 +982,85 @@ "format": "ipv6" } }, + "id": { + "type": "string", + "format": "uuid" + }, "name": { "type": "string" + }, + "service_type": { + "$ref": "#/components/schemas/ServiceType" } }, "required": [ "addresses", - "name" + "id", + "name", + "service_type" + ] + }, + "ServiceType": { + "description": "Describes service-specific parameters.", + "oneOf": [ + { + "type": "object", + "properties": { + "external_address": { + "type": "string" + }, + "internal_address": { + "type": "string" + }, + "type": { + "type": "string", + "enum": [ + "nexus" + ] + } + }, + "required": [ + "external_address", + "internal_address", + "type" + ] + }, + { + "type": "object", + "properties": { + "dns_address": { + "type": "string" + }, + "server_address": { + "type": "string" + }, + "type": { + "type": "string", + "enum": [ + "internal_dns" + ] + } + }, + "required": [ + "dns_address", + "server_address", + "type" + ] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "oximeter" + ] + } + }, + "required": [ + "type" + ] + } ] }, "Slot": { diff --git a/sled-agent/src/params.rs b/sled-agent/src/params.rs index 1c713a69067..d003bbe785e 100644 --- a/sled-agent/src/params.rs +++ b/sled-agent/src/params.rs @@ -9,9 +9,7 @@ use omicron_common::api::internal::nexus::{ use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use std::fmt::{Debug, Display, Formatter, Result as FormatResult}; -use std::net::IpAddr; -use std::net::Ipv6Addr; -use std::net::{SocketAddr, SocketAddrV6}; +use std::net::{IpAddr, Ipv6Addr, SocketAddr, SocketAddrV6}; use uuid::Uuid; /// Information required to construct a virtual network interface for a guest @@ -165,7 +163,7 @@ pub struct InstanceRuntimeStateRequested { pub enum DatasetKind { CockroachDb { /// The addresses of all nodes within the cluster. - all_addresses: Vec, + all_addresses: Vec, }, Crucible, Clickhouse, @@ -213,6 +211,8 @@ impl std::fmt::Display for DatasetKind { /// instantiated when the dataset is detected. #[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq)] pub struct DatasetEnsureBody { + // The UUID of the dataset, as well as the service using it directly. + pub id: Uuid, // The name (and UUID) of the Zpool which we are inserting into. pub zpool_id: Uuid, // The type of the filesystem. @@ -235,14 +235,52 @@ impl From for sled_agent_client::types::DatasetEnsureBody { zpool_id: p.zpool_id, dataset_kind: p.dataset_kind.into(), address: p.address.to_string(), + id: p.id, } } } +/// Describes service-specific parameters. +#[derive( + Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq, Hash, +)] +#[serde(tag = "type", rename_all = "snake_case")] +pub enum ServiceType { + Nexus { internal_address: SocketAddrV6, external_address: SocketAddrV6 }, + InternalDns { server_address: SocketAddrV6, dns_address: SocketAddrV6 }, + Oximeter, +} + +impl From for sled_agent_client::types::ServiceType { + fn from(s: ServiceType) -> Self { + use sled_agent_client::types::ServiceType as AutoSt; + use ServiceType as St; + + match s { + St::Nexus { internal_address, external_address } => AutoSt::Nexus { + internal_address: internal_address.to_string(), + external_address: external_address.to_string(), + }, + St::InternalDns { server_address, dns_address } => { + AutoSt::InternalDns { + server_address: server_address.to_string(), + dns_address: dns_address.to_string(), + } + } + St::Oximeter => AutoSt::Oximeter, + } + } +} + +/// Describes a request to create a service. This information +/// should be sufficient for a Sled Agent to start a zone +/// containing the requested service. #[derive( Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq, Hash, )] pub struct ServiceRequest { + // The UUID of the service to be initialized. + pub id: Uuid, // The name of the service to be created. pub name: String, // The addresses on which the service should listen for requests. @@ -256,14 +294,18 @@ pub struct ServiceRequest { // is necessary to allow inter-zone traffic routing. #[serde(default)] pub gz_addresses: Vec, + // Any other service-specific parameters. + pub service_type: ServiceType, } impl From for sled_agent_client::types::ServiceRequest { fn from(s: ServiceRequest) -> Self { Self { + id: s.id, name: s.name, addresses: s.addresses, gz_addresses: s.gz_addresses, + service_type: s.service_type.into(), } } } diff --git a/sled-agent/src/rack_setup/config.rs b/sled-agent/src/rack_setup/config.rs index 26f3ce8a321..d9f8324535d 100644 --- a/sled-agent/src/rack_setup/config.rs +++ b/sled-agent/src/rack_setup/config.rs @@ -29,12 +29,12 @@ pub struct SetupServiceConfig { pub rack_subnet: Ipv6Addr, #[serde(default, rename = "request")] - pub requests: Vec, + pub requests: Vec, } /// A request to initialize a sled. #[derive(Clone, Debug, Default, Deserialize, Serialize, PartialEq)] -pub struct SledRequest { +pub struct HardcodedSledRequest { /// Datasets to be created. #[serde(default, rename = "dataset")] pub datasets: Vec, diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 0fef7054d26..6c65383d5e8 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -4,13 +4,15 @@ //! Rack Setup Service implementation -use super::config::{SetupServiceConfig as Config, SledRequest}; -use crate::bootstrap::config::BOOTSTRAP_AGENT_PORT; -use crate::bootstrap::discovery::PeerMonitorObserver; -use crate::bootstrap::params::SledAgentRequest; -use crate::bootstrap::rss_handle::BootstrapAgentHandle; -use crate::params::ServiceRequest; -use omicron_common::address::{get_sled_address, ReservedRackSubnet}; +use super::config::{HardcodedSledRequest, SetupServiceConfig as Config}; +use crate::bootstrap::{ + config::BOOTSTRAP_AGENT_PORT, discovery::PeerMonitorObserver, + params::SledAgentRequest, rss_handle::BootstrapAgentHandle, +}; +use crate::params::{ServiceRequest, ServiceType}; +use omicron_common::address::{ + get_sled_address, ReservedRackSubnet, DNS_PORT, DNS_SERVER_PORT, +}; use omicron_common::backoff::{ internal_service_policy, retry_notify, BackoffError, }; @@ -21,6 +23,7 @@ use std::net::{Ipv6Addr, SocketAddr, SocketAddrV6}; use std::path::PathBuf; use thiserror::Error; use tokio::sync::Mutex; +use uuid::Uuid; /// Describes errors which may occur while operating the setup service. #[derive(Error, Debug)] @@ -55,7 +58,7 @@ pub enum SetupServiceError { #[derive(Clone, Debug, Deserialize, Serialize, PartialEq)] struct SledAllocation { initialization_request: SledAgentRequest, - services_request: SledRequest, + services_request: HardcodedSledRequest, } /// The interface to the Rack Setup Service. @@ -192,7 +195,7 @@ impl ServiceInner { async fn initialize_services( &self, sled_address: SocketAddr, - services: &Vec, + services: &Vec, ) -> Result<(), SetupServiceError> { let dur = std::time::Duration::from_secs(60); let client = reqwest::ClientBuilder::new() @@ -284,18 +287,31 @@ impl ServiceInner { if idx < config.requests.len() { config.requests[idx].clone() } else { - SledRequest::default() + HardcodedSledRequest::default() } }; - // The first enumerated addresses get assigned the additional + // The first enumerated sleds get assigned the additional // responsibility of being internal DNS servers. if idx < dns_subnets.len() { let dns_subnet = &dns_subnets[idx]; + let dns_addr = dns_subnet.dns_address().ip(); request.dns_services.push(ServiceRequest { + id: Uuid::new_v4(), name: "internal-dns".to_string(), - addresses: vec![dns_subnet.dns_address().ip()], + addresses: vec![dns_addr], gz_addresses: vec![dns_subnet.gz_address().ip()], + service_type: ServiceType::InternalDns { + server_address: SocketAddrV6::new( + dns_addr, + DNS_SERVER_PORT, + 0, + 0, + ), + dns_address: SocketAddrV6::new( + dns_addr, DNS_PORT, 0, 0, + ), + }, }); } @@ -331,8 +347,10 @@ impl ServiceInner { } // Once we've constructed a plan, write it down to durable storage. - let serialized_plan = toml::Value::try_from(&plan) - .expect("Cannot serialize configuration"); + let serialized_plan = + toml::Value::try_from(&plan).unwrap_or_else(|e| { + panic!("Cannot serialize configuration: {:#?}: {}", plan, e) + }); let plan_str = toml::to_string(&serialized_plan) .expect("Cannot turn config to string"); diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 72444a79b17..aaa1960fb3e 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -7,17 +7,32 @@ use crate::illumos::dladm::{Etherstub, EtherstubVnic}; use crate::illumos::running_zone::{InstalledZone, RunningZone}; use crate::illumos::vnic::VnicAllocator; +use crate::illumos::zfs::ZONE_ZFS_DATASET_MOUNTPOINT; use crate::illumos::zone::AddressRequest; -use crate::params::{ServiceEnsureBody, ServiceRequest}; +use crate::params::{ServiceEnsureBody, ServiceRequest, ServiceType}; use crate::zone::Zones; -use omicron_common::address::{DNS_PORT, DNS_SERVER_PORT}; +use dropshot::ConfigDropshot; +use omicron_common::address::{Ipv6Subnet, RACK_PREFIX}; +use omicron_common::nexus_config::{self, RuntimeConfig as NexusRuntimeConfig}; +use omicron_common::postgres_config::PostgresConfigWithUrl; use slog::Logger; use std::collections::HashSet; use std::iter::FromIterator; -use std::net::{IpAddr, Ipv6Addr}; +use std::net::{IpAddr, Ipv6Addr, SocketAddr}; use std::path::{Path, PathBuf}; +use std::str::FromStr; +use tokio::io::AsyncWriteExt; use tokio::sync::Mutex; +// The filename of ServiceManager's internal storage. +const SERVICE_CONFIG_FILENAME: &str = "service.toml"; +// The filename of a half-completed config, in need of parameters supplied at +// runtime. +const PARTIAL_CONFIG_FILENAME: &str = "config-partial.toml"; +// The filename of a completed config, merging the partial config with +// additional appended parameters known at runtime. +const COMPLETE_CONFIG_FILENAME: &str = "config.toml"; + #[derive(thiserror::Error, Debug)] pub enum Error { #[error("Cannot serialize TOML to file {path}: {err}")] @@ -69,13 +84,40 @@ impl From for omicron_common::api::external::Error { /// The default path to service configuration, if one is not /// explicitly provided. pub fn default_services_config_path() -> PathBuf { - Path::new(omicron_common::OMICRON_CONFIG_PATH).join("services.toml") + Path::new(omicron_common::OMICRON_CONFIG_PATH).join(SERVICE_CONFIG_FILENAME) +} + +/// Configuration parameters which modify the [`ServiceManager`]'s behavior. +/// +/// These are typically used to make testing easier; production usage +/// should generally prefer to use the defaults. +pub struct Config { + /// The path for the ServiceManager to store information about + /// all running services. + pub all_svcs_config_path: PathBuf, + /// A function which returns the path the directory holding the + /// service's configuration file. + pub get_svc_config_dir: Box PathBuf + Send + Sync>, +} + +impl Default for Config { + fn default() -> Self { + Self { + all_svcs_config_path: default_services_config_path(), + get_svc_config_dir: Box::new(|zone_name: &str, svc_name: &str| { + PathBuf::from(ZONE_ZFS_DATASET_MOUNTPOINT) + .join(PathBuf::from(zone_name)) + .join("root") + .join(format!("var/svc/manifest/site/{}", svc_name)) + }), + } + } } /// Manages miscellaneous Sled-local services. pub struct ServiceManager { log: Logger, - config_path: Option, + config: Config, zones: Mutex>, vnic_allocator: VnicAllocator, underlay_vnic: EtherstubVnic, @@ -98,12 +140,12 @@ impl ServiceManager { etherstub: Etherstub, underlay_vnic: EtherstubVnic, underlay_address: Ipv6Addr, - config_path: Option, + config: Config, ) -> Result { debug!(log, "Creating new ServiceManager"); let mgr = Self { log: log.new(o!("component" => "ServiceManager")), - config_path, + config, zones: Mutex::new(vec![]), vnic_allocator: VnicAllocator::new("Service", etherstub), underlay_vnic, @@ -143,11 +185,7 @@ impl ServiceManager { // Returns either the path to the explicitly provided config path, or // chooses the default one. fn services_config_path(&self) -> PathBuf { - if let Some(path) = &self.config_path { - path.clone() - } else { - default_services_config_path() - } + self.config.all_svcs_config_path.clone() } // Populates `existing_zones` according to the requests in `services`. @@ -268,16 +306,70 @@ impl ServiceManager { let smf_name = format!("svc:/system/illumos/{}", service.name); let default_smf_name = format!("{}:default", smf_name); - match service.name.as_str() { - "internal-dns" => { - info!(self.log, "Setting up internal-dns service"); - let address = - service.addresses.get(0).ok_or_else(|| { - Error::BadServiceRequest { - service: service.name.clone(), - message: "Not enough addresses".to_string(), - } + match service.service_type { + ServiceType::Nexus { internal_address, external_address } => { + info!(self.log, "Setting up Nexus service"); + + // Nexus takes a separate config file for parameters which + // cannot be known at packaging time. + let runtime_config = NexusRuntimeConfig { + id: service.id, + dropshot_external: ConfigDropshot { + bind_address: SocketAddr::V6(external_address), + ..Default::default() + }, + dropshot_internal: ConfigDropshot { + bind_address: SocketAddr::V6(internal_address), + ..Default::default() + }, + subnet: Ipv6Subnet::::new( + self.underlay_address, + ), + // TODO: Switch to inferring this URL by DNS. + database: nexus_config::Database::FromUrl { + url: PostgresConfigWithUrl::from_str( + "postgresql://root@[fd00:1122:3344:0101::2]:32221/omicron?sslmode=disable" + ).unwrap() + } + }; + + // Copy the partial config file to the expected location. + let config_dir = (self.config.get_svc_config_dir)( + running_zone.name(), + &service.name, + ); + let partial_config_path = + config_dir.join(PARTIAL_CONFIG_FILENAME); + let config_path = config_dir.join(COMPLETE_CONFIG_FILENAME); + tokio::fs::copy(partial_config_path, &config_path) + .await + .map_err(|err| Error::Io { + path: config_path.clone(), + err, })?; + + // Serialize the configuration and append it into the file. + let serialized_cfg = toml::Value::try_from(&runtime_config) + .expect("Cannot serialize config"); + let mut map = toml::map::Map::new(); + map.insert("runtime".to_string(), serialized_cfg); + let config_str = toml::to_string(&map).map_err(|err| { + Error::TomlSerialize { path: config_path.clone(), err } + })?; + let mut file = tokio::fs::OpenOptions::new() + .append(true) + .open(&config_path) + .await + .map_err(|err| Error::Io { + path: config_path.clone(), + err, + })?; + file.write_all(config_str.as_bytes()).await.map_err( + |err| Error::Io { path: config_path.clone(), err }, + )?; + } + ServiceType::InternalDns { server_address, dns_address } => { + info!(self.log, "Setting up internal-dns service"); running_zone .run_cmd(&[ crate::illumos::zone::SVCCFG, @@ -286,14 +378,12 @@ impl ServiceManager { "setprop", &format!( "config/server_address=[{}]:{}", - address, DNS_SERVER_PORT + server_address.ip(), + server_address.port(), ), ]) .map_err(|err| Error::ZoneCommand { - intent: format!( - "Setting DNS server address [{}]:{}", - address, DNS_SERVER_PORT - ), + intent: "set server address".to_string(), err, })?; @@ -305,14 +395,12 @@ impl ServiceManager { "setprop", &format!( "config/dns_address=[{}]:{}", - address, DNS_PORT + dns_address.ip(), + dns_address.port(), ), ]) .map_err(|err| Error::ZoneCommand { - intent: format!( - "Setting DNS address [{}]:{}", - address, DNS_SERVER_PORT - ), + intent: "Set DNS address".to_string(), err, })?; @@ -327,17 +415,17 @@ impl ServiceManager { ]) .map_err(|err| Error::ZoneCommand { intent: format!( - "Refreshing DNS service config for {}", + "Refresh SMF manifest {}", default_smf_name ), err, })?; } - _ => { - info!( - self.log, - "Service name {} did not match", service.name - ); + ServiceType::Oximeter => { + info!(self.log, "Setting up oximeter service"); + + // TODO: Implement with dynamic parameters, when address is + // dynamically assigned. } } @@ -438,7 +526,9 @@ mod test { svc, zone::MockZones, }; + use std::net::{Ipv6Addr, SocketAddrV6}; use std::os::unix::process::ExitStatusExt; + use uuid::Uuid; const SVC_NAME: &str = "my_svc"; const EXPECTED_ZONE_NAME: &str = "oxz_my_svc"; @@ -488,14 +578,29 @@ mod test { } // Prepare to call "ensure" for a new service, then actually call "ensure". - async fn ensure_new_service(mgr: &ServiceManager) { + async fn ensure_new_service(mgr: &ServiceManager, id: Uuid) { let _expectations = expect_new_service(); mgr.ensure(ServiceEnsureBody { services: vec![ServiceRequest { + id, name: SVC_NAME.to_string(), addresses: vec![], gz_addresses: vec![], + service_type: ServiceType::Nexus { + internal_address: SocketAddrV6::new( + Ipv6Addr::LOCALHOST, + 0, + 0, + 0, + ), + external_address: SocketAddrV6::new( + Ipv6Addr::LOCALHOST, + 0, + 0, + 0, + ), + }, }], }) .await @@ -504,12 +609,27 @@ mod test { // Prepare to call "ensure" for a service which already exists. We should // return the service without actually installing a new zone. - async fn ensure_existing_service(mgr: &ServiceManager) { + async fn ensure_existing_service(mgr: &ServiceManager, id: Uuid) { mgr.ensure(ServiceEnsureBody { services: vec![ServiceRequest { + id, name: SVC_NAME.to_string(), addresses: vec![], gz_addresses: vec![], + service_type: ServiceType::Nexus { + internal_address: SocketAddrV6::new( + Ipv6Addr::LOCALHOST, + 0, + 0, + 0, + ), + external_address: SocketAddrV6::new( + Ipv6Addr::LOCALHOST, + 0, + 0, + 0, + ), + }, }], }) .await @@ -533,26 +653,56 @@ mod test { drop(mgr); } + struct TestConfig { + config_dir: tempfile::TempDir, + } + + impl TestConfig { + async fn new() -> Self { + let config_dir = tempfile::TempDir::new().unwrap(); + tokio::fs::File::create( + config_dir.path().join(PARTIAL_CONFIG_FILENAME), + ) + .await + .unwrap(); + Self { config_dir } + } + + fn make_config(&self) -> Config { + let all_svcs_config_path = + self.config_dir.path().join(SERVICE_CONFIG_FILENAME); + let svc_config_dir = self.config_dir.path().to_path_buf(); + Config { + all_svcs_config_path, + get_svc_config_dir: Box::new( + move |_zone_name: &str, _svc_name: &str| { + svc_config_dir.clone() + }, + ), + } + } + } + #[tokio::test] #[serial_test::serial] async fn test_ensure_service() { let logctx = omicron_test_utils::dev::test_setup_log("test_ensure_service"); let log = logctx.log.clone(); + let test_config = TestConfig::new().await; - let config_dir = tempfile::TempDir::new().unwrap(); - let config = config_dir.path().join("services.toml"); let mgr = ServiceManager::new( log, Etherstub(ETHERSTUB_NAME.to_string()), EtherstubVnic(ETHERSTUB_VNIC_NAME.to_string()), Ipv6Addr::LOCALHOST, - Some(config), + test_config.make_config(), ) .await .unwrap(); - ensure_new_service(&mgr).await; + let id = Uuid::new_v4(); + ensure_new_service(&mgr, id).await; drop_service_manager(mgr); logctx.cleanup_successful(); @@ -565,21 +715,21 @@ mod test { "test_ensure_service_which_already_exists", ); let log = logctx.log.clone(); + let test_config = TestConfig::new().await; - let config_dir = tempfile::TempDir::new().unwrap(); - let config = config_dir.path().join("services.toml"); let mgr = ServiceManager::new( log, Etherstub(ETHERSTUB_NAME.to_string()), EtherstubVnic(ETHERSTUB_VNIC_NAME.to_string()), Ipv6Addr::LOCALHOST, - Some(config), + test_config.make_config(), ) .await .unwrap(); - ensure_new_service(&mgr).await; - ensure_existing_service(&mgr).await; + let id = Uuid::new_v4(); + ensure_new_service(&mgr, id).await; + ensure_existing_service(&mgr, id).await; drop_service_manager(mgr); logctx.cleanup_successful(); @@ -591,9 +741,7 @@ mod test { let logctx = omicron_test_utils::dev::test_setup_log( "test_services_are_recreated_on_reboot", ); - - let config_dir = tempfile::TempDir::new().unwrap(); - let config = config_dir.path().join("services.toml"); + let test_config = TestConfig::new().await; // First, spin up a ServiceManager, create a new service, and tear it // down. @@ -602,11 +750,13 @@ mod test { Etherstub(ETHERSTUB_NAME.to_string()), EtherstubVnic(ETHERSTUB_VNIC_NAME.to_string()), Ipv6Addr::LOCALHOST, - Some(config.clone()), + test_config.make_config(), ) .await .unwrap(); - ensure_new_service(&mgr).await; + + let id = Uuid::new_v4(); + ensure_new_service(&mgr, id).await; drop_service_manager(mgr); // Before we re-create the service manager - notably, using the same @@ -617,7 +767,7 @@ mod test { Etherstub(ETHERSTUB_NAME.to_string()), EtherstubVnic(ETHERSTUB_VNIC_NAME.to_string()), Ipv6Addr::LOCALHOST, - Some(config.clone()), + test_config.make_config(), ) .await .unwrap(); @@ -632,9 +782,7 @@ mod test { let logctx = omicron_test_utils::dev::test_setup_log( "test_services_do_not_persist_without_config", ); - - let config_dir = tempfile::TempDir::new().unwrap(); - let config = config_dir.path().join("services.toml"); + let test_config = TestConfig::new().await; // First, spin up a ServiceManager, create a new service, and tear it // down. @@ -643,16 +791,18 @@ mod test { Etherstub(ETHERSTUB_NAME.to_string()), EtherstubVnic(ETHERSTUB_VNIC_NAME.to_string()), Ipv6Addr::LOCALHOST, - Some(config.clone()), + test_config.make_config(), ) .await .unwrap(); - ensure_new_service(&mgr).await; + let id = Uuid::new_v4(); + ensure_new_service(&mgr, id).await; drop_service_manager(mgr); // Next, delete the config. This means the service we just created will // not be remembered on the next initialization. - std::fs::remove_file(&config).unwrap(); + let config = test_config.make_config(); + std::fs::remove_file(&config.all_svcs_config_path).unwrap(); // Observe that the old service is not re-initialized. let mgr = ServiceManager::new( @@ -660,7 +810,7 @@ mod test { Etherstub(ETHERSTUB_NAME.to_string()), EtherstubVnic(ETHERSTUB_VNIC_NAME.to_string()), Ipv6Addr::LOCALHOST, - Some(config.clone()), + config, ) .await .unwrap(); diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index c0c2ff649c8..5f8f1e500ab 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -16,7 +16,7 @@ use crate::params::{ DatasetKind, DiskStateRequested, InstanceHardware, InstanceMigrateParams, InstanceRuntimeStateRequested, ServiceEnsureBody, }; -use crate::services::ServiceManager; +use crate::services::{self, ServiceManager}; use crate::storage_manager::StorageManager; use omicron_common::api::{ internal::nexus::DiskRuntimeState, internal::nexus::InstanceRuntimeState, @@ -245,7 +245,7 @@ impl SledAgent { etherstub.clone(), etherstub_vnic.clone(), *sled_address.ip(), - None, + services::Config::default(), ) .await?; diff --git a/smf/nexus/config.toml b/smf/nexus/config-partial.toml similarity index 53% rename from smf/nexus/config.toml rename to smf/nexus/config-partial.toml index d73d7a90cfc..b77ffc3137f 100644 --- a/smf/nexus/config.toml +++ b/smf/nexus/config-partial.toml @@ -1,10 +1,7 @@ # -# Oxide API: example configuration file +# Oxide API: partial configuration file # -# Identifier for this instance of Nexus -id = "e6bff1ff-24fb-49dc-a54e-c6a350cd4d6c" - [console] # Directory for static assets. Absolute path or relative to CWD. static_dir = "/var/nexus/static" @@ -16,18 +13,6 @@ session_absolute_timeout_minutes = 480 # TODO(https://github.com/oxidecomputer/omicron/issues/372): Remove "spoof". schemes_external = ["spoof", "session_cookie"] -[database] -# URL for connecting to the database -url = "postgresql://root@[fd00:1122:3344:0101::2]:32221/omicron?sslmode=disable" - -[dropshot_external] -# IP address and TCP port on which to listen for the external API -bind_address = "[fd00:1122:3344:0101::3]:12220" - -[dropshot_internal] -# IP address and TCP port on which to listen for the internal API -bind_address = "[fd00:1122:3344:0101::3]:12221" - [log] # Show log messages of this level and more severe level = "info" diff --git a/smf/nexus/manifest.xml b/smf/nexus/manifest.xml index 0b8da2ff62f..3ff92b2fbac 100644 --- a/smf/nexus/manifest.xml +++ b/smf/nexus/manifest.xml @@ -11,6 +11,14 @@ type='service'> + + + + + + diff --git a/smf/sled-agent/config-rss.toml b/smf/sled-agent/config-rss.toml index d8113cf4d1b..698d5b112fc 100644 --- a/smf/sled-agent/config-rss.toml +++ b/smf/sled-agent/config-rss.toml @@ -11,21 +11,25 @@ rack_subnet = "fd00:1122:3344:0100::" # TODO(https://github.com/oxidecomputer/omicron/issues/732): Nexus # should allocate crucible datasets. [[request.dataset]] +id = "09a9a25f-2602-4e2f-9630-31af9c492c3e" zpool_id = "d462a7f7-b628-40fe-80ff-4e4189e2d62b" address = "[fd00:1122:3344:0101::6]:32345" dataset_kind.type = "crucible" [[request.dataset]] +id = "2713b37a-3043-4ed5-aaff-f38200e45cfb" zpool_id = "e4b4dc87-ab46-49fb-a4b4-d361ae214c03" address = "[fd00:1122:3344:0101::7]:32345" dataset_kind.type = "crucible" [[request.dataset]] +id = "ffd16cad-e5d5-495e-9c59-4312a3857d91" zpool_id = "f4b4dc87-ab46-49fb-a4b4-d361ae214c03" address = "[fd00:1122:3344:0101::8]:32345" dataset_kind.type = "crucible" [[request.dataset]] +id = "4d08fc19-3d5f-4f6b-9c48-925f8eac7255" zpool_id = "d462a7f7-b628-40fe-80ff-4e4189e2d62b" address = "[fd00:1122:3344:0101::2]:32221" dataset_kind.type = "cockroach_db" @@ -34,18 +38,27 @@ dataset_kind.all_addresses = [ "[fd00:1122:3344:0101::2]:32221" ] # TODO(https://github.com/oxidecomputer/omicron/issues/732): Nexus # should allocate clickhouse datasets. [[request.dataset]] +id = "a3505b41-a592-420b-84f2-3d76bf0e0a81" zpool_id = "d462a7f7-b628-40fe-80ff-4e4189e2d62b" address = "[fd00:1122:3344:0101::5]:8123" dataset_kind.type = "clickhouse" [[request.service]] +id = "e6bff1ff-24fb-49dc-a54e-c6a350cd4d6c" name = "nexus" addresses = [ "fd00:1122:3344:0101::3" ] gz_addresses = [] +[request.service.service_type] +type = "nexus" +internal_address = "[fd00:1122:3344:0101::3]:12221" +external_address = "[fd00:1122:3344:0101::3]:12220" # TODO(https://github.com/oxidecomputer/omicron/issues/732): Nexus # should allocate Oximeter services. [[request.service]] +id = "1da65e5b-210c-4859-a7d7-200c1e659972" name = "oximeter" addresses = [ "fd00:1122:3344:0101::4" ] gz_addresses = [] +[request.service.service_type] +type = "oximeter" diff --git a/smf/sled-agent/manifest.xml b/smf/sled-agent/manifest.xml index 378b77776c8..96f029d96e0 100644 --- a/smf/sled-agent/manifest.xml +++ b/smf/sled-agent/manifest.xml @@ -28,6 +28,10 @@ type='service'> + + + diff --git a/test-utils/src/dev/db.rs b/test-utils/src/dev/db.rs index 5449bfc4139..b7112ae1a37 100644 --- a/test-utils/src/dev/db.rs +++ b/test-utils/src/dev/db.rs @@ -8,7 +8,7 @@ use crate::dev::poll; use anyhow::anyhow; use anyhow::bail; use anyhow::Context; -use omicron_common::config::PostgresConfigWithUrl; +use omicron_common::postgres_config::PostgresConfigWithUrl; use std::ffi::{OsStr, OsString}; use std::fmt; use std::ops::Deref; From fccc15cc8df05acffdafa791ed5c1d4d965e13e3 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 8 Jun 2022 16:45:34 -0400 Subject: [PATCH 02/17] Ensure postgres config was just a rename --- common/src/config.rs | 95 -------------------------------------------- 1 file changed, 95 deletions(-) delete mode 100644 common/src/config.rs diff --git a/common/src/config.rs b/common/src/config.rs deleted file mode 100644 index 2509ae4fca2..00000000000 --- a/common/src/config.rs +++ /dev/null @@ -1,95 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -//! Common objects used for configuration - -use std::fmt; -use std::ops::Deref; -use std::str::FromStr; - -/// Describes a URL for connecting to a PostgreSQL server -// The config pattern that we're using requires that types in the config impl -// Serialize. If tokio_postgres::config::Config impl'd Serialize or even -// Display, we'd just use that directly instead of this type. But it doesn't. -// We could implement a serialize function ourselves, but URLs support many -// different properties, and this could be brittle and easy to get wrong. -// Instead, this type just wraps tokio_postgres::config::Config and keeps the -// original String around. (The downside is that a consumer _generating_ a -// nexus::db::Config needs to generate a URL that matches the -// tokio_postgres::config::Config that they construct here, but this is not -// currently an important use case.) -// -// To ensure that the URL and config are kept in sync, we currently only support -// constructing one of these via `FromStr` and the fields are not public. -#[derive(Clone, Debug, PartialEq)] -pub struct PostgresConfigWithUrl { - url_raw: String, - config: tokio_postgres::config::Config, -} - -impl PostgresConfigWithUrl { - pub fn url(&self) -> String { - self.url_raw.clone() - } -} - -impl FromStr for PostgresConfigWithUrl { - type Err = tokio_postgres::Error; - - fn from_str(s: &str) -> Result { - Ok(PostgresConfigWithUrl { url_raw: s.to_owned(), config: s.parse()? }) - } -} - -impl fmt::Display for PostgresConfigWithUrl { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.write_str(&self.url_raw) - } -} - -impl Deref for PostgresConfigWithUrl { - type Target = tokio_postgres::config::Config; - - fn deref(&self) -> &Self::Target { - &self.config - } -} - -#[cfg(test)] -mod test { - use super::PostgresConfigWithUrl; - - #[test] - fn test_bad_url() { - // There is surprisingly little that we can rely on the - // tokio_postgres::config::Config parser to include in the error - // message. - let error = "foo".parse::().unwrap_err(); - assert!(error.to_string().contains("unexpected EOF")); - "http://127.0.0.1:1234".parse::().unwrap_err(); - let error = "postgresql://example.com?sslmode=not-a-real-ssl-mode" - .parse::() - .unwrap_err(); - assert!(error - .to_string() - .contains("invalid value for option `sslmode`")); - } - - #[test] - fn test_example_url() { - let config = "postgresql://notauser@10.2.3.4:1789?sslmode=disable" - .parse::() - .unwrap(); - assert_eq!(config.get_user(), Some("notauser")); - assert_eq!( - config.get_ssl_mode(), - tokio_postgres::config::SslMode::Disable - ); - assert_eq!( - config.get_hosts(), - &[tokio_postgres::config::Host::Tcp("10.2.3.4".to_string())] - ); - assert_eq!(config.get_ports(), &[1789]); - } -} From a077bd41879b3551ce25d2f59377262ec1cd1ef6 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 8 Jun 2022 17:38:38 -0400 Subject: [PATCH 03/17] review feedback --- nexus/src/config.rs | 2 +- nexus/src/context.rs | 2 +- nexus/src/lib.rs | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/nexus/src/config.rs b/nexus/src/config.rs index d5bf6a2a2f9..a6034a7eea3 100644 --- a/nexus/src/config.rs +++ b/nexus/src/config.rs @@ -152,7 +152,7 @@ pub struct Config { } impl Config { - /// Load a `PackageConfig` from the given TOML file + /// Load a `Config` from the given TOML file /// /// This config object can then be used to create a new `Nexus`. /// The format is described in the README. diff --git a/nexus/src/context.rs b/nexus/src/context.rs index a08f22304df..2ad6a93553a 100644 --- a/nexus/src/context.rs +++ b/nexus/src/context.rs @@ -68,7 +68,7 @@ pub struct ConsoleConfig { impl ServerContext { /// Create a new context with the given rack id and log. This creates the /// underlying nexus as well. - pub async fn new( + pub fn new( rack_id: Uuid, log: Logger, config: &config::Config, diff --git a/nexus/src/lib.rs b/nexus/src/lib.rs index 61abe04b1ba..c13fc3de3c8 100644 --- a/nexus/src/lib.rs +++ b/nexus/src/lib.rs @@ -90,7 +90,7 @@ impl Server { let ctxlog = log.new(o!("component" => "ServerContext")); - let apictx = ServerContext::new(rack_id, ctxlog, &config).await?; + let apictx = ServerContext::new(rack_id, ctxlog, &config)?; let http_server_starter_external = dropshot::HttpServerStarter::new( &config.runtime.dropshot_external, From d16eda2832fba9c5e46c68431c3e400a6039ea17 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 8 Jun 2022 16:44:41 -0400 Subject: [PATCH 04/17] DNS client --- Cargo.lock | 4 + internal-dns-client/Cargo.toml | 6 +- internal-dns-client/src/lib.rs | 3 + internal-dns-client/src/multiclient.rs | 145 +++++++++++++++++++++++++ internal-dns-client/src/names.rs | 55 ++++++++++ 5 files changed, 212 insertions(+), 1 deletion(-) create mode 100644 internal-dns-client/src/multiclient.rs create mode 100644 internal-dns-client/src/names.rs diff --git a/Cargo.lock b/Cargo.lock index fed2770e843..850c0ec0adc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2342,12 +2342,16 @@ dependencies = [ name = "internal-dns-client" version = "0.1.0" dependencies = [ + "omicron-common", "progenitor", "reqwest", "serde", "serde_json", "slog", "structopt", + "trust-dns-proto", + "trust-dns-resolver", + "uuid", ] [[package]] diff --git a/internal-dns-client/Cargo.toml b/internal-dns-client/Cargo.toml index 22e28c91bc9..0ac6ecba610 100644 --- a/internal-dns-client/Cargo.toml +++ b/internal-dns-client/Cargo.toml @@ -5,9 +5,13 @@ edition = "2021" license = "MPL-2.0" [dependencies] +omicron-common = { path = "../common" } progenitor = { git = "https://github.com/oxidecomputer/progenitor" } +reqwest = { version = "0.11", features = ["json", "rustls-tls", "stream"] } serde = { version = "1.0", features = [ "derive" ] } serde_json = "1.0" slog = { version = "2.5.0", features = [ "max_level_trace", "release_max_level_debug" ] } structopt = "0.3" -reqwest = { version = "0.11", features = ["json", "rustls-tls", "stream"] } +trust-dns-proto = "0.21" +trust-dns-resolver = "0.21" +uuid = { version = "1.1.0", features = [ "v4", "serde" ] } diff --git a/internal-dns-client/src/lib.rs b/internal-dns-client/src/lib.rs index 49daa3d58ae..f7ce56f8521 100644 --- a/internal-dns-client/src/lib.rs +++ b/internal-dns-client/src/lib.rs @@ -16,3 +16,6 @@ progenitor::generate_api!( slog::debug!(log, "client response"; "result" => ?result); }), ); + +pub mod multiclient; +pub mod names; diff --git a/internal-dns-client/src/multiclient.rs b/internal-dns-client/src/multiclient.rs new file mode 100644 index 00000000000..e01fb5a2139 --- /dev/null +++ b/internal-dns-client/src/multiclient.rs @@ -0,0 +1,145 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use crate::types::{DnsKv, DnsRecord, DnsRecordKey, Srv}; +use omicron_common::address::{ + Ipv6Subnet, ReservedRackSubnet, AZ_PREFIX, DNS_PORT, DNS_SERVER_PORT, +}; +use omicron_common::backoff::{ + internal_service_policy, retry_notify, BackoffError, +}; +use slog::{info, warn, Logger}; +use std::net::{SocketAddr, SocketAddrV6}; +use trust_dns_resolver::config::{ + NameServerConfig, Protocol, ResolverConfig, ResolverOpts, +}; +use trust_dns_resolver::TokioAsyncResolver; + +type DnsError = crate::Error; + +/// A connection used to update multiple DNS servers. +pub struct Updater { + clients: Vec, +} + +impl Updater { + pub fn new(subnet: Ipv6Subnet, log: Logger) -> Self { + let clients = ReservedRackSubnet::new(subnet) + .get_dns_subnets() + .into_iter() + .map(|dns_subnet| { + let addr = dns_subnet.dns_address().ip(); + info!(log, "Adding DNS server: {}", addr); + crate::Client::new( + &format!("http://[{}]:{}", addr, DNS_SERVER_PORT), + log.clone(), + ) + }) + .collect::>(); + + Self { clients } + } + + /// Utility function to insert: + /// - A set of uniquely-named AAAA records, each corresponding to an address + /// - An SRV record, pointing to each of the AAAA records. + pub async fn insert_dns_records( + &self, + log: &Logger, + aaaa: Vec<(crate::names::AAAA, SocketAddrV6)>, + srv_key: crate::names::SRV, + ) -> Result<(), DnsError> { + let mut records = Vec::with_capacity(aaaa.len() + 1); + + // Add one DnsKv per AAAA, each with a single record. + records.extend(aaaa.iter().map(|(name, addr)| DnsKv { + key: DnsRecordKey { name: name.to_string() }, + records: vec![DnsRecord::Aaaa(*addr.ip())], + })); + + // Add the DnsKv for the SRV, with a record for each AAAA. + records.push(DnsKv { + key: DnsRecordKey { name: srv_key.to_string() }, + records: aaaa + .iter() + .map(|(name, addr)| { + DnsRecord::Srv(Srv { + prio: 0, + weight: 0, + port: addr.port(), + target: name.to_string(), + }) + }) + .collect::>(), + }); + + let set_record = || async { + self.dns_records_set(&records) + .await + .map_err(BackoffError::transient)?; + Ok::<(), BackoffError>(()) + }; + let log_failure = |error, _| { + warn!(log, "Failed to set DNS records"; "error" => ?error); + }; + + retry_notify(internal_service_policy(), set_record, log_failure) + .await?; + Ok(()) + } + + /// Sets a records on all DNS servers. + /// + /// Returns an error if setting the record fails on any server. + pub async fn dns_records_set<'a>( + &'a self, + body: &'a Vec, + ) -> Result<(), DnsError> { + // TODO: Could be sent concurrently. + for client in &self.clients { + client.dns_records_set(body).await?; + } + + Ok(()) + } + + /// Deletes records in all DNS servers. + /// + /// Returns an error if deleting the record fails on any server. + pub async fn dns_records_delete<'a>( + &'a self, + body: &'a Vec, + ) -> Result<(), DnsError> { + // TODO: Could be sent concurrently + for client in &self.clients { + client.dns_records_delete(body).await?; + } + Ok(()) + } +} + +/// Creates a resolver using all internal DNS name servers. +pub fn create_resolver( + subnet: Ipv6Subnet, +) -> Result { + let mut rc = ResolverConfig::new(); + let dns_ips = ReservedRackSubnet::new(subnet) + .get_dns_subnets() + .into_iter() + .map(|subnet| subnet.dns_address().ip()) + .collect::>(); + + for dns_ip in dns_ips { + rc.add_name_server(NameServerConfig { + socket_addr: SocketAddr::V6(SocketAddrV6::new( + dns_ip, DNS_PORT, 0, 0, + )), + protocol: Protocol::Udp, + tls_dns_name: None, + trust_nx_responses: false, + bind_addr: None, + }); + } + TokioAsyncResolver::tokio(rc, ResolverOpts::default()) +} diff --git a/internal-dns-client/src/names.rs b/internal-dns-client/src/names.rs new file mode 100644 index 00000000000..6384ec9e503 --- /dev/null +++ b/internal-dns-client/src/names.rs @@ -0,0 +1,55 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::fmt; +use uuid::Uuid; + +const DNS_ZONE: &str = "control-plane.oxide.internal"; + +pub enum SRV { + /// A service identified and accessed by name, such as "nexus", "CRDB", etc. + /// + /// This is used in cases where services are interchangeable. + Service(String), + + /// A service identified by name and a unique identifier. + /// + /// This is used in cases where services are not interchangeable, such as + /// for the Sled agent. + Backend(String, Uuid), +} + +impl fmt::Display for SRV { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match &self { + SRV::Service(name) => { + write!(f, "_{}._tcp.{}", name, DNS_ZONE) + } + SRV::Backend(name, id) => { + write!(f, "_{}._tcp.{}.{}", name, id, DNS_ZONE) + } + } + } +} + +pub enum AAAA { + /// Identifies an AAAA record for a sled. + Sled(Uuid), + + /// Identifies an AAAA record for a zone within a sled. + Zone(Uuid), +} + +impl fmt::Display for AAAA { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match &self { + AAAA::Sled(id) => { + write!(f, "{}.sled.{}", id, DNS_ZONE) + } + AAAA::Zone(id) => { + write!(f, "{}.host.{}", id, DNS_ZONE) + } + } + } +} From 8db30b70b965e6eb3de54d2ae8172109225aff37 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 8 Jun 2022 17:00:25 -0400 Subject: [PATCH 05/17] Add concurrency --- Cargo.lock | 1 + internal-dns-client/Cargo.toml | 1 + internal-dns-client/src/multiclient.rs | 28 ++++++++++++++++++-------- 3 files changed, 22 insertions(+), 8 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 850c0ec0adc..ea7797ccd16 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2342,6 +2342,7 @@ dependencies = [ name = "internal-dns-client" version = "0.1.0" dependencies = [ + "futures", "omicron-common", "progenitor", "reqwest", diff --git a/internal-dns-client/Cargo.toml b/internal-dns-client/Cargo.toml index 0ac6ecba610..f2611721ae7 100644 --- a/internal-dns-client/Cargo.toml +++ b/internal-dns-client/Cargo.toml @@ -5,6 +5,7 @@ edition = "2021" license = "MPL-2.0" [dependencies] +futures = "0.3.21" omicron-common = { path = "../common" } progenitor = { git = "https://github.com/oxidecomputer/progenitor" } reqwest = { version = "0.11", features = ["json", "rustls-tls", "stream"] } diff --git a/internal-dns-client/src/multiclient.rs b/internal-dns-client/src/multiclient.rs index e01fb5a2139..3d8d912bf7f 100644 --- a/internal-dns-client/src/multiclient.rs +++ b/internal-dns-client/src/multiclient.rs @@ -3,6 +3,7 @@ // file, You can obtain one at https://mozilla.org/MPL/2.0/. use crate::types::{DnsKv, DnsRecord, DnsRecordKey, Srv}; +use futures::stream::{self, StreamExt, TryStreamExt}; use omicron_common::address::{ Ipv6Subnet, ReservedRackSubnet, AZ_PREFIX, DNS_PORT, DNS_SERVER_PORT, }; @@ -96,10 +97,15 @@ impl Updater { &'a self, body: &'a Vec, ) -> Result<(), DnsError> { - // TODO: Could be sent concurrently. - for client in &self.clients { - client.dns_records_set(body).await?; - } + stream::iter(&self.clients) + .map(Ok::<_, DnsError>) + .try_for_each_concurrent( + None, + |client| async move { + client.dns_records_set(body).await?; + Ok(()) + } + ).await?; Ok(()) } @@ -111,10 +117,16 @@ impl Updater { &'a self, body: &'a Vec, ) -> Result<(), DnsError> { - // TODO: Could be sent concurrently - for client in &self.clients { - client.dns_records_delete(body).await?; - } + stream::iter(&self.clients) + .map(Ok::<_, DnsError>) + .try_for_each_concurrent( + None, + |client| async move { + client.dns_records_delete(body).await?; + Ok(()) + } + ).await?; + Ok(()) } } From 3a0c6ba8102541463416aedf345207a2baa34854 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 8 Jun 2022 17:19:22 -0400 Subject: [PATCH 06/17] comment --- internal-dns-client/src/multiclient.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/internal-dns-client/src/multiclient.rs b/internal-dns-client/src/multiclient.rs index 3d8d912bf7f..47ac76e7710 100644 --- a/internal-dns-client/src/multiclient.rs +++ b/internal-dns-client/src/multiclient.rs @@ -25,6 +25,8 @@ pub struct Updater { } impl Updater { + /// Creates a new "Updater", capable of communicating with all + /// DNS servers within the AZ. pub fn new(subnet: Ipv6Subnet, log: Logger) -> Self { let clients = ReservedRackSubnet::new(subnet) .get_dns_subnets() From 33b3e02b7a926eec67674b6d896d144675da8f2d Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 8 Jun 2022 17:50:13 -0400 Subject: [PATCH 07/17] fmt --- Cargo.lock | 2 +- internal-dns-client/src/multiclient.rs | 24 ++++++++++-------------- 2 files changed, 11 insertions(+), 15 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ea7797ccd16..68e58d9b219 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2352,7 +2352,7 @@ dependencies = [ "structopt", "trust-dns-proto", "trust-dns-resolver", - "uuid", + "uuid 1.1.0", ] [[package]] diff --git a/internal-dns-client/src/multiclient.rs b/internal-dns-client/src/multiclient.rs index 47ac76e7710..24c8817c274 100644 --- a/internal-dns-client/src/multiclient.rs +++ b/internal-dns-client/src/multiclient.rs @@ -101,13 +101,11 @@ impl Updater { ) -> Result<(), DnsError> { stream::iter(&self.clients) .map(Ok::<_, DnsError>) - .try_for_each_concurrent( - None, - |client| async move { - client.dns_records_set(body).await?; - Ok(()) - } - ).await?; + .try_for_each_concurrent(None, |client| async move { + client.dns_records_set(body).await?; + Ok(()) + }) + .await?; Ok(()) } @@ -121,13 +119,11 @@ impl Updater { ) -> Result<(), DnsError> { stream::iter(&self.clients) .map(Ok::<_, DnsError>) - .try_for_each_concurrent( - None, - |client| async move { - client.dns_records_delete(body).await?; - Ok(()) - } - ).await?; + .try_for_each_concurrent(None, |client| async move { + client.dns_records_delete(body).await?; + Ok(()) + }) + .await?; Ok(()) } From 3eb57dcdec6d8585ce4c40cf2048f5ec2d45a9fd Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 8 Jun 2022 18:39:53 -0400 Subject: [PATCH 08/17] lockfile --- Cargo.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index 68e58d9b219..ca290e03a05 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2352,7 +2352,7 @@ dependencies = [ "structopt", "trust-dns-proto", "trust-dns-resolver", - "uuid 1.1.0", + "uuid 1.1.1", ] [[package]] From dd04a67f04b29af80b5e182a9c99ac62889c7778 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 15 Jun 2022 16:39:09 -0400 Subject: [PATCH 09/17] s/runtime/deployment --- common/src/nexus_config.rs | 8 +-- nexus/examples/config.toml | 10 ++-- nexus/src/app/mod.rs | 4 +- nexus/src/config.rs | 54 +++++++++++---------- nexus/src/context.rs | 10 ++-- nexus/src/lib.rs | 6 +-- nexus/test-utils/src/lib.rs | 4 +- nexus/tests/config.test.toml | 10 ++-- nexus/tests/integration_tests/authn_http.rs | 2 +- nexus/tests/integration_tests/commands.rs | 2 +- sled-agent/src/services.rs | 13 +++-- 11 files changed, 65 insertions(+), 58 deletions(-) diff --git a/common/src/nexus_config.rs b/common/src/nexus_config.rs index f1325ae336d..2b34108643d 100644 --- a/common/src/nexus_config.rs +++ b/common/src/nexus_config.rs @@ -3,7 +3,7 @@ // file, You can obtain one at https://mozilla.org/MPL/2.0/. //! Configuration parameters to Nexus that are usually only known -//! at runtime. +//! at deployment time. use super::address::{Ipv6Subnet, RACK_PREFIX}; use super::postgres_config::PostgresConfigWithUrl; @@ -99,7 +99,7 @@ pub enum Database { } #[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] -pub struct RuntimeConfig { +pub struct DeploymentConfig { /// Uuid of the Nexus instance pub id: Uuid, /// Dropshot configuration for external API server @@ -112,8 +112,8 @@ pub struct RuntimeConfig { pub database: Database, } -impl RuntimeConfig { - /// Load a `RuntimeConfig` from the given TOML file +impl DeploymentConfig { + /// Load a `DeploymentConfig` from the given TOML file /// /// This config object can then be used to create a new `Nexus`. /// The format is described in the README. diff --git a/nexus/examples/config.toml b/nexus/examples/config.toml index 22889ab1be9..c841a12ac1c 100644 --- a/nexus/examples/config.toml +++ b/nexus/examples/config.toml @@ -33,25 +33,25 @@ mode = "stderr-terminal" [timeseries_db] address = "[::1]:8123" -[runtime] +[deployment] # Identifier for this instance of Nexus id = "e6bff1ff-24fb-49dc-a54e-c6a350cd4d6c" -[runtime.dropshot_external] +[deployment.dropshot_external] # IP address and TCP port on which to listen for the external API bind_address = "127.0.0.1:12220" # Allow larger request bodies (1MiB) to accomodate firewall endpoints (one # rule is ~500 bytes) request_body_max_bytes = 1048576 -[runtime.dropshot_internal] +[deployment.dropshot_internal] # IP address and TCP port on which to listen for the internal API bind_address = "127.0.0.1:12221" -[runtime.subnet] +[deployment.subnet] net = "fd00:1122:3344:0100::/56" -[runtime.database] +[deployment.database] # URL for connecting to the database type = "from_url" url = "postgresql://root@127.0.0.1:32221/omicron?sslmode=disable" diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index 1c3620de7e7..1cb1f6b6ff7 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -112,7 +112,7 @@ impl Nexus { authz: Arc, ) -> Arc { let pool = Arc::new(pool); - let my_sec_id = db::SecId::from(config.runtime.id); + let my_sec_id = db::SecId::from(config.deployment.id); let db_datastore = Arc::new(db::DataStore::new(Arc::clone(&pool))); let sec_store = Arc::new(db::CockroachDbSecStore::new( my_sec_id, @@ -143,7 +143,7 @@ impl Nexus { populate_start(populate_ctx, Arc::clone(&db_datastore)); let nexus = Nexus { - id: config.runtime.id, + id: config.deployment.id, rack_id, log: log.new(o!()), api_rack_identity: db::model::RackIdentity::new(rack_id), diff --git a/nexus/src/config.rs b/nexus/src/config.rs index a6034a7eea3..83be56fd335 100644 --- a/nexus/src/config.rs +++ b/nexus/src/config.rs @@ -7,7 +7,9 @@ use anyhow::anyhow; use dropshot::ConfigLogging; -use omicron_common::nexus_config::{InvalidTunable, LoadError, RuntimeConfig}; +use omicron_common::nexus_config::{ + DeploymentConfig, InvalidTunable, LoadError, +}; use serde::Deserialize; use serde::Serialize; use serde_with::DeserializeFromStr; @@ -147,8 +149,8 @@ pub struct Config { #[serde(flatten)] pub pkg: PackageConfig, - /// A variety of configuration parameters only known at runtime. - pub runtime: RuntimeConfig, + /// A variety of configuration parameters only known at deployment time. + pub deployment: DeploymentConfig, } impl Config { @@ -214,7 +216,7 @@ mod test { use libc; use omicron_common::address::{Ipv6Subnet, RACK_PREFIX}; use omicron_common::nexus_config::{ - Database, LoadErrorKind, RuntimeConfig, + Database, DeploymentConfig, LoadErrorKind, }; use std::fs; use std::net::{Ipv6Addr, SocketAddr}; @@ -288,7 +290,7 @@ mod test { let error = read_config("empty", "").expect_err("expected failure"); if let LoadErrorKind::Parse(error) = &error.kind { assert_eq!(error.line_col(), None); - assert_eq!(error.to_string(), "missing field `runtime`"); + assert_eq!(error.to_string(), "missing field `deployment`"); } else { panic!( "Got an unexpected error, expected Parse but got {:?}", @@ -325,17 +327,17 @@ mod test { default_base_url = "http://example.invalid/" [tunables] max_vpc_ipv4_subnet_prefix = 27 - [runtime] + [deployment] id = "28b90dc4-c22a-65ba-f49a-f051fe01208f" - [runtime.dropshot_external] + [deployment.dropshot_external] bind_address = "10.1.2.3:4567" request_body_max_bytes = 1024 - [runtime.dropshot_internal] + [deployment.dropshot_internal] bind_address = "10.1.2.3:4568" request_body_max_bytes = 1024 - [runtime.subnet] + [deployment.subnet] net = "::/56" - [runtime.database] + [deployment.database] type = "from_dns" "##, ) @@ -344,7 +346,7 @@ mod test { assert_eq!( config, Config { - runtime: RuntimeConfig { + deployment: DeploymentConfig { id: "28b90dc4-c22a-65ba-f49a-f051fe01208f".parse().unwrap(), dropshot_external: ConfigDropshot { bind_address: "10.1.2.3:4567" @@ -403,17 +405,17 @@ mod test { if_exists = "fail" [timeseries_db] address = "[::1]:8123" - [runtime] + [deployment] id = "28b90dc4-c22a-65ba-f49a-f051fe01208f" - [runtime.dropshot_external] + [deployment.dropshot_external] bind_address = "10.1.2.3:4567" request_body_max_bytes = 1024 - [runtime.dropshot_internal] + [deployment.dropshot_internal] bind_address = "10.1.2.3:4568" request_body_max_bytes = 1024 - [runtime.subnet] + [deployment.subnet] net = "::/56" - [runtime.database] + [deployment.database] type = "from_dns" "##, ) @@ -444,17 +446,17 @@ mod test { if_exists = "fail" [timeseries_db] address = "[::1]:8123" - [runtime] + [deployment] id = "28b90dc4-c22a-65ba-f49a-f051fe01208f" - [runtime.dropshot_external] + [deployment.dropshot_external] bind_address = "10.1.2.3:4567" request_body_max_bytes = 1024 - [runtime.dropshot_internal] + [deployment.dropshot_internal] bind_address = "10.1.2.3:4568" request_body_max_bytes = 1024 - [runtime.subnet] + [deployment.subnet] net = "::/56" - [runtime.database] + [deployment.database] type = "from_dns" "##, ) @@ -499,17 +501,17 @@ mod test { default_base_url = "http://example.invalid/" [tunables] max_vpc_ipv4_subnet_prefix = 100 - [runtime] + [deployment] id = "28b90dc4-c22a-65ba-f49a-f051fe01208f" - [runtime.dropshot_external] + [deployment.dropshot_external] bind_address = "10.1.2.3:4567" request_body_max_bytes = 1024 - [runtime.dropshot_internal] + [deployment.dropshot_internal] bind_address = "10.1.2.3:4568" request_body_max_bytes = 1024 - [runtime.subnet] + [deployment.subnet] net = "::/56" - [runtime.database] + [deployment.database] type = "from_dns" "##, ) diff --git a/nexus/src/context.rs b/nexus/src/context.rs index 2ad6a93553a..e940bef6d10 100644 --- a/nexus/src/context.rs +++ b/nexus/src/context.rs @@ -91,8 +91,10 @@ impl ServerContext { let internal_authn = Arc::new(authn::Context::internal_api()); let authz = Arc::new(authz::Authz::new(&log)); let create_tracker = |name: &str| { - let target = - HttpService { name: name.to_string(), id: config.runtime.id }; + let target = HttpService { + name: name.to_string(), + id: config.deployment.id, + }; const START_LATENCY_DECADE: i8 = -6; const END_LATENCY_DECADE: i8 = 3; LatencyTracker::with_latency_decades( @@ -104,7 +106,7 @@ impl ServerContext { }; let internal_latencies = create_tracker("nexus-internal"); let external_latencies = create_tracker("nexus-external"); - let producer_registry = ProducerRegistry::with_id(config.runtime.id); + let producer_registry = ProducerRegistry::with_id(config.deployment.id); producer_registry .register_producer(internal_latencies.clone()) .unwrap(); @@ -135,7 +137,7 @@ impl ServerContext { // nexus in dev for everyone // Set up DB pool - let url = match &config.runtime.database { + let url = match &config.deployment.database { nexus_config::Database::FromUrl { url } => url.clone(), nexus_config::Database::FromDns => { todo!("Not yet implemented"); diff --git a/nexus/src/lib.rs b/nexus/src/lib.rs index c13fc3de3c8..79f8a2cd838 100644 --- a/nexus/src/lib.rs +++ b/nexus/src/lib.rs @@ -85,7 +85,7 @@ impl Server { rack_id: Uuid, log: &Logger, ) -> Result { - let log = log.new(o!("name" => config.runtime.id.to_string())); + let log = log.new(o!("name" => config.deployment.id.to_string())); info!(log, "setting up nexus server"); let ctxlog = log.new(o!("component" => "ServerContext")); @@ -93,7 +93,7 @@ impl Server { let apictx = ServerContext::new(rack_id, ctxlog, &config)?; let http_server_starter_external = dropshot::HttpServerStarter::new( - &config.runtime.dropshot_external, + &config.deployment.dropshot_external, external_api(), Arc::clone(&apictx), &log.new(o!("component" => "dropshot_external")), @@ -101,7 +101,7 @@ impl Server { .map_err(|error| format!("initializing external server: {}", error))?; let http_server_starter_internal = dropshot::HttpServerStarter::new( - &config.runtime.dropshot_internal, + &config.deployment.dropshot_internal, internal_api(), Arc::clone(&apictx), &log.new(o!("component" => "dropshot_internal")), diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index e4eb744e2fa..02b9a0d7b7d 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -76,7 +76,7 @@ pub fn load_test_config() -> omicron_nexus::Config { let config_file_path = Path::new("tests/config.test.toml"); let mut config = omicron_nexus::Config::from_file(config_file_path) .expect("failed to load config.test.toml"); - config.runtime.id = Uuid::new_v4(); + config.deployment.id = Uuid::new_v4(); config } @@ -100,7 +100,7 @@ pub async fn test_setup_with_config( let clickhouse = dev::clickhouse::ClickHouseInstance::new(0).await.unwrap(); // Store actual address/port information for the databases after they start. - config.runtime.database = + config.deployment.database = nexus_config::Database::FromUrl { url: database.pg_config().clone() }; config.pkg.timeseries_db.address.set_port(clickhouse.port()); diff --git a/nexus/tests/config.test.toml b/nexus/tests/config.test.toml index 2fc4ddba192..0a8789893a1 100644 --- a/nexus/tests/config.test.toml +++ b/nexus/tests/config.test.toml @@ -35,7 +35,7 @@ address = "[::1]:0" # Allow small subnets, so we can test IP address exhaustion easily / quickly max_vpc_ipv4_subnet_prefix = 29 -[runtime] +[deployment] # Identifier for this instance of Nexus. # NOTE: The test suite always overrides this. id = "e6bff1ff-24fb-49dc-a54e-c6a350cd4d6c" @@ -45,22 +45,22 @@ id = "e6bff1ff-24fb-49dc-a54e-c6a350cd4d6c" # available port) because the test suite will be running many servers # concurrently. # -[runtime.dropshot_external] +[deployment.dropshot_external] bind_address = "127.0.0.1:0" request_body_max_bytes = 1048576 # port must be 0. see above -[runtime.dropshot_internal] +[deployment.dropshot_internal] bind_address = "127.0.0.1:0" request_body_max_bytes = 1048576 -[runtime.subnet] +[deployment.subnet] net = "fd00:1122:3344:0100::/56" # # NOTE: for the test suite, the database URL will be replaced with one # appropriate for the database that's started by the test runner. # -[runtime.database] +[deployment.database] type = "from_url" url = "postgresql://root@127.0.0.1:0/omicron?sslmode=disable" diff --git a/nexus/tests/integration_tests/authn_http.rs b/nexus/tests/integration_tests/authn_http.rs index e0234da1b97..99f25f91539 100644 --- a/nexus/tests/integration_tests/authn_http.rs +++ b/nexus/tests/integration_tests/authn_http.rs @@ -299,7 +299,7 @@ async fn start_whoami_server( TestContext::new( whoami_api, server_state, - &config.runtime.dropshot_external, + &config.deployment.dropshot_external, Some(logctx), log, ) diff --git a/nexus/tests/integration_tests/commands.rs b/nexus/tests/integration_tests/commands.rs index 561e5fc478c..e28e313ff31 100644 --- a/nexus/tests/integration_tests/commands.rs +++ b/nexus/tests/integration_tests/commands.rs @@ -76,7 +76,7 @@ fn test_nexus_invalid_config() { assert_eq!( stderr_text, format!( - "nexus: parse \"{}\": missing field `runtime`\n", + "nexus: parse \"{}\": missing field `deployment`\n", config_path.display() ), ); diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 4edd18a3fa7..3f617aaf399 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -13,7 +13,9 @@ use crate::params::{ServiceEnsureBody, ServiceRequest, ServiceType}; use crate::zone::Zones; use dropshot::ConfigDropshot; use omicron_common::address::{Ipv6Subnet, RACK_PREFIX}; -use omicron_common::nexus_config::{self, RuntimeConfig as NexusRuntimeConfig}; +use omicron_common::nexus_config::{ + self, DeploymentConfig as NexusDeploymentConfig, +}; use omicron_common::postgres_config::PostgresConfigWithUrl; use slog::Logger; use std::collections::HashSet; @@ -312,7 +314,7 @@ impl ServiceManager { // Nexus takes a separate config file for parameters which // cannot be known at packaging time. - let runtime_config = NexusRuntimeConfig { + let deployment_config = NexusDeploymentConfig { id: service.id, dropshot_external: ConfigDropshot { bind_address: SocketAddr::V6(external_address), @@ -351,10 +353,11 @@ impl ServiceManager { })?; // Serialize the configuration and append it into the file. - let serialized_cfg = toml::Value::try_from(&runtime_config) - .expect("Cannot serialize config"); + let serialized_cfg = + toml::Value::try_from(&deployment_config) + .expect("Cannot serialize config"); let mut map = toml::map::Map::new(); - map.insert("runtime".to_string(), serialized_cfg); + map.insert("deployment".to_string(), serialized_cfg); let config_str = toml::to_string(&map).map_err(|err| { Error::TomlSerialize { path: config_path.clone(), err } })?; From e1dc94188da4827e6ef1a11c671d85ff234af148 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 15 Jun 2022 18:36:05 -0400 Subject: [PATCH 10/17] [nexus][sled-agent] Generate rack ID in RSS, plumb it through Nexus --- common/src/nexus_config.rs | 2 ++ common/src/sql/dbinit.sql | 8 ++++++++ nexus/examples/config.toml | 1 + nexus/src/app/sled.rs | 2 +- nexus/src/config.rs | 7 +++++++ nexus/src/db/datastore.rs | 8 +++++--- nexus/src/db/model/sled.rs | 5 ++++- nexus/src/db/schema.rs | 1 + nexus/src/lib.rs | 8 +++----- nexus/test-utils/src/lib.rs | 6 ++---- sled-agent/src/bootstrap/agent.rs | 1 + sled-agent/src/bootstrap/params.rs | 7 +++++++ sled-agent/src/rack_setup/service.rs | 3 +++ sled-agent/src/server.rs | 14 ++++++++++---- sled-agent/src/services.rs | 11 +++++++++++ sled-agent/src/sled_agent.rs | 2 ++ 16 files changed, 68 insertions(+), 18 deletions(-) diff --git a/common/src/nexus_config.rs b/common/src/nexus_config.rs index 2b34108643d..a18454e02d0 100644 --- a/common/src/nexus_config.rs +++ b/common/src/nexus_config.rs @@ -102,6 +102,8 @@ pub enum Database { pub struct DeploymentConfig { /// Uuid of the Nexus instance pub id: Uuid, + /// Uuid of the Rack where Nexus is executing. + pub rack_id: Uuid, /// Dropshot configuration for external API server pub dropshot_external: ConfigDropshot, /// Dropshot configuration for internal API server diff --git a/common/src/sql/dbinit.sql b/common/src/sql/dbinit.sql index 3944b3fd46f..e358c9a227e 100644 --- a/common/src/sql/dbinit.sql +++ b/common/src/sql/dbinit.sql @@ -75,6 +75,9 @@ CREATE TABLE omicron.public.sled ( time_deleted TIMESTAMPTZ, rcgen INT NOT NULL, + /* FK into the Rack table */ + rack_id UUID NOT NULL, + /* The IP address and bound port of the sled agent server. */ ip INET NOT NULL, port INT4 CHECK (port BETWEEN 0 AND 65535) NOT NULL, @@ -83,6 +86,11 @@ CREATE TABLE omicron.public.sled ( last_used_address INET NOT NULL ); +/* Add an index which lets us look up sleds on a rack */ +CREATE INDEX ON omicron.public.sled ( + rack_id +) WHERE time_deleted IS NULL; + /* * Services */ diff --git a/nexus/examples/config.toml b/nexus/examples/config.toml index c841a12ac1c..727055490e8 100644 --- a/nexus/examples/config.toml +++ b/nexus/examples/config.toml @@ -36,6 +36,7 @@ address = "[::1]:8123" [deployment] # Identifier for this instance of Nexus id = "e6bff1ff-24fb-49dc-a54e-c6a350cd4d6c" +rack_id = "c19a698f-c6f9-4a17-ae30-20d711b8f7dc" [deployment.dropshot_external] # IP address and TCP port on which to listen for the external API diff --git a/nexus/src/app/sled.rs b/nexus/src/app/sled.rs index 0150cbec148..e4fc616f095 100644 --- a/nexus/src/app/sled.rs +++ b/nexus/src/app/sled.rs @@ -31,7 +31,7 @@ impl super::Nexus { address: SocketAddrV6, ) -> Result<(), Error> { info!(self.log, "registered sled agent"; "sled_uuid" => id.to_string()); - let sled = db::model::Sled::new(id, address); + let sled = db::model::Sled::new(id, address, self.rack_id); self.db_datastore.sled_upsert(sled).await?; Ok(()) } diff --git a/nexus/src/config.rs b/nexus/src/config.rs index 83be56fd335..98cbf0169cf 100644 --- a/nexus/src/config.rs +++ b/nexus/src/config.rs @@ -329,6 +329,7 @@ mod test { max_vpc_ipv4_subnet_prefix = 27 [deployment] id = "28b90dc4-c22a-65ba-f49a-f051fe01208f" + rack_id = "38b90dc4-c22a-65ba-f49a-f051fe01208f" [deployment.dropshot_external] bind_address = "10.1.2.3:4567" request_body_max_bytes = 1024 @@ -348,6 +349,9 @@ mod test { Config { deployment: DeploymentConfig { id: "28b90dc4-c22a-65ba-f49a-f051fe01208f".parse().unwrap(), + rack_id: "38b90dc4-c22a-65ba-f49a-f051fe01208f" + .parse() + .unwrap(), dropshot_external: ConfigDropshot { bind_address: "10.1.2.3:4567" .parse::() @@ -407,6 +411,7 @@ mod test { address = "[::1]:8123" [deployment] id = "28b90dc4-c22a-65ba-f49a-f051fe01208f" + rack_id = "38b90dc4-c22a-65ba-f49a-f051fe01208f" [deployment.dropshot_external] bind_address = "10.1.2.3:4567" request_body_max_bytes = 1024 @@ -448,6 +453,7 @@ mod test { address = "[::1]:8123" [deployment] id = "28b90dc4-c22a-65ba-f49a-f051fe01208f" + rack_id = "38b90dc4-c22a-65ba-f49a-f051fe01208f" [deployment.dropshot_external] bind_address = "10.1.2.3:4567" request_body_max_bytes = 1024 @@ -503,6 +509,7 @@ mod test { max_vpc_ipv4_subnet_prefix = 100 [deployment] id = "28b90dc4-c22a-65ba-f49a-f051fe01208f" + rack_id = "38b90dc4-c22a-65ba-f49a-f051fe01208f" [deployment.dropshot_external] bind_address = "10.1.2.3:4567" request_body_max_bytes = 1024 diff --git a/nexus/src/db/datastore.rs b/nexus/src/db/datastore.rs index 499eee458bc..6c28185ce7d 100644 --- a/nexus/src/db/datastore.rs +++ b/nexus/src/db/datastore.rs @@ -4034,8 +4034,9 @@ mod test { 0, 0, ); + let rack_id = Uuid::new_v4(); let sled_id = Uuid::new_v4(); - let sled = Sled::new(sled_id, bogus_addr.clone()); + let sled = Sled::new(sled_id, bogus_addr.clone(), rack_id); datastore.sled_upsert(sled).await.unwrap(); sled_id } @@ -4391,14 +4392,15 @@ mod test { let opctx = OpContext::for_tests(logctx.log.new(o!()), datastore.clone()); + let rack_id = Uuid::new_v4(); let addr1 = "[fd00:1de::1]:12345".parse().unwrap(); let sled1_id = "0de4b299-e0b4-46f0-d528-85de81a7095f".parse().unwrap(); - let sled1 = db::model::Sled::new(sled1_id, addr1); + let sled1 = db::model::Sled::new(sled1_id, addr1, rack_id); datastore.sled_upsert(sled1).await.unwrap(); let addr2 = "[fd00:1df::1]:12345".parse().unwrap(); let sled2_id = "66285c18-0c79-43e0-e54f-95271f271314".parse().unwrap(); - let sled2 = db::model::Sled::new(sled2_id, addr2); + let sled2 = db::model::Sled::new(sled2_id, addr2, rack_id); datastore.sled_upsert(sled2).await.unwrap(); let ip = datastore.next_ipv6_address(&opctx, sled1_id).await.unwrap(); diff --git a/nexus/src/db/model/sled.rs b/nexus/src/db/model/sled.rs index ad756c3473f..ebe492c7459 100644 --- a/nexus/src/db/model/sled.rs +++ b/nexus/src/db/model/sled.rs @@ -21,6 +21,8 @@ pub struct Sled { time_deleted: Option>, rcgen: Generation, + pub rack_id: Uuid, + // ServiceAddress (Sled Agent). pub ip: ipv6::Ipv6Addr, pub port: SqlU16, @@ -30,7 +32,7 @@ pub struct Sled { } impl Sled { - pub fn new(id: Uuid, addr: SocketAddrV6) -> Self { + pub fn new(id: Uuid, addr: SocketAddrV6, rack_id: Uuid) -> Self { let last_used_address = { let mut segments = addr.ip().segments(); segments[7] += omicron_common::address::RSS_RESERVED_ADDRESSES; @@ -40,6 +42,7 @@ impl Sled { identity: SledIdentity::new(id), time_deleted: None, rcgen: Generation::new(), + rack_id, ip: ipv6::Ipv6Addr::from(addr.ip()), port: addr.port().into(), last_used_address, diff --git a/nexus/src/db/schema.rs b/nexus/src/db/schema.rs index a6d281d987e..41c8c3527b9 100644 --- a/nexus/src/db/schema.rs +++ b/nexus/src/db/schema.rs @@ -297,6 +297,7 @@ table! { time_deleted -> Nullable, rcgen -> Int8, + rack_id -> Uuid, ip -> Inet, port -> Int4, last_used_address -> Inet, diff --git a/nexus/src/lib.rs b/nexus/src/lib.rs index 79f8a2cd838..f0d5210930b 100644 --- a/nexus/src/lib.rs +++ b/nexus/src/lib.rs @@ -36,7 +36,6 @@ use external_api::http_entrypoints::external_api; use internal_api::http_entrypoints::internal_api; use slog::Logger; use std::sync::Arc; -use uuid::Uuid; #[macro_use] extern crate slog; @@ -82,7 +81,6 @@ impl Server { /// Start a nexus server. pub async fn start( config: &Config, - rack_id: Uuid, log: &Logger, ) -> Result { let log = log.new(o!("name" => config.deployment.id.to_string())); @@ -90,7 +88,8 @@ impl Server { let ctxlog = log.new(o!("component" => "ServerContext")); - let apictx = ServerContext::new(rack_id, ctxlog, &config)?; + let apictx = + ServerContext::new(config.deployment.rack_id, ctxlog, &config)?; let http_server_starter_external = dropshot::HttpServerStarter::new( &config.deployment.dropshot_external, @@ -167,8 +166,7 @@ pub async fn run_server(config: &Config) -> Result<(), String> { } else { debug!(log, "registered DTrace probes"); } - let rack_id = Uuid::new_v4(); - let server = Server::start(config, rack_id, &log).await?; + let server = Server::start(config, &log).await?; server.register_as_producer().await; server.wait_for_finish().await } diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index 02b9a0d7b7d..ed056f48d8d 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -90,7 +90,6 @@ pub async fn test_setup_with_config( config: &mut omicron_nexus::Config, ) -> ControlPlaneTestContext { let logctx = LogContext::new(test_name, &config.pkg.log); - let rack_id = Uuid::parse_str(RACK_UUID).unwrap(); let log = &logctx.log; // Start up CockroachDB. @@ -104,9 +103,8 @@ pub async fn test_setup_with_config( nexus_config::Database::FromUrl { url: database.pg_config().clone() }; config.pkg.timeseries_db.address.set_port(clickhouse.port()); - let server = omicron_nexus::Server::start(&config, rack_id, &logctx.log) - .await - .unwrap(); + let server = + omicron_nexus::Server::start(&config, &logctx.log).await.unwrap(); server .apictx .nexus diff --git a/sled-agent/src/bootstrap/agent.rs b/sled-agent/src/bootstrap/agent.rs index fc432554bfa..507d92baf91 100644 --- a/sled-agent/src/bootstrap/agent.rs +++ b/sled-agent/src/bootstrap/agent.rs @@ -245,6 +245,7 @@ impl Agent { &self.sled_config, self.parent_log.clone(), sled_address, + request.rack_id, ) .await .map_err(|e| { diff --git a/sled-agent/src/bootstrap/params.rs b/sled-agent/src/bootstrap/params.rs index def1f55c068..fdbbf2c4295 100644 --- a/sled-agent/src/bootstrap/params.rs +++ b/sled-agent/src/bootstrap/params.rs @@ -8,13 +8,20 @@ use super::trust_quorum::ShareDistribution; use omicron_common::address::{Ipv6Subnet, SLED_PREFIX}; use serde::{Deserialize, Serialize}; use std::borrow::Cow; +use uuid::Uuid; /// Configuration information for launching a Sled Agent. #[derive(Clone, Debug, Serialize, Deserialize, PartialEq)] pub struct SledAgentRequest { + /// Uuid of the Sled Agent to be created. + pub id: Uuid, + /// Portion of the IP space to be managed by the Sled Agent. pub subnet: Ipv6Subnet, + /// Uuid of the rack to which this sled agent belongs. + pub rack_id: Uuid, + /// Share of the rack secret for this Sled Agent. // TODO-cleanup This is currently optional because we don't do trust quorum // shares for single-node deployments (i.e., most dev/test environments), diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 0f8775ed932..c48a20cc4bc 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -357,6 +357,7 @@ impl ServiceInner { (request, (idx, bootstrap_addr)) }); + let rack_id = Uuid::new_v4(); let allocations = requests_and_sleds.map(|(request, sled)| { let (idx, bootstrap_addr) = sled; info!( @@ -373,7 +374,9 @@ impl ServiceInner { bootstrap_addr, SledAllocation { initialization_request: SledAgentRequest { + id: Uuid::new_v4(), subnet, + rack_id, trust_quorum_share: maybe_rack_secret_shares .as_mut() .map(|shares_iter| { diff --git a/sled-agent/src/server.rs b/sled-agent/src/server.rs index 3b31854628e..df596db8d01 100644 --- a/sled-agent/src/server.rs +++ b/sled-agent/src/server.rs @@ -38,6 +38,7 @@ impl Server { config: &Config, log: Logger, addr: SocketAddrV6, + rack_id: Uuid, ) -> Result { info!(log, "setting up sled agent server"); @@ -47,10 +48,15 @@ impl Server { client_log, )); - let sled_agent = - SledAgent::new(&config, log.clone(), nexus_client.clone(), addr) - .await - .map_err(|e| e.to_string())?; + let sled_agent = SledAgent::new( + &config, + log.clone(), + nexus_client.clone(), + addr, + rack_id, + ) + .await + .map_err(|e| e.to_string())?; let mut dropshot_config = dropshot::ConfigDropshot::default(); dropshot_config.request_body_max_bytes = 1024 * 1024; diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 3f617aaf399..dde2ef47937 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -25,6 +25,7 @@ use std::path::{Path, PathBuf}; use std::str::FromStr; use tokio::io::AsyncWriteExt; use tokio::sync::Mutex; +use uuid::Uuid; // The filename of ServiceManager's internal storage. const SERVICE_CONFIG_FILENAME: &str = "service.toml"; @@ -124,6 +125,7 @@ pub struct ServiceManager { vnic_allocator: VnicAllocator, underlay_vnic: EtherstubVnic, underlay_address: Ipv6Addr, + rack_id: Uuid, } impl ServiceManager { @@ -143,6 +145,7 @@ impl ServiceManager { underlay_vnic: EtherstubVnic, underlay_address: Ipv6Addr, config: Config, + rack_id: Uuid, ) -> Result { debug!(log, "Creating new ServiceManager"); let mgr = Self { @@ -152,6 +155,7 @@ impl ServiceManager { vnic_allocator: VnicAllocator::new("Service", etherstub), underlay_vnic, underlay_address, + rack_id, }; let config_path = mgr.services_config_path(); @@ -316,6 +320,7 @@ impl ServiceManager { // cannot be known at packaging time. let deployment_config = NexusDeploymentConfig { id: service.id, + rack_id: self.rack_id, dropshot_external: ConfigDropshot { bind_address: SocketAddr::V6(external_address), request_body_max_bytes: 1048576, @@ -702,6 +707,7 @@ mod test { EtherstubVnic(ETHERSTUB_VNIC_NAME.to_string()), Ipv6Addr::LOCALHOST, test_config.make_config(), + Uuid::new_v4(), ) .await .unwrap(); @@ -728,6 +734,7 @@ mod test { EtherstubVnic(ETHERSTUB_VNIC_NAME.to_string()), Ipv6Addr::LOCALHOST, test_config.make_config(), + Uuid::new_v4(), ) .await .unwrap(); @@ -756,6 +763,7 @@ mod test { EtherstubVnic(ETHERSTUB_VNIC_NAME.to_string()), Ipv6Addr::LOCALHOST, test_config.make_config(), + Uuid::new_v4(), ) .await .unwrap(); @@ -773,6 +781,7 @@ mod test { EtherstubVnic(ETHERSTUB_VNIC_NAME.to_string()), Ipv6Addr::LOCALHOST, test_config.make_config(), + Uuid::new_v4(), ) .await .unwrap(); @@ -797,6 +806,7 @@ mod test { EtherstubVnic(ETHERSTUB_VNIC_NAME.to_string()), Ipv6Addr::LOCALHOST, test_config.make_config(), + Uuid::new_v4(), ) .await .unwrap(); @@ -816,6 +826,7 @@ mod test { EtherstubVnic(ETHERSTUB_VNIC_NAME.to_string()), Ipv6Addr::LOCALHOST, config, + Uuid::new_v4(), ) .await .unwrap(); diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index e674663bac6..14e34f0d8d3 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -115,6 +115,7 @@ impl SledAgent { log: Logger, nexus_client: Arc, sled_address: SocketAddrV6, + rack_id: Uuid, ) -> Result { let id = &config.id; @@ -248,6 +249,7 @@ impl SledAgent { etherstub_vnic.clone(), *sled_address.ip(), services::Config::default(), + rack_id, ) .await?; From a4309ac5bf0b986b347be1d1583ff370a626c2ec Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 15 Jun 2022 18:40:57 -0400 Subject: [PATCH 11/17] need rack_id in the test config too --- nexus/tests/config.test.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/nexus/tests/config.test.toml b/nexus/tests/config.test.toml index 0a8789893a1..fdfeb5effb4 100644 --- a/nexus/tests/config.test.toml +++ b/nexus/tests/config.test.toml @@ -39,6 +39,7 @@ max_vpc_ipv4_subnet_prefix = 29 # Identifier for this instance of Nexus. # NOTE: The test suite always overrides this. id = "e6bff1ff-24fb-49dc-a54e-c6a350cd4d6c" +rack_id = "c19a698f-c6f9-4a17-ae30-20d711b8f7dc" # # NOTE: for the test suite, the port MUST be 0 (in order to bind to any From ff2d7b91b45522a56fa906eb34161fd98858ffc8 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 20 Jun 2022 15:22:29 -0400 Subject: [PATCH 12/17] [internal-dns] Avoid 'picking ports' --- Cargo.lock | 10 ----- internal-dns/Cargo.toml | 1 - internal-dns/src/bin/dns-server.rs | 12 +++--- internal-dns/src/dns_server.rs | 46 +++++++++++++++------- internal-dns/tests/basic_test.rs | 61 ++++++++++++++---------------- 5 files changed, 66 insertions(+), 64 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8d997dd9762..14488da8837 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2315,7 +2315,6 @@ dependencies = [ "omicron-test-utils", "openapi-lint", "openapiv3", - "portpicker", "pretty-hex 0.3.0", "schemars", "serde", @@ -3867,15 +3866,6 @@ dependencies = [ "universal-hash", ] -[[package]] -name = "portpicker" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be97d76faf1bfab666e1375477b23fde79eccf0276e9b63b92a39d676a889ba9" -dependencies = [ - "rand 0.8.5", -] - [[package]] name = "postcard" version = "0.7.3" diff --git a/internal-dns/Cargo.toml b/internal-dns/Cargo.toml index 886fa72cc18..d49859f18c1 100644 --- a/internal-dns/Cargo.toml +++ b/internal-dns/Cargo.toml @@ -30,7 +30,6 @@ expectorate = "1.0.5" omicron-test-utils = { path = "../test-utils" } openapiv3 = "1.0" openapi-lint = { git = "https://github.com/oxidecomputer/openapi-lint", branch = "main" } -portpicker = "0.1" serde_json = "1.0" subprocess = "0.2.9" trust-dns-resolver = "0.21" diff --git a/internal-dns/src/bin/dns-server.rs b/internal-dns/src/bin/dns-server.rs index 96e9da6feca..12eafcc3599 100644 --- a/internal-dns/src/bin/dns-server.rs +++ b/internal-dns/src/bin/dns-server.rs @@ -54,20 +54,18 @@ async fn main() -> Result<(), anyhow::Error> { let db = Arc::new(sled::open(&config.data.storage_path)?); - { + let _dns_server = { let db = db.clone(); let log = log.clone(); let dns_config = internal_dns::dns_server::Config { bind_address: dns_address.to_string(), zone: zone.to_string(), }; - tokio::spawn(async move { - internal_dns::dns_server::run(log, db, dns_config).await - }); - } + internal_dns::dns_server::run(log, db, dns_config).await? + }; - let server = internal_dns::start_server(config, log, db).await?; - server + let dropshot_server = internal_dns::start_server(config, log, db).await?; + dropshot_server .await .map_err(|error_message| anyhow!("server exiting: {}", error_message)) } diff --git a/internal-dns/src/dns_server.rs b/internal-dns/src/dns_server.rs index bffda7cc73f..ccebda582f7 100644 --- a/internal-dns/src/dns_server.rs +++ b/internal-dns/src/dns_server.rs @@ -34,23 +34,43 @@ pub struct Config { pub zone: String, } -pub async fn run(log: Logger, db: Arc, config: Config) -> Result<()> { +pub struct Server { + pub address: SocketAddr, + pub handle: tokio::task::JoinHandle>, +} + +impl Server { + pub fn close(self) { + self.handle.abort() + } +} + +pub async fn run( + log: Logger, + db: Arc, + config: Config, +) -> Result { let socket = Arc::new(UdpSocket::bind(config.bind_address).await?); + let address = socket.local_addr()?; - loop { - let mut buf = vec![0u8; 16384]; - let (n, src) = socket.recv_from(&mut buf).await?; - buf.resize(n, 0); + let handle = tokio::task::spawn(async move { + loop { + let mut buf = vec![0u8; 16384]; + let (n, src) = socket.recv_from(&mut buf).await?; + buf.resize(n, 0); - let socket = socket.clone(); - let log = log.clone(); - let db = db.clone(); - let zone = config.zone.clone(); + let socket = socket.clone(); + let log = log.clone(); + let db = db.clone(); + let zone = config.zone.clone(); - tokio::spawn(async move { - handle_req(log, db, socket, src, buf, zone).await - }); - } + tokio::spawn(async move { + handle_req(log, db, socket, src, buf, zone).await + }); + } + }); + + Ok(Server { address, handle }) } async fn respond_nxdomain( diff --git a/internal-dns/tests/basic_test.rs b/internal-dns/tests/basic_test.rs index 29d358970c7..af72ded52cb 100644 --- a/internal-dns/tests/basic_test.rs +++ b/internal-dns/tests/basic_test.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -use std::net::{Ipv6Addr, SocketAddr, SocketAddrV6}; +use std::net::Ipv6Addr; use std::sync::Arc; use anyhow::{Context, Result}; @@ -280,13 +280,16 @@ pub async fn servfail() -> Result<(), anyhow::Error> { struct TestContext { client: Client, resolver: TokioAsyncResolver, - server: dropshot::HttpServer>, + dns_server: internal_dns::dns_server::Server, + dropshot_server: + dropshot::HttpServer>, tmp: tempdir::TempDir, } impl TestContext { async fn cleanup(self) { - self.server.close().await.expect("Failed to clean up server"); + self.dns_server.close(); + self.dropshot_server.close().await.expect("Failed to clean up server"); self.tmp.close().expect("Failed to clean up tmp directory"); } } @@ -295,7 +298,7 @@ async fn init_client_server( zone: String, ) -> Result { // initialize dns server config - let (tmp, config, dropshot_port, dns_port) = test_config()?; + let (tmp, config) = test_config()?; let log = config .log .to_logger("internal-dns") @@ -305,17 +308,21 @@ async fn init_client_server( let db = Arc::new(sled::open(&config.data.storage_path)?); db.clear()?; - let client = - Client::new(&format!("http://[::1]:{}", dropshot_port), log.clone()); + // launch a dns server + let dns_server = { + let db = db.clone(); + let log = log.clone(); + let dns_config = internal_dns::dns_server::Config { + bind_address: "[::1]:0".into(), + zone, + }; + + internal_dns::dns_server::run(log, db, dns_config).await? + }; let mut rc = ResolverConfig::new(); rc.add_name_server(NameServerConfig { - socket_addr: SocketAddr::V6(SocketAddrV6::new( - Ipv6Addr::LOCALHOST, - dns_port, - 0, - 0, - )), + socket_addr: dns_server.address, protocol: Protocol::Udp, tls_dns_name: None, trust_nx_responses: false, @@ -325,33 +332,21 @@ async fn init_client_server( let resolver = TokioAsyncResolver::tokio(rc, ResolverOpts::default()).unwrap(); - // launch a dns server - { - let db = db.clone(); - let log = log.clone(); - let dns_config = internal_dns::dns_server::Config { - bind_address: format!("[::1]:{}", dns_port), - zone, - }; - - tokio::spawn(async move { - internal_dns::dns_server::run(log, db, dns_config).await - }); - } - // launch a dropshot server - let server = internal_dns::start_server(config, log, db).await?; + let dropshot_server = + internal_dns::start_server(config, log.clone(), db).await?; // wait for server to start tokio::time::sleep(tokio::time::Duration::from_millis(250)).await; - Ok(TestContext { client, resolver, server, tmp }) + let client = + Client::new(&format!("http://{}", dropshot_server.local_addr()), log); + + Ok(TestContext { client, resolver, dns_server, dropshot_server, tmp }) } fn test_config( -) -> Result<(tempdir::TempDir, internal_dns::Config, u16, u16), anyhow::Error> { - let dropshot_port = portpicker::pick_unused_port().expect("pick port"); - let dns_port = portpicker::pick_unused_port().expect("pick port"); +) -> Result<(tempdir::TempDir, internal_dns::Config), anyhow::Error> { let tmp_dir = tempdir::TempDir::new("internal-dns-test")?; let mut storage_path = tmp_dir.path().to_path_buf(); storage_path.push("test"); @@ -362,7 +357,7 @@ fn test_config( level: dropshot::ConfigLoggingLevel::Info, }, dropshot: dropshot::ConfigDropshot { - bind_address: format!("[::1]:{}", dropshot_port).parse().unwrap(), + bind_address: format!("[::1]:0").parse().unwrap(), request_body_max_bytes: 1024, ..Default::default() }, @@ -372,5 +367,5 @@ fn test_config( }, }; - Ok((tmp_dir, config, dropshot_port, dns_port)) + Ok((tmp_dir, config)) } From 2a035a5ab9fc56068266d88d9ed2cc8edae1c63f Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 20 Jun 2022 15:29:28 -0400 Subject: [PATCH 13/17] Changes from rss-handoff --- Cargo.lock | 7 + internal-dns-client/Cargo.toml | 9 + internal-dns-client/src/multiclient.rs | 612 ++++++++++++++++++++++--- internal-dns-client/src/names.rs | 103 ++++- 4 files changed, 670 insertions(+), 61 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3956c2c3a4b..fe1cc390b81 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2338,13 +2338,20 @@ dependencies = [ name = "internal-dns-client" version = "0.1.0" dependencies = [ + "dropshot", "futures", + "internal-dns", "omicron-common 0.1.0", + "omicron-test-utils", "progenitor", "reqwest", "serde", "serde_json", + "sled", "slog", + "tempfile", + "thiserror", + "tokio", "trust-dns-proto", "trust-dns-resolver", "uuid", diff --git a/internal-dns-client/Cargo.toml b/internal-dns-client/Cargo.toml index 3303ddfc44c..4872699610a 100644 --- a/internal-dns-client/Cargo.toml +++ b/internal-dns-client/Cargo.toml @@ -12,6 +12,15 @@ reqwest = { version = "0.11", features = ["json", "rustls-tls", "stream"] } serde = { version = "1.0", features = [ "derive" ] } serde_json = "1.0" slog = { version = "2.5.0", features = [ "max_level_trace", "release_max_level_debug" ] } +thiserror = "1.0" trust-dns-proto = "0.21" trust-dns-resolver = "0.21" uuid = { version = "1.1.0", features = [ "v4", "serde" ] } + +[dev-dependencies] +dropshot = { git = "https://github.com/oxidecomputer/dropshot", branch = "main", features = [ "usdt-probes" ] } +internal-dns = { path = "../internal-dns" } +omicron-test-utils = { path = "../test-utils" } +sled = "0.34" +tempfile = "3.3" +tokio = { version = "1.18", features = [ "full" ] } diff --git a/internal-dns-client/src/multiclient.rs b/internal-dns-client/src/multiclient.rs index 24c8817c274..ca8387fca45 100644 --- a/internal-dns-client/src/multiclient.rs +++ b/internal-dns-client/src/multiclient.rs @@ -7,49 +7,122 @@ use futures::stream::{self, StreamExt, TryStreamExt}; use omicron_common::address::{ Ipv6Subnet, ReservedRackSubnet, AZ_PREFIX, DNS_PORT, DNS_SERVER_PORT, }; -use omicron_common::backoff::{ - internal_service_policy, retry_notify, BackoffError, -}; -use slog::{info, warn, Logger}; -use std::net::{SocketAddr, SocketAddrV6}; +use slog::{info, Logger}; +use std::net::{IpAddr, Ipv6Addr, SocketAddr, SocketAddrV6}; use trust_dns_resolver::config::{ NameServerConfig, Protocol, ResolverConfig, ResolverOpts, }; use trust_dns_resolver::TokioAsyncResolver; -type DnsError = crate::Error; +pub type DnsError = crate::Error; + +/// Describes how to find the DNS servers. +/// +/// In production code, this is nearly always [`Ipv6Subnet`], +/// but it allows a point of dependency-injection for tests to supply their +/// own address lookups. +pub trait DnsAddressLookup { + fn dropshot_server_addrs(&self) -> Vec; + + fn dns_server_addrs(&self) -> Vec; +} + +fn subnet_to_ips( + subnet: Ipv6Subnet, +) -> impl Iterator { + ReservedRackSubnet::new(subnet) + .get_dns_subnets() + .into_iter() + .map(|dns_subnet| IpAddr::V6(dns_subnet.dns_address().ip())) +} + +impl DnsAddressLookup for Ipv6Subnet { + fn dropshot_server_addrs(&self) -> Vec { + subnet_to_ips(*self) + .map(|address| SocketAddr::new(address, DNS_SERVER_PORT)) + .collect() + } + + fn dns_server_addrs(&self) -> Vec { + subnet_to_ips(*self) + .map(|address| SocketAddr::new(address, DNS_PORT)) + .collect() + } +} + +/// Describes a service which may be inserted into DNS records. +pub trait Service { + fn aaaa(&self) -> crate::names::AAAA; + fn srv(&self) -> crate::names::SRV; + fn address(&self) -> SocketAddrV6; +} /// A connection used to update multiple DNS servers. pub struct Updater { + log: Logger, clients: Vec, } impl Updater { - /// Creates a new "Updater", capable of communicating with all - /// DNS servers within the AZ. - pub fn new(subnet: Ipv6Subnet, log: Logger) -> Self { - let clients = ReservedRackSubnet::new(subnet) - .get_dns_subnets() + pub fn new(address_getter: &impl DnsAddressLookup, log: Logger) -> Self { + let addrs = address_getter.dropshot_server_addrs(); + Self::new_from_addrs(addrs, log) + } + + fn new_from_addrs(addrs: Vec, log: Logger) -> Self { + let clients = addrs .into_iter() - .map(|dns_subnet| { - let addr = dns_subnet.dns_address().ip(); + .map(|addr| { info!(log, "Adding DNS server: {}", addr); - crate::Client::new( - &format!("http://[{}]:{}", addr, DNS_SERVER_PORT), - log.clone(), - ) + crate::Client::new(&format!("http://{}", addr), log.clone()) }) .collect::>(); - Self { clients } + Self { log, clients } } - /// Utility function to insert: - /// - A set of uniquely-named AAAA records, each corresponding to an address - /// - An SRV record, pointing to each of the AAAA records. + /// Inserts all service records into the DNS server. + /// + /// This method is most efficient when records are sorted by SRV key. pub async fn insert_dns_records( &self, - log: &Logger, + records: &Vec, + ) -> Result<(), DnsError> { + let mut records = records.iter().peekable(); + + while let Some(record) = records.next() { + let srv = record.srv(); + info!(self.log, "Inserting DNS record: {:?}", srv); + + match &srv { + &crate::names::SRV::Service(_) => { + let mut aaaa = vec![(record.aaaa(), record.address())]; + while let Some(record) = records.peek() { + if record.srv() == srv { + let record = records.next().unwrap(); + aaaa.push((record.aaaa(), record.address())); + } else { + break; + } + } + + self.insert_dns_records_internal(aaaa, srv).await?; + } + &crate::names::SRV::Backend(_, _) => { + let aaaa = vec![(record.aaaa(), record.address())]; + self.insert_dns_records_internal(aaaa, record.srv()) + .await?; + } + }; + } + Ok(()) + } + + // Utility function to insert: + // - A set of uniquely-named AAAA records, each corresponding to an address + // - An SRV record, pointing to each of the AAAA records. + async fn insert_dns_records_internal( + &self, aaaa: Vec<(crate::names::AAAA, SocketAddrV6)>, srv_key: crate::names::SRV, ) -> Result<(), DnsError> { @@ -76,20 +149,7 @@ impl Updater { }) .collect::>(), }); - - let set_record = || async { - self.dns_records_set(&records) - .await - .map_err(BackoffError::transient)?; - Ok::<(), BackoffError>(()) - }; - let log_failure = |error, _| { - warn!(log, "Failed to set DNS records"; "error" => ?error); - }; - - retry_notify(internal_service_policy(), set_record, log_failure) - .await?; - Ok(()) + self.dns_records_set(&records).await } /// Sets a records on all DNS servers. @@ -129,27 +189,463 @@ impl Updater { } } -/// Creates a resolver using all internal DNS name servers. -pub fn create_resolver( - subnet: Ipv6Subnet, -) -> Result { - let mut rc = ResolverConfig::new(); - let dns_ips = ReservedRackSubnet::new(subnet) - .get_dns_subnets() - .into_iter() - .map(|subnet| subnet.dns_address().ip()) - .collect::>(); - - for dns_ip in dns_ips { - rc.add_name_server(NameServerConfig { - socket_addr: SocketAddr::V6(SocketAddrV6::new( - dns_ip, DNS_PORT, 0, 0, - )), - protocol: Protocol::Udp, - tls_dns_name: None, - trust_nx_responses: false, - bind_addr: None, - }); +#[derive(Debug, Clone, thiserror::Error)] +pub enum ResolveError { + #[error(transparent)] + Resolve(#[from] trust_dns_resolver::error::ResolveError), + + #[error("Record not found for SRV key: {0}")] + NotFound(crate::names::SRV), +} + +/// A wrapper around a DNS resolver, providing a way to conveniently +/// look up IP addresses of services based on their SRV keys. +pub struct Resolver { + inner: Box, +} + +impl Resolver { + pub fn new( + address_getter: &impl DnsAddressLookup, + ) -> Result { + let dns_addrs = address_getter.dns_server_addrs(); + Self::new_from_addrs(dns_addrs) + } + + fn new_from_addrs( + dns_addrs: Vec, + ) -> Result { + let mut rc = ResolverConfig::new(); + for socket_addr in dns_addrs.into_iter() { + rc.add_name_server(NameServerConfig { + socket_addr, + protocol: Protocol::Udp, + tls_dns_name: None, + trust_nx_responses: false, + bind_addr: None, + }); + } + let inner = + Box::new(TokioAsyncResolver::tokio(rc, ResolverOpts::default())?); + + Ok(Self { inner }) + } + + /// Convenience wrapper for [`Resolver::new`] which determines the subnet + /// based on a provided IP address. + pub fn new_from_ip(address: Ipv6Addr) -> Result { + let subnet = Ipv6Subnet::::new(address); + + Resolver::new(&subnet) + } + + /// Looks up a single [`Ipv6Addr`] based on the SRV name. + /// Returns an error if the record does not exist. + // TODO: There are lots of ways this API can expand: Caching, + // actually respecting TTL, looking up ports, etc. + // + // For now, however, it serves as a very simple "get everyone using DNS" + // API that can be improved upon later. + pub async fn lookup_ipv6( + &self, + srv: crate::names::SRV, + ) -> Result { + let response = self.inner.ipv6_lookup(&srv.to_string()).await?; + let address = response + .iter() + .next() + .ok_or_else(|| ResolveError::NotFound(srv))?; + Ok(*address) + } + + pub async fn lookup_ip( + &self, + srv: crate::names::SRV, + ) -> Result { + let response = self.inner.lookup_ip(&srv.to_string()).await?; + let address = response + .iter() + .next() + .ok_or_else(|| ResolveError::NotFound(srv))?; + Ok(address) + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::names::{BackendName, ServiceName, AAAA, SRV}; + use omicron_test_utils::dev::test_setup_log; + use std::str::FromStr; + use std::sync::Arc; + use tempfile::TempDir; + use uuid::Uuid; + + struct DnsServer { + _storage: TempDir, + dns_server: internal_dns::dns_server::Server, + dropshot_server: + dropshot::HttpServer>, + } + + impl DnsServer { + async fn create(log: &Logger) -> Self { + let storage = + TempDir::new().expect("Failed to create temporary directory"); + + let db = Arc::new(sled::open(&storage.path()).unwrap()); + + let dns_server = { + let db = db.clone(); + let log = log.clone(); + let dns_config = internal_dns::dns_server::Config { + bind_address: "[::1]:0".to_string(), + zone: crate::names::DNS_ZONE.into(), + }; + + internal_dns::dns_server::run(log, db, dns_config) + .await + .unwrap() + }; + + let config = internal_dns::Config { + log: dropshot::ConfigLogging::StderrTerminal { + level: dropshot::ConfigLoggingLevel::Info, + }, + dropshot: dropshot::ConfigDropshot { + bind_address: "[::1]:0".parse().unwrap(), + request_body_max_bytes: 1024, + ..Default::default() + }, + data: internal_dns::dns_data::Config { + nmax_messages: 16, + storage_path: storage.path().to_string_lossy().into(), + }, + }; + + let dropshot_server = + internal_dns::start_server(config, log.clone(), db) + .await + .unwrap(); + + Self { _storage: storage, dns_server, dropshot_server } + } + + fn dns_server_address(&self) -> SocketAddr { + self.dns_server.address + } + + fn dropshot_server_address(&self) -> SocketAddr { + self.dropshot_server.local_addr() + } + } + + // A test-only way to infer DNS addresses. + // + // Rather than inferring DNS server addresses from the rack subnet, + // they may be explicitly supplied. This results in easier-to-test code. + #[derive(Default)] + struct LocalAddressGetter { + addrs: Vec<(SocketAddr, SocketAddr)>, + } + + impl LocalAddressGetter { + fn add_dns_server( + &mut self, + dns_address: SocketAddr, + server_address: SocketAddr, + ) { + self.addrs.push((dns_address, server_address)); + } + } + + impl DnsAddressLookup for LocalAddressGetter { + fn dropshot_server_addrs(&self) -> Vec { + self.addrs + .iter() + .map(|(_dns_address, dropshot_address)| *dropshot_address) + .collect() + } + + fn dns_server_addrs(&self) -> Vec { + self.addrs + .iter() + .map(|(dns_address, _dropshot_address)| *dns_address) + .collect() + } + } + + // The resolver cannot look up IPs before records have been inserted. + #[tokio::test] + async fn lookup_nonexistent_record_fails() { + let logctx = test_setup_log("lookup_nonexistent_record_fails"); + let dns_server = DnsServer::create(&logctx.log).await; + + let mut address_getter = LocalAddressGetter::default(); + address_getter.add_dns_server( + dns_server.dns_server_address(), + dns_server.dropshot_server_address(), + ); + + let resolver = Resolver::new(&address_getter) + .expect("Error creating localhost resolver"); + + let err = resolver + .lookup_ip(SRV::Service(ServiceName::Cockroach)) + .await + .expect_err("Looking up non-existent service should fail"); + + let dns_error = match err { + ResolveError::Resolve(err) => err, + _ => panic!("Unexpected error: {err}"), + }; + assert!( + matches!( + dns_error.kind(), + trust_dns_resolver::error::ResolveErrorKind::NoRecordsFound { .. }, + ), + "Saw error: {dns_error}", + ); + logctx.cleanup_successful(); + } + + #[derive(Clone)] + struct TestServiceRecord { + aaaa: AAAA, + srv: SRV, + addr: SocketAddrV6, + } + + impl TestServiceRecord { + fn new(aaaa: AAAA, srv: SRV, addr: SocketAddrV6) -> Self { + Self { aaaa, srv, addr } + } + } + + impl Service for TestServiceRecord { + fn aaaa(&self) -> AAAA { + self.aaaa.clone() + } + + fn srv(&self) -> SRV { + self.srv.clone() + } + + fn address(&self) -> SocketAddrV6 { + self.addr + } + } + + // Insert and retreive a single DNS record. + #[tokio::test] + async fn insert_and_lookup_one_record() { + let logctx = test_setup_log("insert_and_lookup_one_record"); + let dns_server = DnsServer::create(&logctx.log).await; + + let mut address_getter = LocalAddressGetter::default(); + address_getter.add_dns_server( + dns_server.dns_server_address(), + dns_server.dropshot_server_address(), + ); + + let resolver = Resolver::new(&address_getter) + .expect("Error creating localhost resolver"); + let updater = Updater::new(&address_getter, logctx.log.clone()); + + let record = TestServiceRecord::new( + AAAA::Zone(Uuid::new_v4()), + SRV::Service(ServiceName::Cockroach), + SocketAddrV6::new( + Ipv6Addr::from_str("ff::01").unwrap(), + 12345, + 0, + 0, + ), + ); + updater.insert_dns_records(&vec![record.clone()]).await.unwrap(); + + let ip = resolver + .lookup_ipv6(SRV::Service(ServiceName::Cockroach)) + .await + .expect("Should have been able to look up IP address"); + assert_eq!(&ip, record.addr.ip()); + + logctx.cleanup_successful(); + } + + // Insert multiple DNS records of different types. + #[tokio::test] + async fn insert_and_lookup_multiple_records() { + let logctx = test_setup_log("insert_and_lookup_multiple_records"); + let dns_server = DnsServer::create(&logctx.log).await; + + let mut address_getter = LocalAddressGetter::default(); + address_getter.add_dns_server( + dns_server.dns_server_address(), + dns_server.dropshot_server_address(), + ); + + let resolver = Resolver::new(&address_getter) + .expect("Error creating localhost resolver"); + let updater = Updater::new(&address_getter, logctx.log.clone()); + + let cockroach_addrs = [ + SocketAddrV6::new( + Ipv6Addr::from_str("ff::01").unwrap(), + 1111, + 0, + 0, + ), + SocketAddrV6::new( + Ipv6Addr::from_str("ff::02").unwrap(), + 2222, + 0, + 0, + ), + SocketAddrV6::new( + Ipv6Addr::from_str("ff::03").unwrap(), + 3333, + 0, + 0, + ), + ]; + let clickhouse_addr = SocketAddrV6::new( + Ipv6Addr::from_str("fe::01").unwrap(), + 4444, + 0, + 0, + ); + let crucible_addr = SocketAddrV6::new( + Ipv6Addr::from_str("fd::02").unwrap(), + 5555, + 0, + 0, + ); + + let records = vec![ + // Three Cockroach services + TestServiceRecord::new( + AAAA::Zone(Uuid::new_v4()), + SRV::Service(ServiceName::Cockroach), + cockroach_addrs[0], + ), + TestServiceRecord::new( + AAAA::Zone(Uuid::new_v4()), + SRV::Service(ServiceName::Cockroach), + cockroach_addrs[1], + ), + TestServiceRecord::new( + AAAA::Zone(Uuid::new_v4()), + SRV::Service(ServiceName::Cockroach), + cockroach_addrs[2], + ), + // One Clickhouse service + TestServiceRecord::new( + AAAA::Zone(Uuid::new_v4()), + SRV::Service(ServiceName::Clickhouse), + clickhouse_addr, + ), + // One Backend service + TestServiceRecord::new( + AAAA::Zone(Uuid::new_v4()), + SRV::Backend(BackendName::Crucible, Uuid::new_v4()), + crucible_addr, + ), + ]; + updater.insert_dns_records(&records).await.unwrap(); + + // Look up Cockroach + let ip = resolver + .lookup_ipv6(SRV::Service(ServiceName::Cockroach)) + .await + .expect("Should have been able to look up IP address"); + assert!(cockroach_addrs.iter().any(|addr| addr.ip() == &ip)); + + // Look up Clickhouse + let ip = resolver + .lookup_ipv6(SRV::Service(ServiceName::Clickhouse)) + .await + .expect("Should have been able to look up IP address"); + assert_eq!(&ip, clickhouse_addr.ip()); + + // Look up Backend Service + let ip = resolver + .lookup_ipv6(records[4].srv.clone()) + .await + .expect("Should have been able to look up IP address"); + assert_eq!(&ip, crucible_addr.ip()); + + // If we remove the AAAA records for two of the CRDB services, + // only one will remain. + updater + .dns_records_delete(&vec![DnsRecordKey { + name: records[0].aaaa.to_string(), + }]) + .await + .expect("Should have been able to delete record"); + updater + .dns_records_delete(&vec![DnsRecordKey { + name: records[1].aaaa.to_string(), + }]) + .await + .expect("Should have been able to delete record"); + let ip = resolver + .lookup_ipv6(SRV::Service(ServiceName::Cockroach)) + .await + .expect("Should have been able to look up IP address"); + assert_eq!(&ip, cockroach_addrs[2].ip()); + + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn update_record() { + let logctx = test_setup_log("update_record"); + let dns_server = DnsServer::create(&logctx.log).await; + + let mut address_getter = LocalAddressGetter::default(); + address_getter.add_dns_server( + dns_server.dns_server_address(), + dns_server.dropshot_server_address(), + ); + + let resolver = Resolver::new(&address_getter) + .expect("Error creating localhost resolver"); + let updater = Updater::new(&address_getter, logctx.log.clone()); + + // Insert a record, observe that it exists. + let mut record = TestServiceRecord::new( + AAAA::Zone(Uuid::new_v4()), + SRV::Service(ServiceName::Cockroach), + SocketAddrV6::new( + Ipv6Addr::from_str("ff::01").unwrap(), + 12345, + 0, + 0, + ), + ); + updater.insert_dns_records(&vec![record.clone()]).await.unwrap(); + let ip = resolver + .lookup_ipv6(SRV::Service(ServiceName::Cockroach)) + .await + .expect("Should have been able to look up IP address"); + assert_eq!(&ip, record.addr.ip()); + + // If we insert the same record with a new address, it should be + // updated. + record.addr = SocketAddrV6::new( + Ipv6Addr::from_str("ee::02").unwrap(), + 54321, + 0, + 0, + ); + updater.insert_dns_records(&vec![record.clone()]).await.unwrap(); + let ip = resolver + .lookup_ipv6(SRV::Service(ServiceName::Cockroach)) + .await + .expect("Should have been able to look up IP address"); + assert_eq!(&ip, record.addr.ip()); + + logctx.cleanup_successful(); } - TokioAsyncResolver::tokio(rc, ResolverOpts::default()) } diff --git a/internal-dns-client/src/names.rs b/internal-dns-client/src/names.rs index 6384ec9e503..dbcc0d9f01c 100644 --- a/internal-dns-client/src/names.rs +++ b/internal-dns-client/src/names.rs @@ -2,22 +2,63 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. +//! Naming scheme for Internal DNS names (RFD 248). + use std::fmt; use uuid::Uuid; -const DNS_ZONE: &str = "control-plane.oxide.internal"; +pub(crate) const DNS_ZONE: &str = "control-plane.oxide.internal"; + +/// Names for services where backends are interchangeable. +#[derive(Clone, Debug, PartialEq, PartialOrd)] +pub enum ServiceName { + Clickhouse, + Cockroach, + InternalDNS, + Nexus, + Oximeter, +} +impl fmt::Display for ServiceName { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match &self { + ServiceName::Clickhouse => write!(f, "clickhouse"), + ServiceName::Cockroach => write!(f, "cockroach"), + ServiceName::InternalDNS => write!(f, "internalDNS"), + ServiceName::Nexus => write!(f, "nexus"), + ServiceName::Oximeter => write!(f, "oximeter"), + } + } +} + +/// Names for services where backends are not interchangeable. +#[derive(Clone, Debug, PartialEq, PartialOrd)] +pub enum BackendName { + Crucible, + SledAgent, +} + +impl fmt::Display for BackendName { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match &self { + BackendName::Crucible => write!(f, "crucible"), + BackendName::SledAgent => write!(f, "sledagent"), + } + } +} + +#[derive(Clone, Debug, PartialEq, PartialOrd)] pub enum SRV { /// A service identified and accessed by name, such as "nexus", "CRDB", etc. /// /// This is used in cases where services are interchangeable. - Service(String), + Service(ServiceName), /// A service identified by name and a unique identifier. /// /// This is used in cases where services are not interchangeable, such as /// for the Sled agent. - Backend(String, Uuid), + Backend(BackendName, Uuid), } impl fmt::Display for SRV { @@ -33,6 +74,7 @@ impl fmt::Display for SRV { } } +#[derive(Clone, Debug, PartialEq, PartialOrd)] pub enum AAAA { /// Identifies an AAAA record for a sled. Sled(Uuid), @@ -53,3 +95,58 @@ impl fmt::Display for AAAA { } } } + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn display_srv_service() { + assert_eq!( + SRV::Service(ServiceName::Clickhouse).to_string(), + "_clickhouse._tcp.control-plane.oxide.internal", + ); + assert_eq!( + SRV::Service(ServiceName::Cockroach).to_string(), + "_cockroach._tcp.control-plane.oxide.internal", + ); + assert_eq!( + SRV::Service(ServiceName::InternalDNS).to_string(), + "_internalDNS._tcp.control-plane.oxide.internal", + ); + assert_eq!( + SRV::Service(ServiceName::Nexus).to_string(), + "_nexus._tcp.control-plane.oxide.internal", + ); + assert_eq!( + SRV::Service(ServiceName::Oximeter).to_string(), + "_oximeter._tcp.control-plane.oxide.internal", + ); + } + + #[test] + fn display_srv_backend() { + let uuid = Uuid::nil(); + assert_eq!( + SRV::Backend(BackendName::Crucible, uuid).to_string(), + "_crucible._tcp.00000000-0000-0000-0000-000000000000.control-plane.oxide.internal", + ); + assert_eq!( + SRV::Backend(BackendName::SledAgent, uuid).to_string(), + "_sledagent._tcp.00000000-0000-0000-0000-000000000000.control-plane.oxide.internal", + ); + } + + #[test] + fn display_aaaa() { + let uuid = Uuid::nil(); + assert_eq!( + AAAA::Sled(uuid).to_string(), + "00000000-0000-0000-0000-000000000000.sled.control-plane.oxide.internal", + ); + assert_eq!( + AAAA::Zone(uuid).to_string(), + "00000000-0000-0000-0000-000000000000.host.control-plane.oxide.internal", + ); + } +} From 4df23c2031efc4cdf09d9739f823203bafa15117 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Tue, 21 Jun 2022 10:08:50 -0400 Subject: [PATCH 14/17] jgallagher feedback --- internal-dns/src/bin/dns-server.rs | 3 ++- internal-dns/src/dns_server.rs | 4 ++-- internal-dns/src/lib.rs | 2 +- internal-dns/tests/basic_test.rs | 2 +- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/internal-dns/src/bin/dns-server.rs b/internal-dns/src/bin/dns-server.rs index 12eafcc3599..b8520efdb26 100644 --- a/internal-dns/src/bin/dns-server.rs +++ b/internal-dns/src/bin/dns-server.rs @@ -64,7 +64,8 @@ async fn main() -> Result<(), anyhow::Error> { internal_dns::dns_server::run(log, db, dns_config).await? }; - let dropshot_server = internal_dns::start_server(config, log, db).await?; + let dropshot_server = + internal_dns::start_dropshot_server(config, log, db).await?; dropshot_server .await .map_err(|error_message| anyhow!("server exiting: {}", error_message)) diff --git a/internal-dns/src/dns_server.rs b/internal-dns/src/dns_server.rs index ccebda582f7..51a84899812 100644 --- a/internal-dns/src/dns_server.rs +++ b/internal-dns/src/dns_server.rs @@ -39,8 +39,8 @@ pub struct Server { pub handle: tokio::task::JoinHandle>, } -impl Server { - pub fn close(self) { +impl Drop for Server { + fn drop(&mut self) { self.handle.abort() } } diff --git a/internal-dns/src/lib.rs b/internal-dns/src/lib.rs index 786750c1a8f..7fee156787e 100644 --- a/internal-dns/src/lib.rs +++ b/internal-dns/src/lib.rs @@ -20,7 +20,7 @@ pub struct Config { pub data: dns_data::Config, } -pub async fn start_server( +pub async fn start_dropshot_server( config: Config, log: slog::Logger, db: Arc, diff --git a/internal-dns/tests/basic_test.rs b/internal-dns/tests/basic_test.rs index af72ded52cb..d6784bddae0 100644 --- a/internal-dns/tests/basic_test.rs +++ b/internal-dns/tests/basic_test.rs @@ -334,7 +334,7 @@ async fn init_client_server( // launch a dropshot server let dropshot_server = - internal_dns::start_server(config, log.clone(), db).await?; + internal_dns::start_dropshot_server(config, log.clone(), db).await?; // wait for server to start tokio::time::sleep(tokio::time::Duration::from_millis(250)).await; From 5556d5f5e5b3df2ae764015eea31935287877694 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Tue, 21 Jun 2022 10:12:08 -0400 Subject: [PATCH 15/17] Patch tests --- internal-dns/tests/basic_test.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal-dns/tests/basic_test.rs b/internal-dns/tests/basic_test.rs index d6784bddae0..d09e27f18c6 100644 --- a/internal-dns/tests/basic_test.rs +++ b/internal-dns/tests/basic_test.rs @@ -288,7 +288,7 @@ struct TestContext { impl TestContext { async fn cleanup(self) { - self.dns_server.close(); + drop(self.dns_server); self.dropshot_server.close().await.expect("Failed to clean up server"); self.tmp.close().expect("Failed to clean up tmp directory"); } From 6126e41f6b87ffa39c206ad81f91ee7620e78fc6 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Tue, 21 Jun 2022 10:12:39 -0400 Subject: [PATCH 16/17] merge --- internal-dns-client/src/multiclient.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal-dns-client/src/multiclient.rs b/internal-dns-client/src/multiclient.rs index ca8387fca45..58b2cdea012 100644 --- a/internal-dns-client/src/multiclient.rs +++ b/internal-dns-client/src/multiclient.rs @@ -324,7 +324,7 @@ mod test { }; let dropshot_server = - internal_dns::start_server(config, log.clone(), db) + internal_dns::start_dropshot_server(config, log.clone(), db) .await .unwrap(); From 470da8b19fcbc6abcf4b58b43fc8a60b12b2211c Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Thu, 23 Jun 2022 21:43:06 -0400 Subject: [PATCH 17/17] review feedback --- internal-dns-client/src/multiclient.rs | 189 +++++++++---------------- internal-dns-client/src/names.rs | 6 +- 2 files changed, 70 insertions(+), 125 deletions(-) diff --git a/internal-dns-client/src/multiclient.rs b/internal-dns-client/src/multiclient.rs index 58b2cdea012..2fc9089e334 100644 --- a/internal-dns-client/src/multiclient.rs +++ b/internal-dns-client/src/multiclient.rs @@ -8,6 +8,7 @@ use omicron_common::address::{ Ipv6Subnet, ReservedRackSubnet, AZ_PREFIX, DNS_PORT, DNS_SERVER_PORT, }; use slog::{info, Logger}; +use std::collections::HashMap; use std::net::{IpAddr, Ipv6Addr, SocketAddr, SocketAddrV6}; use trust_dns_resolver::config::{ NameServerConfig, Protocol, ResolverConfig, ResolverOpts, @@ -16,6 +17,8 @@ use trust_dns_resolver::TokioAsyncResolver; pub type DnsError = crate::Error; +pub type AAAARecord = (crate::names::AAAA, SocketAddrV6); + /// Describes how to find the DNS servers. /// /// In production code, this is nearly always [`Ipv6Subnet`], @@ -50,13 +53,6 @@ impl DnsAddressLookup for Ipv6Subnet { } } -/// Describes a service which may be inserted into DNS records. -pub trait Service { - fn aaaa(&self) -> crate::names::AAAA; - fn srv(&self) -> crate::names::SRV; - fn address(&self) -> SocketAddrV6; -} - /// A connection used to update multiple DNS servers. pub struct Updater { log: Logger, @@ -83,37 +79,15 @@ impl Updater { /// Inserts all service records into the DNS server. /// - /// This method is most efficient when records are sorted by SRV key. + /// Each SRV record should have one or more AAAA records. pub async fn insert_dns_records( &self, - records: &Vec, + records: &HashMap>, ) -> Result<(), DnsError> { - let mut records = records.iter().peekable(); - - while let Some(record) = records.next() { - let srv = record.srv(); + for (srv, aaaa) in records.iter() { info!(self.log, "Inserting DNS record: {:?}", srv); - match &srv { - &crate::names::SRV::Service(_) => { - let mut aaaa = vec![(record.aaaa(), record.address())]; - while let Some(record) = records.peek() { - if record.srv() == srv { - let record = records.next().unwrap(); - aaaa.push((record.aaaa(), record.address())); - } else { - break; - } - } - - self.insert_dns_records_internal(aaaa, srv).await?; - } - &crate::names::SRV::Backend(_, _) => { - let aaaa = vec![(record.aaaa(), record.address())]; - self.insert_dns_records_internal(aaaa, record.srv()) - .await?; - } - }; + self.insert_dns_records_internal(aaaa, srv).await?; } Ok(()) } @@ -123,8 +97,8 @@ impl Updater { // - An SRV record, pointing to each of the AAAA records. async fn insert_dns_records_internal( &self, - aaaa: Vec<(crate::names::AAAA, SocketAddrV6)>, - srv_key: crate::names::SRV, + aaaa: &Vec, + srv_key: &crate::names::SRV, ) -> Result<(), DnsError> { let mut records = Vec::with_capacity(aaaa.len() + 1); @@ -409,33 +383,6 @@ mod test { logctx.cleanup_successful(); } - #[derive(Clone)] - struct TestServiceRecord { - aaaa: AAAA, - srv: SRV, - addr: SocketAddrV6, - } - - impl TestServiceRecord { - fn new(aaaa: AAAA, srv: SRV, addr: SocketAddrV6) -> Self { - Self { aaaa, srv, addr } - } - } - - impl Service for TestServiceRecord { - fn aaaa(&self) -> AAAA { - self.aaaa.clone() - } - - fn srv(&self) -> SRV { - self.srv.clone() - } - - fn address(&self) -> SocketAddrV6 { - self.addr - } - } - // Insert and retreive a single DNS record. #[tokio::test] async fn insert_and_lookup_one_record() { @@ -452,23 +399,28 @@ mod test { .expect("Error creating localhost resolver"); let updater = Updater::new(&address_getter, logctx.log.clone()); - let record = TestServiceRecord::new( - AAAA::Zone(Uuid::new_v4()), + let records = HashMap::from([( SRV::Service(ServiceName::Cockroach), - SocketAddrV6::new( - Ipv6Addr::from_str("ff::01").unwrap(), - 12345, - 0, - 0, - ), - ); - updater.insert_dns_records(&vec![record.clone()]).await.unwrap(); + vec![( + AAAA::Zone(Uuid::new_v4()), + SocketAddrV6::new( + Ipv6Addr::from_str("ff::01").unwrap(), + 12345, + 0, + 0, + ), + )], + )]); + updater.insert_dns_records(&records).await.unwrap(); let ip = resolver .lookup_ipv6(SRV::Service(ServiceName::Cockroach)) .await .expect("Should have been able to look up IP address"); - assert_eq!(&ip, record.addr.ip()); + assert_eq!( + &ip, + records[&SRV::Service(ServiceName::Cockroach)][0].1.ip() + ); logctx.cleanup_successful(); } @@ -522,36 +474,31 @@ mod test { 0, ); - let records = vec![ + let srv_crdb = SRV::Service(ServiceName::Cockroach); + let srv_clickhouse = SRV::Service(ServiceName::Clickhouse); + let srv_backend = SRV::Backend(BackendName::Crucible, Uuid::new_v4()); + + let records = HashMap::from([ // Three Cockroach services - TestServiceRecord::new( - AAAA::Zone(Uuid::new_v4()), - SRV::Service(ServiceName::Cockroach), - cockroach_addrs[0], - ), - TestServiceRecord::new( - AAAA::Zone(Uuid::new_v4()), - SRV::Service(ServiceName::Cockroach), - cockroach_addrs[1], - ), - TestServiceRecord::new( - AAAA::Zone(Uuid::new_v4()), - SRV::Service(ServiceName::Cockroach), - cockroach_addrs[2], + ( + srv_crdb.clone(), + vec![ + (AAAA::Zone(Uuid::new_v4()), cockroach_addrs[0]), + (AAAA::Zone(Uuid::new_v4()), cockroach_addrs[1]), + (AAAA::Zone(Uuid::new_v4()), cockroach_addrs[2]), + ], ), // One Clickhouse service - TestServiceRecord::new( - AAAA::Zone(Uuid::new_v4()), - SRV::Service(ServiceName::Clickhouse), - clickhouse_addr, + ( + srv_clickhouse.clone(), + vec![(AAAA::Zone(Uuid::new_v4()), clickhouse_addr)], ), // One Backend service - TestServiceRecord::new( - AAAA::Zone(Uuid::new_v4()), - SRV::Backend(BackendName::Crucible, Uuid::new_v4()), - crucible_addr, + ( + srv_backend.clone(), + vec![(AAAA::Zone(Uuid::new_v4()), crucible_addr)], ), - ]; + ]); updater.insert_dns_records(&records).await.unwrap(); // Look up Cockroach @@ -570,7 +517,7 @@ mod test { // Look up Backend Service let ip = resolver - .lookup_ipv6(records[4].srv.clone()) + .lookup_ipv6(srv_backend) .await .expect("Should have been able to look up IP address"); assert_eq!(&ip, crucible_addr.ip()); @@ -578,15 +525,10 @@ mod test { // If we remove the AAAA records for two of the CRDB services, // only one will remain. updater - .dns_records_delete(&vec![DnsRecordKey { - name: records[0].aaaa.to_string(), - }]) - .await - .expect("Should have been able to delete record"); - updater - .dns_records_delete(&vec![DnsRecordKey { - name: records[1].aaaa.to_string(), - }]) + .dns_records_delete(&vec![ + DnsRecordKey { name: records[&srv_crdb][0].0.to_string() }, + DnsRecordKey { name: records[&srv_crdb][1].0.to_string() }, + ]) .await .expect("Should have been able to delete record"); let ip = resolver @@ -614,37 +556,40 @@ mod test { let updater = Updater::new(&address_getter, logctx.log.clone()); // Insert a record, observe that it exists. - let mut record = TestServiceRecord::new( - AAAA::Zone(Uuid::new_v4()), - SRV::Service(ServiceName::Cockroach), - SocketAddrV6::new( - Ipv6Addr::from_str("ff::01").unwrap(), - 12345, - 0, - 0, - ), - ); - updater.insert_dns_records(&vec![record.clone()]).await.unwrap(); + let srv_crdb = SRV::Service(ServiceName::Cockroach); + let mut records = HashMap::from([( + srv_crdb.clone(), + vec![( + AAAA::Zone(Uuid::new_v4()), + SocketAddrV6::new( + Ipv6Addr::from_str("ff::01").unwrap(), + 12345, + 0, + 0, + ), + )], + )]); + updater.insert_dns_records(&records).await.unwrap(); let ip = resolver .lookup_ipv6(SRV::Service(ServiceName::Cockroach)) .await .expect("Should have been able to look up IP address"); - assert_eq!(&ip, record.addr.ip()); + assert_eq!(&ip, records[&srv_crdb][0].1.ip()); // If we insert the same record with a new address, it should be // updated. - record.addr = SocketAddrV6::new( + records.get_mut(&srv_crdb).unwrap()[0].1 = SocketAddrV6::new( Ipv6Addr::from_str("ee::02").unwrap(), 54321, 0, 0, ); - updater.insert_dns_records(&vec![record.clone()]).await.unwrap(); + updater.insert_dns_records(&records).await.unwrap(); let ip = resolver .lookup_ipv6(SRV::Service(ServiceName::Cockroach)) .await .expect("Should have been able to look up IP address"); - assert_eq!(&ip, record.addr.ip()); + assert_eq!(&ip, records[&srv_crdb][0].1.ip()); logctx.cleanup_successful(); } diff --git a/internal-dns-client/src/names.rs b/internal-dns-client/src/names.rs index dbcc0d9f01c..1b633f915e1 100644 --- a/internal-dns-client/src/names.rs +++ b/internal-dns-client/src/names.rs @@ -10,7 +10,7 @@ use uuid::Uuid; pub(crate) const DNS_ZONE: &str = "control-plane.oxide.internal"; /// Names for services where backends are interchangeable. -#[derive(Clone, Debug, PartialEq, PartialOrd)] +#[derive(Clone, Debug, Hash, Eq, PartialEq, PartialOrd)] pub enum ServiceName { Clickhouse, Cockroach, @@ -32,7 +32,7 @@ impl fmt::Display for ServiceName { } /// Names for services where backends are not interchangeable. -#[derive(Clone, Debug, PartialEq, PartialOrd)] +#[derive(Clone, Debug, Hash, Eq, PartialEq, PartialOrd)] pub enum BackendName { Crucible, SledAgent, @@ -47,7 +47,7 @@ impl fmt::Display for BackendName { } } -#[derive(Clone, Debug, PartialEq, PartialOrd)] +#[derive(Clone, Debug, Hash, Eq, PartialEq, PartialOrd)] pub enum SRV { /// A service identified and accessed by name, such as "nexus", "CRDB", etc. ///