From b78ff981f2d98ce572925381d38c5e5813f4191f Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 8 Jun 2022 15:34:24 -0400 Subject: [PATCH 01/27] [nexus] Split Nexus configuration (package vs runtime) --- Cargo.lock | 1 + common/Cargo.toml | 1 + common/src/lib.rs | 3 +- common/src/nexus_config.rs | 128 ++++++++ common/src/postgres_config.rs | 95 ++++++ nexus/benches/setup_benchmark.rs | 2 +- nexus/examples/config.toml | 41 +-- nexus/src/app/mod.rs | 10 +- nexus/src/config.rs | 295 ++++++++---------- nexus/src/context.rs | 31 +- nexus/src/db/config.rs | 2 +- nexus/src/lib.rs | 24 +- nexus/test-utils/src/lib.rs | 10 +- nexus/tests/config.test.toml | 55 ++-- nexus/tests/integration_tests/authn_http.rs | 4 +- nexus/tests/integration_tests/commands.rs | 3 +- nexus/tests/integration_tests/console_api.rs | 2 +- nexus/tests/integration_tests/updates.rs | 4 +- openapi/sled-agent.json | 80 ++++- sled-agent/src/params.rs | 50 ++- sled-agent/src/rack_setup/config.rs | 4 +- sled-agent/src/rack_setup/service.rs | 46 ++- sled-agent/src/services.rs | 272 ++++++++++++---- sled-agent/src/sled_agent.rs | 4 +- .../{config.toml => config-partial.toml} | 17 +- smf/nexus/manifest.xml | 8 + smf/sled-agent/config-rss.toml | 13 + smf/sled-agent/manifest.xml | 4 + test-utils/src/dev/db.rs | 2 +- 29 files changed, 853 insertions(+), 358 deletions(-) create mode 100644 common/src/nexus_config.rs create mode 100644 common/src/postgres_config.rs rename smf/nexus/{config.toml => config-partial.toml} (53%) diff --git a/Cargo.lock b/Cargo.lock index 268e855769e..480645db5a2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2798,6 +2798,7 @@ dependencies = [ "thiserror", "tokio", "tokio-postgres", + "toml", "uuid", ] diff --git a/common/Cargo.toml b/common/Cargo.toml index aa3b8943800..cd47bef1169 100644 --- a/common/Cargo.toml +++ b/common/Cargo.toml @@ -30,6 +30,7 @@ structopt = "0.3" thiserror = "1.0" tokio = { version = "1.18", features = [ "full" ] } tokio-postgres = { version = "0.7", features = [ "with-chrono-0_4", "with-uuid-1" ] } +toml = "0.5.9" uuid = { version = "1.1.0", features = [ "serde", "v4" ] } parse-display = "0.5.4" progenitor = { git = "https://github.com/oxidecomputer/progenitor" } diff --git a/common/src/lib.rs b/common/src/lib.rs index 2a933283425..d90ecdb7333 100644 --- a/common/src/lib.rs +++ b/common/src/lib.rs @@ -24,7 +24,8 @@ pub mod address; pub mod api; pub mod backoff; pub mod cmd; -pub mod config; +pub mod nexus_config; +pub mod postgres_config; #[macro_export] macro_rules! generate_logging_api { diff --git a/common/src/nexus_config.rs b/common/src/nexus_config.rs new file mode 100644 index 00000000000..f1325ae336d --- /dev/null +++ b/common/src/nexus_config.rs @@ -0,0 +1,128 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Configuration parameters to Nexus that are usually only known +//! at runtime. + +use super::address::{Ipv6Subnet, RACK_PREFIX}; +use super::postgres_config::PostgresConfigWithUrl; +use dropshot::ConfigDropshot; +use serde::{Deserialize, Serialize}; +use serde_with::serde_as; +use serde_with::DisplayFromStr; +use std::fmt; +use std::path::{Path, PathBuf}; +use uuid::Uuid; + +#[derive(Debug)] +pub struct LoadError { + pub path: PathBuf, + pub kind: LoadErrorKind, +} + +#[derive(Debug)] +pub struct InvalidTunable { + pub tunable: String, + pub message: String, +} + +impl std::fmt::Display for InvalidTunable { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "invalid \"{}\": \"{}\"", self.tunable, self.message) + } +} +impl std::error::Error for InvalidTunable {} + +#[derive(Debug)] +pub enum LoadErrorKind { + Io(std::io::Error), + Parse(toml::de::Error), + InvalidTunable(InvalidTunable), +} + +impl From<(PathBuf, std::io::Error)> for LoadError { + fn from((path, err): (PathBuf, std::io::Error)) -> Self { + LoadError { path, kind: LoadErrorKind::Io(err) } + } +} + +impl From<(PathBuf, toml::de::Error)> for LoadError { + fn from((path, err): (PathBuf, toml::de::Error)) -> Self { + LoadError { path, kind: LoadErrorKind::Parse(err) } + } +} + +impl std::error::Error for LoadError {} + +impl fmt::Display for LoadError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match &self.kind { + LoadErrorKind::Io(e) => { + write!(f, "read \"{}\": {}", self.path.display(), e) + } + LoadErrorKind::Parse(e) => { + write!(f, "parse \"{}\": {}", self.path.display(), e) + } + LoadErrorKind::InvalidTunable(inner) => { + write!( + f, + "invalid tunable \"{}\": {}", + self.path.display(), + inner, + ) + } + } + } +} + +impl std::cmp::PartialEq for LoadError { + fn eq(&self, other: &std::io::Error) -> bool { + if let LoadErrorKind::Io(e) = &self.kind { + e.kind() == other.kind() + } else { + false + } + } +} + +#[serde_as] +#[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] +#[serde(tag = "type", rename_all = "snake_case")] +#[allow(clippy::large_enum_variant)] +pub enum Database { + FromDns, + FromUrl { + #[serde_as(as = "DisplayFromStr")] + url: PostgresConfigWithUrl, + }, +} + +#[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] +pub struct RuntimeConfig { + /// Uuid of the Nexus instance + pub id: Uuid, + /// Dropshot configuration for external API server + pub dropshot_external: ConfigDropshot, + /// Dropshot configuration for internal API server + pub dropshot_internal: ConfigDropshot, + /// Portion of the IP space to be managed by the Rack. + pub subnet: Ipv6Subnet, + /// DB configuration. + pub database: Database, +} + +impl RuntimeConfig { + /// Load a `RuntimeConfig` from the given TOML file + /// + /// This config object can then be used to create a new `Nexus`. + /// The format is described in the README. + pub fn from_file>(path: P) -> Result { + let path = path.as_ref(); + let file_contents = std::fs::read_to_string(path) + .map_err(|e| (path.to_path_buf(), e))?; + let config_parsed: Self = toml::from_str(&file_contents) + .map_err(|e| (path.to_path_buf(), e))?; + Ok(config_parsed) + } +} diff --git a/common/src/postgres_config.rs b/common/src/postgres_config.rs new file mode 100644 index 00000000000..2509ae4fca2 --- /dev/null +++ b/common/src/postgres_config.rs @@ -0,0 +1,95 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Common objects used for configuration + +use std::fmt; +use std::ops::Deref; +use std::str::FromStr; + +/// Describes a URL for connecting to a PostgreSQL server +// The config pattern that we're using requires that types in the config impl +// Serialize. If tokio_postgres::config::Config impl'd Serialize or even +// Display, we'd just use that directly instead of this type. But it doesn't. +// We could implement a serialize function ourselves, but URLs support many +// different properties, and this could be brittle and easy to get wrong. +// Instead, this type just wraps tokio_postgres::config::Config and keeps the +// original String around. (The downside is that a consumer _generating_ a +// nexus::db::Config needs to generate a URL that matches the +// tokio_postgres::config::Config that they construct here, but this is not +// currently an important use case.) +// +// To ensure that the URL and config are kept in sync, we currently only support +// constructing one of these via `FromStr` and the fields are not public. +#[derive(Clone, Debug, PartialEq)] +pub struct PostgresConfigWithUrl { + url_raw: String, + config: tokio_postgres::config::Config, +} + +impl PostgresConfigWithUrl { + pub fn url(&self) -> String { + self.url_raw.clone() + } +} + +impl FromStr for PostgresConfigWithUrl { + type Err = tokio_postgres::Error; + + fn from_str(s: &str) -> Result { + Ok(PostgresConfigWithUrl { url_raw: s.to_owned(), config: s.parse()? }) + } +} + +impl fmt::Display for PostgresConfigWithUrl { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str(&self.url_raw) + } +} + +impl Deref for PostgresConfigWithUrl { + type Target = tokio_postgres::config::Config; + + fn deref(&self) -> &Self::Target { + &self.config + } +} + +#[cfg(test)] +mod test { + use super::PostgresConfigWithUrl; + + #[test] + fn test_bad_url() { + // There is surprisingly little that we can rely on the + // tokio_postgres::config::Config parser to include in the error + // message. + let error = "foo".parse::().unwrap_err(); + assert!(error.to_string().contains("unexpected EOF")); + "http://127.0.0.1:1234".parse::().unwrap_err(); + let error = "postgresql://example.com?sslmode=not-a-real-ssl-mode" + .parse::() + .unwrap_err(); + assert!(error + .to_string() + .contains("invalid value for option `sslmode`")); + } + + #[test] + fn test_example_url() { + let config = "postgresql://notauser@10.2.3.4:1789?sslmode=disable" + .parse::() + .unwrap(); + assert_eq!(config.get_user(), Some("notauser")); + assert_eq!( + config.get_ssl_mode(), + tokio_postgres::config::SslMode::Disable + ); + assert_eq!( + config.get_hosts(), + &[tokio_postgres::config::Host::Tcp("10.2.3.4".to_string())] + ); + assert_eq!(config.get_ports(), &[1789]); + } +} diff --git a/nexus/benches/setup_benchmark.rs b/nexus/benches/setup_benchmark.rs index c4c27bd2a97..24584670ce5 100644 --- a/nexus/benches/setup_benchmark.rs +++ b/nexus/benches/setup_benchmark.rs @@ -19,7 +19,7 @@ async fn do_full_setup() { // Wraps exclusively the CockroachDB portion of setup/teardown. async fn do_crdb_setup() { let cfg = nexus_test_utils::load_test_config(); - let logctx = LogContext::new("crdb_setup", &cfg.log); + let logctx = LogContext::new("crdb_setup", &cfg.pkg.log); let mut db = test_setup_database(&logctx.log).await; db.cleanup().await.unwrap(); } diff --git a/nexus/examples/config.toml b/nexus/examples/config.toml index 7900813cae0..22889ab1be9 100644 --- a/nexus/examples/config.toml +++ b/nexus/examples/config.toml @@ -2,9 +2,6 @@ # Oxide API: example configuration file # -# Identifier for this instance of Nexus -id = "e6bff1ff-24fb-49dc-a54e-c6a350cd4d6c" - [console] # Directory for static assets. Absolute path or relative to CWD. static_dir = "nexus/static" # TODO: figure out value @@ -20,21 +17,6 @@ session_absolute_timeout_minutes = 480 # TODO(https://github.com/oxidecomputer/omicron/issues/372): Remove "spoof". schemes_external = ["spoof", "session_cookie"] -[database] -# URL for connecting to the database -url = "postgresql://root@127.0.0.1:32221/omicron?sslmode=disable" - -[dropshot_external] -# IP address and TCP port on which to listen for the external API -bind_address = "127.0.0.1:12220" -# Allow larger request bodies (1MiB) to accomodate firewall endpoints (one -# rule is ~500 bytes) -request_body_max_bytes = 1048576 - -[dropshot_internal] -# IP address and TCP port on which to listen for the internal API -bind_address = "127.0.0.1:12221" - [log] # Show log messages of this level and more severe level = "info" @@ -51,6 +33,29 @@ mode = "stderr-terminal" [timeseries_db] address = "[::1]:8123" +[runtime] +# Identifier for this instance of Nexus +id = "e6bff1ff-24fb-49dc-a54e-c6a350cd4d6c" + +[runtime.dropshot_external] +# IP address and TCP port on which to listen for the external API +bind_address = "127.0.0.1:12220" +# Allow larger request bodies (1MiB) to accomodate firewall endpoints (one +# rule is ~500 bytes) +request_body_max_bytes = 1048576 + +[runtime.dropshot_internal] +# IP address and TCP port on which to listen for the internal API +bind_address = "127.0.0.1:12221" + +[runtime.subnet] +net = "fd00:1122:3344:0100::/56" + +[runtime.database] +# URL for connecting to the database +type = "from_url" +url = "postgresql://root@127.0.0.1:32221/omicron?sslmode=disable" + # Tunable configuration parameters, for testing or experimentation [tunables] diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index ce20065fa1f..1c3620de7e7 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -112,7 +112,7 @@ impl Nexus { authz: Arc, ) -> Arc { let pool = Arc::new(pool); - let my_sec_id = db::SecId::from(config.id); + let my_sec_id = db::SecId::from(config.runtime.id); let db_datastore = Arc::new(db::DataStore::new(Arc::clone(&pool))); let sec_store = Arc::new(db::CockroachDbSecStore::new( my_sec_id, @@ -127,7 +127,7 @@ impl Nexus { sec_store, )); let timeseries_client = - oximeter_db::Client::new(config.timeseries_db.address, &log); + oximeter_db::Client::new(config.pkg.timeseries_db.address, &log); // TODO-cleanup We may want a first-class subsystem for managing startup // background tasks. It could use a Future for each one, a status enum @@ -143,7 +143,7 @@ impl Nexus { populate_start(populate_ctx, Arc::clone(&db_datastore)); let nexus = Nexus { - id: config.id, + id: config.runtime.id, rack_id, log: log.new(o!()), api_rack_identity: db::model::RackIdentity::new(rack_id), @@ -153,8 +153,8 @@ impl Nexus { recovery_task: std::sync::Mutex::new(None), populate_status, timeseries_client, - updates_config: config.updates.clone(), - tunables: config.tunables.clone(), + updates_config: config.pkg.updates.clone(), + tunables: config.pkg.tunables.clone(), opctx_alloc: OpContext::for_background( log.new(o!("component" => "InstanceAllocator")), Arc::clone(&authz), diff --git a/nexus/src/config.rs b/nexus/src/config.rs index 11b2c8d861e..d5bf6a2a2f9 100644 --- a/nexus/src/config.rs +++ b/nexus/src/config.rs @@ -5,15 +5,13 @@ //! Interfaces for parsing configuration files and working with a nexus server //! configuration -use crate::db; use anyhow::anyhow; -use dropshot::ConfigDropshot; use dropshot::ConfigLogging; +use omicron_common::nexus_config::{InvalidTunable, LoadError, RuntimeConfig}; use serde::Deserialize; use serde::Serialize; use serde_with::DeserializeFromStr; use serde_with::SerializeDisplay; -use std::fmt; use std::net::SocketAddr; use std::path::{Path, PathBuf}; @@ -124,22 +122,15 @@ impl Default for Tunables { /// Configuration for a nexus server #[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] -pub struct Config { - /// Dropshot configuration for external API server - pub dropshot_external: ConfigDropshot, - /// Dropshot configuration for internal API server - pub dropshot_internal: ConfigDropshot, - /// Identifier for this instance of Nexus - pub id: uuid::Uuid, +pub struct PackageConfig { /// Console-related tunables pub console: ConsoleConfig, /// Server-wide logging configuration. pub log: ConfigLogging, - /// Database parameters - pub database: db::Config, /// Authentication-related configuration pub authn: AuthnConfig, /// Timeseries database configuration. + // TODO: Should this be removed? Nexus needs to initialize it. pub timeseries_db: TimeseriesDbConfig, /// Updates-related configuration. Updates APIs return 400 Bad Request when this is /// unconfigured. @@ -150,74 +141,28 @@ pub struct Config { pub tunables: Tunables, } -#[derive(Debug)] -pub struct InvalidTunable { - tunable: String, - message: String, -} - -impl std::fmt::Display for InvalidTunable { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "invalid \"{}\": \"{}\"", self.tunable, self.message) - } -} - -impl std::error::Error for InvalidTunable {} - -#[derive(Debug)] -pub struct LoadError { - path: PathBuf, - kind: LoadErrorKind, -} -#[derive(Debug)] -pub enum LoadErrorKind { - Io(std::io::Error), - Parse(toml::de::Error), - InvalidTunable(InvalidTunable), -} - -impl From<(PathBuf, std::io::Error)> for LoadError { - fn from((path, err): (PathBuf, std::io::Error)) -> Self { - LoadError { path, kind: LoadErrorKind::Io(err) } - } -} - -impl From<(PathBuf, toml::de::Error)> for LoadError { - fn from((path, err): (PathBuf, toml::de::Error)) -> Self { - LoadError { path, kind: LoadErrorKind::Parse(err) } - } -} - -impl std::error::Error for LoadError {} +#[derive(Clone, Debug, PartialEq, Deserialize, Serialize)] +pub struct Config { + /// Configuration parameters known at compile-time. + #[serde(flatten)] + pub pkg: PackageConfig, -impl fmt::Display for LoadError { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match &self.kind { - LoadErrorKind::Io(e) => { - write!(f, "read \"{}\": {}", self.path.display(), e) - } - LoadErrorKind::Parse(e) => { - write!(f, "parse \"{}\": {}", self.path.display(), e) - } - LoadErrorKind::InvalidTunable(inner) => { - write!( - f, - "invalid tunable \"{}\": {}", - self.path.display(), - inner, - ) - } - } - } + /// A variety of configuration parameters only known at runtime. + pub runtime: RuntimeConfig, } -impl std::cmp::PartialEq for LoadError { - fn eq(&self, other: &std::io::Error) -> bool { - if let LoadErrorKind::Io(e) = &self.kind { - e.kind() == other.kind() - } else { - false - } +impl Config { + /// Load a `PackageConfig` from the given TOML file + /// + /// This config object can then be used to create a new `Nexus`. + /// The format is described in the README. + pub fn from_file>(path: P) -> Result { + let path = path.as_ref(); + let file_contents = std::fs::read_to_string(path) + .map_err(|e| (path.to_path_buf(), e))?; + let config_parsed: Self = toml::from_str(&file_contents) + .map_err(|e| (path.to_path_buf(), e))?; + Ok(config_parsed) } } @@ -255,36 +200,24 @@ impl std::fmt::Display for SchemeName { } } -impl Config { - /// Load a `Config` from the given TOML file - /// - /// This config object can then be used to create a new `Nexus`. - /// The format is described in the README. - pub fn from_file>(path: P) -> Result { - let path = path.as_ref(); - let file_contents = std::fs::read_to_string(path) - .map_err(|e| (path.to_path_buf(), e))?; - let config_parsed: Config = toml::from_str(&file_contents) - .map_err(|e| (path.to_path_buf(), e))?; - Ok(config_parsed) - } -} - #[cfg(test)] mod test { use super::Tunables; use super::{ - AuthnConfig, Config, ConsoleConfig, LoadError, LoadErrorKind, + AuthnConfig, Config, ConsoleConfig, LoadError, PackageConfig, SchemeName, TimeseriesDbConfig, UpdatesConfig, }; - use crate::db; use dropshot::ConfigDropshot; use dropshot::ConfigLogging; use dropshot::ConfigLoggingIfExists; use dropshot::ConfigLoggingLevel; use libc; + use omicron_common::address::{Ipv6Subnet, RACK_PREFIX}; + use omicron_common::nexus_config::{ + Database, LoadErrorKind, RuntimeConfig, + }; use std::fs; - use std::net::SocketAddr; + use std::net::{Ipv6Addr, SocketAddr}; use std::path::Path; use std::path::PathBuf; @@ -355,7 +288,7 @@ mod test { let error = read_config("empty", "").expect_err("expected failure"); if let LoadErrorKind::Parse(error) = &error.kind { assert_eq!(error.line_col(), None); - assert_eq!(error.to_string(), "missing field `dropshot_external`"); + assert_eq!(error.to_string(), "missing field `runtime`"); } else { panic!( "Got an unexpected error, expected Parse but got {:?}", @@ -373,7 +306,6 @@ mod test { let config = read_config( "valid", r##" - id = "28b90dc4-c22a-65ba-f49a-f051fe01208f" [console] static_dir = "tests/static" cache_control_max_age_minutes = 10 @@ -381,14 +313,6 @@ mod test { session_absolute_timeout_minutes = 480 [authn] schemes_external = [] - [dropshot_external] - bind_address = "10.1.2.3:4567" - request_body_max_bytes = 1024 - [dropshot_internal] - bind_address = "10.1.2.3:4568" - request_body_max_bytes = 1024 - [database] - url = "postgresql://127.0.0.1?sslmode=disable" [log] mode = "file" level = "debug" @@ -401,6 +325,18 @@ mod test { default_base_url = "http://example.invalid/" [tunables] max_vpc_ipv4_subnet_prefix = 27 + [runtime] + id = "28b90dc4-c22a-65ba-f49a-f051fe01208f" + [runtime.dropshot_external] + bind_address = "10.1.2.3:4567" + request_body_max_bytes = 1024 + [runtime.dropshot_internal] + bind_address = "10.1.2.3:4568" + request_body_max_bytes = 1024 + [runtime.subnet] + net = "::/56" + [runtime.database] + type = "from_dns" "##, ) .unwrap(); @@ -408,51 +344,51 @@ mod test { assert_eq!( config, Config { - id: "28b90dc4-c22a-65ba-f49a-f051fe01208f".parse().unwrap(), - console: ConsoleConfig { - static_dir: "tests/static".parse().unwrap(), - cache_control_max_age_minutes: 10, - session_idle_timeout_minutes: 60, - session_absolute_timeout_minutes: 480 - }, - authn: AuthnConfig { schemes_external: Vec::new() }, - dropshot_external: ConfigDropshot { - bind_address: "10.1.2.3:4567" - .parse::() - .unwrap(), - ..Default::default() - }, - dropshot_internal: ConfigDropshot { - bind_address: "10.1.2.3:4568" - .parse::() - .unwrap(), - ..Default::default() - }, - log: ConfigLogging::File { - level: ConfigLoggingLevel::Debug, - if_exists: ConfigLoggingIfExists::Fail, - path: "/nonexistent/path".to_string() + runtime: RuntimeConfig { + id: "28b90dc4-c22a-65ba-f49a-f051fe01208f".parse().unwrap(), + dropshot_external: ConfigDropshot { + bind_address: "10.1.2.3:4567" + .parse::() + .unwrap(), + ..Default::default() + }, + dropshot_internal: ConfigDropshot { + bind_address: "10.1.2.3:4568" + .parse::() + .unwrap(), + ..Default::default() + }, + subnet: Ipv6Subnet::::new(Ipv6Addr::LOCALHOST), + database: Database::FromDns, }, - database: db::Config { - url: "postgresql://127.0.0.1?sslmode=disable" - .parse() - .unwrap() + pkg: PackageConfig { + console: ConsoleConfig { + static_dir: "tests/static".parse().unwrap(), + cache_control_max_age_minutes: 10, + session_idle_timeout_minutes: 60, + session_absolute_timeout_minutes: 480 + }, + authn: AuthnConfig { schemes_external: Vec::new() }, + log: ConfigLogging::File { + level: ConfigLoggingLevel::Debug, + if_exists: ConfigLoggingIfExists::Fail, + path: "/nonexistent/path".to_string() + }, + timeseries_db: TimeseriesDbConfig { + address: "[::1]:8123".parse().unwrap() + }, + updates: Some(UpdatesConfig { + trusted_root: PathBuf::from("/path/to/root.json"), + default_base_url: "http://example.invalid/".into(), + }), + tunables: Tunables { max_vpc_ipv4_subnet_prefix: 27 }, }, - timeseries_db: TimeseriesDbConfig { - address: "[::1]:8123".parse().unwrap() - }, - updates: Some(UpdatesConfig { - trusted_root: PathBuf::from("/path/to/root.json"), - default_base_url: "http://example.invalid/".into(), - }), - tunables: Tunables { max_vpc_ipv4_subnet_prefix: 27 }, } ); let config = read_config( "valid", r##" - id = "28b90dc4-c22a-65ba-f49a-f051fe01208f" [console] static_dir = "tests/static" cache_control_max_age_minutes = 10 @@ -460,14 +396,6 @@ mod test { session_absolute_timeout_minutes = 480 [authn] schemes_external = [ "spoof", "session_cookie" ] - [dropshot_external] - bind_address = "10.1.2.3:4567" - request_body_max_bytes = 1024 - [dropshot_internal] - bind_address = "10.1.2.3:4568" - request_body_max_bytes = 1024 - [database] - url = "postgresql://127.0.0.1?sslmode=disable" [log] mode = "file" level = "debug" @@ -475,12 +403,24 @@ mod test { if_exists = "fail" [timeseries_db] address = "[::1]:8123" + [runtime] + id = "28b90dc4-c22a-65ba-f49a-f051fe01208f" + [runtime.dropshot_external] + bind_address = "10.1.2.3:4567" + request_body_max_bytes = 1024 + [runtime.dropshot_internal] + bind_address = "10.1.2.3:4568" + request_body_max_bytes = 1024 + [runtime.subnet] + net = "::/56" + [runtime.database] + type = "from_dns" "##, ) .unwrap(); assert_eq!( - config.authn.schemes_external, + config.pkg.authn.schemes_external, vec![SchemeName::Spoof, SchemeName::SessionCookie], ); } @@ -490,7 +430,6 @@ mod test { let error = read_config( "bad authn.schemes_external", r##" - id = "28b90dc4-c22a-65ba-f49a-f051fe01208f" [console] static_dir = "tests/static" cache_control_max_age_minutes = 10 @@ -498,14 +437,6 @@ mod test { session_absolute_timeout_minutes = 480 [authn] schemes_external = ["trust-me"] - [dropshot_external] - bind_address = "10.1.2.3:4567" - request_body_max_bytes = 1024 - [dropshot_internal] - bind_address = "10.1.2.3:4568" - request_body_max_bytes = 1024 - [database] - url = "postgresql://127.0.0.1?sslmode=disable" [log] mode = "file" level = "debug" @@ -513,14 +444,29 @@ mod test { if_exists = "fail" [timeseries_db] address = "[::1]:8123" + [runtime] + id = "28b90dc4-c22a-65ba-f49a-f051fe01208f" + [runtime.dropshot_external] + bind_address = "10.1.2.3:4567" + request_body_max_bytes = 1024 + [runtime.dropshot_internal] + bind_address = "10.1.2.3:4568" + request_body_max_bytes = 1024 + [runtime.subnet] + net = "::/56" + [runtime.database] + type = "from_dns" "##, ) .expect_err("expected failure"); if let LoadErrorKind::Parse(error) = &error.kind { - assert!(error.to_string().starts_with( - "unsupported authn scheme: \"trust-me\" \ - for key `authn.schemes_external`" - )); + assert!( + error + .to_string() + .starts_with("unsupported authn scheme: \"trust-me\""), + "error = {}", + error.to_string() + ); } else { panic!( "Got an unexpected error, expected Parse but got {:?}", @@ -534,7 +480,6 @@ mod test { let error = read_config( "invalid_ipv4_prefix_tunable", r##" - id = "28b90dc4-c22a-65ba-f49a-f051fe01208f" [console] static_dir = "tests/static" cache_control_max_age_minutes = 10 @@ -542,14 +487,6 @@ mod test { session_absolute_timeout_minutes = 480 [authn] schemes_external = [] - [dropshot_external] - bind_address = "10.1.2.3:4567" - request_body_max_bytes = 1024 - [dropshot_internal] - bind_address = "10.1.2.3:4568" - request_body_max_bytes = 1024 - [database] - url = "postgresql://127.0.0.1?sslmode=disable" [log] mode = "file" level = "debug" @@ -562,6 +499,18 @@ mod test { default_base_url = "http://example.invalid/" [tunables] max_vpc_ipv4_subnet_prefix = 100 + [runtime] + id = "28b90dc4-c22a-65ba-f49a-f051fe01208f" + [runtime.dropshot_external] + bind_address = "10.1.2.3:4567" + request_body_max_bytes = 1024 + [runtime.dropshot_internal] + bind_address = "10.1.2.3:4568" + request_body_max_bytes = 1024 + [runtime.subnet] + net = "::/56" + [runtime.database] + type = "from_dns" "##, ) .expect_err("Expected failure"); diff --git a/nexus/src/context.rs b/nexus/src/context.rs index f0d9e6b13a0..a08f22304df 100644 --- a/nexus/src/context.rs +++ b/nexus/src/context.rs @@ -19,6 +19,7 @@ use authn::external::spoof::HttpAuthnSpoof; use authn::external::HttpAuthnScheme; use chrono::{DateTime, Duration, Utc}; use omicron_common::api::external::Error; +use omicron_common::nexus_config; use oximeter::types::ProducerRegistry; use oximeter_instruments::http::{HttpService, LatencyTracker}; use slog::Logger; @@ -67,13 +68,13 @@ pub struct ConsoleConfig { impl ServerContext { /// Create a new context with the given rack id and log. This creates the /// underlying nexus as well. - pub fn new( + pub async fn new( rack_id: Uuid, log: Logger, - pool: db::Pool, config: &config::Config, ) -> Result, String> { let nexus_schemes = config + .pkg .authn .schemes_external .iter() @@ -90,7 +91,8 @@ impl ServerContext { let internal_authn = Arc::new(authn::Context::internal_api()); let authz = Arc::new(authz::Authz::new(&log)); let create_tracker = |name: &str| { - let target = HttpService { name: name.to_string(), id: config.id }; + let target = + HttpService { name: name.to_string(), id: config.runtime.id }; const START_LATENCY_DECADE: i8 = -6; const END_LATENCY_DECADE: i8 = 3; LatencyTracker::with_latency_decades( @@ -102,7 +104,7 @@ impl ServerContext { }; let internal_latencies = create_tracker("nexus-internal"); let external_latencies = create_tracker("nexus-external"); - let producer_registry = ProducerRegistry::with_id(config.id); + let producer_registry = ProducerRegistry::with_id(config.runtime.id); producer_registry .register_producer(internal_latencies.clone()) .unwrap(); @@ -113,11 +115,11 @@ impl ServerContext { // Support both absolute and relative paths. If configured dir is // absolute, use it directly. If not, assume it's relative to the // current working directory. - let static_dir = if config.console.static_dir.is_absolute() { - Some(config.console.static_dir.to_owned()) + let static_dir = if config.pkg.console.static_dir.is_absolute() { + Some(config.pkg.console.static_dir.to_owned()) } else { env::current_dir() - .map(|root| root.join(&config.console.static_dir)) + .map(|root| root.join(&config.pkg.console.static_dir)) .ok() }; @@ -132,6 +134,15 @@ impl ServerContext { // like console index.html. leaving that out for now so we don't break // nexus in dev for everyone + // Set up DB pool + let url = match &config.runtime.database { + nexus_config::Database::FromUrl { url } => url.clone(), + nexus_config::Database::FromDns => { + todo!("Not yet implemented"); + } + }; + let pool = db::Pool::new(&db::Config { url }); + Ok(Arc::new(ServerContext { nexus: Nexus::new_with_id( rack_id, @@ -149,14 +160,14 @@ impl ServerContext { producer_registry, console_config: ConsoleConfig { session_idle_timeout: Duration::minutes( - config.console.session_idle_timeout_minutes.into(), + config.pkg.console.session_idle_timeout_minutes.into(), ), session_absolute_timeout: Duration::minutes( - config.console.session_absolute_timeout_minutes.into(), + config.pkg.console.session_absolute_timeout_minutes.into(), ), static_dir, cache_control_max_age: Duration::minutes( - config.console.cache_control_max_age_minutes.into(), + config.pkg.console.cache_control_max_age_minutes.into(), ), }, })) diff --git a/nexus/src/db/config.rs b/nexus/src/db/config.rs index b4066ce3cbe..afe51bca66d 100644 --- a/nexus/src/db/config.rs +++ b/nexus/src/db/config.rs @@ -4,7 +4,7 @@ //! Nexus database configuration -use omicron_common::config::PostgresConfigWithUrl; +use omicron_common::postgres_config::PostgresConfigWithUrl; use serde::Deserialize; use serde::Serialize; use serde_with::serde_as; diff --git a/nexus/src/lib.rs b/nexus/src/lib.rs index e56503c3c09..61abe04b1ba 100644 --- a/nexus/src/lib.rs +++ b/nexus/src/lib.rs @@ -29,7 +29,7 @@ pub mod updates; // public for testing pub use app::test_interfaces::TestInterfaces; pub use app::Nexus; -pub use config::Config; +pub use config::{Config, PackageConfig}; pub use context::ServerContext; pub use crucible_agent_client; use external_api::http_entrypoints::external_api; @@ -85,15 +85,15 @@ impl Server { rack_id: Uuid, log: &Logger, ) -> Result { - let log = log.new(o!("name" => config.id.to_string())); + let log = log.new(o!("name" => config.runtime.id.to_string())); info!(log, "setting up nexus server"); let ctxlog = log.new(o!("component" => "ServerContext")); - let pool = db::Pool::new(&config.database); - let apictx = ServerContext::new(rack_id, ctxlog, pool, &config)?; + + let apictx = ServerContext::new(rack_id, ctxlog, &config).await?; let http_server_starter_external = dropshot::HttpServerStarter::new( - &config.dropshot_external, + &config.runtime.dropshot_external, external_api(), Arc::clone(&apictx), &log.new(o!("component" => "dropshot_external")), @@ -101,7 +101,7 @@ impl Server { .map_err(|error| format!("initializing external server: {}", error))?; let http_server_starter_internal = dropshot::HttpServerStarter::new( - &config.dropshot_internal, + &config.runtime.dropshot_internal, internal_api(), Arc::clone(&apictx), &log.new(o!("component" => "dropshot_internal")), @@ -153,12 +153,12 @@ impl Server { /// Run an instance of the [Server]. pub async fn run_server(config: &Config) -> Result<(), String> { use slog::Drain; - let (drain, registration) = slog_dtrace::with_drain( - config - .log - .to_logger("nexus") - .map_err(|message| format!("initializing logger: {}", message))?, - ); + let (drain, registration) = + slog_dtrace::with_drain( + config.pkg.log.to_logger("nexus").map_err(|message| { + format!("initializing logger: {}", message) + })?, + ); let log = slog::Logger::root(drain.fuse(), slog::o!()); if let slog_dtrace::ProbeRegistration::Failed(e) = registration { let msg = format!("failed to register DTrace probes: {}", e); diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index a53ad85d585..e4eb744e2fa 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -11,6 +11,7 @@ use dropshot::ConfigLogging; use dropshot::ConfigLoggingLevel; use omicron_common::api::external::IdentityMetadata; use omicron_common::api::internal::nexus::ProducerEndpoint; +use omicron_common::nexus_config; use omicron_sled_agent::sim; use omicron_test_utils::dev; use oximeter_collector::Oximeter; @@ -75,7 +76,7 @@ pub fn load_test_config() -> omicron_nexus::Config { let config_file_path = Path::new("tests/config.test.toml"); let mut config = omicron_nexus::Config::from_file(config_file_path) .expect("failed to load config.test.toml"); - config.id = Uuid::new_v4(); + config.runtime.id = Uuid::new_v4(); config } @@ -88,7 +89,7 @@ pub async fn test_setup_with_config( test_name: &str, config: &mut omicron_nexus::Config, ) -> ControlPlaneTestContext { - let logctx = LogContext::new(test_name, &config.log); + let logctx = LogContext::new(test_name, &config.pkg.log); let rack_id = Uuid::parse_str(RACK_UUID).unwrap(); let log = &logctx.log; @@ -99,8 +100,9 @@ pub async fn test_setup_with_config( let clickhouse = dev::clickhouse::ClickHouseInstance::new(0).await.unwrap(); // Store actual address/port information for the databases after they start. - config.database.url = database.pg_config().clone(); - config.timeseries_db.address.set_port(clickhouse.port()); + config.runtime.database = + nexus_config::Database::FromUrl { url: database.pg_config().clone() }; + config.pkg.timeseries_db.address.set_port(clickhouse.port()); let server = omicron_nexus::Server::start(&config, rack_id, &logctx.log) .await diff --git a/nexus/tests/config.test.toml b/nexus/tests/config.test.toml index 9b8f1f42731..2fc4ddba192 100644 --- a/nexus/tests/config.test.toml +++ b/nexus/tests/config.test.toml @@ -2,10 +2,6 @@ # Oxide API: configuration file for test suite # -# Identifier for this instance of Nexus. -# NOTE: The test suite always overrides this. -id = "e6bff1ff-24fb-49dc-a54e-c6a350cd4d6c" - [console] # Directory for static assets. Absolute path or relative to CWD. static_dir = "tests/static" @@ -17,27 +13,6 @@ session_absolute_timeout_minutes = 480 [authn] schemes_external = [ "spoof", "session_cookie" ] -# -# NOTE: for the test suite, the database URL will be replaced with one -# appropriate for the database that's started by the test runner. -# -[database] -url = "postgresql://root@127.0.0.1:0/omicron?sslmode=disable" - -# -# NOTE: for the test suite, the port MUST be 0 (in order to bind to any -# available port) because the test suite will be running many servers -# concurrently. -# -[dropshot_external] -bind_address = "127.0.0.1:0" -request_body_max_bytes = 1048576 - -# port must be 0. see above -[dropshot_internal] -bind_address = "127.0.0.1:0" -request_body_max_bytes = 1048576 - # # NOTE: for the test suite, if mode = "file", the file path MUST be the sentinel # string "UNUSED". The actual path will be generated by the test suite for each @@ -59,3 +34,33 @@ address = "[::1]:0" [tunables] # Allow small subnets, so we can test IP address exhaustion easily / quickly max_vpc_ipv4_subnet_prefix = 29 + +[runtime] +# Identifier for this instance of Nexus. +# NOTE: The test suite always overrides this. +id = "e6bff1ff-24fb-49dc-a54e-c6a350cd4d6c" + +# +# NOTE: for the test suite, the port MUST be 0 (in order to bind to any +# available port) because the test suite will be running many servers +# concurrently. +# +[runtime.dropshot_external] +bind_address = "127.0.0.1:0" +request_body_max_bytes = 1048576 + +# port must be 0. see above +[runtime.dropshot_internal] +bind_address = "127.0.0.1:0" +request_body_max_bytes = 1048576 + +[runtime.subnet] +net = "fd00:1122:3344:0100::/56" + +# +# NOTE: for the test suite, the database URL will be replaced with one +# appropriate for the database that's started by the test runner. +# +[runtime.database] +type = "from_url" +url = "postgresql://root@127.0.0.1:0/omicron?sslmode=disable" diff --git a/nexus/tests/integration_tests/authn_http.rs b/nexus/tests/integration_tests/authn_http.rs index 7125a52ea90..e0234da1b97 100644 --- a/nexus/tests/integration_tests/authn_http.rs +++ b/nexus/tests/integration_tests/authn_http.rs @@ -277,7 +277,7 @@ async fn start_whoami_server( sessions: HashMap, ) -> TestContext { let config = nexus_test_utils::load_test_config(); - let logctx = LogContext::new(test_name, &config.log); + let logctx = LogContext::new(test_name, &config.pkg.log); let whoami_api = { let mut whoami_api = ApiDescription::new(); @@ -299,7 +299,7 @@ async fn start_whoami_server( TestContext::new( whoami_api, server_state, - &config.dropshot_external, + &config.runtime.dropshot_external, Some(logctx), log, ) diff --git a/nexus/tests/integration_tests/commands.rs b/nexus/tests/integration_tests/commands.rs index 7d3855d5a6c..ac770c137e3 100644 --- a/nexus/tests/integration_tests/commands.rs +++ b/nexus/tests/integration_tests/commands.rs @@ -76,8 +76,7 @@ fn test_nexus_invalid_config() { assert_eq!( stderr_text, format!( - "nexus: parse \"{}\": missing field \ - `dropshot_external`\n", + "nexus: parse \"{}\": missing field `runtime`\n", config_path.display() ), ); diff --git a/nexus/tests/integration_tests/console_api.rs b/nexus/tests/integration_tests/console_api.rs index e84c65c0fe1..779e94470eb 100644 --- a/nexus/tests/integration_tests/console_api.rs +++ b/nexus/tests/integration_tests/console_api.rs @@ -196,7 +196,7 @@ async fn test_assets(cptestctx: &ControlPlaneTestContext) { #[tokio::test] async fn test_absolute_static_dir() { let mut config = load_test_config(); - config.console.static_dir = current_dir().unwrap().join("tests/static"); + config.pkg.console.static_dir = current_dir().unwrap().join("tests/static"); let cptestctx = test_setup_with_config("test_absolute_static_dir", &mut config).await; let testctx = &cptestctx.external_client; diff --git a/nexus/tests/integration_tests/updates.rs b/nexus/tests/integration_tests/updates.rs index 1bfa25d0a2c..c09ca0b7fea 100644 --- a/nexus/tests/integration_tests/updates.rs +++ b/nexus/tests/integration_tests/updates.rs @@ -62,7 +62,7 @@ async fn test_update_end_to_end() { let mut api = ApiDescription::new(); api.register(static_content).unwrap(); let context = FileServerContext { base: tuf_repo.path().to_owned() }; - let logctx = LogContext::new("test_update_end_to_end", &config.log); + let logctx = LogContext::new("test_update_end_to_end", &config.pkg.log); let server = HttpServerStarter::new(&dropshot_config, api, context, &logctx.log) .unwrap() @@ -70,7 +70,7 @@ async fn test_update_end_to_end() { let local_addr = server.local_addr(); // stand up the test environment - config.updates = Some(UpdatesConfig { + config.pkg.updates = Some(UpdatesConfig { trusted_root: tuf_repo.path().join("metadata").join("1.root.json"), default_base_url: format!("http://{}/", local_addr), }); diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index 273082a7500..839e8ba9a76 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -247,6 +247,10 @@ "dataset_kind": { "$ref": "#/components/schemas/DatasetKind" }, + "id": { + "type": "string", + "format": "uuid" + }, "zpool_id": { "type": "string", "format": "uuid" @@ -255,6 +259,7 @@ "required": [ "address", "dataset_kind", + "id", "zpool_id" ] }, @@ -959,6 +964,7 @@ ] }, "ServiceRequest": { + "description": "Describes a request to create a service. This information should be sufficient for a Sled Agent to start a zone containing the requested service.", "type": "object", "properties": { "addresses": { @@ -976,13 +982,85 @@ "format": "ipv6" } }, + "id": { + "type": "string", + "format": "uuid" + }, "name": { "type": "string" + }, + "service_type": { + "$ref": "#/components/schemas/ServiceType" } }, "required": [ "addresses", - "name" + "id", + "name", + "service_type" + ] + }, + "ServiceType": { + "description": "Describes service-specific parameters.", + "oneOf": [ + { + "type": "object", + "properties": { + "external_address": { + "type": "string" + }, + "internal_address": { + "type": "string" + }, + "type": { + "type": "string", + "enum": [ + "nexus" + ] + } + }, + "required": [ + "external_address", + "internal_address", + "type" + ] + }, + { + "type": "object", + "properties": { + "dns_address": { + "type": "string" + }, + "server_address": { + "type": "string" + }, + "type": { + "type": "string", + "enum": [ + "internal_dns" + ] + } + }, + "required": [ + "dns_address", + "server_address", + "type" + ] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "oximeter" + ] + } + }, + "required": [ + "type" + ] + } ] }, "Slot": { diff --git a/sled-agent/src/params.rs b/sled-agent/src/params.rs index 1c713a69067..d003bbe785e 100644 --- a/sled-agent/src/params.rs +++ b/sled-agent/src/params.rs @@ -9,9 +9,7 @@ use omicron_common::api::internal::nexus::{ use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use std::fmt::{Debug, Display, Formatter, Result as FormatResult}; -use std::net::IpAddr; -use std::net::Ipv6Addr; -use std::net::{SocketAddr, SocketAddrV6}; +use std::net::{IpAddr, Ipv6Addr, SocketAddr, SocketAddrV6}; use uuid::Uuid; /// Information required to construct a virtual network interface for a guest @@ -165,7 +163,7 @@ pub struct InstanceRuntimeStateRequested { pub enum DatasetKind { CockroachDb { /// The addresses of all nodes within the cluster. - all_addresses: Vec, + all_addresses: Vec, }, Crucible, Clickhouse, @@ -213,6 +211,8 @@ impl std::fmt::Display for DatasetKind { /// instantiated when the dataset is detected. #[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq)] pub struct DatasetEnsureBody { + // The UUID of the dataset, as well as the service using it directly. + pub id: Uuid, // The name (and UUID) of the Zpool which we are inserting into. pub zpool_id: Uuid, // The type of the filesystem. @@ -235,14 +235,52 @@ impl From for sled_agent_client::types::DatasetEnsureBody { zpool_id: p.zpool_id, dataset_kind: p.dataset_kind.into(), address: p.address.to_string(), + id: p.id, } } } +/// Describes service-specific parameters. +#[derive( + Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq, Hash, +)] +#[serde(tag = "type", rename_all = "snake_case")] +pub enum ServiceType { + Nexus { internal_address: SocketAddrV6, external_address: SocketAddrV6 }, + InternalDns { server_address: SocketAddrV6, dns_address: SocketAddrV6 }, + Oximeter, +} + +impl From for sled_agent_client::types::ServiceType { + fn from(s: ServiceType) -> Self { + use sled_agent_client::types::ServiceType as AutoSt; + use ServiceType as St; + + match s { + St::Nexus { internal_address, external_address } => AutoSt::Nexus { + internal_address: internal_address.to_string(), + external_address: external_address.to_string(), + }, + St::InternalDns { server_address, dns_address } => { + AutoSt::InternalDns { + server_address: server_address.to_string(), + dns_address: dns_address.to_string(), + } + } + St::Oximeter => AutoSt::Oximeter, + } + } +} + +/// Describes a request to create a service. This information +/// should be sufficient for a Sled Agent to start a zone +/// containing the requested service. #[derive( Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq, Hash, )] pub struct ServiceRequest { + // The UUID of the service to be initialized. + pub id: Uuid, // The name of the service to be created. pub name: String, // The addresses on which the service should listen for requests. @@ -256,14 +294,18 @@ pub struct ServiceRequest { // is necessary to allow inter-zone traffic routing. #[serde(default)] pub gz_addresses: Vec, + // Any other service-specific parameters. + pub service_type: ServiceType, } impl From for sled_agent_client::types::ServiceRequest { fn from(s: ServiceRequest) -> Self { Self { + id: s.id, name: s.name, addresses: s.addresses, gz_addresses: s.gz_addresses, + service_type: s.service_type.into(), } } } diff --git a/sled-agent/src/rack_setup/config.rs b/sled-agent/src/rack_setup/config.rs index 26f3ce8a321..d9f8324535d 100644 --- a/sled-agent/src/rack_setup/config.rs +++ b/sled-agent/src/rack_setup/config.rs @@ -29,12 +29,12 @@ pub struct SetupServiceConfig { pub rack_subnet: Ipv6Addr, #[serde(default, rename = "request")] - pub requests: Vec, + pub requests: Vec, } /// A request to initialize a sled. #[derive(Clone, Debug, Default, Deserialize, Serialize, PartialEq)] -pub struct SledRequest { +pub struct HardcodedSledRequest { /// Datasets to be created. #[serde(default, rename = "dataset")] pub datasets: Vec, diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 0fef7054d26..6c65383d5e8 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -4,13 +4,15 @@ //! Rack Setup Service implementation -use super::config::{SetupServiceConfig as Config, SledRequest}; -use crate::bootstrap::config::BOOTSTRAP_AGENT_PORT; -use crate::bootstrap::discovery::PeerMonitorObserver; -use crate::bootstrap::params::SledAgentRequest; -use crate::bootstrap::rss_handle::BootstrapAgentHandle; -use crate::params::ServiceRequest; -use omicron_common::address::{get_sled_address, ReservedRackSubnet}; +use super::config::{HardcodedSledRequest, SetupServiceConfig as Config}; +use crate::bootstrap::{ + config::BOOTSTRAP_AGENT_PORT, discovery::PeerMonitorObserver, + params::SledAgentRequest, rss_handle::BootstrapAgentHandle, +}; +use crate::params::{ServiceRequest, ServiceType}; +use omicron_common::address::{ + get_sled_address, ReservedRackSubnet, DNS_PORT, DNS_SERVER_PORT, +}; use omicron_common::backoff::{ internal_service_policy, retry_notify, BackoffError, }; @@ -21,6 +23,7 @@ use std::net::{Ipv6Addr, SocketAddr, SocketAddrV6}; use std::path::PathBuf; use thiserror::Error; use tokio::sync::Mutex; +use uuid::Uuid; /// Describes errors which may occur while operating the setup service. #[derive(Error, Debug)] @@ -55,7 +58,7 @@ pub enum SetupServiceError { #[derive(Clone, Debug, Deserialize, Serialize, PartialEq)] struct SledAllocation { initialization_request: SledAgentRequest, - services_request: SledRequest, + services_request: HardcodedSledRequest, } /// The interface to the Rack Setup Service. @@ -192,7 +195,7 @@ impl ServiceInner { async fn initialize_services( &self, sled_address: SocketAddr, - services: &Vec, + services: &Vec, ) -> Result<(), SetupServiceError> { let dur = std::time::Duration::from_secs(60); let client = reqwest::ClientBuilder::new() @@ -284,18 +287,31 @@ impl ServiceInner { if idx < config.requests.len() { config.requests[idx].clone() } else { - SledRequest::default() + HardcodedSledRequest::default() } }; - // The first enumerated addresses get assigned the additional + // The first enumerated sleds get assigned the additional // responsibility of being internal DNS servers. if idx < dns_subnets.len() { let dns_subnet = &dns_subnets[idx]; + let dns_addr = dns_subnet.dns_address().ip(); request.dns_services.push(ServiceRequest { + id: Uuid::new_v4(), name: "internal-dns".to_string(), - addresses: vec![dns_subnet.dns_address().ip()], + addresses: vec![dns_addr], gz_addresses: vec![dns_subnet.gz_address().ip()], + service_type: ServiceType::InternalDns { + server_address: SocketAddrV6::new( + dns_addr, + DNS_SERVER_PORT, + 0, + 0, + ), + dns_address: SocketAddrV6::new( + dns_addr, DNS_PORT, 0, 0, + ), + }, }); } @@ -331,8 +347,10 @@ impl ServiceInner { } // Once we've constructed a plan, write it down to durable storage. - let serialized_plan = toml::Value::try_from(&plan) - .expect("Cannot serialize configuration"); + let serialized_plan = + toml::Value::try_from(&plan).unwrap_or_else(|e| { + panic!("Cannot serialize configuration: {:#?}: {}", plan, e) + }); let plan_str = toml::to_string(&serialized_plan) .expect("Cannot turn config to string"); diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 72444a79b17..aaa1960fb3e 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -7,17 +7,32 @@ use crate::illumos::dladm::{Etherstub, EtherstubVnic}; use crate::illumos::running_zone::{InstalledZone, RunningZone}; use crate::illumos::vnic::VnicAllocator; +use crate::illumos::zfs::ZONE_ZFS_DATASET_MOUNTPOINT; use crate::illumos::zone::AddressRequest; -use crate::params::{ServiceEnsureBody, ServiceRequest}; +use crate::params::{ServiceEnsureBody, ServiceRequest, ServiceType}; use crate::zone::Zones; -use omicron_common::address::{DNS_PORT, DNS_SERVER_PORT}; +use dropshot::ConfigDropshot; +use omicron_common::address::{Ipv6Subnet, RACK_PREFIX}; +use omicron_common::nexus_config::{self, RuntimeConfig as NexusRuntimeConfig}; +use omicron_common::postgres_config::PostgresConfigWithUrl; use slog::Logger; use std::collections::HashSet; use std::iter::FromIterator; -use std::net::{IpAddr, Ipv6Addr}; +use std::net::{IpAddr, Ipv6Addr, SocketAddr}; use std::path::{Path, PathBuf}; +use std::str::FromStr; +use tokio::io::AsyncWriteExt; use tokio::sync::Mutex; +// The filename of ServiceManager's internal storage. +const SERVICE_CONFIG_FILENAME: &str = "service.toml"; +// The filename of a half-completed config, in need of parameters supplied at +// runtime. +const PARTIAL_CONFIG_FILENAME: &str = "config-partial.toml"; +// The filename of a completed config, merging the partial config with +// additional appended parameters known at runtime. +const COMPLETE_CONFIG_FILENAME: &str = "config.toml"; + #[derive(thiserror::Error, Debug)] pub enum Error { #[error("Cannot serialize TOML to file {path}: {err}")] @@ -69,13 +84,40 @@ impl From for omicron_common::api::external::Error { /// The default path to service configuration, if one is not /// explicitly provided. pub fn default_services_config_path() -> PathBuf { - Path::new(omicron_common::OMICRON_CONFIG_PATH).join("services.toml") + Path::new(omicron_common::OMICRON_CONFIG_PATH).join(SERVICE_CONFIG_FILENAME) +} + +/// Configuration parameters which modify the [`ServiceManager`]'s behavior. +/// +/// These are typically used to make testing easier; production usage +/// should generally prefer to use the defaults. +pub struct Config { + /// The path for the ServiceManager to store information about + /// all running services. + pub all_svcs_config_path: PathBuf, + /// A function which returns the path the directory holding the + /// service's configuration file. + pub get_svc_config_dir: Box PathBuf + Send + Sync>, +} + +impl Default for Config { + fn default() -> Self { + Self { + all_svcs_config_path: default_services_config_path(), + get_svc_config_dir: Box::new(|zone_name: &str, svc_name: &str| { + PathBuf::from(ZONE_ZFS_DATASET_MOUNTPOINT) + .join(PathBuf::from(zone_name)) + .join("root") + .join(format!("var/svc/manifest/site/{}", svc_name)) + }), + } + } } /// Manages miscellaneous Sled-local services. pub struct ServiceManager { log: Logger, - config_path: Option, + config: Config, zones: Mutex>, vnic_allocator: VnicAllocator, underlay_vnic: EtherstubVnic, @@ -98,12 +140,12 @@ impl ServiceManager { etherstub: Etherstub, underlay_vnic: EtherstubVnic, underlay_address: Ipv6Addr, - config_path: Option, + config: Config, ) -> Result { debug!(log, "Creating new ServiceManager"); let mgr = Self { log: log.new(o!("component" => "ServiceManager")), - config_path, + config, zones: Mutex::new(vec![]), vnic_allocator: VnicAllocator::new("Service", etherstub), underlay_vnic, @@ -143,11 +185,7 @@ impl ServiceManager { // Returns either the path to the explicitly provided config path, or // chooses the default one. fn services_config_path(&self) -> PathBuf { - if let Some(path) = &self.config_path { - path.clone() - } else { - default_services_config_path() - } + self.config.all_svcs_config_path.clone() } // Populates `existing_zones` according to the requests in `services`. @@ -268,16 +306,70 @@ impl ServiceManager { let smf_name = format!("svc:/system/illumos/{}", service.name); let default_smf_name = format!("{}:default", smf_name); - match service.name.as_str() { - "internal-dns" => { - info!(self.log, "Setting up internal-dns service"); - let address = - service.addresses.get(0).ok_or_else(|| { - Error::BadServiceRequest { - service: service.name.clone(), - message: "Not enough addresses".to_string(), - } + match service.service_type { + ServiceType::Nexus { internal_address, external_address } => { + info!(self.log, "Setting up Nexus service"); + + // Nexus takes a separate config file for parameters which + // cannot be known at packaging time. + let runtime_config = NexusRuntimeConfig { + id: service.id, + dropshot_external: ConfigDropshot { + bind_address: SocketAddr::V6(external_address), + ..Default::default() + }, + dropshot_internal: ConfigDropshot { + bind_address: SocketAddr::V6(internal_address), + ..Default::default() + }, + subnet: Ipv6Subnet::::new( + self.underlay_address, + ), + // TODO: Switch to inferring this URL by DNS. + database: nexus_config::Database::FromUrl { + url: PostgresConfigWithUrl::from_str( + "postgresql://root@[fd00:1122:3344:0101::2]:32221/omicron?sslmode=disable" + ).unwrap() + } + }; + + // Copy the partial config file to the expected location. + let config_dir = (self.config.get_svc_config_dir)( + running_zone.name(), + &service.name, + ); + let partial_config_path = + config_dir.join(PARTIAL_CONFIG_FILENAME); + let config_path = config_dir.join(COMPLETE_CONFIG_FILENAME); + tokio::fs::copy(partial_config_path, &config_path) + .await + .map_err(|err| Error::Io { + path: config_path.clone(), + err, })?; + + // Serialize the configuration and append it into the file. + let serialized_cfg = toml::Value::try_from(&runtime_config) + .expect("Cannot serialize config"); + let mut map = toml::map::Map::new(); + map.insert("runtime".to_string(), serialized_cfg); + let config_str = toml::to_string(&map).map_err(|err| { + Error::TomlSerialize { path: config_path.clone(), err } + })?; + let mut file = tokio::fs::OpenOptions::new() + .append(true) + .open(&config_path) + .await + .map_err(|err| Error::Io { + path: config_path.clone(), + err, + })?; + file.write_all(config_str.as_bytes()).await.map_err( + |err| Error::Io { path: config_path.clone(), err }, + )?; + } + ServiceType::InternalDns { server_address, dns_address } => { + info!(self.log, "Setting up internal-dns service"); running_zone .run_cmd(&[ crate::illumos::zone::SVCCFG, @@ -286,14 +378,12 @@ impl ServiceManager { "setprop", &format!( "config/server_address=[{}]:{}", - address, DNS_SERVER_PORT + server_address.ip(), + server_address.port(), ), ]) .map_err(|err| Error::ZoneCommand { - intent: format!( - "Setting DNS server address [{}]:{}", - address, DNS_SERVER_PORT - ), + intent: "set server address".to_string(), err, })?; @@ -305,14 +395,12 @@ impl ServiceManager { "setprop", &format!( "config/dns_address=[{}]:{}", - address, DNS_PORT + dns_address.ip(), + dns_address.port(), ), ]) .map_err(|err| Error::ZoneCommand { - intent: format!( - "Setting DNS address [{}]:{}", - address, DNS_SERVER_PORT - ), + intent: "Set DNS address".to_string(), err, })?; @@ -327,17 +415,17 @@ impl ServiceManager { ]) .map_err(|err| Error::ZoneCommand { intent: format!( - "Refreshing DNS service config for {}", + "Refresh SMF manifest {}", default_smf_name ), err, })?; } - _ => { - info!( - self.log, - "Service name {} did not match", service.name - ); + ServiceType::Oximeter => { + info!(self.log, "Setting up oximeter service"); + + // TODO: Implement with dynamic parameters, when address is + // dynamically assigned. } } @@ -438,7 +526,9 @@ mod test { svc, zone::MockZones, }; + use std::net::{Ipv6Addr, SocketAddrV6}; use std::os::unix::process::ExitStatusExt; + use uuid::Uuid; const SVC_NAME: &str = "my_svc"; const EXPECTED_ZONE_NAME: &str = "oxz_my_svc"; @@ -488,14 +578,29 @@ mod test { } // Prepare to call "ensure" for a new service, then actually call "ensure". - async fn ensure_new_service(mgr: &ServiceManager) { + async fn ensure_new_service(mgr: &ServiceManager, id: Uuid) { let _expectations = expect_new_service(); mgr.ensure(ServiceEnsureBody { services: vec![ServiceRequest { + id, name: SVC_NAME.to_string(), addresses: vec![], gz_addresses: vec![], + service_type: ServiceType::Nexus { + internal_address: SocketAddrV6::new( + Ipv6Addr::LOCALHOST, + 0, + 0, + 0, + ), + external_address: SocketAddrV6::new( + Ipv6Addr::LOCALHOST, + 0, + 0, + 0, + ), + }, }], }) .await @@ -504,12 +609,27 @@ mod test { // Prepare to call "ensure" for a service which already exists. We should // return the service without actually installing a new zone. - async fn ensure_existing_service(mgr: &ServiceManager) { + async fn ensure_existing_service(mgr: &ServiceManager, id: Uuid) { mgr.ensure(ServiceEnsureBody { services: vec![ServiceRequest { + id, name: SVC_NAME.to_string(), addresses: vec![], gz_addresses: vec![], + service_type: ServiceType::Nexus { + internal_address: SocketAddrV6::new( + Ipv6Addr::LOCALHOST, + 0, + 0, + 0, + ), + external_address: SocketAddrV6::new( + Ipv6Addr::LOCALHOST, + 0, + 0, + 0, + ), + }, }], }) .await @@ -533,26 +653,56 @@ mod test { drop(mgr); } + struct TestConfig { + config_dir: tempfile::TempDir, + } + + impl TestConfig { + async fn new() -> Self { + let config_dir = tempfile::TempDir::new().unwrap(); + tokio::fs::File::create( + config_dir.path().join(PARTIAL_CONFIG_FILENAME), + ) + .await + .unwrap(); + Self { config_dir } + } + + fn make_config(&self) -> Config { + let all_svcs_config_path = + self.config_dir.path().join(SERVICE_CONFIG_FILENAME); + let svc_config_dir = self.config_dir.path().to_path_buf(); + Config { + all_svcs_config_path, + get_svc_config_dir: Box::new( + move |_zone_name: &str, _svc_name: &str| { + svc_config_dir.clone() + }, + ), + } + } + } + #[tokio::test] #[serial_test::serial] async fn test_ensure_service() { let logctx = omicron_test_utils::dev::test_setup_log("test_ensure_service"); let log = logctx.log.clone(); + let test_config = TestConfig::new().await; - let config_dir = tempfile::TempDir::new().unwrap(); - let config = config_dir.path().join("services.toml"); let mgr = ServiceManager::new( log, Etherstub(ETHERSTUB_NAME.to_string()), EtherstubVnic(ETHERSTUB_VNIC_NAME.to_string()), Ipv6Addr::LOCALHOST, - Some(config), + test_config.make_config(), ) .await .unwrap(); - ensure_new_service(&mgr).await; + let id = Uuid::new_v4(); + ensure_new_service(&mgr, id).await; drop_service_manager(mgr); logctx.cleanup_successful(); @@ -565,21 +715,21 @@ mod test { "test_ensure_service_which_already_exists", ); let log = logctx.log.clone(); + let test_config = TestConfig::new().await; - let config_dir = tempfile::TempDir::new().unwrap(); - let config = config_dir.path().join("services.toml"); let mgr = ServiceManager::new( log, Etherstub(ETHERSTUB_NAME.to_string()), EtherstubVnic(ETHERSTUB_VNIC_NAME.to_string()), Ipv6Addr::LOCALHOST, - Some(config), + test_config.make_config(), ) .await .unwrap(); - ensure_new_service(&mgr).await; - ensure_existing_service(&mgr).await; + let id = Uuid::new_v4(); + ensure_new_service(&mgr, id).await; + ensure_existing_service(&mgr, id).await; drop_service_manager(mgr); logctx.cleanup_successful(); @@ -591,9 +741,7 @@ mod test { let logctx = omicron_test_utils::dev::test_setup_log( "test_services_are_recreated_on_reboot", ); - - let config_dir = tempfile::TempDir::new().unwrap(); - let config = config_dir.path().join("services.toml"); + let test_config = TestConfig::new().await; // First, spin up a ServiceManager, create a new service, and tear it // down. @@ -602,11 +750,13 @@ mod test { Etherstub(ETHERSTUB_NAME.to_string()), EtherstubVnic(ETHERSTUB_VNIC_NAME.to_string()), Ipv6Addr::LOCALHOST, - Some(config.clone()), + test_config.make_config(), ) .await .unwrap(); - ensure_new_service(&mgr).await; + + let id = Uuid::new_v4(); + ensure_new_service(&mgr, id).await; drop_service_manager(mgr); // Before we re-create the service manager - notably, using the same @@ -617,7 +767,7 @@ mod test { Etherstub(ETHERSTUB_NAME.to_string()), EtherstubVnic(ETHERSTUB_VNIC_NAME.to_string()), Ipv6Addr::LOCALHOST, - Some(config.clone()), + test_config.make_config(), ) .await .unwrap(); @@ -632,9 +782,7 @@ mod test { let logctx = omicron_test_utils::dev::test_setup_log( "test_services_do_not_persist_without_config", ); - - let config_dir = tempfile::TempDir::new().unwrap(); - let config = config_dir.path().join("services.toml"); + let test_config = TestConfig::new().await; // First, spin up a ServiceManager, create a new service, and tear it // down. @@ -643,16 +791,18 @@ mod test { Etherstub(ETHERSTUB_NAME.to_string()), EtherstubVnic(ETHERSTUB_VNIC_NAME.to_string()), Ipv6Addr::LOCALHOST, - Some(config.clone()), + test_config.make_config(), ) .await .unwrap(); - ensure_new_service(&mgr).await; + let id = Uuid::new_v4(); + ensure_new_service(&mgr, id).await; drop_service_manager(mgr); // Next, delete the config. This means the service we just created will // not be remembered on the next initialization. - std::fs::remove_file(&config).unwrap(); + let config = test_config.make_config(); + std::fs::remove_file(&config.all_svcs_config_path).unwrap(); // Observe that the old service is not re-initialized. let mgr = ServiceManager::new( @@ -660,7 +810,7 @@ mod test { Etherstub(ETHERSTUB_NAME.to_string()), EtherstubVnic(ETHERSTUB_VNIC_NAME.to_string()), Ipv6Addr::LOCALHOST, - Some(config.clone()), + config, ) .await .unwrap(); diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index c0c2ff649c8..5f8f1e500ab 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -16,7 +16,7 @@ use crate::params::{ DatasetKind, DiskStateRequested, InstanceHardware, InstanceMigrateParams, InstanceRuntimeStateRequested, ServiceEnsureBody, }; -use crate::services::ServiceManager; +use crate::services::{self, ServiceManager}; use crate::storage_manager::StorageManager; use omicron_common::api::{ internal::nexus::DiskRuntimeState, internal::nexus::InstanceRuntimeState, @@ -245,7 +245,7 @@ impl SledAgent { etherstub.clone(), etherstub_vnic.clone(), *sled_address.ip(), - None, + services::Config::default(), ) .await?; diff --git a/smf/nexus/config.toml b/smf/nexus/config-partial.toml similarity index 53% rename from smf/nexus/config.toml rename to smf/nexus/config-partial.toml index d73d7a90cfc..b77ffc3137f 100644 --- a/smf/nexus/config.toml +++ b/smf/nexus/config-partial.toml @@ -1,10 +1,7 @@ # -# Oxide API: example configuration file +# Oxide API: partial configuration file # -# Identifier for this instance of Nexus -id = "e6bff1ff-24fb-49dc-a54e-c6a350cd4d6c" - [console] # Directory for static assets. Absolute path or relative to CWD. static_dir = "/var/nexus/static" @@ -16,18 +13,6 @@ session_absolute_timeout_minutes = 480 # TODO(https://github.com/oxidecomputer/omicron/issues/372): Remove "spoof". schemes_external = ["spoof", "session_cookie"] -[database] -# URL for connecting to the database -url = "postgresql://root@[fd00:1122:3344:0101::2]:32221/omicron?sslmode=disable" - -[dropshot_external] -# IP address and TCP port on which to listen for the external API -bind_address = "[fd00:1122:3344:0101::3]:12220" - -[dropshot_internal] -# IP address and TCP port on which to listen for the internal API -bind_address = "[fd00:1122:3344:0101::3]:12221" - [log] # Show log messages of this level and more severe level = "info" diff --git a/smf/nexus/manifest.xml b/smf/nexus/manifest.xml index 0b8da2ff62f..3ff92b2fbac 100644 --- a/smf/nexus/manifest.xml +++ b/smf/nexus/manifest.xml @@ -11,6 +11,14 @@ type='service'> + + + + + + diff --git a/smf/sled-agent/config-rss.toml b/smf/sled-agent/config-rss.toml index d8113cf4d1b..698d5b112fc 100644 --- a/smf/sled-agent/config-rss.toml +++ b/smf/sled-agent/config-rss.toml @@ -11,21 +11,25 @@ rack_subnet = "fd00:1122:3344:0100::" # TODO(https://github.com/oxidecomputer/omicron/issues/732): Nexus # should allocate crucible datasets. [[request.dataset]] +id = "09a9a25f-2602-4e2f-9630-31af9c492c3e" zpool_id = "d462a7f7-b628-40fe-80ff-4e4189e2d62b" address = "[fd00:1122:3344:0101::6]:32345" dataset_kind.type = "crucible" [[request.dataset]] +id = "2713b37a-3043-4ed5-aaff-f38200e45cfb" zpool_id = "e4b4dc87-ab46-49fb-a4b4-d361ae214c03" address = "[fd00:1122:3344:0101::7]:32345" dataset_kind.type = "crucible" [[request.dataset]] +id = "ffd16cad-e5d5-495e-9c59-4312a3857d91" zpool_id = "f4b4dc87-ab46-49fb-a4b4-d361ae214c03" address = "[fd00:1122:3344:0101::8]:32345" dataset_kind.type = "crucible" [[request.dataset]] +id = "4d08fc19-3d5f-4f6b-9c48-925f8eac7255" zpool_id = "d462a7f7-b628-40fe-80ff-4e4189e2d62b" address = "[fd00:1122:3344:0101::2]:32221" dataset_kind.type = "cockroach_db" @@ -34,18 +38,27 @@ dataset_kind.all_addresses = [ "[fd00:1122:3344:0101::2]:32221" ] # TODO(https://github.com/oxidecomputer/omicron/issues/732): Nexus # should allocate clickhouse datasets. [[request.dataset]] +id = "a3505b41-a592-420b-84f2-3d76bf0e0a81" zpool_id = "d462a7f7-b628-40fe-80ff-4e4189e2d62b" address = "[fd00:1122:3344:0101::5]:8123" dataset_kind.type = "clickhouse" [[request.service]] +id = "e6bff1ff-24fb-49dc-a54e-c6a350cd4d6c" name = "nexus" addresses = [ "fd00:1122:3344:0101::3" ] gz_addresses = [] +[request.service.service_type] +type = "nexus" +internal_address = "[fd00:1122:3344:0101::3]:12221" +external_address = "[fd00:1122:3344:0101::3]:12220" # TODO(https://github.com/oxidecomputer/omicron/issues/732): Nexus # should allocate Oximeter services. [[request.service]] +id = "1da65e5b-210c-4859-a7d7-200c1e659972" name = "oximeter" addresses = [ "fd00:1122:3344:0101::4" ] gz_addresses = [] +[request.service.service_type] +type = "oximeter" diff --git a/smf/sled-agent/manifest.xml b/smf/sled-agent/manifest.xml index 378b77776c8..96f029d96e0 100644 --- a/smf/sled-agent/manifest.xml +++ b/smf/sled-agent/manifest.xml @@ -28,6 +28,10 @@ type='service'> + + + diff --git a/test-utils/src/dev/db.rs b/test-utils/src/dev/db.rs index 5449bfc4139..b7112ae1a37 100644 --- a/test-utils/src/dev/db.rs +++ b/test-utils/src/dev/db.rs @@ -8,7 +8,7 @@ use crate::dev::poll; use anyhow::anyhow; use anyhow::bail; use anyhow::Context; -use omicron_common::config::PostgresConfigWithUrl; +use omicron_common::postgres_config::PostgresConfigWithUrl; use std::ffi::{OsStr, OsString}; use std::fmt; use std::ops::Deref; From fccc15cc8df05acffdafa791ed5c1d4d965e13e3 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 8 Jun 2022 16:45:34 -0400 Subject: [PATCH 02/27] Ensure postgres config was just a rename --- common/src/config.rs | 95 -------------------------------------------- 1 file changed, 95 deletions(-) delete mode 100644 common/src/config.rs diff --git a/common/src/config.rs b/common/src/config.rs deleted file mode 100644 index 2509ae4fca2..00000000000 --- a/common/src/config.rs +++ /dev/null @@ -1,95 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -//! Common objects used for configuration - -use std::fmt; -use std::ops::Deref; -use std::str::FromStr; - -/// Describes a URL for connecting to a PostgreSQL server -// The config pattern that we're using requires that types in the config impl -// Serialize. If tokio_postgres::config::Config impl'd Serialize or even -// Display, we'd just use that directly instead of this type. But it doesn't. -// We could implement a serialize function ourselves, but URLs support many -// different properties, and this could be brittle and easy to get wrong. -// Instead, this type just wraps tokio_postgres::config::Config and keeps the -// original String around. (The downside is that a consumer _generating_ a -// nexus::db::Config needs to generate a URL that matches the -// tokio_postgres::config::Config that they construct here, but this is not -// currently an important use case.) -// -// To ensure that the URL and config are kept in sync, we currently only support -// constructing one of these via `FromStr` and the fields are not public. -#[derive(Clone, Debug, PartialEq)] -pub struct PostgresConfigWithUrl { - url_raw: String, - config: tokio_postgres::config::Config, -} - -impl PostgresConfigWithUrl { - pub fn url(&self) -> String { - self.url_raw.clone() - } -} - -impl FromStr for PostgresConfigWithUrl { - type Err = tokio_postgres::Error; - - fn from_str(s: &str) -> Result { - Ok(PostgresConfigWithUrl { url_raw: s.to_owned(), config: s.parse()? }) - } -} - -impl fmt::Display for PostgresConfigWithUrl { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.write_str(&self.url_raw) - } -} - -impl Deref for PostgresConfigWithUrl { - type Target = tokio_postgres::config::Config; - - fn deref(&self) -> &Self::Target { - &self.config - } -} - -#[cfg(test)] -mod test { - use super::PostgresConfigWithUrl; - - #[test] - fn test_bad_url() { - // There is surprisingly little that we can rely on the - // tokio_postgres::config::Config parser to include in the error - // message. - let error = "foo".parse::().unwrap_err(); - assert!(error.to_string().contains("unexpected EOF")); - "http://127.0.0.1:1234".parse::().unwrap_err(); - let error = "postgresql://example.com?sslmode=not-a-real-ssl-mode" - .parse::() - .unwrap_err(); - assert!(error - .to_string() - .contains("invalid value for option `sslmode`")); - } - - #[test] - fn test_example_url() { - let config = "postgresql://notauser@10.2.3.4:1789?sslmode=disable" - .parse::() - .unwrap(); - assert_eq!(config.get_user(), Some("notauser")); - assert_eq!( - config.get_ssl_mode(), - tokio_postgres::config::SslMode::Disable - ); - assert_eq!( - config.get_hosts(), - &[tokio_postgres::config::Host::Tcp("10.2.3.4".to_string())] - ); - assert_eq!(config.get_ports(), &[1789]); - } -} From a077bd41879b3551ce25d2f59377262ec1cd1ef6 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 8 Jun 2022 17:38:38 -0400 Subject: [PATCH 03/27] review feedback --- nexus/src/config.rs | 2 +- nexus/src/context.rs | 2 +- nexus/src/lib.rs | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/nexus/src/config.rs b/nexus/src/config.rs index d5bf6a2a2f9..a6034a7eea3 100644 --- a/nexus/src/config.rs +++ b/nexus/src/config.rs @@ -152,7 +152,7 @@ pub struct Config { } impl Config { - /// Load a `PackageConfig` from the given TOML file + /// Load a `Config` from the given TOML file /// /// This config object can then be used to create a new `Nexus`. /// The format is described in the README. diff --git a/nexus/src/context.rs b/nexus/src/context.rs index a08f22304df..2ad6a93553a 100644 --- a/nexus/src/context.rs +++ b/nexus/src/context.rs @@ -68,7 +68,7 @@ pub struct ConsoleConfig { impl ServerContext { /// Create a new context with the given rack id and log. This creates the /// underlying nexus as well. - pub async fn new( + pub fn new( rack_id: Uuid, log: Logger, config: &config::Config, diff --git a/nexus/src/lib.rs b/nexus/src/lib.rs index 61abe04b1ba..c13fc3de3c8 100644 --- a/nexus/src/lib.rs +++ b/nexus/src/lib.rs @@ -90,7 +90,7 @@ impl Server { let ctxlog = log.new(o!("component" => "ServerContext")); - let apictx = ServerContext::new(rack_id, ctxlog, &config).await?; + let apictx = ServerContext::new(rack_id, ctxlog, &config)?; let http_server_starter_external = dropshot::HttpServerStarter::new( &config.runtime.dropshot_external, From d16eda2832fba9c5e46c68431c3e400a6039ea17 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 8 Jun 2022 16:44:41 -0400 Subject: [PATCH 04/27] DNS client --- Cargo.lock | 4 + internal-dns-client/Cargo.toml | 6 +- internal-dns-client/src/lib.rs | 3 + internal-dns-client/src/multiclient.rs | 145 +++++++++++++++++++++++++ internal-dns-client/src/names.rs | 55 ++++++++++ 5 files changed, 212 insertions(+), 1 deletion(-) create mode 100644 internal-dns-client/src/multiclient.rs create mode 100644 internal-dns-client/src/names.rs diff --git a/Cargo.lock b/Cargo.lock index fed2770e843..850c0ec0adc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2342,12 +2342,16 @@ dependencies = [ name = "internal-dns-client" version = "0.1.0" dependencies = [ + "omicron-common", "progenitor", "reqwest", "serde", "serde_json", "slog", "structopt", + "trust-dns-proto", + "trust-dns-resolver", + "uuid", ] [[package]] diff --git a/internal-dns-client/Cargo.toml b/internal-dns-client/Cargo.toml index 22e28c91bc9..0ac6ecba610 100644 --- a/internal-dns-client/Cargo.toml +++ b/internal-dns-client/Cargo.toml @@ -5,9 +5,13 @@ edition = "2021" license = "MPL-2.0" [dependencies] +omicron-common = { path = "../common" } progenitor = { git = "https://github.com/oxidecomputer/progenitor" } +reqwest = { version = "0.11", features = ["json", "rustls-tls", "stream"] } serde = { version = "1.0", features = [ "derive" ] } serde_json = "1.0" slog = { version = "2.5.0", features = [ "max_level_trace", "release_max_level_debug" ] } structopt = "0.3" -reqwest = { version = "0.11", features = ["json", "rustls-tls", "stream"] } +trust-dns-proto = "0.21" +trust-dns-resolver = "0.21" +uuid = { version = "1.1.0", features = [ "v4", "serde" ] } diff --git a/internal-dns-client/src/lib.rs b/internal-dns-client/src/lib.rs index 49daa3d58ae..f7ce56f8521 100644 --- a/internal-dns-client/src/lib.rs +++ b/internal-dns-client/src/lib.rs @@ -16,3 +16,6 @@ progenitor::generate_api!( slog::debug!(log, "client response"; "result" => ?result); }), ); + +pub mod multiclient; +pub mod names; diff --git a/internal-dns-client/src/multiclient.rs b/internal-dns-client/src/multiclient.rs new file mode 100644 index 00000000000..e01fb5a2139 --- /dev/null +++ b/internal-dns-client/src/multiclient.rs @@ -0,0 +1,145 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use crate::types::{DnsKv, DnsRecord, DnsRecordKey, Srv}; +use omicron_common::address::{ + Ipv6Subnet, ReservedRackSubnet, AZ_PREFIX, DNS_PORT, DNS_SERVER_PORT, +}; +use omicron_common::backoff::{ + internal_service_policy, retry_notify, BackoffError, +}; +use slog::{info, warn, Logger}; +use std::net::{SocketAddr, SocketAddrV6}; +use trust_dns_resolver::config::{ + NameServerConfig, Protocol, ResolverConfig, ResolverOpts, +}; +use trust_dns_resolver::TokioAsyncResolver; + +type DnsError = crate::Error; + +/// A connection used to update multiple DNS servers. +pub struct Updater { + clients: Vec, +} + +impl Updater { + pub fn new(subnet: Ipv6Subnet, log: Logger) -> Self { + let clients = ReservedRackSubnet::new(subnet) + .get_dns_subnets() + .into_iter() + .map(|dns_subnet| { + let addr = dns_subnet.dns_address().ip(); + info!(log, "Adding DNS server: {}", addr); + crate::Client::new( + &format!("http://[{}]:{}", addr, DNS_SERVER_PORT), + log.clone(), + ) + }) + .collect::>(); + + Self { clients } + } + + /// Utility function to insert: + /// - A set of uniquely-named AAAA records, each corresponding to an address + /// - An SRV record, pointing to each of the AAAA records. + pub async fn insert_dns_records( + &self, + log: &Logger, + aaaa: Vec<(crate::names::AAAA, SocketAddrV6)>, + srv_key: crate::names::SRV, + ) -> Result<(), DnsError> { + let mut records = Vec::with_capacity(aaaa.len() + 1); + + // Add one DnsKv per AAAA, each with a single record. + records.extend(aaaa.iter().map(|(name, addr)| DnsKv { + key: DnsRecordKey { name: name.to_string() }, + records: vec![DnsRecord::Aaaa(*addr.ip())], + })); + + // Add the DnsKv for the SRV, with a record for each AAAA. + records.push(DnsKv { + key: DnsRecordKey { name: srv_key.to_string() }, + records: aaaa + .iter() + .map(|(name, addr)| { + DnsRecord::Srv(Srv { + prio: 0, + weight: 0, + port: addr.port(), + target: name.to_string(), + }) + }) + .collect::>(), + }); + + let set_record = || async { + self.dns_records_set(&records) + .await + .map_err(BackoffError::transient)?; + Ok::<(), BackoffError>(()) + }; + let log_failure = |error, _| { + warn!(log, "Failed to set DNS records"; "error" => ?error); + }; + + retry_notify(internal_service_policy(), set_record, log_failure) + .await?; + Ok(()) + } + + /// Sets a records on all DNS servers. + /// + /// Returns an error if setting the record fails on any server. + pub async fn dns_records_set<'a>( + &'a self, + body: &'a Vec, + ) -> Result<(), DnsError> { + // TODO: Could be sent concurrently. + for client in &self.clients { + client.dns_records_set(body).await?; + } + + Ok(()) + } + + /// Deletes records in all DNS servers. + /// + /// Returns an error if deleting the record fails on any server. + pub async fn dns_records_delete<'a>( + &'a self, + body: &'a Vec, + ) -> Result<(), DnsError> { + // TODO: Could be sent concurrently + for client in &self.clients { + client.dns_records_delete(body).await?; + } + Ok(()) + } +} + +/// Creates a resolver using all internal DNS name servers. +pub fn create_resolver( + subnet: Ipv6Subnet, +) -> Result { + let mut rc = ResolverConfig::new(); + let dns_ips = ReservedRackSubnet::new(subnet) + .get_dns_subnets() + .into_iter() + .map(|subnet| subnet.dns_address().ip()) + .collect::>(); + + for dns_ip in dns_ips { + rc.add_name_server(NameServerConfig { + socket_addr: SocketAddr::V6(SocketAddrV6::new( + dns_ip, DNS_PORT, 0, 0, + )), + protocol: Protocol::Udp, + tls_dns_name: None, + trust_nx_responses: false, + bind_addr: None, + }); + } + TokioAsyncResolver::tokio(rc, ResolverOpts::default()) +} diff --git a/internal-dns-client/src/names.rs b/internal-dns-client/src/names.rs new file mode 100644 index 00000000000..6384ec9e503 --- /dev/null +++ b/internal-dns-client/src/names.rs @@ -0,0 +1,55 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::fmt; +use uuid::Uuid; + +const DNS_ZONE: &str = "control-plane.oxide.internal"; + +pub enum SRV { + /// A service identified and accessed by name, such as "nexus", "CRDB", etc. + /// + /// This is used in cases where services are interchangeable. + Service(String), + + /// A service identified by name and a unique identifier. + /// + /// This is used in cases where services are not interchangeable, such as + /// for the Sled agent. + Backend(String, Uuid), +} + +impl fmt::Display for SRV { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match &self { + SRV::Service(name) => { + write!(f, "_{}._tcp.{}", name, DNS_ZONE) + } + SRV::Backend(name, id) => { + write!(f, "_{}._tcp.{}.{}", name, id, DNS_ZONE) + } + } + } +} + +pub enum AAAA { + /// Identifies an AAAA record for a sled. + Sled(Uuid), + + /// Identifies an AAAA record for a zone within a sled. + Zone(Uuid), +} + +impl fmt::Display for AAAA { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match &self { + AAAA::Sled(id) => { + write!(f, "{}.sled.{}", id, DNS_ZONE) + } + AAAA::Zone(id) => { + write!(f, "{}.host.{}", id, DNS_ZONE) + } + } + } +} From 8db30b70b965e6eb3de54d2ae8172109225aff37 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 8 Jun 2022 17:00:25 -0400 Subject: [PATCH 05/27] Add concurrency --- Cargo.lock | 1 + internal-dns-client/Cargo.toml | 1 + internal-dns-client/src/multiclient.rs | 28 ++++++++++++++++++-------- 3 files changed, 22 insertions(+), 8 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 850c0ec0adc..ea7797ccd16 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2342,6 +2342,7 @@ dependencies = [ name = "internal-dns-client" version = "0.1.0" dependencies = [ + "futures", "omicron-common", "progenitor", "reqwest", diff --git a/internal-dns-client/Cargo.toml b/internal-dns-client/Cargo.toml index 0ac6ecba610..f2611721ae7 100644 --- a/internal-dns-client/Cargo.toml +++ b/internal-dns-client/Cargo.toml @@ -5,6 +5,7 @@ edition = "2021" license = "MPL-2.0" [dependencies] +futures = "0.3.21" omicron-common = { path = "../common" } progenitor = { git = "https://github.com/oxidecomputer/progenitor" } reqwest = { version = "0.11", features = ["json", "rustls-tls", "stream"] } diff --git a/internal-dns-client/src/multiclient.rs b/internal-dns-client/src/multiclient.rs index e01fb5a2139..3d8d912bf7f 100644 --- a/internal-dns-client/src/multiclient.rs +++ b/internal-dns-client/src/multiclient.rs @@ -3,6 +3,7 @@ // file, You can obtain one at https://mozilla.org/MPL/2.0/. use crate::types::{DnsKv, DnsRecord, DnsRecordKey, Srv}; +use futures::stream::{self, StreamExt, TryStreamExt}; use omicron_common::address::{ Ipv6Subnet, ReservedRackSubnet, AZ_PREFIX, DNS_PORT, DNS_SERVER_PORT, }; @@ -96,10 +97,15 @@ impl Updater { &'a self, body: &'a Vec, ) -> Result<(), DnsError> { - // TODO: Could be sent concurrently. - for client in &self.clients { - client.dns_records_set(body).await?; - } + stream::iter(&self.clients) + .map(Ok::<_, DnsError>) + .try_for_each_concurrent( + None, + |client| async move { + client.dns_records_set(body).await?; + Ok(()) + } + ).await?; Ok(()) } @@ -111,10 +117,16 @@ impl Updater { &'a self, body: &'a Vec, ) -> Result<(), DnsError> { - // TODO: Could be sent concurrently - for client in &self.clients { - client.dns_records_delete(body).await?; - } + stream::iter(&self.clients) + .map(Ok::<_, DnsError>) + .try_for_each_concurrent( + None, + |client| async move { + client.dns_records_delete(body).await?; + Ok(()) + } + ).await?; + Ok(()) } } From 3a0c6ba8102541463416aedf345207a2baa34854 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 8 Jun 2022 17:19:22 -0400 Subject: [PATCH 06/27] comment --- internal-dns-client/src/multiclient.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/internal-dns-client/src/multiclient.rs b/internal-dns-client/src/multiclient.rs index 3d8d912bf7f..47ac76e7710 100644 --- a/internal-dns-client/src/multiclient.rs +++ b/internal-dns-client/src/multiclient.rs @@ -25,6 +25,8 @@ pub struct Updater { } impl Updater { + /// Creates a new "Updater", capable of communicating with all + /// DNS servers within the AZ. pub fn new(subnet: Ipv6Subnet, log: Logger) -> Self { let clients = ReservedRackSubnet::new(subnet) .get_dns_subnets() From 33b3e02b7a926eec67674b6d896d144675da8f2d Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 8 Jun 2022 17:50:13 -0400 Subject: [PATCH 07/27] fmt --- Cargo.lock | 2 +- internal-dns-client/src/multiclient.rs | 24 ++++++++++-------------- 2 files changed, 11 insertions(+), 15 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ea7797ccd16..68e58d9b219 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2352,7 +2352,7 @@ dependencies = [ "structopt", "trust-dns-proto", "trust-dns-resolver", - "uuid", + "uuid 1.1.0", ] [[package]] diff --git a/internal-dns-client/src/multiclient.rs b/internal-dns-client/src/multiclient.rs index 47ac76e7710..24c8817c274 100644 --- a/internal-dns-client/src/multiclient.rs +++ b/internal-dns-client/src/multiclient.rs @@ -101,13 +101,11 @@ impl Updater { ) -> Result<(), DnsError> { stream::iter(&self.clients) .map(Ok::<_, DnsError>) - .try_for_each_concurrent( - None, - |client| async move { - client.dns_records_set(body).await?; - Ok(()) - } - ).await?; + .try_for_each_concurrent(None, |client| async move { + client.dns_records_set(body).await?; + Ok(()) + }) + .await?; Ok(()) } @@ -121,13 +119,11 @@ impl Updater { ) -> Result<(), DnsError> { stream::iter(&self.clients) .map(Ok::<_, DnsError>) - .try_for_each_concurrent( - None, - |client| async move { - client.dns_records_delete(body).await?; - Ok(()) - } - ).await?; + .try_for_each_concurrent(None, |client| async move { + client.dns_records_delete(body).await?; + Ok(()) + }) + .await?; Ok(()) } From 3eb57dcdec6d8585ce4c40cf2048f5ec2d45a9fd Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 8 Jun 2022 18:39:53 -0400 Subject: [PATCH 08/27] lockfile --- Cargo.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index 68e58d9b219..ca290e03a05 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2352,7 +2352,7 @@ dependencies = [ "structopt", "trust-dns-proto", "trust-dns-resolver", - "uuid 1.1.0", + "uuid 1.1.1", ] [[package]] From dd04a67f04b29af80b5e182a9c99ac62889c7778 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 15 Jun 2022 16:39:09 -0400 Subject: [PATCH 09/27] s/runtime/deployment --- common/src/nexus_config.rs | 8 +-- nexus/examples/config.toml | 10 ++-- nexus/src/app/mod.rs | 4 +- nexus/src/config.rs | 54 +++++++++++---------- nexus/src/context.rs | 10 ++-- nexus/src/lib.rs | 6 +-- nexus/test-utils/src/lib.rs | 4 +- nexus/tests/config.test.toml | 10 ++-- nexus/tests/integration_tests/authn_http.rs | 2 +- nexus/tests/integration_tests/commands.rs | 2 +- sled-agent/src/services.rs | 13 +++-- 11 files changed, 65 insertions(+), 58 deletions(-) diff --git a/common/src/nexus_config.rs b/common/src/nexus_config.rs index f1325ae336d..2b34108643d 100644 --- a/common/src/nexus_config.rs +++ b/common/src/nexus_config.rs @@ -3,7 +3,7 @@ // file, You can obtain one at https://mozilla.org/MPL/2.0/. //! Configuration parameters to Nexus that are usually only known -//! at runtime. +//! at deployment time. use super::address::{Ipv6Subnet, RACK_PREFIX}; use super::postgres_config::PostgresConfigWithUrl; @@ -99,7 +99,7 @@ pub enum Database { } #[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] -pub struct RuntimeConfig { +pub struct DeploymentConfig { /// Uuid of the Nexus instance pub id: Uuid, /// Dropshot configuration for external API server @@ -112,8 +112,8 @@ pub struct RuntimeConfig { pub database: Database, } -impl RuntimeConfig { - /// Load a `RuntimeConfig` from the given TOML file +impl DeploymentConfig { + /// Load a `DeploymentConfig` from the given TOML file /// /// This config object can then be used to create a new `Nexus`. /// The format is described in the README. diff --git a/nexus/examples/config.toml b/nexus/examples/config.toml index 22889ab1be9..c841a12ac1c 100644 --- a/nexus/examples/config.toml +++ b/nexus/examples/config.toml @@ -33,25 +33,25 @@ mode = "stderr-terminal" [timeseries_db] address = "[::1]:8123" -[runtime] +[deployment] # Identifier for this instance of Nexus id = "e6bff1ff-24fb-49dc-a54e-c6a350cd4d6c" -[runtime.dropshot_external] +[deployment.dropshot_external] # IP address and TCP port on which to listen for the external API bind_address = "127.0.0.1:12220" # Allow larger request bodies (1MiB) to accomodate firewall endpoints (one # rule is ~500 bytes) request_body_max_bytes = 1048576 -[runtime.dropshot_internal] +[deployment.dropshot_internal] # IP address and TCP port on which to listen for the internal API bind_address = "127.0.0.1:12221" -[runtime.subnet] +[deployment.subnet] net = "fd00:1122:3344:0100::/56" -[runtime.database] +[deployment.database] # URL for connecting to the database type = "from_url" url = "postgresql://root@127.0.0.1:32221/omicron?sslmode=disable" diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index 1c3620de7e7..1cb1f6b6ff7 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -112,7 +112,7 @@ impl Nexus { authz: Arc, ) -> Arc { let pool = Arc::new(pool); - let my_sec_id = db::SecId::from(config.runtime.id); + let my_sec_id = db::SecId::from(config.deployment.id); let db_datastore = Arc::new(db::DataStore::new(Arc::clone(&pool))); let sec_store = Arc::new(db::CockroachDbSecStore::new( my_sec_id, @@ -143,7 +143,7 @@ impl Nexus { populate_start(populate_ctx, Arc::clone(&db_datastore)); let nexus = Nexus { - id: config.runtime.id, + id: config.deployment.id, rack_id, log: log.new(o!()), api_rack_identity: db::model::RackIdentity::new(rack_id), diff --git a/nexus/src/config.rs b/nexus/src/config.rs index a6034a7eea3..83be56fd335 100644 --- a/nexus/src/config.rs +++ b/nexus/src/config.rs @@ -7,7 +7,9 @@ use anyhow::anyhow; use dropshot::ConfigLogging; -use omicron_common::nexus_config::{InvalidTunable, LoadError, RuntimeConfig}; +use omicron_common::nexus_config::{ + DeploymentConfig, InvalidTunable, LoadError, +}; use serde::Deserialize; use serde::Serialize; use serde_with::DeserializeFromStr; @@ -147,8 +149,8 @@ pub struct Config { #[serde(flatten)] pub pkg: PackageConfig, - /// A variety of configuration parameters only known at runtime. - pub runtime: RuntimeConfig, + /// A variety of configuration parameters only known at deployment time. + pub deployment: DeploymentConfig, } impl Config { @@ -214,7 +216,7 @@ mod test { use libc; use omicron_common::address::{Ipv6Subnet, RACK_PREFIX}; use omicron_common::nexus_config::{ - Database, LoadErrorKind, RuntimeConfig, + Database, DeploymentConfig, LoadErrorKind, }; use std::fs; use std::net::{Ipv6Addr, SocketAddr}; @@ -288,7 +290,7 @@ mod test { let error = read_config("empty", "").expect_err("expected failure"); if let LoadErrorKind::Parse(error) = &error.kind { assert_eq!(error.line_col(), None); - assert_eq!(error.to_string(), "missing field `runtime`"); + assert_eq!(error.to_string(), "missing field `deployment`"); } else { panic!( "Got an unexpected error, expected Parse but got {:?}", @@ -325,17 +327,17 @@ mod test { default_base_url = "http://example.invalid/" [tunables] max_vpc_ipv4_subnet_prefix = 27 - [runtime] + [deployment] id = "28b90dc4-c22a-65ba-f49a-f051fe01208f" - [runtime.dropshot_external] + [deployment.dropshot_external] bind_address = "10.1.2.3:4567" request_body_max_bytes = 1024 - [runtime.dropshot_internal] + [deployment.dropshot_internal] bind_address = "10.1.2.3:4568" request_body_max_bytes = 1024 - [runtime.subnet] + [deployment.subnet] net = "::/56" - [runtime.database] + [deployment.database] type = "from_dns" "##, ) @@ -344,7 +346,7 @@ mod test { assert_eq!( config, Config { - runtime: RuntimeConfig { + deployment: DeploymentConfig { id: "28b90dc4-c22a-65ba-f49a-f051fe01208f".parse().unwrap(), dropshot_external: ConfigDropshot { bind_address: "10.1.2.3:4567" @@ -403,17 +405,17 @@ mod test { if_exists = "fail" [timeseries_db] address = "[::1]:8123" - [runtime] + [deployment] id = "28b90dc4-c22a-65ba-f49a-f051fe01208f" - [runtime.dropshot_external] + [deployment.dropshot_external] bind_address = "10.1.2.3:4567" request_body_max_bytes = 1024 - [runtime.dropshot_internal] + [deployment.dropshot_internal] bind_address = "10.1.2.3:4568" request_body_max_bytes = 1024 - [runtime.subnet] + [deployment.subnet] net = "::/56" - [runtime.database] + [deployment.database] type = "from_dns" "##, ) @@ -444,17 +446,17 @@ mod test { if_exists = "fail" [timeseries_db] address = "[::1]:8123" - [runtime] + [deployment] id = "28b90dc4-c22a-65ba-f49a-f051fe01208f" - [runtime.dropshot_external] + [deployment.dropshot_external] bind_address = "10.1.2.3:4567" request_body_max_bytes = 1024 - [runtime.dropshot_internal] + [deployment.dropshot_internal] bind_address = "10.1.2.3:4568" request_body_max_bytes = 1024 - [runtime.subnet] + [deployment.subnet] net = "::/56" - [runtime.database] + [deployment.database] type = "from_dns" "##, ) @@ -499,17 +501,17 @@ mod test { default_base_url = "http://example.invalid/" [tunables] max_vpc_ipv4_subnet_prefix = 100 - [runtime] + [deployment] id = "28b90dc4-c22a-65ba-f49a-f051fe01208f" - [runtime.dropshot_external] + [deployment.dropshot_external] bind_address = "10.1.2.3:4567" request_body_max_bytes = 1024 - [runtime.dropshot_internal] + [deployment.dropshot_internal] bind_address = "10.1.2.3:4568" request_body_max_bytes = 1024 - [runtime.subnet] + [deployment.subnet] net = "::/56" - [runtime.database] + [deployment.database] type = "from_dns" "##, ) diff --git a/nexus/src/context.rs b/nexus/src/context.rs index 2ad6a93553a..e940bef6d10 100644 --- a/nexus/src/context.rs +++ b/nexus/src/context.rs @@ -91,8 +91,10 @@ impl ServerContext { let internal_authn = Arc::new(authn::Context::internal_api()); let authz = Arc::new(authz::Authz::new(&log)); let create_tracker = |name: &str| { - let target = - HttpService { name: name.to_string(), id: config.runtime.id }; + let target = HttpService { + name: name.to_string(), + id: config.deployment.id, + }; const START_LATENCY_DECADE: i8 = -6; const END_LATENCY_DECADE: i8 = 3; LatencyTracker::with_latency_decades( @@ -104,7 +106,7 @@ impl ServerContext { }; let internal_latencies = create_tracker("nexus-internal"); let external_latencies = create_tracker("nexus-external"); - let producer_registry = ProducerRegistry::with_id(config.runtime.id); + let producer_registry = ProducerRegistry::with_id(config.deployment.id); producer_registry .register_producer(internal_latencies.clone()) .unwrap(); @@ -135,7 +137,7 @@ impl ServerContext { // nexus in dev for everyone // Set up DB pool - let url = match &config.runtime.database { + let url = match &config.deployment.database { nexus_config::Database::FromUrl { url } => url.clone(), nexus_config::Database::FromDns => { todo!("Not yet implemented"); diff --git a/nexus/src/lib.rs b/nexus/src/lib.rs index c13fc3de3c8..79f8a2cd838 100644 --- a/nexus/src/lib.rs +++ b/nexus/src/lib.rs @@ -85,7 +85,7 @@ impl Server { rack_id: Uuid, log: &Logger, ) -> Result { - let log = log.new(o!("name" => config.runtime.id.to_string())); + let log = log.new(o!("name" => config.deployment.id.to_string())); info!(log, "setting up nexus server"); let ctxlog = log.new(o!("component" => "ServerContext")); @@ -93,7 +93,7 @@ impl Server { let apictx = ServerContext::new(rack_id, ctxlog, &config)?; let http_server_starter_external = dropshot::HttpServerStarter::new( - &config.runtime.dropshot_external, + &config.deployment.dropshot_external, external_api(), Arc::clone(&apictx), &log.new(o!("component" => "dropshot_external")), @@ -101,7 +101,7 @@ impl Server { .map_err(|error| format!("initializing external server: {}", error))?; let http_server_starter_internal = dropshot::HttpServerStarter::new( - &config.runtime.dropshot_internal, + &config.deployment.dropshot_internal, internal_api(), Arc::clone(&apictx), &log.new(o!("component" => "dropshot_internal")), diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index e4eb744e2fa..02b9a0d7b7d 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -76,7 +76,7 @@ pub fn load_test_config() -> omicron_nexus::Config { let config_file_path = Path::new("tests/config.test.toml"); let mut config = omicron_nexus::Config::from_file(config_file_path) .expect("failed to load config.test.toml"); - config.runtime.id = Uuid::new_v4(); + config.deployment.id = Uuid::new_v4(); config } @@ -100,7 +100,7 @@ pub async fn test_setup_with_config( let clickhouse = dev::clickhouse::ClickHouseInstance::new(0).await.unwrap(); // Store actual address/port information for the databases after they start. - config.runtime.database = + config.deployment.database = nexus_config::Database::FromUrl { url: database.pg_config().clone() }; config.pkg.timeseries_db.address.set_port(clickhouse.port()); diff --git a/nexus/tests/config.test.toml b/nexus/tests/config.test.toml index 2fc4ddba192..0a8789893a1 100644 --- a/nexus/tests/config.test.toml +++ b/nexus/tests/config.test.toml @@ -35,7 +35,7 @@ address = "[::1]:0" # Allow small subnets, so we can test IP address exhaustion easily / quickly max_vpc_ipv4_subnet_prefix = 29 -[runtime] +[deployment] # Identifier for this instance of Nexus. # NOTE: The test suite always overrides this. id = "e6bff1ff-24fb-49dc-a54e-c6a350cd4d6c" @@ -45,22 +45,22 @@ id = "e6bff1ff-24fb-49dc-a54e-c6a350cd4d6c" # available port) because the test suite will be running many servers # concurrently. # -[runtime.dropshot_external] +[deployment.dropshot_external] bind_address = "127.0.0.1:0" request_body_max_bytes = 1048576 # port must be 0. see above -[runtime.dropshot_internal] +[deployment.dropshot_internal] bind_address = "127.0.0.1:0" request_body_max_bytes = 1048576 -[runtime.subnet] +[deployment.subnet] net = "fd00:1122:3344:0100::/56" # # NOTE: for the test suite, the database URL will be replaced with one # appropriate for the database that's started by the test runner. # -[runtime.database] +[deployment.database] type = "from_url" url = "postgresql://root@127.0.0.1:0/omicron?sslmode=disable" diff --git a/nexus/tests/integration_tests/authn_http.rs b/nexus/tests/integration_tests/authn_http.rs index e0234da1b97..99f25f91539 100644 --- a/nexus/tests/integration_tests/authn_http.rs +++ b/nexus/tests/integration_tests/authn_http.rs @@ -299,7 +299,7 @@ async fn start_whoami_server( TestContext::new( whoami_api, server_state, - &config.runtime.dropshot_external, + &config.deployment.dropshot_external, Some(logctx), log, ) diff --git a/nexus/tests/integration_tests/commands.rs b/nexus/tests/integration_tests/commands.rs index 561e5fc478c..e28e313ff31 100644 --- a/nexus/tests/integration_tests/commands.rs +++ b/nexus/tests/integration_tests/commands.rs @@ -76,7 +76,7 @@ fn test_nexus_invalid_config() { assert_eq!( stderr_text, format!( - "nexus: parse \"{}\": missing field `runtime`\n", + "nexus: parse \"{}\": missing field `deployment`\n", config_path.display() ), ); diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 4edd18a3fa7..3f617aaf399 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -13,7 +13,9 @@ use crate::params::{ServiceEnsureBody, ServiceRequest, ServiceType}; use crate::zone::Zones; use dropshot::ConfigDropshot; use omicron_common::address::{Ipv6Subnet, RACK_PREFIX}; -use omicron_common::nexus_config::{self, RuntimeConfig as NexusRuntimeConfig}; +use omicron_common::nexus_config::{ + self, DeploymentConfig as NexusDeploymentConfig, +}; use omicron_common::postgres_config::PostgresConfigWithUrl; use slog::Logger; use std::collections::HashSet; @@ -312,7 +314,7 @@ impl ServiceManager { // Nexus takes a separate config file for parameters which // cannot be known at packaging time. - let runtime_config = NexusRuntimeConfig { + let deployment_config = NexusDeploymentConfig { id: service.id, dropshot_external: ConfigDropshot { bind_address: SocketAddr::V6(external_address), @@ -351,10 +353,11 @@ impl ServiceManager { })?; // Serialize the configuration and append it into the file. - let serialized_cfg = toml::Value::try_from(&runtime_config) - .expect("Cannot serialize config"); + let serialized_cfg = + toml::Value::try_from(&deployment_config) + .expect("Cannot serialize config"); let mut map = toml::map::Map::new(); - map.insert("runtime".to_string(), serialized_cfg); + map.insert("deployment".to_string(), serialized_cfg); let config_str = toml::to_string(&map).map_err(|err| { Error::TomlSerialize { path: config_path.clone(), err } })?; From e1dc94188da4827e6ef1a11c671d85ff234af148 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 15 Jun 2022 18:36:05 -0400 Subject: [PATCH 10/27] [nexus][sled-agent] Generate rack ID in RSS, plumb it through Nexus --- common/src/nexus_config.rs | 2 ++ common/src/sql/dbinit.sql | 8 ++++++++ nexus/examples/config.toml | 1 + nexus/src/app/sled.rs | 2 +- nexus/src/config.rs | 7 +++++++ nexus/src/db/datastore.rs | 8 +++++--- nexus/src/db/model/sled.rs | 5 ++++- nexus/src/db/schema.rs | 1 + nexus/src/lib.rs | 8 +++----- nexus/test-utils/src/lib.rs | 6 ++---- sled-agent/src/bootstrap/agent.rs | 1 + sled-agent/src/bootstrap/params.rs | 7 +++++++ sled-agent/src/rack_setup/service.rs | 3 +++ sled-agent/src/server.rs | 14 ++++++++++---- sled-agent/src/services.rs | 11 +++++++++++ sled-agent/src/sled_agent.rs | 2 ++ 16 files changed, 68 insertions(+), 18 deletions(-) diff --git a/common/src/nexus_config.rs b/common/src/nexus_config.rs index 2b34108643d..a18454e02d0 100644 --- a/common/src/nexus_config.rs +++ b/common/src/nexus_config.rs @@ -102,6 +102,8 @@ pub enum Database { pub struct DeploymentConfig { /// Uuid of the Nexus instance pub id: Uuid, + /// Uuid of the Rack where Nexus is executing. + pub rack_id: Uuid, /// Dropshot configuration for external API server pub dropshot_external: ConfigDropshot, /// Dropshot configuration for internal API server diff --git a/common/src/sql/dbinit.sql b/common/src/sql/dbinit.sql index 3944b3fd46f..e358c9a227e 100644 --- a/common/src/sql/dbinit.sql +++ b/common/src/sql/dbinit.sql @@ -75,6 +75,9 @@ CREATE TABLE omicron.public.sled ( time_deleted TIMESTAMPTZ, rcgen INT NOT NULL, + /* FK into the Rack table */ + rack_id UUID NOT NULL, + /* The IP address and bound port of the sled agent server. */ ip INET NOT NULL, port INT4 CHECK (port BETWEEN 0 AND 65535) NOT NULL, @@ -83,6 +86,11 @@ CREATE TABLE omicron.public.sled ( last_used_address INET NOT NULL ); +/* Add an index which lets us look up sleds on a rack */ +CREATE INDEX ON omicron.public.sled ( + rack_id +) WHERE time_deleted IS NULL; + /* * Services */ diff --git a/nexus/examples/config.toml b/nexus/examples/config.toml index c841a12ac1c..727055490e8 100644 --- a/nexus/examples/config.toml +++ b/nexus/examples/config.toml @@ -36,6 +36,7 @@ address = "[::1]:8123" [deployment] # Identifier for this instance of Nexus id = "e6bff1ff-24fb-49dc-a54e-c6a350cd4d6c" +rack_id = "c19a698f-c6f9-4a17-ae30-20d711b8f7dc" [deployment.dropshot_external] # IP address and TCP port on which to listen for the external API diff --git a/nexus/src/app/sled.rs b/nexus/src/app/sled.rs index 0150cbec148..e4fc616f095 100644 --- a/nexus/src/app/sled.rs +++ b/nexus/src/app/sled.rs @@ -31,7 +31,7 @@ impl super::Nexus { address: SocketAddrV6, ) -> Result<(), Error> { info!(self.log, "registered sled agent"; "sled_uuid" => id.to_string()); - let sled = db::model::Sled::new(id, address); + let sled = db::model::Sled::new(id, address, self.rack_id); self.db_datastore.sled_upsert(sled).await?; Ok(()) } diff --git a/nexus/src/config.rs b/nexus/src/config.rs index 83be56fd335..98cbf0169cf 100644 --- a/nexus/src/config.rs +++ b/nexus/src/config.rs @@ -329,6 +329,7 @@ mod test { max_vpc_ipv4_subnet_prefix = 27 [deployment] id = "28b90dc4-c22a-65ba-f49a-f051fe01208f" + rack_id = "38b90dc4-c22a-65ba-f49a-f051fe01208f" [deployment.dropshot_external] bind_address = "10.1.2.3:4567" request_body_max_bytes = 1024 @@ -348,6 +349,9 @@ mod test { Config { deployment: DeploymentConfig { id: "28b90dc4-c22a-65ba-f49a-f051fe01208f".parse().unwrap(), + rack_id: "38b90dc4-c22a-65ba-f49a-f051fe01208f" + .parse() + .unwrap(), dropshot_external: ConfigDropshot { bind_address: "10.1.2.3:4567" .parse::() @@ -407,6 +411,7 @@ mod test { address = "[::1]:8123" [deployment] id = "28b90dc4-c22a-65ba-f49a-f051fe01208f" + rack_id = "38b90dc4-c22a-65ba-f49a-f051fe01208f" [deployment.dropshot_external] bind_address = "10.1.2.3:4567" request_body_max_bytes = 1024 @@ -448,6 +453,7 @@ mod test { address = "[::1]:8123" [deployment] id = "28b90dc4-c22a-65ba-f49a-f051fe01208f" + rack_id = "38b90dc4-c22a-65ba-f49a-f051fe01208f" [deployment.dropshot_external] bind_address = "10.1.2.3:4567" request_body_max_bytes = 1024 @@ -503,6 +509,7 @@ mod test { max_vpc_ipv4_subnet_prefix = 100 [deployment] id = "28b90dc4-c22a-65ba-f49a-f051fe01208f" + rack_id = "38b90dc4-c22a-65ba-f49a-f051fe01208f" [deployment.dropshot_external] bind_address = "10.1.2.3:4567" request_body_max_bytes = 1024 diff --git a/nexus/src/db/datastore.rs b/nexus/src/db/datastore.rs index 499eee458bc..6c28185ce7d 100644 --- a/nexus/src/db/datastore.rs +++ b/nexus/src/db/datastore.rs @@ -4034,8 +4034,9 @@ mod test { 0, 0, ); + let rack_id = Uuid::new_v4(); let sled_id = Uuid::new_v4(); - let sled = Sled::new(sled_id, bogus_addr.clone()); + let sled = Sled::new(sled_id, bogus_addr.clone(), rack_id); datastore.sled_upsert(sled).await.unwrap(); sled_id } @@ -4391,14 +4392,15 @@ mod test { let opctx = OpContext::for_tests(logctx.log.new(o!()), datastore.clone()); + let rack_id = Uuid::new_v4(); let addr1 = "[fd00:1de::1]:12345".parse().unwrap(); let sled1_id = "0de4b299-e0b4-46f0-d528-85de81a7095f".parse().unwrap(); - let sled1 = db::model::Sled::new(sled1_id, addr1); + let sled1 = db::model::Sled::new(sled1_id, addr1, rack_id); datastore.sled_upsert(sled1).await.unwrap(); let addr2 = "[fd00:1df::1]:12345".parse().unwrap(); let sled2_id = "66285c18-0c79-43e0-e54f-95271f271314".parse().unwrap(); - let sled2 = db::model::Sled::new(sled2_id, addr2); + let sled2 = db::model::Sled::new(sled2_id, addr2, rack_id); datastore.sled_upsert(sled2).await.unwrap(); let ip = datastore.next_ipv6_address(&opctx, sled1_id).await.unwrap(); diff --git a/nexus/src/db/model/sled.rs b/nexus/src/db/model/sled.rs index ad756c3473f..ebe492c7459 100644 --- a/nexus/src/db/model/sled.rs +++ b/nexus/src/db/model/sled.rs @@ -21,6 +21,8 @@ pub struct Sled { time_deleted: Option>, rcgen: Generation, + pub rack_id: Uuid, + // ServiceAddress (Sled Agent). pub ip: ipv6::Ipv6Addr, pub port: SqlU16, @@ -30,7 +32,7 @@ pub struct Sled { } impl Sled { - pub fn new(id: Uuid, addr: SocketAddrV6) -> Self { + pub fn new(id: Uuid, addr: SocketAddrV6, rack_id: Uuid) -> Self { let last_used_address = { let mut segments = addr.ip().segments(); segments[7] += omicron_common::address::RSS_RESERVED_ADDRESSES; @@ -40,6 +42,7 @@ impl Sled { identity: SledIdentity::new(id), time_deleted: None, rcgen: Generation::new(), + rack_id, ip: ipv6::Ipv6Addr::from(addr.ip()), port: addr.port().into(), last_used_address, diff --git a/nexus/src/db/schema.rs b/nexus/src/db/schema.rs index a6d281d987e..41c8c3527b9 100644 --- a/nexus/src/db/schema.rs +++ b/nexus/src/db/schema.rs @@ -297,6 +297,7 @@ table! { time_deleted -> Nullable, rcgen -> Int8, + rack_id -> Uuid, ip -> Inet, port -> Int4, last_used_address -> Inet, diff --git a/nexus/src/lib.rs b/nexus/src/lib.rs index 79f8a2cd838..f0d5210930b 100644 --- a/nexus/src/lib.rs +++ b/nexus/src/lib.rs @@ -36,7 +36,6 @@ use external_api::http_entrypoints::external_api; use internal_api::http_entrypoints::internal_api; use slog::Logger; use std::sync::Arc; -use uuid::Uuid; #[macro_use] extern crate slog; @@ -82,7 +81,6 @@ impl Server { /// Start a nexus server. pub async fn start( config: &Config, - rack_id: Uuid, log: &Logger, ) -> Result { let log = log.new(o!("name" => config.deployment.id.to_string())); @@ -90,7 +88,8 @@ impl Server { let ctxlog = log.new(o!("component" => "ServerContext")); - let apictx = ServerContext::new(rack_id, ctxlog, &config)?; + let apictx = + ServerContext::new(config.deployment.rack_id, ctxlog, &config)?; let http_server_starter_external = dropshot::HttpServerStarter::new( &config.deployment.dropshot_external, @@ -167,8 +166,7 @@ pub async fn run_server(config: &Config) -> Result<(), String> { } else { debug!(log, "registered DTrace probes"); } - let rack_id = Uuid::new_v4(); - let server = Server::start(config, rack_id, &log).await?; + let server = Server::start(config, &log).await?; server.register_as_producer().await; server.wait_for_finish().await } diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index 02b9a0d7b7d..ed056f48d8d 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -90,7 +90,6 @@ pub async fn test_setup_with_config( config: &mut omicron_nexus::Config, ) -> ControlPlaneTestContext { let logctx = LogContext::new(test_name, &config.pkg.log); - let rack_id = Uuid::parse_str(RACK_UUID).unwrap(); let log = &logctx.log; // Start up CockroachDB. @@ -104,9 +103,8 @@ pub async fn test_setup_with_config( nexus_config::Database::FromUrl { url: database.pg_config().clone() }; config.pkg.timeseries_db.address.set_port(clickhouse.port()); - let server = omicron_nexus::Server::start(&config, rack_id, &logctx.log) - .await - .unwrap(); + let server = + omicron_nexus::Server::start(&config, &logctx.log).await.unwrap(); server .apictx .nexus diff --git a/sled-agent/src/bootstrap/agent.rs b/sled-agent/src/bootstrap/agent.rs index fc432554bfa..507d92baf91 100644 --- a/sled-agent/src/bootstrap/agent.rs +++ b/sled-agent/src/bootstrap/agent.rs @@ -245,6 +245,7 @@ impl Agent { &self.sled_config, self.parent_log.clone(), sled_address, + request.rack_id, ) .await .map_err(|e| { diff --git a/sled-agent/src/bootstrap/params.rs b/sled-agent/src/bootstrap/params.rs index def1f55c068..fdbbf2c4295 100644 --- a/sled-agent/src/bootstrap/params.rs +++ b/sled-agent/src/bootstrap/params.rs @@ -8,13 +8,20 @@ use super::trust_quorum::ShareDistribution; use omicron_common::address::{Ipv6Subnet, SLED_PREFIX}; use serde::{Deserialize, Serialize}; use std::borrow::Cow; +use uuid::Uuid; /// Configuration information for launching a Sled Agent. #[derive(Clone, Debug, Serialize, Deserialize, PartialEq)] pub struct SledAgentRequest { + /// Uuid of the Sled Agent to be created. + pub id: Uuid, + /// Portion of the IP space to be managed by the Sled Agent. pub subnet: Ipv6Subnet, + /// Uuid of the rack to which this sled agent belongs. + pub rack_id: Uuid, + /// Share of the rack secret for this Sled Agent. // TODO-cleanup This is currently optional because we don't do trust quorum // shares for single-node deployments (i.e., most dev/test environments), diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 0f8775ed932..c48a20cc4bc 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -357,6 +357,7 @@ impl ServiceInner { (request, (idx, bootstrap_addr)) }); + let rack_id = Uuid::new_v4(); let allocations = requests_and_sleds.map(|(request, sled)| { let (idx, bootstrap_addr) = sled; info!( @@ -373,7 +374,9 @@ impl ServiceInner { bootstrap_addr, SledAllocation { initialization_request: SledAgentRequest { + id: Uuid::new_v4(), subnet, + rack_id, trust_quorum_share: maybe_rack_secret_shares .as_mut() .map(|shares_iter| { diff --git a/sled-agent/src/server.rs b/sled-agent/src/server.rs index 3b31854628e..df596db8d01 100644 --- a/sled-agent/src/server.rs +++ b/sled-agent/src/server.rs @@ -38,6 +38,7 @@ impl Server { config: &Config, log: Logger, addr: SocketAddrV6, + rack_id: Uuid, ) -> Result { info!(log, "setting up sled agent server"); @@ -47,10 +48,15 @@ impl Server { client_log, )); - let sled_agent = - SledAgent::new(&config, log.clone(), nexus_client.clone(), addr) - .await - .map_err(|e| e.to_string())?; + let sled_agent = SledAgent::new( + &config, + log.clone(), + nexus_client.clone(), + addr, + rack_id, + ) + .await + .map_err(|e| e.to_string())?; let mut dropshot_config = dropshot::ConfigDropshot::default(); dropshot_config.request_body_max_bytes = 1024 * 1024; diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 3f617aaf399..dde2ef47937 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -25,6 +25,7 @@ use std::path::{Path, PathBuf}; use std::str::FromStr; use tokio::io::AsyncWriteExt; use tokio::sync::Mutex; +use uuid::Uuid; // The filename of ServiceManager's internal storage. const SERVICE_CONFIG_FILENAME: &str = "service.toml"; @@ -124,6 +125,7 @@ pub struct ServiceManager { vnic_allocator: VnicAllocator, underlay_vnic: EtherstubVnic, underlay_address: Ipv6Addr, + rack_id: Uuid, } impl ServiceManager { @@ -143,6 +145,7 @@ impl ServiceManager { underlay_vnic: EtherstubVnic, underlay_address: Ipv6Addr, config: Config, + rack_id: Uuid, ) -> Result { debug!(log, "Creating new ServiceManager"); let mgr = Self { @@ -152,6 +155,7 @@ impl ServiceManager { vnic_allocator: VnicAllocator::new("Service", etherstub), underlay_vnic, underlay_address, + rack_id, }; let config_path = mgr.services_config_path(); @@ -316,6 +320,7 @@ impl ServiceManager { // cannot be known at packaging time. let deployment_config = NexusDeploymentConfig { id: service.id, + rack_id: self.rack_id, dropshot_external: ConfigDropshot { bind_address: SocketAddr::V6(external_address), request_body_max_bytes: 1048576, @@ -702,6 +707,7 @@ mod test { EtherstubVnic(ETHERSTUB_VNIC_NAME.to_string()), Ipv6Addr::LOCALHOST, test_config.make_config(), + Uuid::new_v4(), ) .await .unwrap(); @@ -728,6 +734,7 @@ mod test { EtherstubVnic(ETHERSTUB_VNIC_NAME.to_string()), Ipv6Addr::LOCALHOST, test_config.make_config(), + Uuid::new_v4(), ) .await .unwrap(); @@ -756,6 +763,7 @@ mod test { EtherstubVnic(ETHERSTUB_VNIC_NAME.to_string()), Ipv6Addr::LOCALHOST, test_config.make_config(), + Uuid::new_v4(), ) .await .unwrap(); @@ -773,6 +781,7 @@ mod test { EtherstubVnic(ETHERSTUB_VNIC_NAME.to_string()), Ipv6Addr::LOCALHOST, test_config.make_config(), + Uuid::new_v4(), ) .await .unwrap(); @@ -797,6 +806,7 @@ mod test { EtherstubVnic(ETHERSTUB_VNIC_NAME.to_string()), Ipv6Addr::LOCALHOST, test_config.make_config(), + Uuid::new_v4(), ) .await .unwrap(); @@ -816,6 +826,7 @@ mod test { EtherstubVnic(ETHERSTUB_VNIC_NAME.to_string()), Ipv6Addr::LOCALHOST, config, + Uuid::new_v4(), ) .await .unwrap(); diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index e674663bac6..14e34f0d8d3 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -115,6 +115,7 @@ impl SledAgent { log: Logger, nexus_client: Arc, sled_address: SocketAddrV6, + rack_id: Uuid, ) -> Result { let id = &config.id; @@ -248,6 +249,7 @@ impl SledAgent { etherstub_vnic.clone(), *sled_address.ip(), services::Config::default(), + rack_id, ) .await?; From a4309ac5bf0b986b347be1d1583ff370a626c2ec Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 15 Jun 2022 18:40:57 -0400 Subject: [PATCH 11/27] need rack_id in the test config too --- nexus/tests/config.test.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/nexus/tests/config.test.toml b/nexus/tests/config.test.toml index 0a8789893a1..fdfeb5effb4 100644 --- a/nexus/tests/config.test.toml +++ b/nexus/tests/config.test.toml @@ -39,6 +39,7 @@ max_vpc_ipv4_subnet_prefix = 29 # Identifier for this instance of Nexus. # NOTE: The test suite always overrides this. id = "e6bff1ff-24fb-49dc-a54e-c6a350cd4d6c" +rack_id = "c19a698f-c6f9-4a17-ae30-20d711b8f7dc" # # NOTE: for the test suite, the port MUST be 0 (in order to bind to any From ff2d7b91b45522a56fa906eb34161fd98858ffc8 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 20 Jun 2022 15:22:29 -0400 Subject: [PATCH 12/27] [internal-dns] Avoid 'picking ports' --- Cargo.lock | 10 ----- internal-dns/Cargo.toml | 1 - internal-dns/src/bin/dns-server.rs | 12 +++--- internal-dns/src/dns_server.rs | 46 +++++++++++++++------- internal-dns/tests/basic_test.rs | 61 ++++++++++++++---------------- 5 files changed, 66 insertions(+), 64 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8d997dd9762..14488da8837 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2315,7 +2315,6 @@ dependencies = [ "omicron-test-utils", "openapi-lint", "openapiv3", - "portpicker", "pretty-hex 0.3.0", "schemars", "serde", @@ -3867,15 +3866,6 @@ dependencies = [ "universal-hash", ] -[[package]] -name = "portpicker" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be97d76faf1bfab666e1375477b23fde79eccf0276e9b63b92a39d676a889ba9" -dependencies = [ - "rand 0.8.5", -] - [[package]] name = "postcard" version = "0.7.3" diff --git a/internal-dns/Cargo.toml b/internal-dns/Cargo.toml index 886fa72cc18..d49859f18c1 100644 --- a/internal-dns/Cargo.toml +++ b/internal-dns/Cargo.toml @@ -30,7 +30,6 @@ expectorate = "1.0.5" omicron-test-utils = { path = "../test-utils" } openapiv3 = "1.0" openapi-lint = { git = "https://github.com/oxidecomputer/openapi-lint", branch = "main" } -portpicker = "0.1" serde_json = "1.0" subprocess = "0.2.9" trust-dns-resolver = "0.21" diff --git a/internal-dns/src/bin/dns-server.rs b/internal-dns/src/bin/dns-server.rs index 96e9da6feca..12eafcc3599 100644 --- a/internal-dns/src/bin/dns-server.rs +++ b/internal-dns/src/bin/dns-server.rs @@ -54,20 +54,18 @@ async fn main() -> Result<(), anyhow::Error> { let db = Arc::new(sled::open(&config.data.storage_path)?); - { + let _dns_server = { let db = db.clone(); let log = log.clone(); let dns_config = internal_dns::dns_server::Config { bind_address: dns_address.to_string(), zone: zone.to_string(), }; - tokio::spawn(async move { - internal_dns::dns_server::run(log, db, dns_config).await - }); - } + internal_dns::dns_server::run(log, db, dns_config).await? + }; - let server = internal_dns::start_server(config, log, db).await?; - server + let dropshot_server = internal_dns::start_server(config, log, db).await?; + dropshot_server .await .map_err(|error_message| anyhow!("server exiting: {}", error_message)) } diff --git a/internal-dns/src/dns_server.rs b/internal-dns/src/dns_server.rs index bffda7cc73f..ccebda582f7 100644 --- a/internal-dns/src/dns_server.rs +++ b/internal-dns/src/dns_server.rs @@ -34,23 +34,43 @@ pub struct Config { pub zone: String, } -pub async fn run(log: Logger, db: Arc, config: Config) -> Result<()> { +pub struct Server { + pub address: SocketAddr, + pub handle: tokio::task::JoinHandle>, +} + +impl Server { + pub fn close(self) { + self.handle.abort() + } +} + +pub async fn run( + log: Logger, + db: Arc, + config: Config, +) -> Result { let socket = Arc::new(UdpSocket::bind(config.bind_address).await?); + let address = socket.local_addr()?; - loop { - let mut buf = vec![0u8; 16384]; - let (n, src) = socket.recv_from(&mut buf).await?; - buf.resize(n, 0); + let handle = tokio::task::spawn(async move { + loop { + let mut buf = vec![0u8; 16384]; + let (n, src) = socket.recv_from(&mut buf).await?; + buf.resize(n, 0); - let socket = socket.clone(); - let log = log.clone(); - let db = db.clone(); - let zone = config.zone.clone(); + let socket = socket.clone(); + let log = log.clone(); + let db = db.clone(); + let zone = config.zone.clone(); - tokio::spawn(async move { - handle_req(log, db, socket, src, buf, zone).await - }); - } + tokio::spawn(async move { + handle_req(log, db, socket, src, buf, zone).await + }); + } + }); + + Ok(Server { address, handle }) } async fn respond_nxdomain( diff --git a/internal-dns/tests/basic_test.rs b/internal-dns/tests/basic_test.rs index 29d358970c7..af72ded52cb 100644 --- a/internal-dns/tests/basic_test.rs +++ b/internal-dns/tests/basic_test.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -use std::net::{Ipv6Addr, SocketAddr, SocketAddrV6}; +use std::net::Ipv6Addr; use std::sync::Arc; use anyhow::{Context, Result}; @@ -280,13 +280,16 @@ pub async fn servfail() -> Result<(), anyhow::Error> { struct TestContext { client: Client, resolver: TokioAsyncResolver, - server: dropshot::HttpServer>, + dns_server: internal_dns::dns_server::Server, + dropshot_server: + dropshot::HttpServer>, tmp: tempdir::TempDir, } impl TestContext { async fn cleanup(self) { - self.server.close().await.expect("Failed to clean up server"); + self.dns_server.close(); + self.dropshot_server.close().await.expect("Failed to clean up server"); self.tmp.close().expect("Failed to clean up tmp directory"); } } @@ -295,7 +298,7 @@ async fn init_client_server( zone: String, ) -> Result { // initialize dns server config - let (tmp, config, dropshot_port, dns_port) = test_config()?; + let (tmp, config) = test_config()?; let log = config .log .to_logger("internal-dns") @@ -305,17 +308,21 @@ async fn init_client_server( let db = Arc::new(sled::open(&config.data.storage_path)?); db.clear()?; - let client = - Client::new(&format!("http://[::1]:{}", dropshot_port), log.clone()); + // launch a dns server + let dns_server = { + let db = db.clone(); + let log = log.clone(); + let dns_config = internal_dns::dns_server::Config { + bind_address: "[::1]:0".into(), + zone, + }; + + internal_dns::dns_server::run(log, db, dns_config).await? + }; let mut rc = ResolverConfig::new(); rc.add_name_server(NameServerConfig { - socket_addr: SocketAddr::V6(SocketAddrV6::new( - Ipv6Addr::LOCALHOST, - dns_port, - 0, - 0, - )), + socket_addr: dns_server.address, protocol: Protocol::Udp, tls_dns_name: None, trust_nx_responses: false, @@ -325,33 +332,21 @@ async fn init_client_server( let resolver = TokioAsyncResolver::tokio(rc, ResolverOpts::default()).unwrap(); - // launch a dns server - { - let db = db.clone(); - let log = log.clone(); - let dns_config = internal_dns::dns_server::Config { - bind_address: format!("[::1]:{}", dns_port), - zone, - }; - - tokio::spawn(async move { - internal_dns::dns_server::run(log, db, dns_config).await - }); - } - // launch a dropshot server - let server = internal_dns::start_server(config, log, db).await?; + let dropshot_server = + internal_dns::start_server(config, log.clone(), db).await?; // wait for server to start tokio::time::sleep(tokio::time::Duration::from_millis(250)).await; - Ok(TestContext { client, resolver, server, tmp }) + let client = + Client::new(&format!("http://{}", dropshot_server.local_addr()), log); + + Ok(TestContext { client, resolver, dns_server, dropshot_server, tmp }) } fn test_config( -) -> Result<(tempdir::TempDir, internal_dns::Config, u16, u16), anyhow::Error> { - let dropshot_port = portpicker::pick_unused_port().expect("pick port"); - let dns_port = portpicker::pick_unused_port().expect("pick port"); +) -> Result<(tempdir::TempDir, internal_dns::Config), anyhow::Error> { let tmp_dir = tempdir::TempDir::new("internal-dns-test")?; let mut storage_path = tmp_dir.path().to_path_buf(); storage_path.push("test"); @@ -362,7 +357,7 @@ fn test_config( level: dropshot::ConfigLoggingLevel::Info, }, dropshot: dropshot::ConfigDropshot { - bind_address: format!("[::1]:{}", dropshot_port).parse().unwrap(), + bind_address: format!("[::1]:0").parse().unwrap(), request_body_max_bytes: 1024, ..Default::default() }, @@ -372,5 +367,5 @@ fn test_config( }, }; - Ok((tmp_dir, config, dropshot_port, dns_port)) + Ok((tmp_dir, config)) } From 2a035a5ab9fc56068266d88d9ed2cc8edae1c63f Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 20 Jun 2022 15:29:28 -0400 Subject: [PATCH 13/27] Changes from rss-handoff --- Cargo.lock | 7 + internal-dns-client/Cargo.toml | 9 + internal-dns-client/src/multiclient.rs | 612 ++++++++++++++++++++++--- internal-dns-client/src/names.rs | 103 ++++- 4 files changed, 670 insertions(+), 61 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3956c2c3a4b..fe1cc390b81 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2338,13 +2338,20 @@ dependencies = [ name = "internal-dns-client" version = "0.1.0" dependencies = [ + "dropshot", "futures", + "internal-dns", "omicron-common 0.1.0", + "omicron-test-utils", "progenitor", "reqwest", "serde", "serde_json", + "sled", "slog", + "tempfile", + "thiserror", + "tokio", "trust-dns-proto", "trust-dns-resolver", "uuid", diff --git a/internal-dns-client/Cargo.toml b/internal-dns-client/Cargo.toml index 3303ddfc44c..4872699610a 100644 --- a/internal-dns-client/Cargo.toml +++ b/internal-dns-client/Cargo.toml @@ -12,6 +12,15 @@ reqwest = { version = "0.11", features = ["json", "rustls-tls", "stream"] } serde = { version = "1.0", features = [ "derive" ] } serde_json = "1.0" slog = { version = "2.5.0", features = [ "max_level_trace", "release_max_level_debug" ] } +thiserror = "1.0" trust-dns-proto = "0.21" trust-dns-resolver = "0.21" uuid = { version = "1.1.0", features = [ "v4", "serde" ] } + +[dev-dependencies] +dropshot = { git = "https://github.com/oxidecomputer/dropshot", branch = "main", features = [ "usdt-probes" ] } +internal-dns = { path = "../internal-dns" } +omicron-test-utils = { path = "../test-utils" } +sled = "0.34" +tempfile = "3.3" +tokio = { version = "1.18", features = [ "full" ] } diff --git a/internal-dns-client/src/multiclient.rs b/internal-dns-client/src/multiclient.rs index 24c8817c274..ca8387fca45 100644 --- a/internal-dns-client/src/multiclient.rs +++ b/internal-dns-client/src/multiclient.rs @@ -7,49 +7,122 @@ use futures::stream::{self, StreamExt, TryStreamExt}; use omicron_common::address::{ Ipv6Subnet, ReservedRackSubnet, AZ_PREFIX, DNS_PORT, DNS_SERVER_PORT, }; -use omicron_common::backoff::{ - internal_service_policy, retry_notify, BackoffError, -}; -use slog::{info, warn, Logger}; -use std::net::{SocketAddr, SocketAddrV6}; +use slog::{info, Logger}; +use std::net::{IpAddr, Ipv6Addr, SocketAddr, SocketAddrV6}; use trust_dns_resolver::config::{ NameServerConfig, Protocol, ResolverConfig, ResolverOpts, }; use trust_dns_resolver::TokioAsyncResolver; -type DnsError = crate::Error; +pub type DnsError = crate::Error; + +/// Describes how to find the DNS servers. +/// +/// In production code, this is nearly always [`Ipv6Subnet`], +/// but it allows a point of dependency-injection for tests to supply their +/// own address lookups. +pub trait DnsAddressLookup { + fn dropshot_server_addrs(&self) -> Vec; + + fn dns_server_addrs(&self) -> Vec; +} + +fn subnet_to_ips( + subnet: Ipv6Subnet, +) -> impl Iterator { + ReservedRackSubnet::new(subnet) + .get_dns_subnets() + .into_iter() + .map(|dns_subnet| IpAddr::V6(dns_subnet.dns_address().ip())) +} + +impl DnsAddressLookup for Ipv6Subnet { + fn dropshot_server_addrs(&self) -> Vec { + subnet_to_ips(*self) + .map(|address| SocketAddr::new(address, DNS_SERVER_PORT)) + .collect() + } + + fn dns_server_addrs(&self) -> Vec { + subnet_to_ips(*self) + .map(|address| SocketAddr::new(address, DNS_PORT)) + .collect() + } +} + +/// Describes a service which may be inserted into DNS records. +pub trait Service { + fn aaaa(&self) -> crate::names::AAAA; + fn srv(&self) -> crate::names::SRV; + fn address(&self) -> SocketAddrV6; +} /// A connection used to update multiple DNS servers. pub struct Updater { + log: Logger, clients: Vec, } impl Updater { - /// Creates a new "Updater", capable of communicating with all - /// DNS servers within the AZ. - pub fn new(subnet: Ipv6Subnet, log: Logger) -> Self { - let clients = ReservedRackSubnet::new(subnet) - .get_dns_subnets() + pub fn new(address_getter: &impl DnsAddressLookup, log: Logger) -> Self { + let addrs = address_getter.dropshot_server_addrs(); + Self::new_from_addrs(addrs, log) + } + + fn new_from_addrs(addrs: Vec, log: Logger) -> Self { + let clients = addrs .into_iter() - .map(|dns_subnet| { - let addr = dns_subnet.dns_address().ip(); + .map(|addr| { info!(log, "Adding DNS server: {}", addr); - crate::Client::new( - &format!("http://[{}]:{}", addr, DNS_SERVER_PORT), - log.clone(), - ) + crate::Client::new(&format!("http://{}", addr), log.clone()) }) .collect::>(); - Self { clients } + Self { log, clients } } - /// Utility function to insert: - /// - A set of uniquely-named AAAA records, each corresponding to an address - /// - An SRV record, pointing to each of the AAAA records. + /// Inserts all service records into the DNS server. + /// + /// This method is most efficient when records are sorted by SRV key. pub async fn insert_dns_records( &self, - log: &Logger, + records: &Vec, + ) -> Result<(), DnsError> { + let mut records = records.iter().peekable(); + + while let Some(record) = records.next() { + let srv = record.srv(); + info!(self.log, "Inserting DNS record: {:?}", srv); + + match &srv { + &crate::names::SRV::Service(_) => { + let mut aaaa = vec![(record.aaaa(), record.address())]; + while let Some(record) = records.peek() { + if record.srv() == srv { + let record = records.next().unwrap(); + aaaa.push((record.aaaa(), record.address())); + } else { + break; + } + } + + self.insert_dns_records_internal(aaaa, srv).await?; + } + &crate::names::SRV::Backend(_, _) => { + let aaaa = vec![(record.aaaa(), record.address())]; + self.insert_dns_records_internal(aaaa, record.srv()) + .await?; + } + }; + } + Ok(()) + } + + // Utility function to insert: + // - A set of uniquely-named AAAA records, each corresponding to an address + // - An SRV record, pointing to each of the AAAA records. + async fn insert_dns_records_internal( + &self, aaaa: Vec<(crate::names::AAAA, SocketAddrV6)>, srv_key: crate::names::SRV, ) -> Result<(), DnsError> { @@ -76,20 +149,7 @@ impl Updater { }) .collect::>(), }); - - let set_record = || async { - self.dns_records_set(&records) - .await - .map_err(BackoffError::transient)?; - Ok::<(), BackoffError>(()) - }; - let log_failure = |error, _| { - warn!(log, "Failed to set DNS records"; "error" => ?error); - }; - - retry_notify(internal_service_policy(), set_record, log_failure) - .await?; - Ok(()) + self.dns_records_set(&records).await } /// Sets a records on all DNS servers. @@ -129,27 +189,463 @@ impl Updater { } } -/// Creates a resolver using all internal DNS name servers. -pub fn create_resolver( - subnet: Ipv6Subnet, -) -> Result { - let mut rc = ResolverConfig::new(); - let dns_ips = ReservedRackSubnet::new(subnet) - .get_dns_subnets() - .into_iter() - .map(|subnet| subnet.dns_address().ip()) - .collect::>(); - - for dns_ip in dns_ips { - rc.add_name_server(NameServerConfig { - socket_addr: SocketAddr::V6(SocketAddrV6::new( - dns_ip, DNS_PORT, 0, 0, - )), - protocol: Protocol::Udp, - tls_dns_name: None, - trust_nx_responses: false, - bind_addr: None, - }); +#[derive(Debug, Clone, thiserror::Error)] +pub enum ResolveError { + #[error(transparent)] + Resolve(#[from] trust_dns_resolver::error::ResolveError), + + #[error("Record not found for SRV key: {0}")] + NotFound(crate::names::SRV), +} + +/// A wrapper around a DNS resolver, providing a way to conveniently +/// look up IP addresses of services based on their SRV keys. +pub struct Resolver { + inner: Box, +} + +impl Resolver { + pub fn new( + address_getter: &impl DnsAddressLookup, + ) -> Result { + let dns_addrs = address_getter.dns_server_addrs(); + Self::new_from_addrs(dns_addrs) + } + + fn new_from_addrs( + dns_addrs: Vec, + ) -> Result { + let mut rc = ResolverConfig::new(); + for socket_addr in dns_addrs.into_iter() { + rc.add_name_server(NameServerConfig { + socket_addr, + protocol: Protocol::Udp, + tls_dns_name: None, + trust_nx_responses: false, + bind_addr: None, + }); + } + let inner = + Box::new(TokioAsyncResolver::tokio(rc, ResolverOpts::default())?); + + Ok(Self { inner }) + } + + /// Convenience wrapper for [`Resolver::new`] which determines the subnet + /// based on a provided IP address. + pub fn new_from_ip(address: Ipv6Addr) -> Result { + let subnet = Ipv6Subnet::::new(address); + + Resolver::new(&subnet) + } + + /// Looks up a single [`Ipv6Addr`] based on the SRV name. + /// Returns an error if the record does not exist. + // TODO: There are lots of ways this API can expand: Caching, + // actually respecting TTL, looking up ports, etc. + // + // For now, however, it serves as a very simple "get everyone using DNS" + // API that can be improved upon later. + pub async fn lookup_ipv6( + &self, + srv: crate::names::SRV, + ) -> Result { + let response = self.inner.ipv6_lookup(&srv.to_string()).await?; + let address = response + .iter() + .next() + .ok_or_else(|| ResolveError::NotFound(srv))?; + Ok(*address) + } + + pub async fn lookup_ip( + &self, + srv: crate::names::SRV, + ) -> Result { + let response = self.inner.lookup_ip(&srv.to_string()).await?; + let address = response + .iter() + .next() + .ok_or_else(|| ResolveError::NotFound(srv))?; + Ok(address) + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::names::{BackendName, ServiceName, AAAA, SRV}; + use omicron_test_utils::dev::test_setup_log; + use std::str::FromStr; + use std::sync::Arc; + use tempfile::TempDir; + use uuid::Uuid; + + struct DnsServer { + _storage: TempDir, + dns_server: internal_dns::dns_server::Server, + dropshot_server: + dropshot::HttpServer>, + } + + impl DnsServer { + async fn create(log: &Logger) -> Self { + let storage = + TempDir::new().expect("Failed to create temporary directory"); + + let db = Arc::new(sled::open(&storage.path()).unwrap()); + + let dns_server = { + let db = db.clone(); + let log = log.clone(); + let dns_config = internal_dns::dns_server::Config { + bind_address: "[::1]:0".to_string(), + zone: crate::names::DNS_ZONE.into(), + }; + + internal_dns::dns_server::run(log, db, dns_config) + .await + .unwrap() + }; + + let config = internal_dns::Config { + log: dropshot::ConfigLogging::StderrTerminal { + level: dropshot::ConfigLoggingLevel::Info, + }, + dropshot: dropshot::ConfigDropshot { + bind_address: "[::1]:0".parse().unwrap(), + request_body_max_bytes: 1024, + ..Default::default() + }, + data: internal_dns::dns_data::Config { + nmax_messages: 16, + storage_path: storage.path().to_string_lossy().into(), + }, + }; + + let dropshot_server = + internal_dns::start_server(config, log.clone(), db) + .await + .unwrap(); + + Self { _storage: storage, dns_server, dropshot_server } + } + + fn dns_server_address(&self) -> SocketAddr { + self.dns_server.address + } + + fn dropshot_server_address(&self) -> SocketAddr { + self.dropshot_server.local_addr() + } + } + + // A test-only way to infer DNS addresses. + // + // Rather than inferring DNS server addresses from the rack subnet, + // they may be explicitly supplied. This results in easier-to-test code. + #[derive(Default)] + struct LocalAddressGetter { + addrs: Vec<(SocketAddr, SocketAddr)>, + } + + impl LocalAddressGetter { + fn add_dns_server( + &mut self, + dns_address: SocketAddr, + server_address: SocketAddr, + ) { + self.addrs.push((dns_address, server_address)); + } + } + + impl DnsAddressLookup for LocalAddressGetter { + fn dropshot_server_addrs(&self) -> Vec { + self.addrs + .iter() + .map(|(_dns_address, dropshot_address)| *dropshot_address) + .collect() + } + + fn dns_server_addrs(&self) -> Vec { + self.addrs + .iter() + .map(|(dns_address, _dropshot_address)| *dns_address) + .collect() + } + } + + // The resolver cannot look up IPs before records have been inserted. + #[tokio::test] + async fn lookup_nonexistent_record_fails() { + let logctx = test_setup_log("lookup_nonexistent_record_fails"); + let dns_server = DnsServer::create(&logctx.log).await; + + let mut address_getter = LocalAddressGetter::default(); + address_getter.add_dns_server( + dns_server.dns_server_address(), + dns_server.dropshot_server_address(), + ); + + let resolver = Resolver::new(&address_getter) + .expect("Error creating localhost resolver"); + + let err = resolver + .lookup_ip(SRV::Service(ServiceName::Cockroach)) + .await + .expect_err("Looking up non-existent service should fail"); + + let dns_error = match err { + ResolveError::Resolve(err) => err, + _ => panic!("Unexpected error: {err}"), + }; + assert!( + matches!( + dns_error.kind(), + trust_dns_resolver::error::ResolveErrorKind::NoRecordsFound { .. }, + ), + "Saw error: {dns_error}", + ); + logctx.cleanup_successful(); + } + + #[derive(Clone)] + struct TestServiceRecord { + aaaa: AAAA, + srv: SRV, + addr: SocketAddrV6, + } + + impl TestServiceRecord { + fn new(aaaa: AAAA, srv: SRV, addr: SocketAddrV6) -> Self { + Self { aaaa, srv, addr } + } + } + + impl Service for TestServiceRecord { + fn aaaa(&self) -> AAAA { + self.aaaa.clone() + } + + fn srv(&self) -> SRV { + self.srv.clone() + } + + fn address(&self) -> SocketAddrV6 { + self.addr + } + } + + // Insert and retreive a single DNS record. + #[tokio::test] + async fn insert_and_lookup_one_record() { + let logctx = test_setup_log("insert_and_lookup_one_record"); + let dns_server = DnsServer::create(&logctx.log).await; + + let mut address_getter = LocalAddressGetter::default(); + address_getter.add_dns_server( + dns_server.dns_server_address(), + dns_server.dropshot_server_address(), + ); + + let resolver = Resolver::new(&address_getter) + .expect("Error creating localhost resolver"); + let updater = Updater::new(&address_getter, logctx.log.clone()); + + let record = TestServiceRecord::new( + AAAA::Zone(Uuid::new_v4()), + SRV::Service(ServiceName::Cockroach), + SocketAddrV6::new( + Ipv6Addr::from_str("ff::01").unwrap(), + 12345, + 0, + 0, + ), + ); + updater.insert_dns_records(&vec![record.clone()]).await.unwrap(); + + let ip = resolver + .lookup_ipv6(SRV::Service(ServiceName::Cockroach)) + .await + .expect("Should have been able to look up IP address"); + assert_eq!(&ip, record.addr.ip()); + + logctx.cleanup_successful(); + } + + // Insert multiple DNS records of different types. + #[tokio::test] + async fn insert_and_lookup_multiple_records() { + let logctx = test_setup_log("insert_and_lookup_multiple_records"); + let dns_server = DnsServer::create(&logctx.log).await; + + let mut address_getter = LocalAddressGetter::default(); + address_getter.add_dns_server( + dns_server.dns_server_address(), + dns_server.dropshot_server_address(), + ); + + let resolver = Resolver::new(&address_getter) + .expect("Error creating localhost resolver"); + let updater = Updater::new(&address_getter, logctx.log.clone()); + + let cockroach_addrs = [ + SocketAddrV6::new( + Ipv6Addr::from_str("ff::01").unwrap(), + 1111, + 0, + 0, + ), + SocketAddrV6::new( + Ipv6Addr::from_str("ff::02").unwrap(), + 2222, + 0, + 0, + ), + SocketAddrV6::new( + Ipv6Addr::from_str("ff::03").unwrap(), + 3333, + 0, + 0, + ), + ]; + let clickhouse_addr = SocketAddrV6::new( + Ipv6Addr::from_str("fe::01").unwrap(), + 4444, + 0, + 0, + ); + let crucible_addr = SocketAddrV6::new( + Ipv6Addr::from_str("fd::02").unwrap(), + 5555, + 0, + 0, + ); + + let records = vec![ + // Three Cockroach services + TestServiceRecord::new( + AAAA::Zone(Uuid::new_v4()), + SRV::Service(ServiceName::Cockroach), + cockroach_addrs[0], + ), + TestServiceRecord::new( + AAAA::Zone(Uuid::new_v4()), + SRV::Service(ServiceName::Cockroach), + cockroach_addrs[1], + ), + TestServiceRecord::new( + AAAA::Zone(Uuid::new_v4()), + SRV::Service(ServiceName::Cockroach), + cockroach_addrs[2], + ), + // One Clickhouse service + TestServiceRecord::new( + AAAA::Zone(Uuid::new_v4()), + SRV::Service(ServiceName::Clickhouse), + clickhouse_addr, + ), + // One Backend service + TestServiceRecord::new( + AAAA::Zone(Uuid::new_v4()), + SRV::Backend(BackendName::Crucible, Uuid::new_v4()), + crucible_addr, + ), + ]; + updater.insert_dns_records(&records).await.unwrap(); + + // Look up Cockroach + let ip = resolver + .lookup_ipv6(SRV::Service(ServiceName::Cockroach)) + .await + .expect("Should have been able to look up IP address"); + assert!(cockroach_addrs.iter().any(|addr| addr.ip() == &ip)); + + // Look up Clickhouse + let ip = resolver + .lookup_ipv6(SRV::Service(ServiceName::Clickhouse)) + .await + .expect("Should have been able to look up IP address"); + assert_eq!(&ip, clickhouse_addr.ip()); + + // Look up Backend Service + let ip = resolver + .lookup_ipv6(records[4].srv.clone()) + .await + .expect("Should have been able to look up IP address"); + assert_eq!(&ip, crucible_addr.ip()); + + // If we remove the AAAA records for two of the CRDB services, + // only one will remain. + updater + .dns_records_delete(&vec![DnsRecordKey { + name: records[0].aaaa.to_string(), + }]) + .await + .expect("Should have been able to delete record"); + updater + .dns_records_delete(&vec![DnsRecordKey { + name: records[1].aaaa.to_string(), + }]) + .await + .expect("Should have been able to delete record"); + let ip = resolver + .lookup_ipv6(SRV::Service(ServiceName::Cockroach)) + .await + .expect("Should have been able to look up IP address"); + assert_eq!(&ip, cockroach_addrs[2].ip()); + + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn update_record() { + let logctx = test_setup_log("update_record"); + let dns_server = DnsServer::create(&logctx.log).await; + + let mut address_getter = LocalAddressGetter::default(); + address_getter.add_dns_server( + dns_server.dns_server_address(), + dns_server.dropshot_server_address(), + ); + + let resolver = Resolver::new(&address_getter) + .expect("Error creating localhost resolver"); + let updater = Updater::new(&address_getter, logctx.log.clone()); + + // Insert a record, observe that it exists. + let mut record = TestServiceRecord::new( + AAAA::Zone(Uuid::new_v4()), + SRV::Service(ServiceName::Cockroach), + SocketAddrV6::new( + Ipv6Addr::from_str("ff::01").unwrap(), + 12345, + 0, + 0, + ), + ); + updater.insert_dns_records(&vec![record.clone()]).await.unwrap(); + let ip = resolver + .lookup_ipv6(SRV::Service(ServiceName::Cockroach)) + .await + .expect("Should have been able to look up IP address"); + assert_eq!(&ip, record.addr.ip()); + + // If we insert the same record with a new address, it should be + // updated. + record.addr = SocketAddrV6::new( + Ipv6Addr::from_str("ee::02").unwrap(), + 54321, + 0, + 0, + ); + updater.insert_dns_records(&vec![record.clone()]).await.unwrap(); + let ip = resolver + .lookup_ipv6(SRV::Service(ServiceName::Cockroach)) + .await + .expect("Should have been able to look up IP address"); + assert_eq!(&ip, record.addr.ip()); + + logctx.cleanup_successful(); } - TokioAsyncResolver::tokio(rc, ResolverOpts::default()) } diff --git a/internal-dns-client/src/names.rs b/internal-dns-client/src/names.rs index 6384ec9e503..dbcc0d9f01c 100644 --- a/internal-dns-client/src/names.rs +++ b/internal-dns-client/src/names.rs @@ -2,22 +2,63 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. +//! Naming scheme for Internal DNS names (RFD 248). + use std::fmt; use uuid::Uuid; -const DNS_ZONE: &str = "control-plane.oxide.internal"; +pub(crate) const DNS_ZONE: &str = "control-plane.oxide.internal"; + +/// Names for services where backends are interchangeable. +#[derive(Clone, Debug, PartialEq, PartialOrd)] +pub enum ServiceName { + Clickhouse, + Cockroach, + InternalDNS, + Nexus, + Oximeter, +} +impl fmt::Display for ServiceName { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match &self { + ServiceName::Clickhouse => write!(f, "clickhouse"), + ServiceName::Cockroach => write!(f, "cockroach"), + ServiceName::InternalDNS => write!(f, "internalDNS"), + ServiceName::Nexus => write!(f, "nexus"), + ServiceName::Oximeter => write!(f, "oximeter"), + } + } +} + +/// Names for services where backends are not interchangeable. +#[derive(Clone, Debug, PartialEq, PartialOrd)] +pub enum BackendName { + Crucible, + SledAgent, +} + +impl fmt::Display for BackendName { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match &self { + BackendName::Crucible => write!(f, "crucible"), + BackendName::SledAgent => write!(f, "sledagent"), + } + } +} + +#[derive(Clone, Debug, PartialEq, PartialOrd)] pub enum SRV { /// A service identified and accessed by name, such as "nexus", "CRDB", etc. /// /// This is used in cases where services are interchangeable. - Service(String), + Service(ServiceName), /// A service identified by name and a unique identifier. /// /// This is used in cases where services are not interchangeable, such as /// for the Sled agent. - Backend(String, Uuid), + Backend(BackendName, Uuid), } impl fmt::Display for SRV { @@ -33,6 +74,7 @@ impl fmt::Display for SRV { } } +#[derive(Clone, Debug, PartialEq, PartialOrd)] pub enum AAAA { /// Identifies an AAAA record for a sled. Sled(Uuid), @@ -53,3 +95,58 @@ impl fmt::Display for AAAA { } } } + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn display_srv_service() { + assert_eq!( + SRV::Service(ServiceName::Clickhouse).to_string(), + "_clickhouse._tcp.control-plane.oxide.internal", + ); + assert_eq!( + SRV::Service(ServiceName::Cockroach).to_string(), + "_cockroach._tcp.control-plane.oxide.internal", + ); + assert_eq!( + SRV::Service(ServiceName::InternalDNS).to_string(), + "_internalDNS._tcp.control-plane.oxide.internal", + ); + assert_eq!( + SRV::Service(ServiceName::Nexus).to_string(), + "_nexus._tcp.control-plane.oxide.internal", + ); + assert_eq!( + SRV::Service(ServiceName::Oximeter).to_string(), + "_oximeter._tcp.control-plane.oxide.internal", + ); + } + + #[test] + fn display_srv_backend() { + let uuid = Uuid::nil(); + assert_eq!( + SRV::Backend(BackendName::Crucible, uuid).to_string(), + "_crucible._tcp.00000000-0000-0000-0000-000000000000.control-plane.oxide.internal", + ); + assert_eq!( + SRV::Backend(BackendName::SledAgent, uuid).to_string(), + "_sledagent._tcp.00000000-0000-0000-0000-000000000000.control-plane.oxide.internal", + ); + } + + #[test] + fn display_aaaa() { + let uuid = Uuid::nil(); + assert_eq!( + AAAA::Sled(uuid).to_string(), + "00000000-0000-0000-0000-000000000000.sled.control-plane.oxide.internal", + ); + assert_eq!( + AAAA::Zone(uuid).to_string(), + "00000000-0000-0000-0000-000000000000.host.control-plane.oxide.internal", + ); + } +} From c3a49bb82b6de4a5e138186d83c862655d730eb3 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 20 Jun 2022 15:49:19 -0400 Subject: [PATCH 14/27] [nexus] Add a new user for background tasks --- nexus/src/app/mod.rs | 10 ++++++++++ nexus/src/authn/mod.rs | 11 +++++++++++ nexus/src/db/datastore.rs | 1 + nexus/src/db/fixed_data/role_assignment.rs | 7 +++++++ nexus/src/db/fixed_data/user_builtin.rs | 11 +++++++++++ nexus/tests/integration_tests/users_builtin.rs | 3 +++ 6 files changed, 43 insertions(+) diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index 1cb1f6b6ff7..06f7264a124 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -222,6 +222,16 @@ impl Nexus { &self.opctx_external_authn } + /// Returns an [`OpContext`] used for background tasks. + pub fn opctx_for_background(&self) -> OpContext { + OpContext::for_background( + self.log.new(o!("component" => "BackgroundWork")), + Arc::clone(&self.authz), + authn::Context::internal_db_background(), + Arc::clone(&self.db_datastore), + ) + } + /// Used as the body of a "stub" endpoint -- one that's currently /// unimplemented but that we eventually intend to implement /// diff --git a/nexus/src/authn/mod.rs b/nexus/src/authn/mod.rs index 59e5bc7a889..c9399bdb131 100644 --- a/nexus/src/authn/mod.rs +++ b/nexus/src/authn/mod.rs @@ -30,6 +30,7 @@ pub mod silos; pub use crate::db::fixed_data::silo_user::USER_TEST_PRIVILEGED; pub use crate::db::fixed_data::silo_user::USER_TEST_UNPRIVILEGED; +pub use crate::db::fixed_data::user_builtin::USER_BACKGROUND_WORK; pub use crate::db::fixed_data::user_builtin::USER_DB_INIT; pub use crate::db::fixed_data::user_builtin::USER_EXTERNAL_AUTHN; pub use crate::db::fixed_data::user_builtin::USER_INTERNAL_API; @@ -170,6 +171,11 @@ impl Context { Context::context_for_builtin_user(USER_DB_INIT.id) } + /// Returns an authenticated context for Nexus-driven db work. + pub fn internal_db_background() -> Context { + Context::context_for_builtin_user(USER_BACKGROUND_WORK.id) + } + fn context_for_builtin_user(user_builtin_id: Uuid) -> Context { Context { kind: Kind::Authenticated(Details { @@ -213,6 +219,7 @@ impl Context { #[cfg(test)] mod test { use super::Context; + use super::USER_BACKGROUND_WORK; use super::USER_DB_INIT; use super::USER_INTERNAL_API; use super::USER_INTERNAL_READ; @@ -251,6 +258,10 @@ mod test { let actor = authn.actor().unwrap(); assert_eq!(actor.actor_id(), USER_DB_INIT.id); + let authn = Context::internal_db_background(); + let actor = authn.actor().unwrap(); + assert_eq!(actor.actor_id(), USER_BACKGROUND_WORK.id); + let authn = Context::internal_saga_recovery(); let actor = authn.actor().unwrap(); assert_eq!(actor.actor_id(), USER_SAGA_RECOVERY.id); diff --git a/nexus/src/db/datastore.rs b/nexus/src/db/datastore.rs index 6c28185ce7d..6814b6276ac 100644 --- a/nexus/src/db/datastore.rs +++ b/nexus/src/db/datastore.rs @@ -2990,6 +2990,7 @@ impl DataStore { let builtin_users = [ // Note: "db_init" is also a builtin user, but that one by necessity // is created with the database. + &*authn::USER_BACKGROUND_WORK, &*authn::USER_INTERNAL_API, &*authn::USER_INTERNAL_READ, &*authn::USER_EXTERNAL_AUTHN, diff --git a/nexus/src/db/fixed_data/role_assignment.rs b/nexus/src/db/fixed_data/role_assignment.rs index 94caf552a13..540b57abe50 100644 --- a/nexus/src/db/fixed_data/role_assignment.rs +++ b/nexus/src/db/fixed_data/role_assignment.rs @@ -24,6 +24,13 @@ lazy_static! { *FLEET_ID, role_builtin::FLEET_ADMIN.role_name, ), + RoleAssignment::new( + IdentityType::UserBuiltin, + user_builtin::USER_BACKGROUND_WORK.id, + role_builtin::FLEET_ADMIN.resource_type, + *FLEET_ID, + role_builtin::FLEET_ADMIN.role_name, + ), // The "internal-read" user gets the "viewer" role on the sole // Fleet. This will grant them the ability to read various control diff --git a/nexus/src/db/fixed_data/user_builtin.rs b/nexus/src/db/fixed_data/user_builtin.rs index 1e9dee1b7bf..238a8f5405a 100644 --- a/nexus/src/db/fixed_data/user_builtin.rs +++ b/nexus/src/db/fixed_data/user_builtin.rs @@ -39,6 +39,15 @@ lazy_static! { "used for seeding initial database data", ); + /// Internal user for performing operations driven by Nexus, rather + /// than any API request. + pub static ref USER_BACKGROUND_WORK: UserBuiltinConfig = + UserBuiltinConfig::new_static( + "001de000-05e4-4000-8000-00000000bac3", + "background-work", + "used for Nexus-driven database operations", + ); + /// Internal user used by Nexus when handling internal API requests pub static ref USER_INTERNAL_API: UserBuiltinConfig = UserBuiltinConfig::new_static( @@ -77,6 +86,7 @@ lazy_static! { #[cfg(test)] mod test { use super::super::assert_valid_uuid; + use super::USER_BACKGROUND_WORK; use super::USER_DB_INIT; use super::USER_EXTERNAL_AUTHN; use super::USER_INTERNAL_API; @@ -85,6 +95,7 @@ mod test { #[test] fn test_builtin_user_ids_are_valid() { + assert_valid_uuid(&USER_BACKGROUND_WORK.id); assert_valid_uuid(&USER_DB_INIT.id); assert_valid_uuid(&USER_INTERNAL_API.id); assert_valid_uuid(&USER_EXTERNAL_AUTHN.id); diff --git a/nexus/tests/integration_tests/users_builtin.rs b/nexus/tests/integration_tests/users_builtin.rs index b06741a3067..0df3fbaf04b 100644 --- a/nexus/tests/integration_tests/users_builtin.rs +++ b/nexus/tests/integration_tests/users_builtin.rs @@ -27,6 +27,9 @@ async fn test_users_builtin(cptestctx: &ControlPlaneTestContext) { let u = users.remove(&authn::USER_DB_INIT.name.to_string()).unwrap(); assert_eq!(u.identity.id, authn::USER_DB_INIT.id); + let u = + users.remove(&authn::USER_BACKGROUND_WORK.name.to_string()).unwrap(); + assert_eq!(u.identity.id, authn::USER_BACKGROUND_WORK.id); let u = users.remove(&authn::USER_INTERNAL_API.name.to_string()).unwrap(); assert_eq!(u.identity.id, authn::USER_INTERNAL_API.id); let u = users.remove(&authn::USER_INTERNAL_READ.name.to_string()).unwrap(); From 5440cbf36979efb7ba28ac43083757ffc096cab2 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 20 Jun 2022 16:38:24 -0400 Subject: [PATCH 15/27] [rss] Set DNS records during RSS initialization --- Cargo.lock | 1 + common/src/address.rs | 2 + sled-agent/Cargo.toml | 1 + sled-agent/src/params.rs | 60 +++++++++++++++++++++++----- sled-agent/src/rack_setup/service.rs | 33 ++++++++++++++- 5 files changed, 86 insertions(+), 11 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index fe1cc390b81..3bf3d89f88f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3155,6 +3155,7 @@ dependencies = [ "expectorate", "futures", "http", + "internal-dns-client", "ipnetwork", "libc", "macaddr", diff --git a/common/src/address.rs b/common/src/address.rs index 226dc9ea655..3dee3848b9e 100644 --- a/common/src/address.rs +++ b/common/src/address.rs @@ -34,6 +34,8 @@ pub const SLED_AGENT_PORT: u16 = 12345; /// The port propolis-server listens on inside the propolis zone. pub const PROPOLIS_PORT: u16 = 12400; +pub const OXIMETER_PORT: u16 = 12223; + // Anycast is a mechanism in which a single IP address is shared by multiple // devices, and the destination is located based on routing distance. // diff --git a/sled-agent/Cargo.toml b/sled-agent/Cargo.toml index dc254c75e15..3041a99ae06 100644 --- a/sled-agent/Cargo.toml +++ b/sled-agent/Cargo.toml @@ -17,6 +17,7 @@ clap = { version = "3.2", features = ["derive"] } crucible-agent-client = { git = "https://github.com/oxidecomputer/crucible", rev = "8314eeddd228ec0d76cefa40c4a41d3e2611ac18" } dropshot = { git = "https://github.com/oxidecomputer/dropshot", branch = "main", features = [ "usdt-probes" ] } futures = "0.3.21" +internal-dns-client = { path = "../internal-dns-client" } ipnetwork = "0.18" libc = "0.2.126" macaddr = { version = "1.0.1", features = [ "serde_std" ] } diff --git a/sled-agent/src/params.rs b/sled-agent/src/params.rs index e3370a30200..dbb0eac57a1 100644 --- a/sled-agent/src/params.rs +++ b/sled-agent/src/params.rs @@ -2,6 +2,8 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. +use internal_dns_client::names::{BackendName, ServiceName, AAAA, SRV}; +use omicron_common::address::OXIMETER_PORT; use omicron_common::api::external; use omicron_common::api::internal::nexus::{ DiskRuntimeState, InstanceRuntimeState, @@ -226,7 +228,7 @@ impl std::fmt::Display for DatasetKind { use DatasetKind::*; let s = match self { Crucible => "crucible", - CockroachDb { .. } => "cockroach", + CockroachDb { .. } => "cockroachdb", Clickhouse => "clickhouse", }; write!(f, "{}", s) @@ -247,14 +249,28 @@ pub struct DatasetEnsureBody { pub dataset_kind: DatasetKind, // The address on which the zone will listen for requests. pub address: SocketAddrV6, - // NOTE: We could insert a UUID here, if we want that to be set by the - // caller explicitly? Currently, the lack of a UUID implies that - // "at most one dataset type" exists within a zpool. - // - // It's unclear if this is actually necessary - making this change - // would also require the RSS to query existing datasets before - // requesting new ones (after all, we generally wouldn't want to - // create two CRDB datasets with different UUIDs on the same zpool). +} + +impl internal_dns_client::multiclient::Service for DatasetEnsureBody { + fn aaaa(&self) -> AAAA { + AAAA::Zone(self.id) + } + + fn srv(&self) -> SRV { + match self.dataset_kind { + DatasetKind::Crucible => { + SRV::Backend(BackendName::Crucible, self.id) + } + DatasetKind::Clickhouse => SRV::Service(ServiceName::Clickhouse), + DatasetKind::CockroachDb { .. } => { + SRV::Service(ServiceName::Cockroach) + } + } + } + + fn address(&self) -> SocketAddrV6 { + self.address + } } impl From for sled_agent_client::types::DatasetEnsureBody { @@ -326,6 +342,32 @@ pub struct ServiceRequest { pub service_type: ServiceType, } +impl internal_dns_client::multiclient::Service for ServiceRequest { + fn aaaa(&self) -> AAAA { + AAAA::Zone(self.id) + } + + fn srv(&self) -> SRV { + match self.service_type { + ServiceType::InternalDns { .. } => { + SRV::Service(ServiceName::InternalDNS) + } + ServiceType::Nexus { .. } => SRV::Service(ServiceName::Nexus), + ServiceType::Oximeter => SRV::Service(ServiceName::Oximeter), + } + } + + fn address(&self) -> SocketAddrV6 { + match self.service_type { + ServiceType::InternalDns { server_address, .. } => server_address, + ServiceType::Nexus { internal_address, .. } => internal_address, + ServiceType::Oximeter => { + SocketAddrV6::new(self.addresses[0], OXIMETER_PORT, 0, 0) + } + } + } +} + impl From for sled_agent_client::types::ServiceRequest { fn from(s: ServiceRequest) -> Self { Self { diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index c48a20cc4bc..a4f7032b385 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -13,6 +13,7 @@ use crate::bootstrap::{ trust_quorum::{RackSecret, ShareDistribution}, }; use crate::params::{ServiceRequest, ServiceType}; +use internal_dns_client::multiclient::{DnsError, Updater as DnsUpdater}; use omicron_common::address::{ get_sled_address, ReservedRackSubnet, DNS_PORT, DNS_SERVER_PORT, }; @@ -26,7 +27,7 @@ use std::collections::{HashMap, HashSet}; use std::net::{Ipv6Addr, SocketAddr, SocketAddrV6}; use std::path::PathBuf; use thiserror::Error; -use tokio::sync::Mutex; +use tokio::sync::{Mutex, OnceCell}; use uuid::Uuid; /// Describes errors which may occur while operating the setup service. @@ -56,6 +57,9 @@ pub enum SetupServiceError { #[error("Failed to split rack secret: {0:?}")] SplitRackSecret(vsss_rs::Error), + + #[error("Failed to access DNS servers: {0}")] + Dns(#[from] DnsError), } // The workload / information allocated to a single sled. @@ -150,11 +154,16 @@ enum PeerExpectation { struct ServiceInner { log: Logger, peer_monitor: Mutex, + dns_servers: OnceCell, } impl ServiceInner { fn new(log: Logger, peer_monitor: PeerMonitorObserver) -> Self { - ServiceInner { log, peer_monitor: Mutex::new(peer_monitor) } + ServiceInner { + log, + peer_monitor: Mutex::new(peer_monitor), + dns_servers: OnceCell::new(), + } } async fn initialize_datasets( @@ -574,6 +583,15 @@ impl ServiceInner { .into_iter() .collect::>()?; + let dns_servers = DnsUpdater::new( + &config.az_subnet(), + self.log.new(o!("client" => "DNS")), + ); + self.dns_servers + .set(dns_servers) + .map_err(|_| ()) + .expect("DNS servers should only be set once"); + // Issue the dataset initialization requests to all sleds. futures::future::join_all(plan.iter().map( |(_, allocation)| async move { @@ -585,6 +603,12 @@ impl ServiceInner { &allocation.services_request.datasets, ) .await?; + + self.dns_servers + .get() + .expect("DNS servers must be initialized first") + .insert_dns_records(&allocation.services_request.datasets) + .await?; Ok(()) }, )) @@ -614,6 +638,11 @@ impl ServiceInner { .collect::>(); self.initialize_services(sled_address, &all_services).await?; + self.dns_servers + .get() + .expect("DNS servers must be initialized first") + .insert_dns_records(&all_services) + .await?; Ok(()) }, )) From 3e1495fd1a62e4be814f8df866e0a0e41815eca5 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 20 Jun 2022 17:01:11 -0400 Subject: [PATCH 16/27] [oximeter] Rely on dynamically set arguments within Oximeter --- Cargo.lock | 1 + common/src/address.rs | 4 +- nexus/test-utils/src/lib.rs | 15 ++-- oximeter/collector/Cargo.toml | 1 + oximeter/collector/config.toml | 7 -- oximeter/collector/src/bin/oximeter.rs | 54 +++++++----- oximeter/collector/src/lib.rs | 86 ++++++++++++++----- .../tests/output/cmd-oximeter-noargs-stderr | 14 ++- oximeter/collector/tests/test_commands.rs | 2 +- sled-agent/src/services.rs | 61 ++++++++++--- smf/oximeter/config.toml | 8 -- smf/oximeter/manifest.xml | 7 +- 12 files changed, 174 insertions(+), 86 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3bf3d89f88f..876ef959b8e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3445,6 +3445,7 @@ dependencies = [ "clap 3.2.5", "dropshot", "expectorate", + "internal-dns-client", "nexus-client 0.1.0", "omicron-common 0.1.0", "omicron-test-utils", diff --git a/common/src/address.rs b/common/src/address.rs index 3dee3848b9e..708fbff12bd 100644 --- a/common/src/address.rs +++ b/common/src/address.rs @@ -33,9 +33,11 @@ pub const SLED_AGENT_PORT: u16 = 12345; /// The port propolis-server listens on inside the propolis zone. pub const PROPOLIS_PORT: u16 = 12400; - +pub const CLICKHOUSE_PORT: u16 = 8123; pub const OXIMETER_PORT: u16 = 12223; +pub const NEXUS_INTERNAL_PORT: u16 = 12221; + // Anycast is a mechanism in which a single IP address is shared by multiple // devices, and the destination is located based on routing distance. // diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index ed056f48d8d..0df25b48fcb 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -197,21 +197,20 @@ pub async fn start_oximeter( id: Uuid, ) -> Result { let db = oximeter_collector::DbConfig { - address: SocketAddr::new(Ipv6Addr::LOCALHOST.into(), db_port), + address: Some(SocketAddr::new(Ipv6Addr::LOCALHOST.into(), db_port)), batch_size: 10, batch_interval: 1, }; let config = oximeter_collector::Config { - id, - nexus_address, + nexus_address: Some(nexus_address), db, - dropshot: ConfigDropshot { - bind_address: SocketAddr::new(Ipv6Addr::LOCALHOST.into(), 0), - ..Default::default() - }, log: ConfigLogging::StderrTerminal { level: ConfigLoggingLevel::Error }, }; - Oximeter::new(&config).await.map_err(|e| e.to_string()) + let args = oximeter_collector::OximeterArguments { + id, + address: SocketAddrV6::new(Ipv6Addr::LOCALHOST, 0, 0, 0), + }; + Oximeter::new(&config, &args).await.map_err(|e| e.to_string()) } #[derive(Debug, Clone, oximeter::Target)] diff --git a/oximeter/collector/Cargo.toml b/oximeter/collector/Cargo.toml index 7e36050d9af..10fe6058c0a 100644 --- a/oximeter/collector/Cargo.toml +++ b/oximeter/collector/Cargo.toml @@ -8,6 +8,7 @@ license = "MPL-2.0" [dependencies] clap = { version = "3.2", features = ["derive"] } dropshot = { git = "https://github.com/oxidecomputer/dropshot", branch = "main", features = [ "usdt-probes" ] } +internal-dns-client = { path = "../../internal-dns-client" } nexus-client = { path = "../../nexus-client" } omicron-common = { path = "../../common" } oximeter = { path = "../oximeter" } diff --git a/oximeter/collector/config.toml b/oximeter/collector/config.toml index 6b03a3974d2..0e8557a71bf 100644 --- a/oximeter/collector/config.toml +++ b/oximeter/collector/config.toml @@ -1,16 +1,9 @@ # Example configuration file for running an oximeter collector server -id = "1da65e5b-210c-4859-a7d7-200c1e659972" -nexus_address = "127.0.0.1:12221" - [db] -address = "[::1]:8123" batch_size = 1000 batch_interval = 5 # In seconds [log] level = "debug" mode = "stderr-terminal" - -[dropshot] -bind_address = "[::1]:12223" diff --git a/oximeter/collector/src/bin/oximeter.rs b/oximeter/collector/src/bin/oximeter.rs index 19f9b5b3da0..bf54cf33fa0 100644 --- a/oximeter/collector/src/bin/oximeter.rs +++ b/oximeter/collector/src/bin/oximeter.rs @@ -8,8 +8,10 @@ use clap::Parser; use omicron_common::cmd::fatal; use omicron_common::cmd::CmdError; -use oximeter_collector::{oximeter_api, Config, Oximeter}; +use oximeter_collector::{oximeter_api, Config, Oximeter, OximeterArguments}; +use std::net::SocketAddrV6; use std::path::PathBuf; +use uuid::Uuid; pub fn run_openapi() -> Result<(), String> { oximeter_api() @@ -24,18 +26,22 @@ pub fn run_openapi() -> Result<(), String> { /// Run an oximeter metric collection server in the Oxide Control Plane. #[derive(Parser)] #[clap(name = "oximeter", about = "See README.adoc for more information")] -struct Args { - #[clap( - short = 'O', - long = "openapi", - help = "Print the external OpenAPI Spec document and exit", - action - )] - openapi: bool, - - /// Path to TOML file with configuration for the server - #[clap(name = "CONFIG_FILE", action)] - config_file: PathBuf, +enum Args { + /// Print the external OpenAPI Spec document and exit + Openapi, + + /// Start an Oximeter server + Run { + /// Path to TOML file with configuration for the server + #[clap(name = "CONFIG_FILE", action)] + config_file: PathBuf, + + #[clap(short, long, action)] + id: Uuid, + + #[clap(short, long, action)] + address: SocketAddrV6, + }, } #[tokio::main] @@ -47,15 +53,17 @@ async fn main() { async fn do_run() -> Result<(), CmdError> { let args = Args::parse(); - let config = Config::from_file(args.config_file).unwrap(); - if args.openapi { - run_openapi().map_err(CmdError::Failure) - } else { - Oximeter::new(&config) - .await - .unwrap() - .serve_forever() - .await - .map_err(|e| CmdError::Failure(e.to_string())) + match args { + Args::Openapi => run_openapi().map_err(CmdError::Failure), + Args::Run { config_file, id, address } => { + let config = Config::from_file(config_file).unwrap(); + let args = OximeterArguments { id, address }; + Oximeter::new(&config, &args) + .await + .unwrap() + .serve_forever() + .await + .map_err(|e| CmdError::Failure(e.to_string())) + } } } diff --git a/oximeter/collector/src/lib.rs b/oximeter/collector/src/lib.rs index 4e2f6ca4fda..64a2af4c96a 100644 --- a/oximeter/collector/src/lib.rs +++ b/oximeter/collector/src/lib.rs @@ -11,6 +11,11 @@ use dropshot::{ HttpResponseUpdatedNoContent, HttpServer, HttpServerStarter, RequestContext, TypedBody, }; +use internal_dns_client::{ + multiclient::{ResolveError, Resolver}, + names::{ServiceName, SRV}, +}; +use omicron_common::address::{CLICKHOUSE_PORT, NEXUS_INTERNAL_PORT}; use omicron_common::api::internal::nexus::ProducerEndpoint; use omicron_common::backoff; use oximeter::types::{ProducerResults, ProducerResultsItem}; @@ -18,7 +23,7 @@ use oximeter_db::{Client, DbWrite}; use serde::{Deserialize, Serialize}; use slog::{debug, error, info, o, trace, warn, Drain, Logger}; use std::collections::{btree_map::Entry, BTreeMap}; -use std::net::SocketAddr; +use std::net::{SocketAddr, SocketAddrV6}; use std::path::Path; use std::sync::Arc; use std::time::Duration; @@ -37,6 +42,9 @@ pub enum Error { #[error(transparent)] Database(#[from] oximeter_db::Error), + + #[error(transparent)] + ResolveError(#[from] ResolveError), } // Messages for controlling a collection task @@ -231,8 +239,11 @@ async fn results_sink( /// Configuration for interacting with the metric database. #[derive(Debug, Clone, Copy, Deserialize, Serialize)] pub struct DbConfig { - /// Address of the ClickHouse server - pub address: SocketAddr, + /// Optional address of the ClickHouse server. + /// + /// If "None", will be inferred from DNS. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub address: Option, /// Batch size of samples at which to insert pub batch_size: usize, @@ -259,6 +270,7 @@ impl OximeterAgent { pub async fn with_id( id: Uuid, db_config: DbConfig, + resolver: &Resolver, log: &Logger, ) -> Result { let (result_sender, result_receiver) = mpsc::channel(8); @@ -267,7 +279,17 @@ impl OximeterAgent { // Construct the ClickHouse client first, propagate an error if we can't reach the // database. - let client = Client::new(db_config.address, &log); + let db_address = if let Some(address) = db_config.address { + address + } else { + SocketAddr::new( + resolver + .lookup_ip(SRV::Service(ServiceName::Clickhouse)) + .await?, + CLICKHOUSE_PORT, + ) + }; + let client = Client::new(db_address, &log); client.init_db().await?; // Spawn the task for aggregating and inserting all metrics @@ -334,18 +356,15 @@ impl OximeterAgent { /// Configuration used to initialize an oximeter server #[derive(Clone, Debug, Deserialize, Serialize)] pub struct Config { - /// An unique ID for this oximeter server - pub id: Uuid, - /// The address used to connect to Nexus. - pub nexus_address: SocketAddr, + /// + /// If "None", will be inferred from DNS. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub nexus_address: Option, /// Configuration for working with ClickHouse pub db: DbConfig, - /// The internal Dropshot HTTP server configuration - pub dropshot: ConfigDropshot, - /// Logging configuration pub log: ConfigLogging, } @@ -360,6 +379,11 @@ impl Config { } } +pub struct OximeterArguments { + pub id: Uuid, + pub address: SocketAddrV6, +} + /// A server used to collect metrics from components in the control plane. pub struct Oximeter { _agent: Arc, @@ -371,7 +395,10 @@ impl Oximeter { /// /// This starts an HTTP server used to communicate with other agents in Omicron, especially /// Nexus. It also registers itself as a new `oximeter` instance with Nexus. - pub async fn new(config: &Config) -> Result { + pub async fn new( + config: &Config, + args: &OximeterArguments, + ) -> Result { let (drain, registration) = slog_dtrace::with_drain( config .log @@ -388,10 +415,13 @@ impl Oximeter { } info!(log, "starting oximeter server"); + let resolver = Resolver::new_from_ip(*args.address.ip())?; + let make_agent = || async { debug!(log, "creating ClickHouse client"); Ok(Arc::new( - OximeterAgent::with_id(config.id, config.db, &log).await?, + OximeterAgent::with_id(args.id, config.db, &resolver, &log) + .await?, )) }; let log_client_failure = |error, delay| { @@ -411,7 +441,10 @@ impl Oximeter { let dropshot_log = log.new(o!("component" => "dropshot")); let server = HttpServerStarter::new( - &config.dropshot, + &ConfigDropshot { + bind_address: SocketAddr::V6(args.address), + ..Default::default() + }, oximeter_api(), Arc::clone(&agent), &dropshot_log, @@ -423,20 +456,33 @@ impl Oximeter { let client = reqwest::Client::new(); let notify_nexus = || async { debug!(log, "contacting nexus"); - client - .post(format!( - "http://{}/metrics/collectors", - config.nexus_address + let nexus_address = if let Some(address) = config.nexus_address { + address + } else { + SocketAddr::V6(SocketAddrV6::new( + resolver + .lookup_ipv6(SRV::Service(ServiceName::Nexus)) + .await + .map_err(|e| { + backoff::BackoffError::transient(e.to_string()) + })?, + NEXUS_INTERNAL_PORT, + 0, + 0, )) + }; + + client + .post(format!("http://{}/metrics/collectors", nexus_address,)) .json(&nexus_client::types::OximeterInfo { address: server.local_addr().to_string(), collector_id: agent.id, }) .send() .await - .map_err(backoff::BackoffError::transient)? + .map_err(|e| backoff::BackoffError::transient(e.to_string()))? .error_for_status() - .map_err(backoff::BackoffError::transient) + .map_err(|e| backoff::BackoffError::transient(e.to_string())) }; let log_notification_failure = |error, delay| { warn!( diff --git a/oximeter/collector/tests/output/cmd-oximeter-noargs-stderr b/oximeter/collector/tests/output/cmd-oximeter-noargs-stderr index 1398febf119..dfb062bca75 100644 --- a/oximeter/collector/tests/output/cmd-oximeter-noargs-stderr +++ b/oximeter/collector/tests/output/cmd-oximeter-noargs-stderr @@ -1,7 +1,13 @@ -error: The following required arguments were not provided: - +oximeter +See README.adoc for more information USAGE: - oximeter [OPTIONS] + oximeter -For more information try --help +OPTIONS: + -h, --help Print help information + +SUBCOMMANDS: + help Print this message or the help of the given subcommand(s) + openapi Print the external OpenAPI Spec document and exit + run Start an Oximeter server diff --git a/oximeter/collector/tests/test_commands.rs b/oximeter/collector/tests/test_commands.rs index 7b910a5be4a..d3d66be0580 100644 --- a/oximeter/collector/tests/test_commands.rs +++ b/oximeter/collector/tests/test_commands.rs @@ -50,7 +50,7 @@ fn test_oximeter_openapi() { // But we do know where it is at compile time, so we load it then. let config = include_str!("../../collector/config.toml"); let config_path = write_config(config); - let exec = Exec::cmd(path_to_oximeter()).arg(&config_path).arg("--openapi"); + let exec = Exec::cmd(path_to_oximeter()).arg("openapi"); let (exit_status, stdout_text, stderr_text) = run_command(exec); fs::remove_file(&config_path).expect("failed to remove temporary file"); assert_exit_code(exit_status, EXIT_SUCCESS, &stderr_text); diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index dde2ef47937..ea989c3eab0 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -12,17 +12,15 @@ use crate::illumos::zone::AddressRequest; use crate::params::{ServiceEnsureBody, ServiceRequest, ServiceType}; use crate::zone::Zones; use dropshot::ConfigDropshot; -use omicron_common::address::{Ipv6Subnet, RACK_PREFIX}; +use omicron_common::address::{Ipv6Subnet, OXIMETER_PORT, RACK_PREFIX}; use omicron_common::nexus_config::{ self, DeploymentConfig as NexusDeploymentConfig, }; -use omicron_common::postgres_config::PostgresConfigWithUrl; use slog::Logger; use std::collections::HashSet; use std::iter::FromIterator; use std::net::{IpAddr, Ipv6Addr, SocketAddr}; use std::path::{Path, PathBuf}; -use std::str::FromStr; use tokio::io::AsyncWriteExt; use tokio::sync::Mutex; use uuid::Uuid; @@ -204,11 +202,11 @@ impl ServiceManager { existing_zones: &mut Vec, services: &Vec, ) -> Result<(), Error> { - info!(self.log, "Ensuring services are initialized: {:?}", services); // TODO(https://github.com/oxidecomputer/omicron/issues/726): // As long as we ensure the requests don't overlap, we could // parallelize this request. for service in services { + info!(self.log, "Ensuring service is initialized: {:?}", service); // Before we bother allocating anything for this request, check if // this service has already been created. let expected_zone_name = @@ -334,12 +332,7 @@ impl ServiceManager { subnet: Ipv6Subnet::::new( self.underlay_address, ), - // TODO: Switch to inferring this URL by DNS. - database: nexus_config::Database::FromUrl { - url: PostgresConfigWithUrl::from_str( - "postgresql://root@[fd00:1122:3344:0101::2]:32221/omicron?sslmode=disable" - ).unwrap() - } + database: nexus_config::Database::FromDns, }; // Copy the partial config file to the expected location. @@ -434,8 +427,50 @@ impl ServiceManager { ServiceType::Oximeter => { info!(self.log, "Setting up oximeter service"); - // TODO: Implement with dynamic parameters, when address is - // dynamically assigned. + let address = service.addresses[0]; + running_zone + .run_cmd(&[ + crate::illumos::zone::SVCCFG, + "-s", + &smf_name, + "setprop", + &format!("config/id={}", service.id), + ]) + .map_err(|err| Error::ZoneCommand { + intent: "set server ID".to_string(), + err, + })?; + + running_zone + .run_cmd(&[ + crate::illumos::zone::SVCCFG, + "-s", + &smf_name, + "setprop", + &format!( + "config/address=[{}]:{}", + address, OXIMETER_PORT, + ), + ]) + .map_err(|err| Error::ZoneCommand { + intent: "set server address".to_string(), + err, + })?; + + running_zone + .run_cmd(&[ + crate::illumos::zone::SVCCFG, + "-s", + &default_smf_name, + "refresh", + ]) + .map_err(|err| Error::ZoneCommand { + intent: format!( + "Refresh SMF manifest {}", + default_smf_name + ), + err, + })?; } } @@ -494,7 +529,7 @@ impl ServiceManager { // that removal implicitly. warn!( self.log, - "Cannot request services on this sled, differing configurations: {:?}", + "Cannot request services on this sled, differing configurations: {:#?}", known_set.symmetric_difference(&requested_set) ); return Err(Error::ServicesAlreadyConfigured); diff --git a/smf/oximeter/config.toml b/smf/oximeter/config.toml index 4a0095fdd00..ca14fe6ec8b 100644 --- a/smf/oximeter/config.toml +++ b/smf/oximeter/config.toml @@ -1,11 +1,6 @@ # Example configuration file for running an oximeter collector server -id = "1da65e5b-210c-4859-a7d7-200c1e659972" -# Internal address of nexus -nexus_address = "[fd00:1122:3344:0101::3]:12221" - [db] -address = "[fd00:1122:3344:0101::5]:8123" batch_size = 1000 batch_interval = 5 # In seconds @@ -14,6 +9,3 @@ level = "debug" mode = "file" path = "/dev/stdout" if_exists = "append" - -[dropshot] -bind_address = "[fd00:1122:3344:0101::4]:12223" diff --git a/smf/oximeter/manifest.xml b/smf/oximeter/manifest.xml index 47e3cb254f1..d16efd90d99 100644 --- a/smf/oximeter/manifest.xml +++ b/smf/oximeter/manifest.xml @@ -18,10 +18,15 @@ + + + + + From 5d330bc2af4beaf01d44202bc3b1b9be6040af50 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 20 Jun 2022 17:05:04 -0400 Subject: [PATCH 17/27] fix import --- nexus/test-utils/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index 0df25b48fcb..1f9967da95b 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -18,7 +18,7 @@ use oximeter_collector::Oximeter; use oximeter_producer::Server as ProducerServer; use slog::o; use slog::Logger; -use std::net::{IpAddr, Ipv6Addr, SocketAddr}; +use std::net::{IpAddr, Ipv6Addr, SocketAddr, SocketAddrV6}; use std::path::Path; use std::time::Duration; use uuid::Uuid; From 130ffa056220d02e97c9fec8844ec158864ff4fa Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 20 Jun 2022 17:13:32 -0400 Subject: [PATCH 18/27] okay not THAT dynamic just yet --- sled-agent/src/services.rs | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index ea989c3eab0..be11bfb2a6d 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -16,11 +16,13 @@ use omicron_common::address::{Ipv6Subnet, OXIMETER_PORT, RACK_PREFIX}; use omicron_common::nexus_config::{ self, DeploymentConfig as NexusDeploymentConfig, }; +use omicron_common::postgres_config::PostgresConfigWithUrl; use slog::Logger; use std::collections::HashSet; use std::iter::FromIterator; use std::net::{IpAddr, Ipv6Addr, SocketAddr}; use std::path::{Path, PathBuf}; +use std::str::FromStr; use tokio::io::AsyncWriteExt; use tokio::sync::Mutex; use uuid::Uuid; @@ -332,7 +334,12 @@ impl ServiceManager { subnet: Ipv6Subnet::::new( self.underlay_address, ), - database: nexus_config::Database::FromDns, + // TODO: Switch to inferring this URL by DNS. + database: nexus_config::Database::FromUrl { + url: PostgresConfigWithUrl::from_str( + "postgresql://root@[fd00:1122:3344:0101::2]:32221/omicron?sslmode=disable" + ).unwrap(), + } }; // Copy the partial config file to the expected location. From 7ceb8fafe99003090054dc61244e5ef6ddc7decc Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 20 Jun 2022 20:57:34 -0400 Subject: [PATCH 19/27] [nexus] Populate rack during initialization --- nexus/src/app/mod.rs | 16 ++--- nexus/src/app/rack.rs | 56 +++++++---------- nexus/src/app/update.rs | 25 +++++--- nexus/src/db/datastore.rs | 14 +++++ nexus/src/external_api/http_entrypoints.rs | 10 +-- nexus/src/populate.rs | 71 ++++++++++++++++++---- 6 files changed, 127 insertions(+), 65 deletions(-) diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index 06f7264a124..76da20d6d19 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -10,6 +10,7 @@ use crate::config; use crate::context::OpContext; use crate::db; use crate::populate::populate_start; +use crate::populate::PopulateArgs; use crate::populate::PopulateStatus; use crate::saga_interface::SagaContext; use anyhow::anyhow; @@ -54,15 +55,12 @@ pub struct Nexus { /// uuid for this nexus instance. id: Uuid, - /// uuid for this rack (TODO should also be in persistent storage) + /// uuid for this rack rack_id: Uuid, /// general server log log: Logger, - /// cached rack identity metadata - api_rack_identity: db::model::RackIdentity, - /// persistent storage for resources in the control plane db_datastore: Arc, @@ -139,14 +137,18 @@ impl Nexus { authn::Context::internal_db_init(), Arc::clone(&db_datastore), ); - let populate_status = - populate_start(populate_ctx, Arc::clone(&db_datastore)); + + let populate_args = PopulateArgs::new(rack_id); + let populate_status = populate_start( + populate_ctx, + Arc::clone(&db_datastore), + populate_args, + ); let nexus = Nexus { id: config.deployment.id, rack_id, log: log.new(o!()), - api_rack_identity: db::model::RackIdentity::new(rack_id), db_datastore: Arc::clone(&db_datastore), authz: Arc::clone(&authz), sec_client: Arc::clone(&sec_client), diff --git a/nexus/src/app/rack.rs b/nexus/src/app/rack.rs index a9a10a616aa..dcc7ce92dbc 100644 --- a/nexus/src/app/rack.rs +++ b/nexus/src/app/rack.rs @@ -7,40 +7,21 @@ use crate::authz; use crate::context::OpContext; use crate::db; +use crate::db::lookup::LookupPath; use crate::internal_api::params::ServicePutRequest; -use futures::future::ready; -use futures::StreamExt; use omicron_common::api::external::DataPageParams; use omicron_common::api::external::Error; -use omicron_common::api::external::ListResult; +use omicron_common::api::external::ListResultVec; use omicron_common::api::external::LookupResult; -use omicron_common::api::external::LookupType; -use omicron_common::api::external::ResourceType; use uuid::Uuid; impl super::Nexus { - pub(crate) fn as_rack(&self) -> db::model::Rack { - db::model::Rack { - identity: self.api_rack_identity.clone(), - initialized: true, - tuf_base_url: None, - } - } - pub async fn racks_list( &self, opctx: &OpContext, pagparams: &DataPageParams<'_, Uuid>, - ) -> ListResult { - opctx.authorize(authz::Action::Read, &authz::FLEET).await?; - - if let Some(marker) = pagparams.marker { - if *marker >= self.rack_id { - return Ok(futures::stream::empty().boxed()); - } - } - - Ok(futures::stream::once(ready(Ok(self.as_rack()))).boxed()) + ) -> ListResultVec { + self.db_datastore.rack_list(&opctx, pagparams).await } pub async fn rack_lookup( @@ -48,18 +29,25 @@ impl super::Nexus { opctx: &OpContext, rack_id: &Uuid, ) -> LookupResult { - let authz_rack = authz::Rack::new( - authz::FLEET, - *rack_id, - LookupType::ById(*rack_id), - ); - opctx.authorize(authz::Action::Read, &authz_rack).await?; + let (.., db_rack) = LookupPath::new(opctx, &self.db_datastore) + .rack_id(*rack_id) + .fetch() + .await?; + Ok(db_rack) + } - if *rack_id == self.rack_id { - Ok(self.as_rack()) - } else { - Err(Error::not_found_by_id(ResourceType::Rack, rack_id)) - } + /// Ensures that a rack exists in the DB. + /// + /// If the rack already exists, this function is a no-op. + pub async fn rack_insert( + &self, + opctx: &OpContext, + rack_id: Uuid, + ) -> Result<(), Error> { + self.datastore() + .rack_insert(opctx, &db::model::Rack::new(rack_id)) + .await?; + Ok(()) } /// Marks the rack as initialized with a set of services. diff --git a/nexus/src/app/update.rs b/nexus/src/app/update.rs index 0d6721ec439..2d87a44a84f 100644 --- a/nexus/src/app/update.rs +++ b/nexus/src/app/update.rs @@ -24,11 +24,15 @@ use tokio::io::AsyncWriteExt; static BASE_ARTIFACT_DIR: &str = "/var/tmp/oxide_artifacts"; impl super::Nexus { - fn tuf_base_url(&self) -> Option { - self.updates_config.as_ref().map(|c| { - let rack = self.as_rack(); + async fn tuf_base_url( + &self, + opctx: &OpContext, + ) -> Result, Error> { + let rack = self.rack_lookup(opctx, &self.rack_id).await?; + + Ok(self.updates_config.as_ref().map(|c| { rack.tuf_base_url.unwrap_or_else(|| c.default_base_url.clone()) - }) + })) } pub async fn updates_refresh_metadata( @@ -42,10 +46,11 @@ impl super::Nexus { message: "updates system not configured".into(), } })?; - let base_url = - self.tuf_base_url().ok_or_else(|| Error::InvalidRequest { + let base_url = self.tuf_base_url(opctx).await?.ok_or_else(|| { + Error::InvalidRequest { message: "updates system not configured".into(), - })?; + } + })?; let trusted_root = tokio::fs::read(&updates_config.trusted_root) .await .map_err(|e| Error::InternalError { @@ -129,8 +134,10 @@ impl super::Nexus { artifact: UpdateArtifact, ) -> Result, Error> { let mut base_url = - self.tuf_base_url().ok_or_else(|| Error::InvalidRequest { - message: "updates system not configured".into(), + self.tuf_base_url(opctx).await?.ok_or_else(|| { + Error::InvalidRequest { + message: "updates system not configured".into(), + } })?; if !base_url.ends_with('/') { base_url.push('/'); diff --git a/nexus/src/db/datastore.rs b/nexus/src/db/datastore.rs index 6814b6276ac..b1c984f2218 100644 --- a/nexus/src/db/datastore.rs +++ b/nexus/src/db/datastore.rs @@ -147,6 +147,20 @@ impl DataStore { Ok(self.pool.pool()) } + pub async fn rack_list( + &self, + opctx: &OpContext, + pagparams: &DataPageParams<'_, Uuid>, + ) -> ListResultVec { + opctx.authorize(authz::Action::Read, &authz::FLEET).await?; + use db::schema::rack::dsl; + paginated(dsl::rack, dsl::id, pagparams) + .select(Rack::as_select()) + .load_async(self.pool_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel_pool(e, ErrorHandler::Server)) + } + /// Stores a new rack in the database. /// /// This function is a no-op if the rack already exists. diff --git a/nexus/src/external_api/http_entrypoints.rs b/nexus/src/external_api/http_entrypoints.rs index b47e6ecf12c..cab200e376f 100644 --- a/nexus/src/external_api/http_entrypoints.rs +++ b/nexus/src/external_api/http_entrypoints.rs @@ -2940,11 +2940,13 @@ async fn hardware_racks_get( let query = query_params.into_inner(); let handler = async { let opctx = OpContext::for_external_api(&rqctx).await?; - let rack_stream = nexus + let racks = nexus .racks_list(&opctx, &data_page_params_for(&rqctx, &query)?) - .await?; - let view_list = to_list::(rack_stream).await; - Ok(HttpResponseOk(ScanById::results_page(&query, view_list)?)) + .await? + .into_iter() + .map(|r| r.into()) + .collect(); + Ok(HttpResponseOk(ScanById::results_page(&query, racks)?)) }; apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await } diff --git a/nexus/src/populate.rs b/nexus/src/populate.rs index 9f6bcdcad20..85223aef2b1 100644 --- a/nexus/src/populate.rs +++ b/nexus/src/populate.rs @@ -43,13 +43,14 @@ //! each populator behaves as expected in the above ways. use crate::context::OpContext; -use crate::db::DataStore; +use crate::db::{self, DataStore}; use futures::future::BoxFuture; use futures::FutureExt; use lazy_static::lazy_static; use omicron_common::api::external::Error; use omicron_common::backoff; use std::sync::Arc; +use uuid::Uuid; #[derive(Clone, Debug)] pub enum PopulateStatus { @@ -58,14 +59,26 @@ pub enum PopulateStatus { Failed(String), } +/// Auxiliary data necessary to populate the database. +pub struct PopulateArgs { + rack_id: Uuid, +} + +impl PopulateArgs { + pub fn new(rack_id: Uuid) -> Self { + Self { rack_id } + } +} + pub fn populate_start( opctx: OpContext, datastore: Arc, + args: PopulateArgs, ) -> tokio::sync::watch::Receiver { let (tx, rx) = tokio::sync::watch::channel(PopulateStatus::NotDone); tokio::spawn(async move { - let result = populate(&opctx, &datastore).await; + let result = populate(&opctx, &datastore, &args).await; if let Err(error) = tx.send(match result { Ok(()) => PopulateStatus::Done, Err(message) => PopulateStatus::Failed(message), @@ -80,17 +93,19 @@ pub fn populate_start( async fn populate( opctx: &OpContext, datastore: &DataStore, + args: &PopulateArgs, ) -> Result<(), String> { for p in *ALL_POPULATORS { let db_result = backoff::retry_notify( backoff::internal_service_policy(), || async { - p.populate(opctx, datastore).await.map_err(|error| match &error - { - Error::ServiceUnavailable { .. } => { - backoff::BackoffError::transient(error) + p.populate(opctx, datastore, args).await.map_err(|error| { + match &error { + Error::ServiceUnavailable { .. } => { + backoff::BackoffError::transient(error) + } + _ => backoff::BackoffError::Permanent(error), } - _ => backoff::BackoffError::Permanent(error), }) }, |error, delay| { @@ -130,6 +145,7 @@ trait Populator: std::fmt::Debug + Send + Sync { &self, opctx: &'a OpContext, datastore: &'a DataStore, + args: &'a PopulateArgs, ) -> BoxFuture<'b, Result<(), Error>> where 'a: 'b; @@ -143,6 +159,7 @@ impl Populator for PopulateBuiltinUsers { &self, opctx: &'a OpContext, datastore: &'a DataStore, + _args: &'a PopulateArgs, ) -> BoxFuture<'b, Result<(), Error>> where 'a: 'b, @@ -159,6 +176,7 @@ impl Populator for PopulateBuiltinRoles { &self, opctx: &'a OpContext, datastore: &'a DataStore, + _args: &'a PopulateArgs, ) -> BoxFuture<'b, Result<(), Error>> where 'a: 'b, @@ -175,6 +193,7 @@ impl Populator for PopulateBuiltinRoleAssignments { &self, opctx: &'a OpContext, datastore: &'a DataStore, + _args: &'a PopulateArgs, ) -> BoxFuture<'b, Result<(), Error>> where 'a: 'b, @@ -192,6 +211,7 @@ impl Populator for PopulateBuiltinSilos { &self, opctx: &'a OpContext, datastore: &'a DataStore, + _args: &'a PopulateArgs, ) -> BoxFuture<'b, Result<(), Error>> where 'a: 'b, @@ -214,6 +234,7 @@ impl Populator for PopulateSiloUsers { &self, opctx: &'a OpContext, datastore: &'a DataStore, + _args: &'a PopulateArgs, ) -> BoxFuture<'b, Result<(), Error>> where 'a: 'b, @@ -230,6 +251,7 @@ impl Populator for PopulateSiloUserRoleAssignments { &self, opctx: &'a OpContext, datastore: &'a DataStore, + _args: &'a PopulateArgs, ) -> BoxFuture<'b, Result<(), Error>> where 'a: 'b, @@ -241,19 +263,43 @@ impl Populator for PopulateSiloUserRoleAssignments { } } +#[derive(Debug)] +struct PopulateRack; +impl Populator for PopulateRack { + fn populate<'a, 'b>( + &self, + opctx: &'a OpContext, + datastore: &'a DataStore, + args: &'a PopulateArgs, + ) -> BoxFuture<'b, Result<(), Error>> + where + 'a: 'b, + { + async { + datastore + .rack_insert(opctx, &db::model::Rack::new(args.rack_id)) + .await?; + Ok(()) + } + .boxed() + } +} + lazy_static! { - static ref ALL_POPULATORS: [&'static dyn Populator; 6] = [ + static ref ALL_POPULATORS: [&'static dyn Populator; 7] = [ &PopulateBuiltinUsers, &PopulateBuiltinRoles, &PopulateBuiltinRoleAssignments, &PopulateBuiltinSilos, &PopulateSiloUsers, &PopulateSiloUserRoleAssignments, + &PopulateRack, ]; } #[cfg(test)] mod test { + use super::PopulateArgs; use super::Populator; use super::ALL_POPULATORS; use crate::authn; @@ -265,6 +311,7 @@ mod test { use omicron_common::api::external::Error; use omicron_test_utils::dev; use std::sync::Arc; + use uuid::Uuid; #[tokio::test] async fn test_populators() { @@ -287,16 +334,18 @@ mod test { ); let log = &logctx.log; + let args = PopulateArgs::new(Uuid::new_v4()); + // Running each populator once under normal conditions should work. info!(&log, "populator {:?}, run 1", p); - p.populate(&opctx, &datastore) + p.populate(&opctx, &datastore, &args) .await .with_context(|| format!("populator {:?} (try 1)", p)) .unwrap(); // It should also work fine to run it again. info!(&log, "populator {:?}, run 2 (idempotency check)", p); - p.populate(&opctx, &datastore) + p.populate(&opctx, &datastore, &args) .await .with_context(|| { format!( @@ -331,7 +380,7 @@ mod test { ); info!(&log, "populator {:?}, with database offline", p); - match p.populate(&opctx, &datastore).await { + match p.populate(&opctx, &datastore, &args).await { Err(Error::ServiceUnavailable { .. }) => (), Ok(_) => panic!( "populator {:?}: unexpectedly succeeded with no database", From 5188880e32c7186a51ac7d69d0ba6d87af59ae70 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 20 Jun 2022 21:36:27 -0400 Subject: [PATCH 20/27] [nexus] Add tests for rack endpoints --- nexus/src/app/mod.rs | 4 +++ nexus/tests/integration_tests/mod.rs | 1 + nexus/tests/integration_tests/rack.rs | 41 +++++++++++++++++++++++++++ 3 files changed, 46 insertions(+) create mode 100644 nexus/tests/integration_tests/rack.rs diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index 76da20d6d19..e8afa7f3528 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -202,6 +202,10 @@ impl Nexus { &self.tunables } + pub fn rack_id(&self) -> Uuid { + self.rack_id + } + pub async fn wait_for_populate(&self) -> Result<(), anyhow::Error> { let mut my_rx = self.populate_status.clone(); loop { diff --git a/nexus/tests/integration_tests/mod.rs b/nexus/tests/integration_tests/mod.rs index de5de9679bd..6c3e52bd785 100644 --- a/nexus/tests/integration_tests/mod.rs +++ b/nexus/tests/integration_tests/mod.rs @@ -14,6 +14,7 @@ mod instances; mod organizations; mod oximeter; mod projects; +mod rack; mod role_assignments; mod roles_builtin; mod router_routes; diff --git a/nexus/tests/integration_tests/rack.rs b/nexus/tests/integration_tests/rack.rs new file mode 100644 index 00000000000..dfcbde9740f --- /dev/null +++ b/nexus/tests/integration_tests/rack.rs @@ -0,0 +1,41 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use nexus_test_utils::http_testing::AuthnMode; +use nexus_test_utils::http_testing::NexusRequest; +use nexus_test_utils::ControlPlaneTestContext; +use nexus_test_utils_macros::nexus_test; +use omicron_nexus::external_api::views::Rack; + +#[nexus_test] +async fn test_list_own_rack(cptestctx: &ControlPlaneTestContext) { + let client = &cptestctx.external_client; + + let racks_url = "/hardware/racks"; + let racks: Vec = + NexusRequest::iter_collection_authn(client, racks_url, "", None) + .await + .expect("failed to list racks") + .all_items; + + assert_eq!(1, racks.len()); + assert_eq!(cptestctx.server.apictx.nexus.rack_id(), racks[0].identity.id); +} + +#[nexus_test] +async fn test_get_own_rack(cptestctx: &ControlPlaneTestContext) { + let client = &cptestctx.external_client; + + let expected_id = cptestctx.server.apictx.nexus.rack_id(); + let rack_url = format!("/hardware/racks/{}", expected_id); + let rack = NexusRequest::object_get(client, &rack_url) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("failed to get rack") + .parsed_body::() + .unwrap(); + + assert_eq!(expected_id, rack.identity.id); +} From 4df23c2031efc4cdf09d9739f823203bafa15117 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Tue, 21 Jun 2022 10:08:50 -0400 Subject: [PATCH 21/27] jgallagher feedback --- internal-dns/src/bin/dns-server.rs | 3 ++- internal-dns/src/dns_server.rs | 4 ++-- internal-dns/src/lib.rs | 2 +- internal-dns/tests/basic_test.rs | 2 +- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/internal-dns/src/bin/dns-server.rs b/internal-dns/src/bin/dns-server.rs index 12eafcc3599..b8520efdb26 100644 --- a/internal-dns/src/bin/dns-server.rs +++ b/internal-dns/src/bin/dns-server.rs @@ -64,7 +64,8 @@ async fn main() -> Result<(), anyhow::Error> { internal_dns::dns_server::run(log, db, dns_config).await? }; - let dropshot_server = internal_dns::start_server(config, log, db).await?; + let dropshot_server = + internal_dns::start_dropshot_server(config, log, db).await?; dropshot_server .await .map_err(|error_message| anyhow!("server exiting: {}", error_message)) diff --git a/internal-dns/src/dns_server.rs b/internal-dns/src/dns_server.rs index ccebda582f7..51a84899812 100644 --- a/internal-dns/src/dns_server.rs +++ b/internal-dns/src/dns_server.rs @@ -39,8 +39,8 @@ pub struct Server { pub handle: tokio::task::JoinHandle>, } -impl Server { - pub fn close(self) { +impl Drop for Server { + fn drop(&mut self) { self.handle.abort() } } diff --git a/internal-dns/src/lib.rs b/internal-dns/src/lib.rs index 786750c1a8f..7fee156787e 100644 --- a/internal-dns/src/lib.rs +++ b/internal-dns/src/lib.rs @@ -20,7 +20,7 @@ pub struct Config { pub data: dns_data::Config, } -pub async fn start_server( +pub async fn start_dropshot_server( config: Config, log: slog::Logger, db: Arc, diff --git a/internal-dns/tests/basic_test.rs b/internal-dns/tests/basic_test.rs index af72ded52cb..d6784bddae0 100644 --- a/internal-dns/tests/basic_test.rs +++ b/internal-dns/tests/basic_test.rs @@ -334,7 +334,7 @@ async fn init_client_server( // launch a dropshot server let dropshot_server = - internal_dns::start_server(config, log.clone(), db).await?; + internal_dns::start_dropshot_server(config, log.clone(), db).await?; // wait for server to start tokio::time::sleep(tokio::time::Duration::from_millis(250)).await; From 5556d5f5e5b3df2ae764015eea31935287877694 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Tue, 21 Jun 2022 10:12:08 -0400 Subject: [PATCH 22/27] Patch tests --- internal-dns/tests/basic_test.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal-dns/tests/basic_test.rs b/internal-dns/tests/basic_test.rs index d6784bddae0..d09e27f18c6 100644 --- a/internal-dns/tests/basic_test.rs +++ b/internal-dns/tests/basic_test.rs @@ -288,7 +288,7 @@ struct TestContext { impl TestContext { async fn cleanup(self) { - self.dns_server.close(); + drop(self.dns_server); self.dropshot_server.close().await.expect("Failed to clean up server"); self.tmp.close().expect("Failed to clean up tmp directory"); } From 6126e41f6b87ffa39c206ad81f91ee7620e78fc6 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Tue, 21 Jun 2022 10:12:39 -0400 Subject: [PATCH 23/27] merge --- internal-dns-client/src/multiclient.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal-dns-client/src/multiclient.rs b/internal-dns-client/src/multiclient.rs index ca8387fca45..58b2cdea012 100644 --- a/internal-dns-client/src/multiclient.rs +++ b/internal-dns-client/src/multiclient.rs @@ -324,7 +324,7 @@ mod test { }; let dropshot_server = - internal_dns::start_server(config, log.clone(), db) + internal_dns::start_dropshot_server(config, log.clone(), db) .await .unwrap(); From d6e3c9de0d23cfe10e59bee4c23e20ee0ae847be Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 22 Jun 2022 12:12:38 -0400 Subject: [PATCH 24/27] background-work -> service-balancer --- nexus/src/app/mod.rs | 8 ++++---- nexus/src/authn/mod.rs | 14 +++++++------- nexus/src/db/datastore.rs | 2 +- nexus/src/db/fixed_data/role_assignment.rs | 2 +- nexus/src/db/fixed_data/user_builtin.rs | 14 +++++++------- nexus/tests/integration_tests/users_builtin.rs | 4 ++-- 6 files changed, 22 insertions(+), 22 deletions(-) diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index 06f7264a124..0f8a1333773 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -222,12 +222,12 @@ impl Nexus { &self.opctx_external_authn } - /// Returns an [`OpContext`] used for background tasks. - pub fn opctx_for_background(&self) -> OpContext { + /// Returns an [`OpContext`] used for balancing services. + pub fn opctx_for_service_balancer(&self) -> OpContext { OpContext::for_background( - self.log.new(o!("component" => "BackgroundWork")), + self.log.new(o!("component" => "ServiceBalancer")), Arc::clone(&self.authz), - authn::Context::internal_db_background(), + authn::Context::internal_service_balancer(), Arc::clone(&self.db_datastore), ) } diff --git a/nexus/src/authn/mod.rs b/nexus/src/authn/mod.rs index c9399bdb131..f939743e769 100644 --- a/nexus/src/authn/mod.rs +++ b/nexus/src/authn/mod.rs @@ -30,12 +30,12 @@ pub mod silos; pub use crate::db::fixed_data::silo_user::USER_TEST_PRIVILEGED; pub use crate::db::fixed_data::silo_user::USER_TEST_UNPRIVILEGED; -pub use crate::db::fixed_data::user_builtin::USER_BACKGROUND_WORK; pub use crate::db::fixed_data::user_builtin::USER_DB_INIT; pub use crate::db::fixed_data::user_builtin::USER_EXTERNAL_AUTHN; pub use crate::db::fixed_data::user_builtin::USER_INTERNAL_API; pub use crate::db::fixed_data::user_builtin::USER_INTERNAL_READ; pub use crate::db::fixed_data::user_builtin::USER_SAGA_RECOVERY; +pub use crate::db::fixed_data::user_builtin::USER_SERVICE_BALANCER; use crate::db::model::ConsoleSession; use crate::authz; @@ -171,9 +171,9 @@ impl Context { Context::context_for_builtin_user(USER_DB_INIT.id) } - /// Returns an authenticated context for Nexus-driven db work. - pub fn internal_db_background() -> Context { - Context::context_for_builtin_user(USER_BACKGROUND_WORK.id) + /// Returns an authenticated context for Nexus-driven service balancing. + pub fn internal_service_balancer() -> Context { + Context::context_for_builtin_user(USER_SERVICE_BALANCER.id) } fn context_for_builtin_user(user_builtin_id: Uuid) -> Context { @@ -219,11 +219,11 @@ impl Context { #[cfg(test)] mod test { use super::Context; - use super::USER_BACKGROUND_WORK; use super::USER_DB_INIT; use super::USER_INTERNAL_API; use super::USER_INTERNAL_READ; use super::USER_SAGA_RECOVERY; + use super::USER_SERVICE_BALANCER; use super::USER_TEST_PRIVILEGED; use super::USER_TEST_UNPRIVILEGED; use crate::db::fixed_data::user_builtin::USER_EXTERNAL_AUTHN; @@ -258,9 +258,9 @@ mod test { let actor = authn.actor().unwrap(); assert_eq!(actor.actor_id(), USER_DB_INIT.id); - let authn = Context::internal_db_background(); + let authn = Context::internal_service_balancer(); let actor = authn.actor().unwrap(); - assert_eq!(actor.actor_id(), USER_BACKGROUND_WORK.id); + assert_eq!(actor.actor_id(), USER_SERVICE_BALANCER.id); let authn = Context::internal_saga_recovery(); let actor = authn.actor().unwrap(); diff --git a/nexus/src/db/datastore.rs b/nexus/src/db/datastore.rs index 6814b6276ac..c705c10d24f 100644 --- a/nexus/src/db/datastore.rs +++ b/nexus/src/db/datastore.rs @@ -2990,7 +2990,7 @@ impl DataStore { let builtin_users = [ // Note: "db_init" is also a builtin user, but that one by necessity // is created with the database. - &*authn::USER_BACKGROUND_WORK, + &*authn::USER_SERVICE_BALANCER, &*authn::USER_INTERNAL_API, &*authn::USER_INTERNAL_READ, &*authn::USER_EXTERNAL_AUTHN, diff --git a/nexus/src/db/fixed_data/role_assignment.rs b/nexus/src/db/fixed_data/role_assignment.rs index 540b57abe50..f6bbb951b6d 100644 --- a/nexus/src/db/fixed_data/role_assignment.rs +++ b/nexus/src/db/fixed_data/role_assignment.rs @@ -26,7 +26,7 @@ lazy_static! { ), RoleAssignment::new( IdentityType::UserBuiltin, - user_builtin::USER_BACKGROUND_WORK.id, + user_builtin::USER_SERVICE_BALANCER.id, role_builtin::FLEET_ADMIN.resource_type, *FLEET_ID, role_builtin::FLEET_ADMIN.role_name, diff --git a/nexus/src/db/fixed_data/user_builtin.rs b/nexus/src/db/fixed_data/user_builtin.rs index 238a8f5405a..87f33fa3558 100644 --- a/nexus/src/db/fixed_data/user_builtin.rs +++ b/nexus/src/db/fixed_data/user_builtin.rs @@ -39,13 +39,13 @@ lazy_static! { "used for seeding initial database data", ); - /// Internal user for performing operations driven by Nexus, rather - /// than any API request. - pub static ref USER_BACKGROUND_WORK: UserBuiltinConfig = + /// Internal user for performing operations to manage the + /// provisioning of services across the fleet. + pub static ref USER_SERVICE_BALANCER: UserBuiltinConfig = UserBuiltinConfig::new_static( "001de000-05e4-4000-8000-00000000bac3", - "background-work", - "used for Nexus-driven database operations", + "service-balancer", + "used for Nexus-driven service balancing", ); /// Internal user used by Nexus when handling internal API requests @@ -86,16 +86,16 @@ lazy_static! { #[cfg(test)] mod test { use super::super::assert_valid_uuid; - use super::USER_BACKGROUND_WORK; use super::USER_DB_INIT; use super::USER_EXTERNAL_AUTHN; use super::USER_INTERNAL_API; use super::USER_INTERNAL_READ; use super::USER_SAGA_RECOVERY; + use super::USER_SERVICE_BALANCER; #[test] fn test_builtin_user_ids_are_valid() { - assert_valid_uuid(&USER_BACKGROUND_WORK.id); + assert_valid_uuid(&USER_SERVICE_BALANCER.id); assert_valid_uuid(&USER_DB_INIT.id); assert_valid_uuid(&USER_INTERNAL_API.id); assert_valid_uuid(&USER_EXTERNAL_AUTHN.id); diff --git a/nexus/tests/integration_tests/users_builtin.rs b/nexus/tests/integration_tests/users_builtin.rs index 0df3fbaf04b..ee4da338fcc 100644 --- a/nexus/tests/integration_tests/users_builtin.rs +++ b/nexus/tests/integration_tests/users_builtin.rs @@ -28,8 +28,8 @@ async fn test_users_builtin(cptestctx: &ControlPlaneTestContext) { let u = users.remove(&authn::USER_DB_INIT.name.to_string()).unwrap(); assert_eq!(u.identity.id, authn::USER_DB_INIT.id); let u = - users.remove(&authn::USER_BACKGROUND_WORK.name.to_string()).unwrap(); - assert_eq!(u.identity.id, authn::USER_BACKGROUND_WORK.id); + users.remove(&authn::USER_SERVICE_BALANCER.name.to_string()).unwrap(); + assert_eq!(u.identity.id, authn::USER_SERVICE_BALANCER.id); let u = users.remove(&authn::USER_INTERNAL_API.name.to_string()).unwrap(); assert_eq!(u.identity.id, authn::USER_INTERNAL_API.id); let u = users.remove(&authn::USER_INTERNAL_READ.name.to_string()).unwrap(); From 470da8b19fcbc6abcf4b58b43fc8a60b12b2211c Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Thu, 23 Jun 2022 21:43:06 -0400 Subject: [PATCH 25/27] review feedback --- internal-dns-client/src/multiclient.rs | 189 +++++++++---------------- internal-dns-client/src/names.rs | 6 +- 2 files changed, 70 insertions(+), 125 deletions(-) diff --git a/internal-dns-client/src/multiclient.rs b/internal-dns-client/src/multiclient.rs index 58b2cdea012..2fc9089e334 100644 --- a/internal-dns-client/src/multiclient.rs +++ b/internal-dns-client/src/multiclient.rs @@ -8,6 +8,7 @@ use omicron_common::address::{ Ipv6Subnet, ReservedRackSubnet, AZ_PREFIX, DNS_PORT, DNS_SERVER_PORT, }; use slog::{info, Logger}; +use std::collections::HashMap; use std::net::{IpAddr, Ipv6Addr, SocketAddr, SocketAddrV6}; use trust_dns_resolver::config::{ NameServerConfig, Protocol, ResolverConfig, ResolverOpts, @@ -16,6 +17,8 @@ use trust_dns_resolver::TokioAsyncResolver; pub type DnsError = crate::Error; +pub type AAAARecord = (crate::names::AAAA, SocketAddrV6); + /// Describes how to find the DNS servers. /// /// In production code, this is nearly always [`Ipv6Subnet`], @@ -50,13 +53,6 @@ impl DnsAddressLookup for Ipv6Subnet { } } -/// Describes a service which may be inserted into DNS records. -pub trait Service { - fn aaaa(&self) -> crate::names::AAAA; - fn srv(&self) -> crate::names::SRV; - fn address(&self) -> SocketAddrV6; -} - /// A connection used to update multiple DNS servers. pub struct Updater { log: Logger, @@ -83,37 +79,15 @@ impl Updater { /// Inserts all service records into the DNS server. /// - /// This method is most efficient when records are sorted by SRV key. + /// Each SRV record should have one or more AAAA records. pub async fn insert_dns_records( &self, - records: &Vec, + records: &HashMap>, ) -> Result<(), DnsError> { - let mut records = records.iter().peekable(); - - while let Some(record) = records.next() { - let srv = record.srv(); + for (srv, aaaa) in records.iter() { info!(self.log, "Inserting DNS record: {:?}", srv); - match &srv { - &crate::names::SRV::Service(_) => { - let mut aaaa = vec![(record.aaaa(), record.address())]; - while let Some(record) = records.peek() { - if record.srv() == srv { - let record = records.next().unwrap(); - aaaa.push((record.aaaa(), record.address())); - } else { - break; - } - } - - self.insert_dns_records_internal(aaaa, srv).await?; - } - &crate::names::SRV::Backend(_, _) => { - let aaaa = vec![(record.aaaa(), record.address())]; - self.insert_dns_records_internal(aaaa, record.srv()) - .await?; - } - }; + self.insert_dns_records_internal(aaaa, srv).await?; } Ok(()) } @@ -123,8 +97,8 @@ impl Updater { // - An SRV record, pointing to each of the AAAA records. async fn insert_dns_records_internal( &self, - aaaa: Vec<(crate::names::AAAA, SocketAddrV6)>, - srv_key: crate::names::SRV, + aaaa: &Vec, + srv_key: &crate::names::SRV, ) -> Result<(), DnsError> { let mut records = Vec::with_capacity(aaaa.len() + 1); @@ -409,33 +383,6 @@ mod test { logctx.cleanup_successful(); } - #[derive(Clone)] - struct TestServiceRecord { - aaaa: AAAA, - srv: SRV, - addr: SocketAddrV6, - } - - impl TestServiceRecord { - fn new(aaaa: AAAA, srv: SRV, addr: SocketAddrV6) -> Self { - Self { aaaa, srv, addr } - } - } - - impl Service for TestServiceRecord { - fn aaaa(&self) -> AAAA { - self.aaaa.clone() - } - - fn srv(&self) -> SRV { - self.srv.clone() - } - - fn address(&self) -> SocketAddrV6 { - self.addr - } - } - // Insert and retreive a single DNS record. #[tokio::test] async fn insert_and_lookup_one_record() { @@ -452,23 +399,28 @@ mod test { .expect("Error creating localhost resolver"); let updater = Updater::new(&address_getter, logctx.log.clone()); - let record = TestServiceRecord::new( - AAAA::Zone(Uuid::new_v4()), + let records = HashMap::from([( SRV::Service(ServiceName::Cockroach), - SocketAddrV6::new( - Ipv6Addr::from_str("ff::01").unwrap(), - 12345, - 0, - 0, - ), - ); - updater.insert_dns_records(&vec![record.clone()]).await.unwrap(); + vec![( + AAAA::Zone(Uuid::new_v4()), + SocketAddrV6::new( + Ipv6Addr::from_str("ff::01").unwrap(), + 12345, + 0, + 0, + ), + )], + )]); + updater.insert_dns_records(&records).await.unwrap(); let ip = resolver .lookup_ipv6(SRV::Service(ServiceName::Cockroach)) .await .expect("Should have been able to look up IP address"); - assert_eq!(&ip, record.addr.ip()); + assert_eq!( + &ip, + records[&SRV::Service(ServiceName::Cockroach)][0].1.ip() + ); logctx.cleanup_successful(); } @@ -522,36 +474,31 @@ mod test { 0, ); - let records = vec![ + let srv_crdb = SRV::Service(ServiceName::Cockroach); + let srv_clickhouse = SRV::Service(ServiceName::Clickhouse); + let srv_backend = SRV::Backend(BackendName::Crucible, Uuid::new_v4()); + + let records = HashMap::from([ // Three Cockroach services - TestServiceRecord::new( - AAAA::Zone(Uuid::new_v4()), - SRV::Service(ServiceName::Cockroach), - cockroach_addrs[0], - ), - TestServiceRecord::new( - AAAA::Zone(Uuid::new_v4()), - SRV::Service(ServiceName::Cockroach), - cockroach_addrs[1], - ), - TestServiceRecord::new( - AAAA::Zone(Uuid::new_v4()), - SRV::Service(ServiceName::Cockroach), - cockroach_addrs[2], + ( + srv_crdb.clone(), + vec![ + (AAAA::Zone(Uuid::new_v4()), cockroach_addrs[0]), + (AAAA::Zone(Uuid::new_v4()), cockroach_addrs[1]), + (AAAA::Zone(Uuid::new_v4()), cockroach_addrs[2]), + ], ), // One Clickhouse service - TestServiceRecord::new( - AAAA::Zone(Uuid::new_v4()), - SRV::Service(ServiceName::Clickhouse), - clickhouse_addr, + ( + srv_clickhouse.clone(), + vec![(AAAA::Zone(Uuid::new_v4()), clickhouse_addr)], ), // One Backend service - TestServiceRecord::new( - AAAA::Zone(Uuid::new_v4()), - SRV::Backend(BackendName::Crucible, Uuid::new_v4()), - crucible_addr, + ( + srv_backend.clone(), + vec![(AAAA::Zone(Uuid::new_v4()), crucible_addr)], ), - ]; + ]); updater.insert_dns_records(&records).await.unwrap(); // Look up Cockroach @@ -570,7 +517,7 @@ mod test { // Look up Backend Service let ip = resolver - .lookup_ipv6(records[4].srv.clone()) + .lookup_ipv6(srv_backend) .await .expect("Should have been able to look up IP address"); assert_eq!(&ip, crucible_addr.ip()); @@ -578,15 +525,10 @@ mod test { // If we remove the AAAA records for two of the CRDB services, // only one will remain. updater - .dns_records_delete(&vec![DnsRecordKey { - name: records[0].aaaa.to_string(), - }]) - .await - .expect("Should have been able to delete record"); - updater - .dns_records_delete(&vec![DnsRecordKey { - name: records[1].aaaa.to_string(), - }]) + .dns_records_delete(&vec![ + DnsRecordKey { name: records[&srv_crdb][0].0.to_string() }, + DnsRecordKey { name: records[&srv_crdb][1].0.to_string() }, + ]) .await .expect("Should have been able to delete record"); let ip = resolver @@ -614,37 +556,40 @@ mod test { let updater = Updater::new(&address_getter, logctx.log.clone()); // Insert a record, observe that it exists. - let mut record = TestServiceRecord::new( - AAAA::Zone(Uuid::new_v4()), - SRV::Service(ServiceName::Cockroach), - SocketAddrV6::new( - Ipv6Addr::from_str("ff::01").unwrap(), - 12345, - 0, - 0, - ), - ); - updater.insert_dns_records(&vec![record.clone()]).await.unwrap(); + let srv_crdb = SRV::Service(ServiceName::Cockroach); + let mut records = HashMap::from([( + srv_crdb.clone(), + vec![( + AAAA::Zone(Uuid::new_v4()), + SocketAddrV6::new( + Ipv6Addr::from_str("ff::01").unwrap(), + 12345, + 0, + 0, + ), + )], + )]); + updater.insert_dns_records(&records).await.unwrap(); let ip = resolver .lookup_ipv6(SRV::Service(ServiceName::Cockroach)) .await .expect("Should have been able to look up IP address"); - assert_eq!(&ip, record.addr.ip()); + assert_eq!(&ip, records[&srv_crdb][0].1.ip()); // If we insert the same record with a new address, it should be // updated. - record.addr = SocketAddrV6::new( + records.get_mut(&srv_crdb).unwrap()[0].1 = SocketAddrV6::new( Ipv6Addr::from_str("ee::02").unwrap(), 54321, 0, 0, ); - updater.insert_dns_records(&vec![record.clone()]).await.unwrap(); + updater.insert_dns_records(&records).await.unwrap(); let ip = resolver .lookup_ipv6(SRV::Service(ServiceName::Cockroach)) .await .expect("Should have been able to look up IP address"); - assert_eq!(&ip, record.addr.ip()); + assert_eq!(&ip, records[&srv_crdb][0].1.ip()); logctx.cleanup_successful(); } diff --git a/internal-dns-client/src/names.rs b/internal-dns-client/src/names.rs index dbcc0d9f01c..1b633f915e1 100644 --- a/internal-dns-client/src/names.rs +++ b/internal-dns-client/src/names.rs @@ -10,7 +10,7 @@ use uuid::Uuid; pub(crate) const DNS_ZONE: &str = "control-plane.oxide.internal"; /// Names for services where backends are interchangeable. -#[derive(Clone, Debug, PartialEq, PartialOrd)] +#[derive(Clone, Debug, Hash, Eq, PartialEq, PartialOrd)] pub enum ServiceName { Clickhouse, Cockroach, @@ -32,7 +32,7 @@ impl fmt::Display for ServiceName { } /// Names for services where backends are not interchangeable. -#[derive(Clone, Debug, PartialEq, PartialOrd)] +#[derive(Clone, Debug, Hash, Eq, PartialEq, PartialOrd)] pub enum BackendName { Crucible, SledAgent, @@ -47,7 +47,7 @@ impl fmt::Display for BackendName { } } -#[derive(Clone, Debug, PartialEq, PartialOrd)] +#[derive(Clone, Debug, Hash, Eq, PartialEq, PartialOrd)] pub enum SRV { /// A service identified and accessed by name, such as "nexus", "CRDB", etc. /// From b5916e00d7834b792ce78452546a1899df8f1853 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Thu, 23 Jun 2022 22:16:57 -0400 Subject: [PATCH 26/27] merge --- sled-agent/src/params.rs | 16 ++++++++-------- sled-agent/src/rack_setup/service.rs | 19 +++++++++++++++++-- 2 files changed, 25 insertions(+), 10 deletions(-) diff --git a/sled-agent/src/params.rs b/sled-agent/src/params.rs index dbb0eac57a1..4752caad940 100644 --- a/sled-agent/src/params.rs +++ b/sled-agent/src/params.rs @@ -251,12 +251,12 @@ pub struct DatasetEnsureBody { pub address: SocketAddrV6, } -impl internal_dns_client::multiclient::Service for DatasetEnsureBody { - fn aaaa(&self) -> AAAA { +impl DatasetEnsureBody { + pub fn aaaa(&self) -> AAAA { AAAA::Zone(self.id) } - fn srv(&self) -> SRV { + pub fn srv(&self) -> SRV { match self.dataset_kind { DatasetKind::Crucible => { SRV::Backend(BackendName::Crucible, self.id) @@ -268,7 +268,7 @@ impl internal_dns_client::multiclient::Service for DatasetEnsureBody { } } - fn address(&self) -> SocketAddrV6 { + pub fn address(&self) -> SocketAddrV6 { self.address } } @@ -342,12 +342,12 @@ pub struct ServiceRequest { pub service_type: ServiceType, } -impl internal_dns_client::multiclient::Service for ServiceRequest { - fn aaaa(&self) -> AAAA { +impl ServiceRequest { + pub fn aaaa(&self) -> AAAA { AAAA::Zone(self.id) } - fn srv(&self) -> SRV { + pub fn srv(&self) -> SRV { match self.service_type { ServiceType::InternalDns { .. } => { SRV::Service(ServiceName::InternalDNS) @@ -357,7 +357,7 @@ impl internal_dns_client::multiclient::Service for ServiceRequest { } } - fn address(&self) -> SocketAddrV6 { + pub fn address(&self) -> SocketAddrV6 { match self.service_type { ServiceType::InternalDns { server_address, .. } => server_address, ServiceType::Nexus { internal_address, .. } => internal_address, diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index a4f7032b385..96aa12dfacf 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -604,10 +604,17 @@ impl ServiceInner { ) .await?; + let mut records = HashMap::new(); + for dataset in &allocation.services_request.datasets { + records + .entry(dataset.srv()) + .or_insert_with(Vec::new) + .push((dataset.aaaa(), dataset.address())); + } self.dns_servers .get() .expect("DNS servers must be initialized first") - .insert_dns_records(&allocation.services_request.datasets) + .insert_dns_records(&records) .await?; Ok(()) }, @@ -638,10 +645,18 @@ impl ServiceInner { .collect::>(); self.initialize_services(sled_address, &all_services).await?; + + let mut records = HashMap::new(); + for service in &all_services { + records + .entry(service.srv()) + .or_insert_with(Vec::new) + .push((service.aaaa(), service.address())); + } self.dns_servers .get() .expect("DNS servers must be initialized first") - .insert_dns_records(&all_services) + .insert_dns_records(&records) .await?; Ok(()) }, From 364efb39be87a6822174a98de0ab8b7497aa0d06 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Thu, 23 Jun 2022 23:14:59 -0400 Subject: [PATCH 27/27] rack ID to test interfaces --- nexus/src/app/mod.rs | 4 ---- nexus/src/app/test_interfaces.rs | 7 +++++++ nexus/tests/integration_tests/rack.rs | 1 + 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index 921fce51790..bf19528dd61 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -202,10 +202,6 @@ impl Nexus { &self.tunables } - pub fn rack_id(&self) -> Uuid { - self.rack_id - } - pub async fn wait_for_populate(&self) -> Result<(), anyhow::Error> { let mut my_rx = self.populate_status.clone(); loop { diff --git a/nexus/src/app/test_interfaces.rs b/nexus/src/app/test_interfaces.rs index a15f46096a8..40faaae5e1c 100644 --- a/nexus/src/app/test_interfaces.rs +++ b/nexus/src/app/test_interfaces.rs @@ -14,6 +14,9 @@ use uuid::Uuid; /// Exposes additional [`Nexus`] interfaces for use by the test suite #[async_trait] pub trait TestInterfaces { + /// Access the Rack ID of the currently executing Nexus. + fn rack_id(&self) -> Uuid; + /// Returns the SledAgentClient for an Instance from its id. We may also /// want to split this up into instance_lookup_by_id() and instance_sled(), /// but after all it's a test suite special to begin with. @@ -39,6 +42,10 @@ pub trait TestInterfaces { #[async_trait] impl TestInterfaces for super::Nexus { + fn rack_id(&self) -> Uuid { + self.rack_id + } + async fn instance_sled_by_id( &self, id: &Uuid, diff --git a/nexus/tests/integration_tests/rack.rs b/nexus/tests/integration_tests/rack.rs index dfcbde9740f..5a6e28ab70a 100644 --- a/nexus/tests/integration_tests/rack.rs +++ b/nexus/tests/integration_tests/rack.rs @@ -7,6 +7,7 @@ use nexus_test_utils::http_testing::NexusRequest; use nexus_test_utils::ControlPlaneTestContext; use nexus_test_utils_macros::nexus_test; use omicron_nexus::external_api::views::Rack; +use omicron_nexus::TestInterfaces; #[nexus_test] async fn test_list_own_rack(cptestctx: &ControlPlaneTestContext) {