From b78ff981f2d98ce572925381d38c5e5813f4191f Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 8 Jun 2022 15:34:24 -0400 Subject: [PATCH 01/88] [nexus] Split Nexus configuration (package vs runtime) --- Cargo.lock | 1 + common/Cargo.toml | 1 + common/src/lib.rs | 3 +- common/src/nexus_config.rs | 128 ++++++++ common/src/postgres_config.rs | 95 ++++++ nexus/benches/setup_benchmark.rs | 2 +- nexus/examples/config.toml | 41 +-- nexus/src/app/mod.rs | 10 +- nexus/src/config.rs | 295 ++++++++---------- nexus/src/context.rs | 31 +- nexus/src/db/config.rs | 2 +- nexus/src/lib.rs | 24 +- nexus/test-utils/src/lib.rs | 10 +- nexus/tests/config.test.toml | 55 ++-- nexus/tests/integration_tests/authn_http.rs | 4 +- nexus/tests/integration_tests/commands.rs | 3 +- nexus/tests/integration_tests/console_api.rs | 2 +- nexus/tests/integration_tests/updates.rs | 4 +- openapi/sled-agent.json | 80 ++++- sled-agent/src/params.rs | 50 ++- sled-agent/src/rack_setup/config.rs | 4 +- sled-agent/src/rack_setup/service.rs | 46 ++- sled-agent/src/services.rs | 272 ++++++++++++---- sled-agent/src/sled_agent.rs | 4 +- .../{config.toml => config-partial.toml} | 17 +- smf/nexus/manifest.xml | 8 + smf/sled-agent/config-rss.toml | 13 + smf/sled-agent/manifest.xml | 4 + test-utils/src/dev/db.rs | 2 +- 29 files changed, 853 insertions(+), 358 deletions(-) create mode 100644 common/src/nexus_config.rs create mode 100644 common/src/postgres_config.rs rename smf/nexus/{config.toml => config-partial.toml} (53%) diff --git a/Cargo.lock b/Cargo.lock index 268e855769e..480645db5a2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2798,6 +2798,7 @@ dependencies = [ "thiserror", "tokio", "tokio-postgres", + "toml", "uuid", ] diff --git a/common/Cargo.toml b/common/Cargo.toml index aa3b8943800..cd47bef1169 100644 --- a/common/Cargo.toml +++ b/common/Cargo.toml @@ -30,6 +30,7 @@ structopt = "0.3" thiserror = "1.0" tokio = { version = "1.18", features = [ "full" ] } tokio-postgres = { version = "0.7", features = [ "with-chrono-0_4", "with-uuid-1" ] } +toml = "0.5.9" uuid = { version = "1.1.0", features = [ "serde", "v4" ] } parse-display = "0.5.4" progenitor = { git = "https://github.com/oxidecomputer/progenitor" } diff --git a/common/src/lib.rs b/common/src/lib.rs index 2a933283425..d90ecdb7333 100644 --- a/common/src/lib.rs +++ b/common/src/lib.rs @@ -24,7 +24,8 @@ pub mod address; pub mod api; pub mod backoff; pub mod cmd; -pub mod config; +pub mod nexus_config; +pub mod postgres_config; #[macro_export] macro_rules! generate_logging_api { diff --git a/common/src/nexus_config.rs b/common/src/nexus_config.rs new file mode 100644 index 00000000000..f1325ae336d --- /dev/null +++ b/common/src/nexus_config.rs @@ -0,0 +1,128 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Configuration parameters to Nexus that are usually only known +//! at runtime. + +use super::address::{Ipv6Subnet, RACK_PREFIX}; +use super::postgres_config::PostgresConfigWithUrl; +use dropshot::ConfigDropshot; +use serde::{Deserialize, Serialize}; +use serde_with::serde_as; +use serde_with::DisplayFromStr; +use std::fmt; +use std::path::{Path, PathBuf}; +use uuid::Uuid; + +#[derive(Debug)] +pub struct LoadError { + pub path: PathBuf, + pub kind: LoadErrorKind, +} + +#[derive(Debug)] +pub struct InvalidTunable { + pub tunable: String, + pub message: String, +} + +impl std::fmt::Display for InvalidTunable { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "invalid \"{}\": \"{}\"", self.tunable, self.message) + } +} +impl std::error::Error for InvalidTunable {} + +#[derive(Debug)] +pub enum LoadErrorKind { + Io(std::io::Error), + Parse(toml::de::Error), + InvalidTunable(InvalidTunable), +} + +impl From<(PathBuf, std::io::Error)> for LoadError { + fn from((path, err): (PathBuf, std::io::Error)) -> Self { + LoadError { path, kind: LoadErrorKind::Io(err) } + } +} + +impl From<(PathBuf, toml::de::Error)> for LoadError { + fn from((path, err): (PathBuf, toml::de::Error)) -> Self { + LoadError { path, kind: LoadErrorKind::Parse(err) } + } +} + +impl std::error::Error for LoadError {} + +impl fmt::Display for LoadError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match &self.kind { + LoadErrorKind::Io(e) => { + write!(f, "read \"{}\": {}", self.path.display(), e) + } + LoadErrorKind::Parse(e) => { + write!(f, "parse \"{}\": {}", self.path.display(), e) + } + LoadErrorKind::InvalidTunable(inner) => { + write!( + f, + "invalid tunable \"{}\": {}", + self.path.display(), + inner, + ) + } + } + } +} + +impl std::cmp::PartialEq for LoadError { + fn eq(&self, other: &std::io::Error) -> bool { + if let LoadErrorKind::Io(e) = &self.kind { + e.kind() == other.kind() + } else { + false + } + } +} + +#[serde_as] +#[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] +#[serde(tag = "type", rename_all = "snake_case")] +#[allow(clippy::large_enum_variant)] +pub enum Database { + FromDns, + FromUrl { + #[serde_as(as = "DisplayFromStr")] + url: PostgresConfigWithUrl, + }, +} + +#[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] +pub struct RuntimeConfig { + /// Uuid of the Nexus instance + pub id: Uuid, + /// Dropshot configuration for external API server + pub dropshot_external: ConfigDropshot, + /// Dropshot configuration for internal API server + pub dropshot_internal: ConfigDropshot, + /// Portion of the IP space to be managed by the Rack. + pub subnet: Ipv6Subnet, + /// DB configuration. + pub database: Database, +} + +impl RuntimeConfig { + /// Load a `RuntimeConfig` from the given TOML file + /// + /// This config object can then be used to create a new `Nexus`. + /// The format is described in the README. + pub fn from_file>(path: P) -> Result { + let path = path.as_ref(); + let file_contents = std::fs::read_to_string(path) + .map_err(|e| (path.to_path_buf(), e))?; + let config_parsed: Self = toml::from_str(&file_contents) + .map_err(|e| (path.to_path_buf(), e))?; + Ok(config_parsed) + } +} diff --git a/common/src/postgres_config.rs b/common/src/postgres_config.rs new file mode 100644 index 00000000000..2509ae4fca2 --- /dev/null +++ b/common/src/postgres_config.rs @@ -0,0 +1,95 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Common objects used for configuration + +use std::fmt; +use std::ops::Deref; +use std::str::FromStr; + +/// Describes a URL for connecting to a PostgreSQL server +// The config pattern that we're using requires that types in the config impl +// Serialize. If tokio_postgres::config::Config impl'd Serialize or even +// Display, we'd just use that directly instead of this type. But it doesn't. +// We could implement a serialize function ourselves, but URLs support many +// different properties, and this could be brittle and easy to get wrong. +// Instead, this type just wraps tokio_postgres::config::Config and keeps the +// original String around. (The downside is that a consumer _generating_ a +// nexus::db::Config needs to generate a URL that matches the +// tokio_postgres::config::Config that they construct here, but this is not +// currently an important use case.) +// +// To ensure that the URL and config are kept in sync, we currently only support +// constructing one of these via `FromStr` and the fields are not public. +#[derive(Clone, Debug, PartialEq)] +pub struct PostgresConfigWithUrl { + url_raw: String, + config: tokio_postgres::config::Config, +} + +impl PostgresConfigWithUrl { + pub fn url(&self) -> String { + self.url_raw.clone() + } +} + +impl FromStr for PostgresConfigWithUrl { + type Err = tokio_postgres::Error; + + fn from_str(s: &str) -> Result { + Ok(PostgresConfigWithUrl { url_raw: s.to_owned(), config: s.parse()? }) + } +} + +impl fmt::Display for PostgresConfigWithUrl { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str(&self.url_raw) + } +} + +impl Deref for PostgresConfigWithUrl { + type Target = tokio_postgres::config::Config; + + fn deref(&self) -> &Self::Target { + &self.config + } +} + +#[cfg(test)] +mod test { + use super::PostgresConfigWithUrl; + + #[test] + fn test_bad_url() { + // There is surprisingly little that we can rely on the + // tokio_postgres::config::Config parser to include in the error + // message. + let error = "foo".parse::().unwrap_err(); + assert!(error.to_string().contains("unexpected EOF")); + "http://127.0.0.1:1234".parse::().unwrap_err(); + let error = "postgresql://example.com?sslmode=not-a-real-ssl-mode" + .parse::() + .unwrap_err(); + assert!(error + .to_string() + .contains("invalid value for option `sslmode`")); + } + + #[test] + fn test_example_url() { + let config = "postgresql://notauser@10.2.3.4:1789?sslmode=disable" + .parse::() + .unwrap(); + assert_eq!(config.get_user(), Some("notauser")); + assert_eq!( + config.get_ssl_mode(), + tokio_postgres::config::SslMode::Disable + ); + assert_eq!( + config.get_hosts(), + &[tokio_postgres::config::Host::Tcp("10.2.3.4".to_string())] + ); + assert_eq!(config.get_ports(), &[1789]); + } +} diff --git a/nexus/benches/setup_benchmark.rs b/nexus/benches/setup_benchmark.rs index c4c27bd2a97..24584670ce5 100644 --- a/nexus/benches/setup_benchmark.rs +++ b/nexus/benches/setup_benchmark.rs @@ -19,7 +19,7 @@ async fn do_full_setup() { // Wraps exclusively the CockroachDB portion of setup/teardown. async fn do_crdb_setup() { let cfg = nexus_test_utils::load_test_config(); - let logctx = LogContext::new("crdb_setup", &cfg.log); + let logctx = LogContext::new("crdb_setup", &cfg.pkg.log); let mut db = test_setup_database(&logctx.log).await; db.cleanup().await.unwrap(); } diff --git a/nexus/examples/config.toml b/nexus/examples/config.toml index 7900813cae0..22889ab1be9 100644 --- a/nexus/examples/config.toml +++ b/nexus/examples/config.toml @@ -2,9 +2,6 @@ # Oxide API: example configuration file # -# Identifier for this instance of Nexus -id = "e6bff1ff-24fb-49dc-a54e-c6a350cd4d6c" - [console] # Directory for static assets. Absolute path or relative to CWD. static_dir = "nexus/static" # TODO: figure out value @@ -20,21 +17,6 @@ session_absolute_timeout_minutes = 480 # TODO(https://github.com/oxidecomputer/omicron/issues/372): Remove "spoof". schemes_external = ["spoof", "session_cookie"] -[database] -# URL for connecting to the database -url = "postgresql://root@127.0.0.1:32221/omicron?sslmode=disable" - -[dropshot_external] -# IP address and TCP port on which to listen for the external API -bind_address = "127.0.0.1:12220" -# Allow larger request bodies (1MiB) to accomodate firewall endpoints (one -# rule is ~500 bytes) -request_body_max_bytes = 1048576 - -[dropshot_internal] -# IP address and TCP port on which to listen for the internal API -bind_address = "127.0.0.1:12221" - [log] # Show log messages of this level and more severe level = "info" @@ -51,6 +33,29 @@ mode = "stderr-terminal" [timeseries_db] address = "[::1]:8123" +[runtime] +# Identifier for this instance of Nexus +id = "e6bff1ff-24fb-49dc-a54e-c6a350cd4d6c" + +[runtime.dropshot_external] +# IP address and TCP port on which to listen for the external API +bind_address = "127.0.0.1:12220" +# Allow larger request bodies (1MiB) to accomodate firewall endpoints (one +# rule is ~500 bytes) +request_body_max_bytes = 1048576 + +[runtime.dropshot_internal] +# IP address and TCP port on which to listen for the internal API +bind_address = "127.0.0.1:12221" + +[runtime.subnet] +net = "fd00:1122:3344:0100::/56" + +[runtime.database] +# URL for connecting to the database +type = "from_url" +url = "postgresql://root@127.0.0.1:32221/omicron?sslmode=disable" + # Tunable configuration parameters, for testing or experimentation [tunables] diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index ce20065fa1f..1c3620de7e7 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -112,7 +112,7 @@ impl Nexus { authz: Arc, ) -> Arc { let pool = Arc::new(pool); - let my_sec_id = db::SecId::from(config.id); + let my_sec_id = db::SecId::from(config.runtime.id); let db_datastore = Arc::new(db::DataStore::new(Arc::clone(&pool))); let sec_store = Arc::new(db::CockroachDbSecStore::new( my_sec_id, @@ -127,7 +127,7 @@ impl Nexus { sec_store, )); let timeseries_client = - oximeter_db::Client::new(config.timeseries_db.address, &log); + oximeter_db::Client::new(config.pkg.timeseries_db.address, &log); // TODO-cleanup We may want a first-class subsystem for managing startup // background tasks. It could use a Future for each one, a status enum @@ -143,7 +143,7 @@ impl Nexus { populate_start(populate_ctx, Arc::clone(&db_datastore)); let nexus = Nexus { - id: config.id, + id: config.runtime.id, rack_id, log: log.new(o!()), api_rack_identity: db::model::RackIdentity::new(rack_id), @@ -153,8 +153,8 @@ impl Nexus { recovery_task: std::sync::Mutex::new(None), populate_status, timeseries_client, - updates_config: config.updates.clone(), - tunables: config.tunables.clone(), + updates_config: config.pkg.updates.clone(), + tunables: config.pkg.tunables.clone(), opctx_alloc: OpContext::for_background( log.new(o!("component" => "InstanceAllocator")), Arc::clone(&authz), diff --git a/nexus/src/config.rs b/nexus/src/config.rs index 11b2c8d861e..d5bf6a2a2f9 100644 --- a/nexus/src/config.rs +++ b/nexus/src/config.rs @@ -5,15 +5,13 @@ //! Interfaces for parsing configuration files and working with a nexus server //! configuration -use crate::db; use anyhow::anyhow; -use dropshot::ConfigDropshot; use dropshot::ConfigLogging; +use omicron_common::nexus_config::{InvalidTunable, LoadError, RuntimeConfig}; use serde::Deserialize; use serde::Serialize; use serde_with::DeserializeFromStr; use serde_with::SerializeDisplay; -use std::fmt; use std::net::SocketAddr; use std::path::{Path, PathBuf}; @@ -124,22 +122,15 @@ impl Default for Tunables { /// Configuration for a nexus server #[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] -pub struct Config { - /// Dropshot configuration for external API server - pub dropshot_external: ConfigDropshot, - /// Dropshot configuration for internal API server - pub dropshot_internal: ConfigDropshot, - /// Identifier for this instance of Nexus - pub id: uuid::Uuid, +pub struct PackageConfig { /// Console-related tunables pub console: ConsoleConfig, /// Server-wide logging configuration. pub log: ConfigLogging, - /// Database parameters - pub database: db::Config, /// Authentication-related configuration pub authn: AuthnConfig, /// Timeseries database configuration. + // TODO: Should this be removed? Nexus needs to initialize it. pub timeseries_db: TimeseriesDbConfig, /// Updates-related configuration. Updates APIs return 400 Bad Request when this is /// unconfigured. @@ -150,74 +141,28 @@ pub struct Config { pub tunables: Tunables, } -#[derive(Debug)] -pub struct InvalidTunable { - tunable: String, - message: String, -} - -impl std::fmt::Display for InvalidTunable { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "invalid \"{}\": \"{}\"", self.tunable, self.message) - } -} - -impl std::error::Error for InvalidTunable {} - -#[derive(Debug)] -pub struct LoadError { - path: PathBuf, - kind: LoadErrorKind, -} -#[derive(Debug)] -pub enum LoadErrorKind { - Io(std::io::Error), - Parse(toml::de::Error), - InvalidTunable(InvalidTunable), -} - -impl From<(PathBuf, std::io::Error)> for LoadError { - fn from((path, err): (PathBuf, std::io::Error)) -> Self { - LoadError { path, kind: LoadErrorKind::Io(err) } - } -} - -impl From<(PathBuf, toml::de::Error)> for LoadError { - fn from((path, err): (PathBuf, toml::de::Error)) -> Self { - LoadError { path, kind: LoadErrorKind::Parse(err) } - } -} - -impl std::error::Error for LoadError {} +#[derive(Clone, Debug, PartialEq, Deserialize, Serialize)] +pub struct Config { + /// Configuration parameters known at compile-time. + #[serde(flatten)] + pub pkg: PackageConfig, -impl fmt::Display for LoadError { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match &self.kind { - LoadErrorKind::Io(e) => { - write!(f, "read \"{}\": {}", self.path.display(), e) - } - LoadErrorKind::Parse(e) => { - write!(f, "parse \"{}\": {}", self.path.display(), e) - } - LoadErrorKind::InvalidTunable(inner) => { - write!( - f, - "invalid tunable \"{}\": {}", - self.path.display(), - inner, - ) - } - } - } + /// A variety of configuration parameters only known at runtime. + pub runtime: RuntimeConfig, } -impl std::cmp::PartialEq for LoadError { - fn eq(&self, other: &std::io::Error) -> bool { - if let LoadErrorKind::Io(e) = &self.kind { - e.kind() == other.kind() - } else { - false - } +impl Config { + /// Load a `PackageConfig` from the given TOML file + /// + /// This config object can then be used to create a new `Nexus`. + /// The format is described in the README. + pub fn from_file>(path: P) -> Result { + let path = path.as_ref(); + let file_contents = std::fs::read_to_string(path) + .map_err(|e| (path.to_path_buf(), e))?; + let config_parsed: Self = toml::from_str(&file_contents) + .map_err(|e| (path.to_path_buf(), e))?; + Ok(config_parsed) } } @@ -255,36 +200,24 @@ impl std::fmt::Display for SchemeName { } } -impl Config { - /// Load a `Config` from the given TOML file - /// - /// This config object can then be used to create a new `Nexus`. - /// The format is described in the README. - pub fn from_file>(path: P) -> Result { - let path = path.as_ref(); - let file_contents = std::fs::read_to_string(path) - .map_err(|e| (path.to_path_buf(), e))?; - let config_parsed: Config = toml::from_str(&file_contents) - .map_err(|e| (path.to_path_buf(), e))?; - Ok(config_parsed) - } -} - #[cfg(test)] mod test { use super::Tunables; use super::{ - AuthnConfig, Config, ConsoleConfig, LoadError, LoadErrorKind, + AuthnConfig, Config, ConsoleConfig, LoadError, PackageConfig, SchemeName, TimeseriesDbConfig, UpdatesConfig, }; - use crate::db; use dropshot::ConfigDropshot; use dropshot::ConfigLogging; use dropshot::ConfigLoggingIfExists; use dropshot::ConfigLoggingLevel; use libc; + use omicron_common::address::{Ipv6Subnet, RACK_PREFIX}; + use omicron_common::nexus_config::{ + Database, LoadErrorKind, RuntimeConfig, + }; use std::fs; - use std::net::SocketAddr; + use std::net::{Ipv6Addr, SocketAddr}; use std::path::Path; use std::path::PathBuf; @@ -355,7 +288,7 @@ mod test { let error = read_config("empty", "").expect_err("expected failure"); if let LoadErrorKind::Parse(error) = &error.kind { assert_eq!(error.line_col(), None); - assert_eq!(error.to_string(), "missing field `dropshot_external`"); + assert_eq!(error.to_string(), "missing field `runtime`"); } else { panic!( "Got an unexpected error, expected Parse but got {:?}", @@ -373,7 +306,6 @@ mod test { let config = read_config( "valid", r##" - id = "28b90dc4-c22a-65ba-f49a-f051fe01208f" [console] static_dir = "tests/static" cache_control_max_age_minutes = 10 @@ -381,14 +313,6 @@ mod test { session_absolute_timeout_minutes = 480 [authn] schemes_external = [] - [dropshot_external] - bind_address = "10.1.2.3:4567" - request_body_max_bytes = 1024 - [dropshot_internal] - bind_address = "10.1.2.3:4568" - request_body_max_bytes = 1024 - [database] - url = "postgresql://127.0.0.1?sslmode=disable" [log] mode = "file" level = "debug" @@ -401,6 +325,18 @@ mod test { default_base_url = "http://example.invalid/" [tunables] max_vpc_ipv4_subnet_prefix = 27 + [runtime] + id = "28b90dc4-c22a-65ba-f49a-f051fe01208f" + [runtime.dropshot_external] + bind_address = "10.1.2.3:4567" + request_body_max_bytes = 1024 + [runtime.dropshot_internal] + bind_address = "10.1.2.3:4568" + request_body_max_bytes = 1024 + [runtime.subnet] + net = "::/56" + [runtime.database] + type = "from_dns" "##, ) .unwrap(); @@ -408,51 +344,51 @@ mod test { assert_eq!( config, Config { - id: "28b90dc4-c22a-65ba-f49a-f051fe01208f".parse().unwrap(), - console: ConsoleConfig { - static_dir: "tests/static".parse().unwrap(), - cache_control_max_age_minutes: 10, - session_idle_timeout_minutes: 60, - session_absolute_timeout_minutes: 480 - }, - authn: AuthnConfig { schemes_external: Vec::new() }, - dropshot_external: ConfigDropshot { - bind_address: "10.1.2.3:4567" - .parse::() - .unwrap(), - ..Default::default() - }, - dropshot_internal: ConfigDropshot { - bind_address: "10.1.2.3:4568" - .parse::() - .unwrap(), - ..Default::default() - }, - log: ConfigLogging::File { - level: ConfigLoggingLevel::Debug, - if_exists: ConfigLoggingIfExists::Fail, - path: "/nonexistent/path".to_string() + runtime: RuntimeConfig { + id: "28b90dc4-c22a-65ba-f49a-f051fe01208f".parse().unwrap(), + dropshot_external: ConfigDropshot { + bind_address: "10.1.2.3:4567" + .parse::() + .unwrap(), + ..Default::default() + }, + dropshot_internal: ConfigDropshot { + bind_address: "10.1.2.3:4568" + .parse::() + .unwrap(), + ..Default::default() + }, + subnet: Ipv6Subnet::::new(Ipv6Addr::LOCALHOST), + database: Database::FromDns, }, - database: db::Config { - url: "postgresql://127.0.0.1?sslmode=disable" - .parse() - .unwrap() + pkg: PackageConfig { + console: ConsoleConfig { + static_dir: "tests/static".parse().unwrap(), + cache_control_max_age_minutes: 10, + session_idle_timeout_minutes: 60, + session_absolute_timeout_minutes: 480 + }, + authn: AuthnConfig { schemes_external: Vec::new() }, + log: ConfigLogging::File { + level: ConfigLoggingLevel::Debug, + if_exists: ConfigLoggingIfExists::Fail, + path: "/nonexistent/path".to_string() + }, + timeseries_db: TimeseriesDbConfig { + address: "[::1]:8123".parse().unwrap() + }, + updates: Some(UpdatesConfig { + trusted_root: PathBuf::from("/path/to/root.json"), + default_base_url: "http://example.invalid/".into(), + }), + tunables: Tunables { max_vpc_ipv4_subnet_prefix: 27 }, }, - timeseries_db: TimeseriesDbConfig { - address: "[::1]:8123".parse().unwrap() - }, - updates: Some(UpdatesConfig { - trusted_root: PathBuf::from("/path/to/root.json"), - default_base_url: "http://example.invalid/".into(), - }), - tunables: Tunables { max_vpc_ipv4_subnet_prefix: 27 }, } ); let config = read_config( "valid", r##" - id = "28b90dc4-c22a-65ba-f49a-f051fe01208f" [console] static_dir = "tests/static" cache_control_max_age_minutes = 10 @@ -460,14 +396,6 @@ mod test { session_absolute_timeout_minutes = 480 [authn] schemes_external = [ "spoof", "session_cookie" ] - [dropshot_external] - bind_address = "10.1.2.3:4567" - request_body_max_bytes = 1024 - [dropshot_internal] - bind_address = "10.1.2.3:4568" - request_body_max_bytes = 1024 - [database] - url = "postgresql://127.0.0.1?sslmode=disable" [log] mode = "file" level = "debug" @@ -475,12 +403,24 @@ mod test { if_exists = "fail" [timeseries_db] address = "[::1]:8123" + [runtime] + id = "28b90dc4-c22a-65ba-f49a-f051fe01208f" + [runtime.dropshot_external] + bind_address = "10.1.2.3:4567" + request_body_max_bytes = 1024 + [runtime.dropshot_internal] + bind_address = "10.1.2.3:4568" + request_body_max_bytes = 1024 + [runtime.subnet] + net = "::/56" + [runtime.database] + type = "from_dns" "##, ) .unwrap(); assert_eq!( - config.authn.schemes_external, + config.pkg.authn.schemes_external, vec![SchemeName::Spoof, SchemeName::SessionCookie], ); } @@ -490,7 +430,6 @@ mod test { let error = read_config( "bad authn.schemes_external", r##" - id = "28b90dc4-c22a-65ba-f49a-f051fe01208f" [console] static_dir = "tests/static" cache_control_max_age_minutes = 10 @@ -498,14 +437,6 @@ mod test { session_absolute_timeout_minutes = 480 [authn] schemes_external = ["trust-me"] - [dropshot_external] - bind_address = "10.1.2.3:4567" - request_body_max_bytes = 1024 - [dropshot_internal] - bind_address = "10.1.2.3:4568" - request_body_max_bytes = 1024 - [database] - url = "postgresql://127.0.0.1?sslmode=disable" [log] mode = "file" level = "debug" @@ -513,14 +444,29 @@ mod test { if_exists = "fail" [timeseries_db] address = "[::1]:8123" + [runtime] + id = "28b90dc4-c22a-65ba-f49a-f051fe01208f" + [runtime.dropshot_external] + bind_address = "10.1.2.3:4567" + request_body_max_bytes = 1024 + [runtime.dropshot_internal] + bind_address = "10.1.2.3:4568" + request_body_max_bytes = 1024 + [runtime.subnet] + net = "::/56" + [runtime.database] + type = "from_dns" "##, ) .expect_err("expected failure"); if let LoadErrorKind::Parse(error) = &error.kind { - assert!(error.to_string().starts_with( - "unsupported authn scheme: \"trust-me\" \ - for key `authn.schemes_external`" - )); + assert!( + error + .to_string() + .starts_with("unsupported authn scheme: \"trust-me\""), + "error = {}", + error.to_string() + ); } else { panic!( "Got an unexpected error, expected Parse but got {:?}", @@ -534,7 +480,6 @@ mod test { let error = read_config( "invalid_ipv4_prefix_tunable", r##" - id = "28b90dc4-c22a-65ba-f49a-f051fe01208f" [console] static_dir = "tests/static" cache_control_max_age_minutes = 10 @@ -542,14 +487,6 @@ mod test { session_absolute_timeout_minutes = 480 [authn] schemes_external = [] - [dropshot_external] - bind_address = "10.1.2.3:4567" - request_body_max_bytes = 1024 - [dropshot_internal] - bind_address = "10.1.2.3:4568" - request_body_max_bytes = 1024 - [database] - url = "postgresql://127.0.0.1?sslmode=disable" [log] mode = "file" level = "debug" @@ -562,6 +499,18 @@ mod test { default_base_url = "http://example.invalid/" [tunables] max_vpc_ipv4_subnet_prefix = 100 + [runtime] + id = "28b90dc4-c22a-65ba-f49a-f051fe01208f" + [runtime.dropshot_external] + bind_address = "10.1.2.3:4567" + request_body_max_bytes = 1024 + [runtime.dropshot_internal] + bind_address = "10.1.2.3:4568" + request_body_max_bytes = 1024 + [runtime.subnet] + net = "::/56" + [runtime.database] + type = "from_dns" "##, ) .expect_err("Expected failure"); diff --git a/nexus/src/context.rs b/nexus/src/context.rs index f0d9e6b13a0..a08f22304df 100644 --- a/nexus/src/context.rs +++ b/nexus/src/context.rs @@ -19,6 +19,7 @@ use authn::external::spoof::HttpAuthnSpoof; use authn::external::HttpAuthnScheme; use chrono::{DateTime, Duration, Utc}; use omicron_common::api::external::Error; +use omicron_common::nexus_config; use oximeter::types::ProducerRegistry; use oximeter_instruments::http::{HttpService, LatencyTracker}; use slog::Logger; @@ -67,13 +68,13 @@ pub struct ConsoleConfig { impl ServerContext { /// Create a new context with the given rack id and log. This creates the /// underlying nexus as well. - pub fn new( + pub async fn new( rack_id: Uuid, log: Logger, - pool: db::Pool, config: &config::Config, ) -> Result, String> { let nexus_schemes = config + .pkg .authn .schemes_external .iter() @@ -90,7 +91,8 @@ impl ServerContext { let internal_authn = Arc::new(authn::Context::internal_api()); let authz = Arc::new(authz::Authz::new(&log)); let create_tracker = |name: &str| { - let target = HttpService { name: name.to_string(), id: config.id }; + let target = + HttpService { name: name.to_string(), id: config.runtime.id }; const START_LATENCY_DECADE: i8 = -6; const END_LATENCY_DECADE: i8 = 3; LatencyTracker::with_latency_decades( @@ -102,7 +104,7 @@ impl ServerContext { }; let internal_latencies = create_tracker("nexus-internal"); let external_latencies = create_tracker("nexus-external"); - let producer_registry = ProducerRegistry::with_id(config.id); + let producer_registry = ProducerRegistry::with_id(config.runtime.id); producer_registry .register_producer(internal_latencies.clone()) .unwrap(); @@ -113,11 +115,11 @@ impl ServerContext { // Support both absolute and relative paths. If configured dir is // absolute, use it directly. If not, assume it's relative to the // current working directory. - let static_dir = if config.console.static_dir.is_absolute() { - Some(config.console.static_dir.to_owned()) + let static_dir = if config.pkg.console.static_dir.is_absolute() { + Some(config.pkg.console.static_dir.to_owned()) } else { env::current_dir() - .map(|root| root.join(&config.console.static_dir)) + .map(|root| root.join(&config.pkg.console.static_dir)) .ok() }; @@ -132,6 +134,15 @@ impl ServerContext { // like console index.html. leaving that out for now so we don't break // nexus in dev for everyone + // Set up DB pool + let url = match &config.runtime.database { + nexus_config::Database::FromUrl { url } => url.clone(), + nexus_config::Database::FromDns => { + todo!("Not yet implemented"); + } + }; + let pool = db::Pool::new(&db::Config { url }); + Ok(Arc::new(ServerContext { nexus: Nexus::new_with_id( rack_id, @@ -149,14 +160,14 @@ impl ServerContext { producer_registry, console_config: ConsoleConfig { session_idle_timeout: Duration::minutes( - config.console.session_idle_timeout_minutes.into(), + config.pkg.console.session_idle_timeout_minutes.into(), ), session_absolute_timeout: Duration::minutes( - config.console.session_absolute_timeout_minutes.into(), + config.pkg.console.session_absolute_timeout_minutes.into(), ), static_dir, cache_control_max_age: Duration::minutes( - config.console.cache_control_max_age_minutes.into(), + config.pkg.console.cache_control_max_age_minutes.into(), ), }, })) diff --git a/nexus/src/db/config.rs b/nexus/src/db/config.rs index b4066ce3cbe..afe51bca66d 100644 --- a/nexus/src/db/config.rs +++ b/nexus/src/db/config.rs @@ -4,7 +4,7 @@ //! Nexus database configuration -use omicron_common::config::PostgresConfigWithUrl; +use omicron_common::postgres_config::PostgresConfigWithUrl; use serde::Deserialize; use serde::Serialize; use serde_with::serde_as; diff --git a/nexus/src/lib.rs b/nexus/src/lib.rs index e56503c3c09..61abe04b1ba 100644 --- a/nexus/src/lib.rs +++ b/nexus/src/lib.rs @@ -29,7 +29,7 @@ pub mod updates; // public for testing pub use app::test_interfaces::TestInterfaces; pub use app::Nexus; -pub use config::Config; +pub use config::{Config, PackageConfig}; pub use context::ServerContext; pub use crucible_agent_client; use external_api::http_entrypoints::external_api; @@ -85,15 +85,15 @@ impl Server { rack_id: Uuid, log: &Logger, ) -> Result { - let log = log.new(o!("name" => config.id.to_string())); + let log = log.new(o!("name" => config.runtime.id.to_string())); info!(log, "setting up nexus server"); let ctxlog = log.new(o!("component" => "ServerContext")); - let pool = db::Pool::new(&config.database); - let apictx = ServerContext::new(rack_id, ctxlog, pool, &config)?; + + let apictx = ServerContext::new(rack_id, ctxlog, &config).await?; let http_server_starter_external = dropshot::HttpServerStarter::new( - &config.dropshot_external, + &config.runtime.dropshot_external, external_api(), Arc::clone(&apictx), &log.new(o!("component" => "dropshot_external")), @@ -101,7 +101,7 @@ impl Server { .map_err(|error| format!("initializing external server: {}", error))?; let http_server_starter_internal = dropshot::HttpServerStarter::new( - &config.dropshot_internal, + &config.runtime.dropshot_internal, internal_api(), Arc::clone(&apictx), &log.new(o!("component" => "dropshot_internal")), @@ -153,12 +153,12 @@ impl Server { /// Run an instance of the [Server]. pub async fn run_server(config: &Config) -> Result<(), String> { use slog::Drain; - let (drain, registration) = slog_dtrace::with_drain( - config - .log - .to_logger("nexus") - .map_err(|message| format!("initializing logger: {}", message))?, - ); + let (drain, registration) = + slog_dtrace::with_drain( + config.pkg.log.to_logger("nexus").map_err(|message| { + format!("initializing logger: {}", message) + })?, + ); let log = slog::Logger::root(drain.fuse(), slog::o!()); if let slog_dtrace::ProbeRegistration::Failed(e) = registration { let msg = format!("failed to register DTrace probes: {}", e); diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index a53ad85d585..e4eb744e2fa 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -11,6 +11,7 @@ use dropshot::ConfigLogging; use dropshot::ConfigLoggingLevel; use omicron_common::api::external::IdentityMetadata; use omicron_common::api::internal::nexus::ProducerEndpoint; +use omicron_common::nexus_config; use omicron_sled_agent::sim; use omicron_test_utils::dev; use oximeter_collector::Oximeter; @@ -75,7 +76,7 @@ pub fn load_test_config() -> omicron_nexus::Config { let config_file_path = Path::new("tests/config.test.toml"); let mut config = omicron_nexus::Config::from_file(config_file_path) .expect("failed to load config.test.toml"); - config.id = Uuid::new_v4(); + config.runtime.id = Uuid::new_v4(); config } @@ -88,7 +89,7 @@ pub async fn test_setup_with_config( test_name: &str, config: &mut omicron_nexus::Config, ) -> ControlPlaneTestContext { - let logctx = LogContext::new(test_name, &config.log); + let logctx = LogContext::new(test_name, &config.pkg.log); let rack_id = Uuid::parse_str(RACK_UUID).unwrap(); let log = &logctx.log; @@ -99,8 +100,9 @@ pub async fn test_setup_with_config( let clickhouse = dev::clickhouse::ClickHouseInstance::new(0).await.unwrap(); // Store actual address/port information for the databases after they start. - config.database.url = database.pg_config().clone(); - config.timeseries_db.address.set_port(clickhouse.port()); + config.runtime.database = + nexus_config::Database::FromUrl { url: database.pg_config().clone() }; + config.pkg.timeseries_db.address.set_port(clickhouse.port()); let server = omicron_nexus::Server::start(&config, rack_id, &logctx.log) .await diff --git a/nexus/tests/config.test.toml b/nexus/tests/config.test.toml index 9b8f1f42731..2fc4ddba192 100644 --- a/nexus/tests/config.test.toml +++ b/nexus/tests/config.test.toml @@ -2,10 +2,6 @@ # Oxide API: configuration file for test suite # -# Identifier for this instance of Nexus. -# NOTE: The test suite always overrides this. -id = "e6bff1ff-24fb-49dc-a54e-c6a350cd4d6c" - [console] # Directory for static assets. Absolute path or relative to CWD. static_dir = "tests/static" @@ -17,27 +13,6 @@ session_absolute_timeout_minutes = 480 [authn] schemes_external = [ "spoof", "session_cookie" ] -# -# NOTE: for the test suite, the database URL will be replaced with one -# appropriate for the database that's started by the test runner. -# -[database] -url = "postgresql://root@127.0.0.1:0/omicron?sslmode=disable" - -# -# NOTE: for the test suite, the port MUST be 0 (in order to bind to any -# available port) because the test suite will be running many servers -# concurrently. -# -[dropshot_external] -bind_address = "127.0.0.1:0" -request_body_max_bytes = 1048576 - -# port must be 0. see above -[dropshot_internal] -bind_address = "127.0.0.1:0" -request_body_max_bytes = 1048576 - # # NOTE: for the test suite, if mode = "file", the file path MUST be the sentinel # string "UNUSED". The actual path will be generated by the test suite for each @@ -59,3 +34,33 @@ address = "[::1]:0" [tunables] # Allow small subnets, so we can test IP address exhaustion easily / quickly max_vpc_ipv4_subnet_prefix = 29 + +[runtime] +# Identifier for this instance of Nexus. +# NOTE: The test suite always overrides this. +id = "e6bff1ff-24fb-49dc-a54e-c6a350cd4d6c" + +# +# NOTE: for the test suite, the port MUST be 0 (in order to bind to any +# available port) because the test suite will be running many servers +# concurrently. +# +[runtime.dropshot_external] +bind_address = "127.0.0.1:0" +request_body_max_bytes = 1048576 + +# port must be 0. see above +[runtime.dropshot_internal] +bind_address = "127.0.0.1:0" +request_body_max_bytes = 1048576 + +[runtime.subnet] +net = "fd00:1122:3344:0100::/56" + +# +# NOTE: for the test suite, the database URL will be replaced with one +# appropriate for the database that's started by the test runner. +# +[runtime.database] +type = "from_url" +url = "postgresql://root@127.0.0.1:0/omicron?sslmode=disable" diff --git a/nexus/tests/integration_tests/authn_http.rs b/nexus/tests/integration_tests/authn_http.rs index 7125a52ea90..e0234da1b97 100644 --- a/nexus/tests/integration_tests/authn_http.rs +++ b/nexus/tests/integration_tests/authn_http.rs @@ -277,7 +277,7 @@ async fn start_whoami_server( sessions: HashMap, ) -> TestContext { let config = nexus_test_utils::load_test_config(); - let logctx = LogContext::new(test_name, &config.log); + let logctx = LogContext::new(test_name, &config.pkg.log); let whoami_api = { let mut whoami_api = ApiDescription::new(); @@ -299,7 +299,7 @@ async fn start_whoami_server( TestContext::new( whoami_api, server_state, - &config.dropshot_external, + &config.runtime.dropshot_external, Some(logctx), log, ) diff --git a/nexus/tests/integration_tests/commands.rs b/nexus/tests/integration_tests/commands.rs index 7d3855d5a6c..ac770c137e3 100644 --- a/nexus/tests/integration_tests/commands.rs +++ b/nexus/tests/integration_tests/commands.rs @@ -76,8 +76,7 @@ fn test_nexus_invalid_config() { assert_eq!( stderr_text, format!( - "nexus: parse \"{}\": missing field \ - `dropshot_external`\n", + "nexus: parse \"{}\": missing field `runtime`\n", config_path.display() ), ); diff --git a/nexus/tests/integration_tests/console_api.rs b/nexus/tests/integration_tests/console_api.rs index e84c65c0fe1..779e94470eb 100644 --- a/nexus/tests/integration_tests/console_api.rs +++ b/nexus/tests/integration_tests/console_api.rs @@ -196,7 +196,7 @@ async fn test_assets(cptestctx: &ControlPlaneTestContext) { #[tokio::test] async fn test_absolute_static_dir() { let mut config = load_test_config(); - config.console.static_dir = current_dir().unwrap().join("tests/static"); + config.pkg.console.static_dir = current_dir().unwrap().join("tests/static"); let cptestctx = test_setup_with_config("test_absolute_static_dir", &mut config).await; let testctx = &cptestctx.external_client; diff --git a/nexus/tests/integration_tests/updates.rs b/nexus/tests/integration_tests/updates.rs index 1bfa25d0a2c..c09ca0b7fea 100644 --- a/nexus/tests/integration_tests/updates.rs +++ b/nexus/tests/integration_tests/updates.rs @@ -62,7 +62,7 @@ async fn test_update_end_to_end() { let mut api = ApiDescription::new(); api.register(static_content).unwrap(); let context = FileServerContext { base: tuf_repo.path().to_owned() }; - let logctx = LogContext::new("test_update_end_to_end", &config.log); + let logctx = LogContext::new("test_update_end_to_end", &config.pkg.log); let server = HttpServerStarter::new(&dropshot_config, api, context, &logctx.log) .unwrap() @@ -70,7 +70,7 @@ async fn test_update_end_to_end() { let local_addr = server.local_addr(); // stand up the test environment - config.updates = Some(UpdatesConfig { + config.pkg.updates = Some(UpdatesConfig { trusted_root: tuf_repo.path().join("metadata").join("1.root.json"), default_base_url: format!("http://{}/", local_addr), }); diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index 273082a7500..839e8ba9a76 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -247,6 +247,10 @@ "dataset_kind": { "$ref": "#/components/schemas/DatasetKind" }, + "id": { + "type": "string", + "format": "uuid" + }, "zpool_id": { "type": "string", "format": "uuid" @@ -255,6 +259,7 @@ "required": [ "address", "dataset_kind", + "id", "zpool_id" ] }, @@ -959,6 +964,7 @@ ] }, "ServiceRequest": { + "description": "Describes a request to create a service. This information should be sufficient for a Sled Agent to start a zone containing the requested service.", "type": "object", "properties": { "addresses": { @@ -976,13 +982,85 @@ "format": "ipv6" } }, + "id": { + "type": "string", + "format": "uuid" + }, "name": { "type": "string" + }, + "service_type": { + "$ref": "#/components/schemas/ServiceType" } }, "required": [ "addresses", - "name" + "id", + "name", + "service_type" + ] + }, + "ServiceType": { + "description": "Describes service-specific parameters.", + "oneOf": [ + { + "type": "object", + "properties": { + "external_address": { + "type": "string" + }, + "internal_address": { + "type": "string" + }, + "type": { + "type": "string", + "enum": [ + "nexus" + ] + } + }, + "required": [ + "external_address", + "internal_address", + "type" + ] + }, + { + "type": "object", + "properties": { + "dns_address": { + "type": "string" + }, + "server_address": { + "type": "string" + }, + "type": { + "type": "string", + "enum": [ + "internal_dns" + ] + } + }, + "required": [ + "dns_address", + "server_address", + "type" + ] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "oximeter" + ] + } + }, + "required": [ + "type" + ] + } ] }, "Slot": { diff --git a/sled-agent/src/params.rs b/sled-agent/src/params.rs index 1c713a69067..d003bbe785e 100644 --- a/sled-agent/src/params.rs +++ b/sled-agent/src/params.rs @@ -9,9 +9,7 @@ use omicron_common::api::internal::nexus::{ use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use std::fmt::{Debug, Display, Formatter, Result as FormatResult}; -use std::net::IpAddr; -use std::net::Ipv6Addr; -use std::net::{SocketAddr, SocketAddrV6}; +use std::net::{IpAddr, Ipv6Addr, SocketAddr, SocketAddrV6}; use uuid::Uuid; /// Information required to construct a virtual network interface for a guest @@ -165,7 +163,7 @@ pub struct InstanceRuntimeStateRequested { pub enum DatasetKind { CockroachDb { /// The addresses of all nodes within the cluster. - all_addresses: Vec, + all_addresses: Vec, }, Crucible, Clickhouse, @@ -213,6 +211,8 @@ impl std::fmt::Display for DatasetKind { /// instantiated when the dataset is detected. #[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq)] pub struct DatasetEnsureBody { + // The UUID of the dataset, as well as the service using it directly. + pub id: Uuid, // The name (and UUID) of the Zpool which we are inserting into. pub zpool_id: Uuid, // The type of the filesystem. @@ -235,14 +235,52 @@ impl From for sled_agent_client::types::DatasetEnsureBody { zpool_id: p.zpool_id, dataset_kind: p.dataset_kind.into(), address: p.address.to_string(), + id: p.id, } } } +/// Describes service-specific parameters. +#[derive( + Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq, Hash, +)] +#[serde(tag = "type", rename_all = "snake_case")] +pub enum ServiceType { + Nexus { internal_address: SocketAddrV6, external_address: SocketAddrV6 }, + InternalDns { server_address: SocketAddrV6, dns_address: SocketAddrV6 }, + Oximeter, +} + +impl From for sled_agent_client::types::ServiceType { + fn from(s: ServiceType) -> Self { + use sled_agent_client::types::ServiceType as AutoSt; + use ServiceType as St; + + match s { + St::Nexus { internal_address, external_address } => AutoSt::Nexus { + internal_address: internal_address.to_string(), + external_address: external_address.to_string(), + }, + St::InternalDns { server_address, dns_address } => { + AutoSt::InternalDns { + server_address: server_address.to_string(), + dns_address: dns_address.to_string(), + } + } + St::Oximeter => AutoSt::Oximeter, + } + } +} + +/// Describes a request to create a service. This information +/// should be sufficient for a Sled Agent to start a zone +/// containing the requested service. #[derive( Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq, Hash, )] pub struct ServiceRequest { + // The UUID of the service to be initialized. + pub id: Uuid, // The name of the service to be created. pub name: String, // The addresses on which the service should listen for requests. @@ -256,14 +294,18 @@ pub struct ServiceRequest { // is necessary to allow inter-zone traffic routing. #[serde(default)] pub gz_addresses: Vec, + // Any other service-specific parameters. + pub service_type: ServiceType, } impl From for sled_agent_client::types::ServiceRequest { fn from(s: ServiceRequest) -> Self { Self { + id: s.id, name: s.name, addresses: s.addresses, gz_addresses: s.gz_addresses, + service_type: s.service_type.into(), } } } diff --git a/sled-agent/src/rack_setup/config.rs b/sled-agent/src/rack_setup/config.rs index 26f3ce8a321..d9f8324535d 100644 --- a/sled-agent/src/rack_setup/config.rs +++ b/sled-agent/src/rack_setup/config.rs @@ -29,12 +29,12 @@ pub struct SetupServiceConfig { pub rack_subnet: Ipv6Addr, #[serde(default, rename = "request")] - pub requests: Vec, + pub requests: Vec, } /// A request to initialize a sled. #[derive(Clone, Debug, Default, Deserialize, Serialize, PartialEq)] -pub struct SledRequest { +pub struct HardcodedSledRequest { /// Datasets to be created. #[serde(default, rename = "dataset")] pub datasets: Vec, diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 0fef7054d26..6c65383d5e8 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -4,13 +4,15 @@ //! Rack Setup Service implementation -use super::config::{SetupServiceConfig as Config, SledRequest}; -use crate::bootstrap::config::BOOTSTRAP_AGENT_PORT; -use crate::bootstrap::discovery::PeerMonitorObserver; -use crate::bootstrap::params::SledAgentRequest; -use crate::bootstrap::rss_handle::BootstrapAgentHandle; -use crate::params::ServiceRequest; -use omicron_common::address::{get_sled_address, ReservedRackSubnet}; +use super::config::{HardcodedSledRequest, SetupServiceConfig as Config}; +use crate::bootstrap::{ + config::BOOTSTRAP_AGENT_PORT, discovery::PeerMonitorObserver, + params::SledAgentRequest, rss_handle::BootstrapAgentHandle, +}; +use crate::params::{ServiceRequest, ServiceType}; +use omicron_common::address::{ + get_sled_address, ReservedRackSubnet, DNS_PORT, DNS_SERVER_PORT, +}; use omicron_common::backoff::{ internal_service_policy, retry_notify, BackoffError, }; @@ -21,6 +23,7 @@ use std::net::{Ipv6Addr, SocketAddr, SocketAddrV6}; use std::path::PathBuf; use thiserror::Error; use tokio::sync::Mutex; +use uuid::Uuid; /// Describes errors which may occur while operating the setup service. #[derive(Error, Debug)] @@ -55,7 +58,7 @@ pub enum SetupServiceError { #[derive(Clone, Debug, Deserialize, Serialize, PartialEq)] struct SledAllocation { initialization_request: SledAgentRequest, - services_request: SledRequest, + services_request: HardcodedSledRequest, } /// The interface to the Rack Setup Service. @@ -192,7 +195,7 @@ impl ServiceInner { async fn initialize_services( &self, sled_address: SocketAddr, - services: &Vec, + services: &Vec, ) -> Result<(), SetupServiceError> { let dur = std::time::Duration::from_secs(60); let client = reqwest::ClientBuilder::new() @@ -284,18 +287,31 @@ impl ServiceInner { if idx < config.requests.len() { config.requests[idx].clone() } else { - SledRequest::default() + HardcodedSledRequest::default() } }; - // The first enumerated addresses get assigned the additional + // The first enumerated sleds get assigned the additional // responsibility of being internal DNS servers. if idx < dns_subnets.len() { let dns_subnet = &dns_subnets[idx]; + let dns_addr = dns_subnet.dns_address().ip(); request.dns_services.push(ServiceRequest { + id: Uuid::new_v4(), name: "internal-dns".to_string(), - addresses: vec![dns_subnet.dns_address().ip()], + addresses: vec![dns_addr], gz_addresses: vec![dns_subnet.gz_address().ip()], + service_type: ServiceType::InternalDns { + server_address: SocketAddrV6::new( + dns_addr, + DNS_SERVER_PORT, + 0, + 0, + ), + dns_address: SocketAddrV6::new( + dns_addr, DNS_PORT, 0, 0, + ), + }, }); } @@ -331,8 +347,10 @@ impl ServiceInner { } // Once we've constructed a plan, write it down to durable storage. - let serialized_plan = toml::Value::try_from(&plan) - .expect("Cannot serialize configuration"); + let serialized_plan = + toml::Value::try_from(&plan).unwrap_or_else(|e| { + panic!("Cannot serialize configuration: {:#?}: {}", plan, e) + }); let plan_str = toml::to_string(&serialized_plan) .expect("Cannot turn config to string"); diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 72444a79b17..aaa1960fb3e 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -7,17 +7,32 @@ use crate::illumos::dladm::{Etherstub, EtherstubVnic}; use crate::illumos::running_zone::{InstalledZone, RunningZone}; use crate::illumos::vnic::VnicAllocator; +use crate::illumos::zfs::ZONE_ZFS_DATASET_MOUNTPOINT; use crate::illumos::zone::AddressRequest; -use crate::params::{ServiceEnsureBody, ServiceRequest}; +use crate::params::{ServiceEnsureBody, ServiceRequest, ServiceType}; use crate::zone::Zones; -use omicron_common::address::{DNS_PORT, DNS_SERVER_PORT}; +use dropshot::ConfigDropshot; +use omicron_common::address::{Ipv6Subnet, RACK_PREFIX}; +use omicron_common::nexus_config::{self, RuntimeConfig as NexusRuntimeConfig}; +use omicron_common::postgres_config::PostgresConfigWithUrl; use slog::Logger; use std::collections::HashSet; use std::iter::FromIterator; -use std::net::{IpAddr, Ipv6Addr}; +use std::net::{IpAddr, Ipv6Addr, SocketAddr}; use std::path::{Path, PathBuf}; +use std::str::FromStr; +use tokio::io::AsyncWriteExt; use tokio::sync::Mutex; +// The filename of ServiceManager's internal storage. +const SERVICE_CONFIG_FILENAME: &str = "service.toml"; +// The filename of a half-completed config, in need of parameters supplied at +// runtime. +const PARTIAL_CONFIG_FILENAME: &str = "config-partial.toml"; +// The filename of a completed config, merging the partial config with +// additional appended parameters known at runtime. +const COMPLETE_CONFIG_FILENAME: &str = "config.toml"; + #[derive(thiserror::Error, Debug)] pub enum Error { #[error("Cannot serialize TOML to file {path}: {err}")] @@ -69,13 +84,40 @@ impl From for omicron_common::api::external::Error { /// The default path to service configuration, if one is not /// explicitly provided. pub fn default_services_config_path() -> PathBuf { - Path::new(omicron_common::OMICRON_CONFIG_PATH).join("services.toml") + Path::new(omicron_common::OMICRON_CONFIG_PATH).join(SERVICE_CONFIG_FILENAME) +} + +/// Configuration parameters which modify the [`ServiceManager`]'s behavior. +/// +/// These are typically used to make testing easier; production usage +/// should generally prefer to use the defaults. +pub struct Config { + /// The path for the ServiceManager to store information about + /// all running services. + pub all_svcs_config_path: PathBuf, + /// A function which returns the path the directory holding the + /// service's configuration file. + pub get_svc_config_dir: Box PathBuf + Send + Sync>, +} + +impl Default for Config { + fn default() -> Self { + Self { + all_svcs_config_path: default_services_config_path(), + get_svc_config_dir: Box::new(|zone_name: &str, svc_name: &str| { + PathBuf::from(ZONE_ZFS_DATASET_MOUNTPOINT) + .join(PathBuf::from(zone_name)) + .join("root") + .join(format!("var/svc/manifest/site/{}", svc_name)) + }), + } + } } /// Manages miscellaneous Sled-local services. pub struct ServiceManager { log: Logger, - config_path: Option, + config: Config, zones: Mutex>, vnic_allocator: VnicAllocator, underlay_vnic: EtherstubVnic, @@ -98,12 +140,12 @@ impl ServiceManager { etherstub: Etherstub, underlay_vnic: EtherstubVnic, underlay_address: Ipv6Addr, - config_path: Option, + config: Config, ) -> Result { debug!(log, "Creating new ServiceManager"); let mgr = Self { log: log.new(o!("component" => "ServiceManager")), - config_path, + config, zones: Mutex::new(vec![]), vnic_allocator: VnicAllocator::new("Service", etherstub), underlay_vnic, @@ -143,11 +185,7 @@ impl ServiceManager { // Returns either the path to the explicitly provided config path, or // chooses the default one. fn services_config_path(&self) -> PathBuf { - if let Some(path) = &self.config_path { - path.clone() - } else { - default_services_config_path() - } + self.config.all_svcs_config_path.clone() } // Populates `existing_zones` according to the requests in `services`. @@ -268,16 +306,70 @@ impl ServiceManager { let smf_name = format!("svc:/system/illumos/{}", service.name); let default_smf_name = format!("{}:default", smf_name); - match service.name.as_str() { - "internal-dns" => { - info!(self.log, "Setting up internal-dns service"); - let address = - service.addresses.get(0).ok_or_else(|| { - Error::BadServiceRequest { - service: service.name.clone(), - message: "Not enough addresses".to_string(), - } + match service.service_type { + ServiceType::Nexus { internal_address, external_address } => { + info!(self.log, "Setting up Nexus service"); + + // Nexus takes a separate config file for parameters which + // cannot be known at packaging time. + let runtime_config = NexusRuntimeConfig { + id: service.id, + dropshot_external: ConfigDropshot { + bind_address: SocketAddr::V6(external_address), + ..Default::default() + }, + dropshot_internal: ConfigDropshot { + bind_address: SocketAddr::V6(internal_address), + ..Default::default() + }, + subnet: Ipv6Subnet::::new( + self.underlay_address, + ), + // TODO: Switch to inferring this URL by DNS. + database: nexus_config::Database::FromUrl { + url: PostgresConfigWithUrl::from_str( + "postgresql://root@[fd00:1122:3344:0101::2]:32221/omicron?sslmode=disable" + ).unwrap() + } + }; + + // Copy the partial config file to the expected location. + let config_dir = (self.config.get_svc_config_dir)( + running_zone.name(), + &service.name, + ); + let partial_config_path = + config_dir.join(PARTIAL_CONFIG_FILENAME); + let config_path = config_dir.join(COMPLETE_CONFIG_FILENAME); + tokio::fs::copy(partial_config_path, &config_path) + .await + .map_err(|err| Error::Io { + path: config_path.clone(), + err, })?; + + // Serialize the configuration and append it into the file. + let serialized_cfg = toml::Value::try_from(&runtime_config) + .expect("Cannot serialize config"); + let mut map = toml::map::Map::new(); + map.insert("runtime".to_string(), serialized_cfg); + let config_str = toml::to_string(&map).map_err(|err| { + Error::TomlSerialize { path: config_path.clone(), err } + })?; + let mut file = tokio::fs::OpenOptions::new() + .append(true) + .open(&config_path) + .await + .map_err(|err| Error::Io { + path: config_path.clone(), + err, + })?; + file.write_all(config_str.as_bytes()).await.map_err( + |err| Error::Io { path: config_path.clone(), err }, + )?; + } + ServiceType::InternalDns { server_address, dns_address } => { + info!(self.log, "Setting up internal-dns service"); running_zone .run_cmd(&[ crate::illumos::zone::SVCCFG, @@ -286,14 +378,12 @@ impl ServiceManager { "setprop", &format!( "config/server_address=[{}]:{}", - address, DNS_SERVER_PORT + server_address.ip(), + server_address.port(), ), ]) .map_err(|err| Error::ZoneCommand { - intent: format!( - "Setting DNS server address [{}]:{}", - address, DNS_SERVER_PORT - ), + intent: "set server address".to_string(), err, })?; @@ -305,14 +395,12 @@ impl ServiceManager { "setprop", &format!( "config/dns_address=[{}]:{}", - address, DNS_PORT + dns_address.ip(), + dns_address.port(), ), ]) .map_err(|err| Error::ZoneCommand { - intent: format!( - "Setting DNS address [{}]:{}", - address, DNS_SERVER_PORT - ), + intent: "Set DNS address".to_string(), err, })?; @@ -327,17 +415,17 @@ impl ServiceManager { ]) .map_err(|err| Error::ZoneCommand { intent: format!( - "Refreshing DNS service config for {}", + "Refresh SMF manifest {}", default_smf_name ), err, })?; } - _ => { - info!( - self.log, - "Service name {} did not match", service.name - ); + ServiceType::Oximeter => { + info!(self.log, "Setting up oximeter service"); + + // TODO: Implement with dynamic parameters, when address is + // dynamically assigned. } } @@ -438,7 +526,9 @@ mod test { svc, zone::MockZones, }; + use std::net::{Ipv6Addr, SocketAddrV6}; use std::os::unix::process::ExitStatusExt; + use uuid::Uuid; const SVC_NAME: &str = "my_svc"; const EXPECTED_ZONE_NAME: &str = "oxz_my_svc"; @@ -488,14 +578,29 @@ mod test { } // Prepare to call "ensure" for a new service, then actually call "ensure". - async fn ensure_new_service(mgr: &ServiceManager) { + async fn ensure_new_service(mgr: &ServiceManager, id: Uuid) { let _expectations = expect_new_service(); mgr.ensure(ServiceEnsureBody { services: vec![ServiceRequest { + id, name: SVC_NAME.to_string(), addresses: vec![], gz_addresses: vec![], + service_type: ServiceType::Nexus { + internal_address: SocketAddrV6::new( + Ipv6Addr::LOCALHOST, + 0, + 0, + 0, + ), + external_address: SocketAddrV6::new( + Ipv6Addr::LOCALHOST, + 0, + 0, + 0, + ), + }, }], }) .await @@ -504,12 +609,27 @@ mod test { // Prepare to call "ensure" for a service which already exists. We should // return the service without actually installing a new zone. - async fn ensure_existing_service(mgr: &ServiceManager) { + async fn ensure_existing_service(mgr: &ServiceManager, id: Uuid) { mgr.ensure(ServiceEnsureBody { services: vec![ServiceRequest { + id, name: SVC_NAME.to_string(), addresses: vec![], gz_addresses: vec![], + service_type: ServiceType::Nexus { + internal_address: SocketAddrV6::new( + Ipv6Addr::LOCALHOST, + 0, + 0, + 0, + ), + external_address: SocketAddrV6::new( + Ipv6Addr::LOCALHOST, + 0, + 0, + 0, + ), + }, }], }) .await @@ -533,26 +653,56 @@ mod test { drop(mgr); } + struct TestConfig { + config_dir: tempfile::TempDir, + } + + impl TestConfig { + async fn new() -> Self { + let config_dir = tempfile::TempDir::new().unwrap(); + tokio::fs::File::create( + config_dir.path().join(PARTIAL_CONFIG_FILENAME), + ) + .await + .unwrap(); + Self { config_dir } + } + + fn make_config(&self) -> Config { + let all_svcs_config_path = + self.config_dir.path().join(SERVICE_CONFIG_FILENAME); + let svc_config_dir = self.config_dir.path().to_path_buf(); + Config { + all_svcs_config_path, + get_svc_config_dir: Box::new( + move |_zone_name: &str, _svc_name: &str| { + svc_config_dir.clone() + }, + ), + } + } + } + #[tokio::test] #[serial_test::serial] async fn test_ensure_service() { let logctx = omicron_test_utils::dev::test_setup_log("test_ensure_service"); let log = logctx.log.clone(); + let test_config = TestConfig::new().await; - let config_dir = tempfile::TempDir::new().unwrap(); - let config = config_dir.path().join("services.toml"); let mgr = ServiceManager::new( log, Etherstub(ETHERSTUB_NAME.to_string()), EtherstubVnic(ETHERSTUB_VNIC_NAME.to_string()), Ipv6Addr::LOCALHOST, - Some(config), + test_config.make_config(), ) .await .unwrap(); - ensure_new_service(&mgr).await; + let id = Uuid::new_v4(); + ensure_new_service(&mgr, id).await; drop_service_manager(mgr); logctx.cleanup_successful(); @@ -565,21 +715,21 @@ mod test { "test_ensure_service_which_already_exists", ); let log = logctx.log.clone(); + let test_config = TestConfig::new().await; - let config_dir = tempfile::TempDir::new().unwrap(); - let config = config_dir.path().join("services.toml"); let mgr = ServiceManager::new( log, Etherstub(ETHERSTUB_NAME.to_string()), EtherstubVnic(ETHERSTUB_VNIC_NAME.to_string()), Ipv6Addr::LOCALHOST, - Some(config), + test_config.make_config(), ) .await .unwrap(); - ensure_new_service(&mgr).await; - ensure_existing_service(&mgr).await; + let id = Uuid::new_v4(); + ensure_new_service(&mgr, id).await; + ensure_existing_service(&mgr, id).await; drop_service_manager(mgr); logctx.cleanup_successful(); @@ -591,9 +741,7 @@ mod test { let logctx = omicron_test_utils::dev::test_setup_log( "test_services_are_recreated_on_reboot", ); - - let config_dir = tempfile::TempDir::new().unwrap(); - let config = config_dir.path().join("services.toml"); + let test_config = TestConfig::new().await; // First, spin up a ServiceManager, create a new service, and tear it // down. @@ -602,11 +750,13 @@ mod test { Etherstub(ETHERSTUB_NAME.to_string()), EtherstubVnic(ETHERSTUB_VNIC_NAME.to_string()), Ipv6Addr::LOCALHOST, - Some(config.clone()), + test_config.make_config(), ) .await .unwrap(); - ensure_new_service(&mgr).await; + + let id = Uuid::new_v4(); + ensure_new_service(&mgr, id).await; drop_service_manager(mgr); // Before we re-create the service manager - notably, using the same @@ -617,7 +767,7 @@ mod test { Etherstub(ETHERSTUB_NAME.to_string()), EtherstubVnic(ETHERSTUB_VNIC_NAME.to_string()), Ipv6Addr::LOCALHOST, - Some(config.clone()), + test_config.make_config(), ) .await .unwrap(); @@ -632,9 +782,7 @@ mod test { let logctx = omicron_test_utils::dev::test_setup_log( "test_services_do_not_persist_without_config", ); - - let config_dir = tempfile::TempDir::new().unwrap(); - let config = config_dir.path().join("services.toml"); + let test_config = TestConfig::new().await; // First, spin up a ServiceManager, create a new service, and tear it // down. @@ -643,16 +791,18 @@ mod test { Etherstub(ETHERSTUB_NAME.to_string()), EtherstubVnic(ETHERSTUB_VNIC_NAME.to_string()), Ipv6Addr::LOCALHOST, - Some(config.clone()), + test_config.make_config(), ) .await .unwrap(); - ensure_new_service(&mgr).await; + let id = Uuid::new_v4(); + ensure_new_service(&mgr, id).await; drop_service_manager(mgr); // Next, delete the config. This means the service we just created will // not be remembered on the next initialization. - std::fs::remove_file(&config).unwrap(); + let config = test_config.make_config(); + std::fs::remove_file(&config.all_svcs_config_path).unwrap(); // Observe that the old service is not re-initialized. let mgr = ServiceManager::new( @@ -660,7 +810,7 @@ mod test { Etherstub(ETHERSTUB_NAME.to_string()), EtherstubVnic(ETHERSTUB_VNIC_NAME.to_string()), Ipv6Addr::LOCALHOST, - Some(config.clone()), + config, ) .await .unwrap(); diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index c0c2ff649c8..5f8f1e500ab 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -16,7 +16,7 @@ use crate::params::{ DatasetKind, DiskStateRequested, InstanceHardware, InstanceMigrateParams, InstanceRuntimeStateRequested, ServiceEnsureBody, }; -use crate::services::ServiceManager; +use crate::services::{self, ServiceManager}; use crate::storage_manager::StorageManager; use omicron_common::api::{ internal::nexus::DiskRuntimeState, internal::nexus::InstanceRuntimeState, @@ -245,7 +245,7 @@ impl SledAgent { etherstub.clone(), etherstub_vnic.clone(), *sled_address.ip(), - None, + services::Config::default(), ) .await?; diff --git a/smf/nexus/config.toml b/smf/nexus/config-partial.toml similarity index 53% rename from smf/nexus/config.toml rename to smf/nexus/config-partial.toml index d73d7a90cfc..b77ffc3137f 100644 --- a/smf/nexus/config.toml +++ b/smf/nexus/config-partial.toml @@ -1,10 +1,7 @@ # -# Oxide API: example configuration file +# Oxide API: partial configuration file # -# Identifier for this instance of Nexus -id = "e6bff1ff-24fb-49dc-a54e-c6a350cd4d6c" - [console] # Directory for static assets. Absolute path or relative to CWD. static_dir = "/var/nexus/static" @@ -16,18 +13,6 @@ session_absolute_timeout_minutes = 480 # TODO(https://github.com/oxidecomputer/omicron/issues/372): Remove "spoof". schemes_external = ["spoof", "session_cookie"] -[database] -# URL for connecting to the database -url = "postgresql://root@[fd00:1122:3344:0101::2]:32221/omicron?sslmode=disable" - -[dropshot_external] -# IP address and TCP port on which to listen for the external API -bind_address = "[fd00:1122:3344:0101::3]:12220" - -[dropshot_internal] -# IP address and TCP port on which to listen for the internal API -bind_address = "[fd00:1122:3344:0101::3]:12221" - [log] # Show log messages of this level and more severe level = "info" diff --git a/smf/nexus/manifest.xml b/smf/nexus/manifest.xml index 0b8da2ff62f..3ff92b2fbac 100644 --- a/smf/nexus/manifest.xml +++ b/smf/nexus/manifest.xml @@ -11,6 +11,14 @@ type='service'> + + + + + + diff --git a/smf/sled-agent/config-rss.toml b/smf/sled-agent/config-rss.toml index d8113cf4d1b..698d5b112fc 100644 --- a/smf/sled-agent/config-rss.toml +++ b/smf/sled-agent/config-rss.toml @@ -11,21 +11,25 @@ rack_subnet = "fd00:1122:3344:0100::" # TODO(https://github.com/oxidecomputer/omicron/issues/732): Nexus # should allocate crucible datasets. [[request.dataset]] +id = "09a9a25f-2602-4e2f-9630-31af9c492c3e" zpool_id = "d462a7f7-b628-40fe-80ff-4e4189e2d62b" address = "[fd00:1122:3344:0101::6]:32345" dataset_kind.type = "crucible" [[request.dataset]] +id = "2713b37a-3043-4ed5-aaff-f38200e45cfb" zpool_id = "e4b4dc87-ab46-49fb-a4b4-d361ae214c03" address = "[fd00:1122:3344:0101::7]:32345" dataset_kind.type = "crucible" [[request.dataset]] +id = "ffd16cad-e5d5-495e-9c59-4312a3857d91" zpool_id = "f4b4dc87-ab46-49fb-a4b4-d361ae214c03" address = "[fd00:1122:3344:0101::8]:32345" dataset_kind.type = "crucible" [[request.dataset]] +id = "4d08fc19-3d5f-4f6b-9c48-925f8eac7255" zpool_id = "d462a7f7-b628-40fe-80ff-4e4189e2d62b" address = "[fd00:1122:3344:0101::2]:32221" dataset_kind.type = "cockroach_db" @@ -34,18 +38,27 @@ dataset_kind.all_addresses = [ "[fd00:1122:3344:0101::2]:32221" ] # TODO(https://github.com/oxidecomputer/omicron/issues/732): Nexus # should allocate clickhouse datasets. [[request.dataset]] +id = "a3505b41-a592-420b-84f2-3d76bf0e0a81" zpool_id = "d462a7f7-b628-40fe-80ff-4e4189e2d62b" address = "[fd00:1122:3344:0101::5]:8123" dataset_kind.type = "clickhouse" [[request.service]] +id = "e6bff1ff-24fb-49dc-a54e-c6a350cd4d6c" name = "nexus" addresses = [ "fd00:1122:3344:0101::3" ] gz_addresses = [] +[request.service.service_type] +type = "nexus" +internal_address = "[fd00:1122:3344:0101::3]:12221" +external_address = "[fd00:1122:3344:0101::3]:12220" # TODO(https://github.com/oxidecomputer/omicron/issues/732): Nexus # should allocate Oximeter services. [[request.service]] +id = "1da65e5b-210c-4859-a7d7-200c1e659972" name = "oximeter" addresses = [ "fd00:1122:3344:0101::4" ] gz_addresses = [] +[request.service.service_type] +type = "oximeter" diff --git a/smf/sled-agent/manifest.xml b/smf/sled-agent/manifest.xml index 378b77776c8..96f029d96e0 100644 --- a/smf/sled-agent/manifest.xml +++ b/smf/sled-agent/manifest.xml @@ -28,6 +28,10 @@ type='service'> + + + diff --git a/test-utils/src/dev/db.rs b/test-utils/src/dev/db.rs index 5449bfc4139..b7112ae1a37 100644 --- a/test-utils/src/dev/db.rs +++ b/test-utils/src/dev/db.rs @@ -8,7 +8,7 @@ use crate::dev::poll; use anyhow::anyhow; use anyhow::bail; use anyhow::Context; -use omicron_common::config::PostgresConfigWithUrl; +use omicron_common::postgres_config::PostgresConfigWithUrl; use std::ffi::{OsStr, OsString}; use std::fmt; use std::ops::Deref; From fccc15cc8df05acffdafa791ed5c1d4d965e13e3 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 8 Jun 2022 16:45:34 -0400 Subject: [PATCH 02/88] Ensure postgres config was just a rename --- common/src/config.rs | 95 -------------------------------------------- 1 file changed, 95 deletions(-) delete mode 100644 common/src/config.rs diff --git a/common/src/config.rs b/common/src/config.rs deleted file mode 100644 index 2509ae4fca2..00000000000 --- a/common/src/config.rs +++ /dev/null @@ -1,95 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -//! Common objects used for configuration - -use std::fmt; -use std::ops::Deref; -use std::str::FromStr; - -/// Describes a URL for connecting to a PostgreSQL server -// The config pattern that we're using requires that types in the config impl -// Serialize. If tokio_postgres::config::Config impl'd Serialize or even -// Display, we'd just use that directly instead of this type. But it doesn't. -// We could implement a serialize function ourselves, but URLs support many -// different properties, and this could be brittle and easy to get wrong. -// Instead, this type just wraps tokio_postgres::config::Config and keeps the -// original String around. (The downside is that a consumer _generating_ a -// nexus::db::Config needs to generate a URL that matches the -// tokio_postgres::config::Config that they construct here, but this is not -// currently an important use case.) -// -// To ensure that the URL and config are kept in sync, we currently only support -// constructing one of these via `FromStr` and the fields are not public. -#[derive(Clone, Debug, PartialEq)] -pub struct PostgresConfigWithUrl { - url_raw: String, - config: tokio_postgres::config::Config, -} - -impl PostgresConfigWithUrl { - pub fn url(&self) -> String { - self.url_raw.clone() - } -} - -impl FromStr for PostgresConfigWithUrl { - type Err = tokio_postgres::Error; - - fn from_str(s: &str) -> Result { - Ok(PostgresConfigWithUrl { url_raw: s.to_owned(), config: s.parse()? }) - } -} - -impl fmt::Display for PostgresConfigWithUrl { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.write_str(&self.url_raw) - } -} - -impl Deref for PostgresConfigWithUrl { - type Target = tokio_postgres::config::Config; - - fn deref(&self) -> &Self::Target { - &self.config - } -} - -#[cfg(test)] -mod test { - use super::PostgresConfigWithUrl; - - #[test] - fn test_bad_url() { - // There is surprisingly little that we can rely on the - // tokio_postgres::config::Config parser to include in the error - // message. - let error = "foo".parse::().unwrap_err(); - assert!(error.to_string().contains("unexpected EOF")); - "http://127.0.0.1:1234".parse::().unwrap_err(); - let error = "postgresql://example.com?sslmode=not-a-real-ssl-mode" - .parse::() - .unwrap_err(); - assert!(error - .to_string() - .contains("invalid value for option `sslmode`")); - } - - #[test] - fn test_example_url() { - let config = "postgresql://notauser@10.2.3.4:1789?sslmode=disable" - .parse::() - .unwrap(); - assert_eq!(config.get_user(), Some("notauser")); - assert_eq!( - config.get_ssl_mode(), - tokio_postgres::config::SslMode::Disable - ); - assert_eq!( - config.get_hosts(), - &[tokio_postgres::config::Host::Tcp("10.2.3.4".to_string())] - ); - assert_eq!(config.get_ports(), &[1789]); - } -} From a077bd41879b3551ce25d2f59377262ec1cd1ef6 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 8 Jun 2022 17:38:38 -0400 Subject: [PATCH 03/88] review feedback --- nexus/src/config.rs | 2 +- nexus/src/context.rs | 2 +- nexus/src/lib.rs | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/nexus/src/config.rs b/nexus/src/config.rs index d5bf6a2a2f9..a6034a7eea3 100644 --- a/nexus/src/config.rs +++ b/nexus/src/config.rs @@ -152,7 +152,7 @@ pub struct Config { } impl Config { - /// Load a `PackageConfig` from the given TOML file + /// Load a `Config` from the given TOML file /// /// This config object can then be used to create a new `Nexus`. /// The format is described in the README. diff --git a/nexus/src/context.rs b/nexus/src/context.rs index a08f22304df..2ad6a93553a 100644 --- a/nexus/src/context.rs +++ b/nexus/src/context.rs @@ -68,7 +68,7 @@ pub struct ConsoleConfig { impl ServerContext { /// Create a new context with the given rack id and log. This creates the /// underlying nexus as well. - pub async fn new( + pub fn new( rack_id: Uuid, log: Logger, config: &config::Config, diff --git a/nexus/src/lib.rs b/nexus/src/lib.rs index 61abe04b1ba..c13fc3de3c8 100644 --- a/nexus/src/lib.rs +++ b/nexus/src/lib.rs @@ -90,7 +90,7 @@ impl Server { let ctxlog = log.new(o!("component" => "ServerContext")); - let apictx = ServerContext::new(rack_id, ctxlog, &config).await?; + let apictx = ServerContext::new(rack_id, ctxlog, &config)?; let http_server_starter_external = dropshot::HttpServerStarter::new( &config.runtime.dropshot_external, From d16eda2832fba9c5e46c68431c3e400a6039ea17 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 8 Jun 2022 16:44:41 -0400 Subject: [PATCH 04/88] DNS client --- Cargo.lock | 4 + internal-dns-client/Cargo.toml | 6 +- internal-dns-client/src/lib.rs | 3 + internal-dns-client/src/multiclient.rs | 145 +++++++++++++++++++++++++ internal-dns-client/src/names.rs | 55 ++++++++++ 5 files changed, 212 insertions(+), 1 deletion(-) create mode 100644 internal-dns-client/src/multiclient.rs create mode 100644 internal-dns-client/src/names.rs diff --git a/Cargo.lock b/Cargo.lock index fed2770e843..850c0ec0adc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2342,12 +2342,16 @@ dependencies = [ name = "internal-dns-client" version = "0.1.0" dependencies = [ + "omicron-common", "progenitor", "reqwest", "serde", "serde_json", "slog", "structopt", + "trust-dns-proto", + "trust-dns-resolver", + "uuid", ] [[package]] diff --git a/internal-dns-client/Cargo.toml b/internal-dns-client/Cargo.toml index 22e28c91bc9..0ac6ecba610 100644 --- a/internal-dns-client/Cargo.toml +++ b/internal-dns-client/Cargo.toml @@ -5,9 +5,13 @@ edition = "2021" license = "MPL-2.0" [dependencies] +omicron-common = { path = "../common" } progenitor = { git = "https://github.com/oxidecomputer/progenitor" } +reqwest = { version = "0.11", features = ["json", "rustls-tls", "stream"] } serde = { version = "1.0", features = [ "derive" ] } serde_json = "1.0" slog = { version = "2.5.0", features = [ "max_level_trace", "release_max_level_debug" ] } structopt = "0.3" -reqwest = { version = "0.11", features = ["json", "rustls-tls", "stream"] } +trust-dns-proto = "0.21" +trust-dns-resolver = "0.21" +uuid = { version = "1.1.0", features = [ "v4", "serde" ] } diff --git a/internal-dns-client/src/lib.rs b/internal-dns-client/src/lib.rs index 49daa3d58ae..f7ce56f8521 100644 --- a/internal-dns-client/src/lib.rs +++ b/internal-dns-client/src/lib.rs @@ -16,3 +16,6 @@ progenitor::generate_api!( slog::debug!(log, "client response"; "result" => ?result); }), ); + +pub mod multiclient; +pub mod names; diff --git a/internal-dns-client/src/multiclient.rs b/internal-dns-client/src/multiclient.rs new file mode 100644 index 00000000000..e01fb5a2139 --- /dev/null +++ b/internal-dns-client/src/multiclient.rs @@ -0,0 +1,145 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use crate::types::{DnsKv, DnsRecord, DnsRecordKey, Srv}; +use omicron_common::address::{ + Ipv6Subnet, ReservedRackSubnet, AZ_PREFIX, DNS_PORT, DNS_SERVER_PORT, +}; +use omicron_common::backoff::{ + internal_service_policy, retry_notify, BackoffError, +}; +use slog::{info, warn, Logger}; +use std::net::{SocketAddr, SocketAddrV6}; +use trust_dns_resolver::config::{ + NameServerConfig, Protocol, ResolverConfig, ResolverOpts, +}; +use trust_dns_resolver::TokioAsyncResolver; + +type DnsError = crate::Error; + +/// A connection used to update multiple DNS servers. +pub struct Updater { + clients: Vec, +} + +impl Updater { + pub fn new(subnet: Ipv6Subnet, log: Logger) -> Self { + let clients = ReservedRackSubnet::new(subnet) + .get_dns_subnets() + .into_iter() + .map(|dns_subnet| { + let addr = dns_subnet.dns_address().ip(); + info!(log, "Adding DNS server: {}", addr); + crate::Client::new( + &format!("http://[{}]:{}", addr, DNS_SERVER_PORT), + log.clone(), + ) + }) + .collect::>(); + + Self { clients } + } + + /// Utility function to insert: + /// - A set of uniquely-named AAAA records, each corresponding to an address + /// - An SRV record, pointing to each of the AAAA records. + pub async fn insert_dns_records( + &self, + log: &Logger, + aaaa: Vec<(crate::names::AAAA, SocketAddrV6)>, + srv_key: crate::names::SRV, + ) -> Result<(), DnsError> { + let mut records = Vec::with_capacity(aaaa.len() + 1); + + // Add one DnsKv per AAAA, each with a single record. + records.extend(aaaa.iter().map(|(name, addr)| DnsKv { + key: DnsRecordKey { name: name.to_string() }, + records: vec![DnsRecord::Aaaa(*addr.ip())], + })); + + // Add the DnsKv for the SRV, with a record for each AAAA. + records.push(DnsKv { + key: DnsRecordKey { name: srv_key.to_string() }, + records: aaaa + .iter() + .map(|(name, addr)| { + DnsRecord::Srv(Srv { + prio: 0, + weight: 0, + port: addr.port(), + target: name.to_string(), + }) + }) + .collect::>(), + }); + + let set_record = || async { + self.dns_records_set(&records) + .await + .map_err(BackoffError::transient)?; + Ok::<(), BackoffError>(()) + }; + let log_failure = |error, _| { + warn!(log, "Failed to set DNS records"; "error" => ?error); + }; + + retry_notify(internal_service_policy(), set_record, log_failure) + .await?; + Ok(()) + } + + /// Sets a records on all DNS servers. + /// + /// Returns an error if setting the record fails on any server. + pub async fn dns_records_set<'a>( + &'a self, + body: &'a Vec, + ) -> Result<(), DnsError> { + // TODO: Could be sent concurrently. + for client in &self.clients { + client.dns_records_set(body).await?; + } + + Ok(()) + } + + /// Deletes records in all DNS servers. + /// + /// Returns an error if deleting the record fails on any server. + pub async fn dns_records_delete<'a>( + &'a self, + body: &'a Vec, + ) -> Result<(), DnsError> { + // TODO: Could be sent concurrently + for client in &self.clients { + client.dns_records_delete(body).await?; + } + Ok(()) + } +} + +/// Creates a resolver using all internal DNS name servers. +pub fn create_resolver( + subnet: Ipv6Subnet, +) -> Result { + let mut rc = ResolverConfig::new(); + let dns_ips = ReservedRackSubnet::new(subnet) + .get_dns_subnets() + .into_iter() + .map(|subnet| subnet.dns_address().ip()) + .collect::>(); + + for dns_ip in dns_ips { + rc.add_name_server(NameServerConfig { + socket_addr: SocketAddr::V6(SocketAddrV6::new( + dns_ip, DNS_PORT, 0, 0, + )), + protocol: Protocol::Udp, + tls_dns_name: None, + trust_nx_responses: false, + bind_addr: None, + }); + } + TokioAsyncResolver::tokio(rc, ResolverOpts::default()) +} diff --git a/internal-dns-client/src/names.rs b/internal-dns-client/src/names.rs new file mode 100644 index 00000000000..6384ec9e503 --- /dev/null +++ b/internal-dns-client/src/names.rs @@ -0,0 +1,55 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::fmt; +use uuid::Uuid; + +const DNS_ZONE: &str = "control-plane.oxide.internal"; + +pub enum SRV { + /// A service identified and accessed by name, such as "nexus", "CRDB", etc. + /// + /// This is used in cases where services are interchangeable. + Service(String), + + /// A service identified by name and a unique identifier. + /// + /// This is used in cases where services are not interchangeable, such as + /// for the Sled agent. + Backend(String, Uuid), +} + +impl fmt::Display for SRV { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match &self { + SRV::Service(name) => { + write!(f, "_{}._tcp.{}", name, DNS_ZONE) + } + SRV::Backend(name, id) => { + write!(f, "_{}._tcp.{}.{}", name, id, DNS_ZONE) + } + } + } +} + +pub enum AAAA { + /// Identifies an AAAA record for a sled. + Sled(Uuid), + + /// Identifies an AAAA record for a zone within a sled. + Zone(Uuid), +} + +impl fmt::Display for AAAA { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match &self { + AAAA::Sled(id) => { + write!(f, "{}.sled.{}", id, DNS_ZONE) + } + AAAA::Zone(id) => { + write!(f, "{}.host.{}", id, DNS_ZONE) + } + } + } +} From 8db30b70b965e6eb3de54d2ae8172109225aff37 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 8 Jun 2022 17:00:25 -0400 Subject: [PATCH 05/88] Add concurrency --- Cargo.lock | 1 + internal-dns-client/Cargo.toml | 1 + internal-dns-client/src/multiclient.rs | 28 ++++++++++++++++++-------- 3 files changed, 22 insertions(+), 8 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 850c0ec0adc..ea7797ccd16 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2342,6 +2342,7 @@ dependencies = [ name = "internal-dns-client" version = "0.1.0" dependencies = [ + "futures", "omicron-common", "progenitor", "reqwest", diff --git a/internal-dns-client/Cargo.toml b/internal-dns-client/Cargo.toml index 0ac6ecba610..f2611721ae7 100644 --- a/internal-dns-client/Cargo.toml +++ b/internal-dns-client/Cargo.toml @@ -5,6 +5,7 @@ edition = "2021" license = "MPL-2.0" [dependencies] +futures = "0.3.21" omicron-common = { path = "../common" } progenitor = { git = "https://github.com/oxidecomputer/progenitor" } reqwest = { version = "0.11", features = ["json", "rustls-tls", "stream"] } diff --git a/internal-dns-client/src/multiclient.rs b/internal-dns-client/src/multiclient.rs index e01fb5a2139..3d8d912bf7f 100644 --- a/internal-dns-client/src/multiclient.rs +++ b/internal-dns-client/src/multiclient.rs @@ -3,6 +3,7 @@ // file, You can obtain one at https://mozilla.org/MPL/2.0/. use crate::types::{DnsKv, DnsRecord, DnsRecordKey, Srv}; +use futures::stream::{self, StreamExt, TryStreamExt}; use omicron_common::address::{ Ipv6Subnet, ReservedRackSubnet, AZ_PREFIX, DNS_PORT, DNS_SERVER_PORT, }; @@ -96,10 +97,15 @@ impl Updater { &'a self, body: &'a Vec, ) -> Result<(), DnsError> { - // TODO: Could be sent concurrently. - for client in &self.clients { - client.dns_records_set(body).await?; - } + stream::iter(&self.clients) + .map(Ok::<_, DnsError>) + .try_for_each_concurrent( + None, + |client| async move { + client.dns_records_set(body).await?; + Ok(()) + } + ).await?; Ok(()) } @@ -111,10 +117,16 @@ impl Updater { &'a self, body: &'a Vec, ) -> Result<(), DnsError> { - // TODO: Could be sent concurrently - for client in &self.clients { - client.dns_records_delete(body).await?; - } + stream::iter(&self.clients) + .map(Ok::<_, DnsError>) + .try_for_each_concurrent( + None, + |client| async move { + client.dns_records_delete(body).await?; + Ok(()) + } + ).await?; + Ok(()) } } From 3a0c6ba8102541463416aedf345207a2baa34854 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 8 Jun 2022 17:19:22 -0400 Subject: [PATCH 06/88] comment --- internal-dns-client/src/multiclient.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/internal-dns-client/src/multiclient.rs b/internal-dns-client/src/multiclient.rs index 3d8d912bf7f..47ac76e7710 100644 --- a/internal-dns-client/src/multiclient.rs +++ b/internal-dns-client/src/multiclient.rs @@ -25,6 +25,8 @@ pub struct Updater { } impl Updater { + /// Creates a new "Updater", capable of communicating with all + /// DNS servers within the AZ. pub fn new(subnet: Ipv6Subnet, log: Logger) -> Self { let clients = ReservedRackSubnet::new(subnet) .get_dns_subnets() From 33b3e02b7a926eec67674b6d896d144675da8f2d Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 8 Jun 2022 17:50:13 -0400 Subject: [PATCH 07/88] fmt --- Cargo.lock | 2 +- internal-dns-client/src/multiclient.rs | 24 ++++++++++-------------- 2 files changed, 11 insertions(+), 15 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ea7797ccd16..68e58d9b219 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2352,7 +2352,7 @@ dependencies = [ "structopt", "trust-dns-proto", "trust-dns-resolver", - "uuid", + "uuid 1.1.0", ] [[package]] diff --git a/internal-dns-client/src/multiclient.rs b/internal-dns-client/src/multiclient.rs index 47ac76e7710..24c8817c274 100644 --- a/internal-dns-client/src/multiclient.rs +++ b/internal-dns-client/src/multiclient.rs @@ -101,13 +101,11 @@ impl Updater { ) -> Result<(), DnsError> { stream::iter(&self.clients) .map(Ok::<_, DnsError>) - .try_for_each_concurrent( - None, - |client| async move { - client.dns_records_set(body).await?; - Ok(()) - } - ).await?; + .try_for_each_concurrent(None, |client| async move { + client.dns_records_set(body).await?; + Ok(()) + }) + .await?; Ok(()) } @@ -121,13 +119,11 @@ impl Updater { ) -> Result<(), DnsError> { stream::iter(&self.clients) .map(Ok::<_, DnsError>) - .try_for_each_concurrent( - None, - |client| async move { - client.dns_records_delete(body).await?; - Ok(()) - } - ).await?; + .try_for_each_concurrent(None, |client| async move { + client.dns_records_delete(body).await?; + Ok(()) + }) + .await?; Ok(()) } From 3eb57dcdec6d8585ce4c40cf2048f5ec2d45a9fd Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 8 Jun 2022 18:39:53 -0400 Subject: [PATCH 08/88] lockfile --- Cargo.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index 68e58d9b219..ca290e03a05 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2352,7 +2352,7 @@ dependencies = [ "structopt", "trust-dns-proto", "trust-dns-resolver", - "uuid 1.1.0", + "uuid 1.1.1", ] [[package]] From 7f1087d3f550030e1ecfd634dd903977e55a0bf8 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 8 Jun 2022 20:43:50 -0400 Subject: [PATCH 09/88] Pull in RSS changes from 'use-dns' branch --- Cargo.lock | 1 + common/src/address.rs | 5 + sled-agent/Cargo.toml | 1 + sled-agent/src/rack_setup/config.rs | 1 + sled-agent/src/rack_setup/service.rs | 132 ++++++++++++++++++++++++--- sled-agent/src/services.rs | 9 +- smf/sled-agent/config-rss.toml | 92 +++++++++---------- 7 files changed, 174 insertions(+), 67 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ca290e03a05..6d004285e09 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3104,6 +3104,7 @@ dependencies = [ "expectorate", "futures", "http", + "internal-dns-client", "ipnetwork", "libc", "macaddr", diff --git a/common/src/address.rs b/common/src/address.rs index 226dc9ea655..b105588b587 100644 --- a/common/src/address.rs +++ b/common/src/address.rs @@ -33,6 +33,11 @@ pub const SLED_AGENT_PORT: u16 = 12345; /// The port propolis-server listens on inside the propolis zone. pub const PROPOLIS_PORT: u16 = 12400; +pub const COCKROACH_PORT: u16 = 32221; +pub const CRUCIBLE_PORT: u16 = 32345; + +pub const NEXUS_EXTERNAL_PORT: u16 = 12220; +pub const NEXUS_INTERNAL_PORT: u16 = 12221; // Anycast is a mechanism in which a single IP address is shared by multiple // devices, and the destination is located based on routing distance. diff --git a/sled-agent/Cargo.toml b/sled-agent/Cargo.toml index 18c9514ba94..a0c157e74b1 100644 --- a/sled-agent/Cargo.toml +++ b/sled-agent/Cargo.toml @@ -16,6 +16,7 @@ chrono = { version = "0.4", features = [ "serde" ] } crucible-agent-client = { git = "https://github.com/oxidecomputer/crucible", rev = "cd74a23ea42ce5e673923a00faf31b0a920191cc" } dropshot = { git = "https://github.com/oxidecomputer/dropshot", branch = "main", features = [ "usdt-probes" ] } futures = "0.3.21" +internal-dns-client = { path = "../internal-dns-client" } ipnetwork = "0.18" libc = "0.2.126" macaddr = { version = "1.0.1", features = [ "serde_std" ] } diff --git a/sled-agent/src/rack_setup/config.rs b/sled-agent/src/rack_setup/config.rs index d9f8324535d..6786312a009 100644 --- a/sled-agent/src/rack_setup/config.rs +++ b/sled-agent/src/rack_setup/config.rs @@ -28,6 +28,7 @@ use std::path::Path; pub struct SetupServiceConfig { pub rack_subnet: Ipv6Addr, + // TODO: REMOVE! #[serde(default, rename = "request")] pub requests: Vec, } diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 6c1610e1983..0fd19706908 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -4,14 +4,15 @@ //! Rack Setup Service implementation -use super::config::{HardcodedSledRequest, SetupServiceConfig as Config}; +use super::config::SetupServiceConfig as Config; use crate::bootstrap::{ config::BOOTSTRAP_AGENT_PORT, discovery::PeerMonitorObserver, params::SledAgentRequest, rss_handle::BootstrapAgentHandle, }; -use crate::params::{ServiceRequest, ServiceType}; +use crate::params::{DatasetEnsureBody, ServiceRequest, ServiceType}; use omicron_common::address::{ get_sled_address, ReservedRackSubnet, DNS_PORT, DNS_SERVER_PORT, + NEXUS_EXTERNAL_PORT, NEXUS_INTERNAL_PORT, RSS_RESERVED_ADDRESSES, }; use omicron_common::backoff::{ internal_service_policy, retry_notify, BackoffError, @@ -22,9 +23,12 @@ use std::collections::{HashMap, HashSet}; use std::net::{Ipv6Addr, SocketAddr, SocketAddrV6}; use std::path::PathBuf; use thiserror::Error; -use tokio::sync::Mutex; +use tokio::sync::{Mutex, OnceCell}; use uuid::Uuid; +// The number of Nexus instances to create from RSS. +const NEXUS_COUNT: usize = 1; + /// Describes errors which may occur while operating the setup service. #[derive(Error, Debug)] pub enum SetupServiceError { @@ -49,13 +53,32 @@ pub enum SetupServiceError { #[error("Failed to construct an HTTP client: {0}")] HttpClient(reqwest::Error), + + // XXX CLEAN UP + #[error(transparent)] + Dns(#[from] internal_dns_client::Error), +} + +#[derive(Clone, Debug, Default, Deserialize, Serialize, PartialEq)] +pub struct SledRequest { + /// Datasets to be created. + #[serde(default, rename = "dataset")] + pub datasets: Vec, + + /// Services to be instantiated. + #[serde(default, rename = "service")] + pub services: Vec, + + /// DNS Services to be instantiated. + #[serde(default, rename = "dns_service")] + pub dns_services: Vec, } // The workload / information allocated to a single sled. #[derive(Clone, Debug, Deserialize, Serialize, PartialEq)] struct SledAllocation { initialization_request: SledAgentRequest, - services_request: HardcodedSledRequest, + services_request: SledRequest, } /// The interface to the Rack Setup Service. @@ -130,15 +153,42 @@ enum PeerExpectation { CreateNewPlan(usize), } +struct AddressBumpAllocator { + last_addr: Ipv6Addr, +} + +// TODO: Testable? +// TODO: Could exist in another file? +impl AddressBumpAllocator { + fn new(sled_addr: Ipv6Addr) -> Self { + Self { last_addr: sled_addr } + } + + fn next(&mut self) -> Option { + let mut segments: [u16; 8] = self.last_addr.segments(); + segments[7] = segments[7].checked_add(1)?; + if segments[7] > RSS_RESERVED_ADDRESSES { + return None; + } + self.last_addr = Ipv6Addr::from(segments); + Some(self.last_addr) + } +} + /// The implementation of the Rack Setup Service. struct ServiceInner { log: Logger, peer_monitor: Mutex, + dns_servers: OnceCell, } impl ServiceInner { fn new(log: Logger, peer_monitor: PeerMonitorObserver) -> Self { - ServiceInner { log, peer_monitor: Mutex::new(peer_monitor) } + ServiceInner { + log, + peer_monitor: Mutex::new(peer_monitor), + dns_servers: OnceCell::new(), + } } async fn initialize_datasets( @@ -277,16 +327,61 @@ impl ServiceInner { let requests_and_sleds = bootstrap_addrs.map(|(idx, bootstrap_addr)| { - // If a sled was explicitly requested from the RSS configuration, - // use that. Otherwise, just give it a "default" (empty) set of - // services. - let mut request = { - if idx < config.requests.len() { - config.requests[idx].clone() - } else { - HardcodedSledRequest::default() + let sled_subnet_index = + u8::try_from(idx + 1).expect("Too many peers!"); + let subnet = config.sled_subnet(sled_subnet_index); + let mut addr_alloc = + AddressBumpAllocator::new(*get_sled_address(subnet).ip()); + + let mut request = SledRequest::default(); + + // The first enumerated sleds get assigned the responsibility + // of hosting Nexus. + if idx < NEXUS_COUNT { + let address = addr_alloc.next().expect("Not enough addrs"); + request.services.push(ServiceRequest { + id: Uuid::new_v4(), + name: "nexus".to_string(), + addresses: vec![address], + gz_addresses: vec![], + service_type: ServiceType::Nexus { + internal_address: SocketAddrV6::new( + address, + NEXUS_INTERNAL_PORT, + 0, + 0, + ), + external_address: SocketAddrV6::new( + address, + NEXUS_EXTERNAL_PORT, + 0, + 0, + ), + }, + }) + } + + // The first enumerated sleds host the CRDB datasets, using + // zpools described from the underlying config file. + if idx < config.requests.len() { + for dataset in &config.requests[idx].datasets { + let address = SocketAddrV6::new( + addr_alloc.next().expect("Not enough addrs"), + omicron_common::address::COCKROACH_PORT, + 0, + 0, + ); + request.datasets.push(DatasetEnsureBody { + id: Uuid::new_v4(), + zpool_id: dataset.zpool_id, + dataset_kind: + crate::params::DatasetKind::CockroachDb { + all_addresses: vec![address], + }, + address, + }); } - }; + } // The first enumerated sleds get assigned the additional // responsibility of being internal DNS servers. @@ -517,6 +612,15 @@ impl ServiceInner { .into_iter() .collect::>()?; + let dns_servers = internal_dns_client::multiclient::Updater::new( + config.az_subnet(), + self.log.new(o!("client" => "DNS")), + ); + self.dns_servers + .set(dns_servers) + .map_err(|_| ()) + .expect("Already set DNS servers"); + // Issue the dataset initialization requests to all sleds. futures::future::join_all(plan.iter().map( |(_, allocation)| async move { diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 4edd18a3fa7..946a6a8bc88 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -14,13 +14,11 @@ use crate::zone::Zones; use dropshot::ConfigDropshot; use omicron_common::address::{Ipv6Subnet, RACK_PREFIX}; use omicron_common::nexus_config::{self, RuntimeConfig as NexusRuntimeConfig}; -use omicron_common::postgres_config::PostgresConfigWithUrl; use slog::Logger; use std::collections::HashSet; use std::iter::FromIterator; use std::net::{IpAddr, Ipv6Addr, SocketAddr}; use std::path::{Path, PathBuf}; -use std::str::FromStr; use tokio::io::AsyncWriteExt; use tokio::sync::Mutex; @@ -328,11 +326,8 @@ impl ServiceManager { self.underlay_address, ), // TODO: Switch to inferring this URL by DNS. - database: nexus_config::Database::FromUrl { - url: PostgresConfigWithUrl::from_str( - "postgresql://root@[fd00:1122:3344:0101::2]:32221/omicron?sslmode=disable" - ).unwrap() - } + // "postgresql://root@[fd00:1122:3344:0101::2]:32221/omicron?sslmode=disable" + database: nexus_config::Database::FromDns, }; // Copy the partial config file to the expected location. diff --git a/smf/sled-agent/config-rss.toml b/smf/sled-agent/config-rss.toml index 698d5b112fc..18a1a3d8597 100644 --- a/smf/sled-agent/config-rss.toml +++ b/smf/sled-agent/config-rss.toml @@ -6,59 +6,59 @@ # |...............| <- This /56 is the Rack Subnet rack_subnet = "fd00:1122:3344:0100::" -[[request]] - +# [[request]] +# # TODO(https://github.com/oxidecomputer/omicron/issues/732): Nexus # should allocate crucible datasets. -[[request.dataset]] -id = "09a9a25f-2602-4e2f-9630-31af9c492c3e" -zpool_id = "d462a7f7-b628-40fe-80ff-4e4189e2d62b" -address = "[fd00:1122:3344:0101::6]:32345" -dataset_kind.type = "crucible" - -[[request.dataset]] -id = "2713b37a-3043-4ed5-aaff-f38200e45cfb" -zpool_id = "e4b4dc87-ab46-49fb-a4b4-d361ae214c03" -address = "[fd00:1122:3344:0101::7]:32345" -dataset_kind.type = "crucible" - -[[request.dataset]] -id = "ffd16cad-e5d5-495e-9c59-4312a3857d91" -zpool_id = "f4b4dc87-ab46-49fb-a4b4-d361ae214c03" -address = "[fd00:1122:3344:0101::8]:32345" -dataset_kind.type = "crucible" +# [[request.dataset]] +# id = "09a9a25f-2602-4e2f-9630-31af9c492c3e" +# zpool_id = "d462a7f7-b628-40fe-80ff-4e4189e2d62b" +# address = "[fd00:1122:3344:0101::6]:32345" +# dataset_kind.type = "crucible" +# +# [[request.dataset]] +# id = "2713b37a-3043-4ed5-aaff-f38200e45cfb" +# zpool_id = "e4b4dc87-ab46-49fb-a4b4-d361ae214c03" +# address = "[fd00:1122:3344:0101::7]:32345" +# dataset_kind.type = "crucible" +# +# [[request.dataset]] +# id = "ffd16cad-e5d5-495e-9c59-4312a3857d91" +# zpool_id = "f4b4dc87-ab46-49fb-a4b4-d361ae214c03" +# address = "[fd00:1122:3344:0101::8]:32345" +# dataset_kind.type = "crucible" -[[request.dataset]] -id = "4d08fc19-3d5f-4f6b-9c48-925f8eac7255" -zpool_id = "d462a7f7-b628-40fe-80ff-4e4189e2d62b" -address = "[fd00:1122:3344:0101::2]:32221" -dataset_kind.type = "cockroach_db" -dataset_kind.all_addresses = [ "[fd00:1122:3344:0101::2]:32221" ] +# [[request.dataset]] +# id = "4d08fc19-3d5f-4f6b-9c48-925f8eac7255" +# zpool_id = "d462a7f7-b628-40fe-80ff-4e4189e2d62b" +# address = "[fd00:1122:3344:0101::2]:32221" +# dataset_kind.type = "cockroach_db" +# dataset_kind.all_addresses = [ "[fd00:1122:3344:0101::2]:32221" ] # TODO(https://github.com/oxidecomputer/omicron/issues/732): Nexus # should allocate clickhouse datasets. -[[request.dataset]] -id = "a3505b41-a592-420b-84f2-3d76bf0e0a81" -zpool_id = "d462a7f7-b628-40fe-80ff-4e4189e2d62b" -address = "[fd00:1122:3344:0101::5]:8123" -dataset_kind.type = "clickhouse" +# [[request.dataset]] +# id = "a3505b41-a592-420b-84f2-3d76bf0e0a81" +# zpool_id = "d462a7f7-b628-40fe-80ff-4e4189e2d62b" +# address = "[fd00:1122:3344:0101::5]:8123" +# dataset_kind.type = "clickhouse" -[[request.service]] -id = "e6bff1ff-24fb-49dc-a54e-c6a350cd4d6c" -name = "nexus" -addresses = [ "fd00:1122:3344:0101::3" ] -gz_addresses = [] -[request.service.service_type] -type = "nexus" -internal_address = "[fd00:1122:3344:0101::3]:12221" -external_address = "[fd00:1122:3344:0101::3]:12220" +# [[request.service]] +# id = "e6bff1ff-24fb-49dc-a54e-c6a350cd4d6c" +# name = "nexus" +# addresses = [ "fd00:1122:3344:0101::3" ] +# gz_addresses = [] +# [request.service.service_type] +# type = "nexus" +# internal_address = "[fd00:1122:3344:0101::3]:12221" +# external_address = "[fd00:1122:3344:0101::3]:12220" # TODO(https://github.com/oxidecomputer/omicron/issues/732): Nexus # should allocate Oximeter services. -[[request.service]] -id = "1da65e5b-210c-4859-a7d7-200c1e659972" -name = "oximeter" -addresses = [ "fd00:1122:3344:0101::4" ] -gz_addresses = [] -[request.service.service_type] -type = "oximeter" +# [[request.service]] +# id = "1da65e5b-210c-4859-a7d7-200c1e659972" +# name = "oximeter" +# addresses = [ "fd00:1122:3344:0101::4" ] +# gz_addresses = [] +# [request.service.service_type] +# type = "oximeter" From eca54846ca99f0034ad12aaf114d76c5409e08ed Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Thu, 9 Jun 2022 14:17:43 -0400 Subject: [PATCH 10/88] RSS performs config by itself, mostly --- Cargo.lock | 1 + nexus/Cargo.toml | 1 + nexus/src/context.rs | 31 +- nexus/src/lib.rs | 2 +- openapi/sled-agent.json | 39 ++ sled-agent/src/http_entrypoints.rs | 17 + sled-agent/src/params.rs | 7 +- sled-agent/src/rack_setup/config.rs | 22 -- sled-agent/src/rack_setup/service.rs | 534 ++++++++++++++++++--------- sled-agent/src/sled_agent.rs | 11 +- sled-agent/src/storage_manager.rs | 9 + smf/sled-agent/config-rss.toml | 4 +- 12 files changed, 474 insertions(+), 204 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6d004285e09..094e0feb651 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3005,6 +3005,7 @@ dependencies = [ "http", "httptest", "hyper", + "internal-dns-client", "ipnetwork", "lazy_static", "libc", diff --git a/nexus/Cargo.toml b/nexus/Cargo.toml index 85cbbaa72b9..20eceacc788 100644 --- a/nexus/Cargo.toml +++ b/nexus/Cargo.toml @@ -25,6 +25,7 @@ hex = "0.4.3" http = "0.2.7" hyper = "0.14" db-macros = { path = "src/db/db-macros" } +internal-dns-client = { path = "../internal-dns-client" } ipnetwork = "0.18" lazy_static = "1.4.0" libc = "0.2.126" diff --git a/nexus/src/context.rs b/nexus/src/context.rs index 2ad6a93553a..e0ed637aef3 100644 --- a/nexus/src/context.rs +++ b/nexus/src/context.rs @@ -18,8 +18,13 @@ use authn::external::session_cookie::HttpAuthnSessionCookie; use authn::external::spoof::HttpAuthnSpoof; use authn::external::HttpAuthnScheme; use chrono::{DateTime, Duration, Utc}; +use internal_dns_client::names::SRV; +use omicron_common::address::{ + Ipv6Subnet, AZ_PREFIX, COCKROACH_PORT, +}; use omicron_common::api::external::Error; use omicron_common::nexus_config; +use omicron_common::postgres_config::PostgresConfigWithUrl; use oximeter::types::ProducerRegistry; use oximeter_instruments::http::{HttpService, LatencyTracker}; use slog::Logger; @@ -27,6 +32,7 @@ use std::collections::BTreeMap; use std::env; use std::fmt::Debug; use std::path::PathBuf; +use std::str::FromStr; use std::sync::Arc; use std::time::Instant; use std::time::SystemTime; @@ -68,7 +74,7 @@ pub struct ConsoleConfig { impl ServerContext { /// Create a new context with the given rack id and log. This creates the /// underlying nexus as well. - pub fn new( + pub async fn new( rack_id: Uuid, log: Logger, config: &config::Config, @@ -134,11 +140,32 @@ impl ServerContext { // like console index.html. leaving that out for now so we don't break // nexus in dev for everyone + // Set up DNS Client + let az_subnet = + Ipv6Subnet::::new(config.runtime.subnet.net().ip()); + info!(log, "Setting up resolver on subnet: {:?}", az_subnet); + let resolver = + internal_dns_client::multiclient::create_resolver(az_subnet) + .map_err(|e| format!("Failed to create DNS resolver: {}", e))?; + // Set up DB pool let url = match &config.runtime.database { nexus_config::Database::FromUrl { url } => url.clone(), nexus_config::Database::FromDns => { - todo!("Not yet implemented"); + info!(log, "Accessing DB url from DNS"); + let response = resolver + .lookup_ip(&SRV::Service("cockroachdb".to_string()).to_string()) + .await + .map_err(|e| format!("Failed to lookup IP: {}", e))?; + let address = response.iter().next().ok_or_else(|| { + "no addresses returned from DNS resolver".to_string() + })?; + info!(log, "DB addreess: {}", address); + PostgresConfigWithUrl::from_str(&format!( + "postgresql://root@[{}]:{}/omicron?sslmode=disable", + address, COCKROACH_PORT + )) + .map_err(|e| format!("Cannot parse Postgres URL: {}", e))? } }; let pool = db::Pool::new(&db::Config { url }); diff --git a/nexus/src/lib.rs b/nexus/src/lib.rs index c13fc3de3c8..61abe04b1ba 100644 --- a/nexus/src/lib.rs +++ b/nexus/src/lib.rs @@ -90,7 +90,7 @@ impl Server { let ctxlog = log.new(o!("component" => "ServerContext")); - let apictx = ServerContext::new(rack_id, ctxlog, &config)?; + let apictx = ServerContext::new(rack_id, ctxlog, &config).await?; let http_server_starter_external = dropshot::HttpServerStarter::new( &config.runtime.dropshot_external, diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index 839e8ba9a76..0d9daf0ccb5 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -177,6 +177,33 @@ } } } + }, + "/zpools": { + "get": { + "operationId": "zpools_get", + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "title": "Array_of_Zpool", + "type": "array", + "items": { + "$ref": "#/components/schemas/Zpool" + } + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } } }, "components": { @@ -1219,6 +1246,18 @@ ] } ] + }, + "Zpool": { + "type": "object", + "properties": { + "id": { + "type": "string", + "format": "uuid" + } + }, + "required": [ + "id" + ] } } } diff --git a/sled-agent/src/http_entrypoints.rs b/sled-agent/src/http_entrypoints.rs index 9f1d167f85c..72a8c3c3f74 100644 --- a/sled-agent/src/http_entrypoints.rs +++ b/sled-agent/src/http_entrypoints.rs @@ -6,6 +6,7 @@ use crate::params::{ DatasetEnsureBody, DiskEnsureBody, InstanceEnsureBody, ServiceEnsureBody, + Zpool, }; use dropshot::{ endpoint, ApiDescription, HttpError, HttpResponseOk, @@ -28,6 +29,7 @@ type SledApiDescription = ApiDescription; pub fn api() -> SledApiDescription { fn register_endpoints(api: &mut SledApiDescription) -> Result<(), String> { api.register(services_put)?; + api.register(zpools_get)?; api.register(filesystem_put)?; api.register(instance_put)?; api.register(disk_put)?; @@ -56,6 +58,21 @@ async fn services_put( Ok(HttpResponseUpdatedNoContent()) } +#[endpoint { + method = GET, + path = "/zpools", +}] +async fn zpools_get( + rqctx: Arc>, +) -> Result>, HttpError> { + let sa = rqctx.context(); + Ok(HttpResponseOk( + sa.zpools_get() + .await + .map_err(|e| Error::from(e))? + )) +} + #[endpoint { method = PUT, path = "/filesystem", diff --git a/sled-agent/src/params.rs b/sled-agent/src/params.rs index d003bbe785e..cc2e18a1062 100644 --- a/sled-agent/src/params.rs +++ b/sled-agent/src/params.rs @@ -156,6 +156,11 @@ pub struct InstanceRuntimeStateRequested { pub migration_params: Option, } +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq)] +pub struct Zpool { + pub id: Uuid, +} + /// The type of a dataset, and an auxiliary information necessary /// to successfully launch a zone managing the associated data. #[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq)] @@ -198,7 +203,7 @@ impl std::fmt::Display for DatasetKind { use DatasetKind::*; let s = match self { Crucible => "crucible", - CockroachDb { .. } => "cockroach", + CockroachDb { .. } => "cockroachdb", Clickhouse => "clickhouse", }; write!(f, "{}", s) diff --git a/sled-agent/src/rack_setup/config.rs b/sled-agent/src/rack_setup/config.rs index 6786312a009..ad53cdb8a04 100644 --- a/sled-agent/src/rack_setup/config.rs +++ b/sled-agent/src/rack_setup/config.rs @@ -5,7 +5,6 @@ //! Interfaces for working with RSS config. use crate::config::ConfigError; -use crate::params::{DatasetEnsureBody, ServiceRequest}; use omicron_common::address::{ get_64_subnet, Ipv6Subnet, AZ_PREFIX, RACK_PREFIX, SLED_PREFIX, }; @@ -27,26 +26,6 @@ use std::path::Path; #[derive(Clone, Debug, Deserialize, Serialize, PartialEq)] pub struct SetupServiceConfig { pub rack_subnet: Ipv6Addr, - - // TODO: REMOVE! - #[serde(default, rename = "request")] - pub requests: Vec, -} - -/// A request to initialize a sled. -#[derive(Clone, Debug, Default, Deserialize, Serialize, PartialEq)] -pub struct HardcodedSledRequest { - /// Datasets to be created. - #[serde(default, rename = "dataset")] - pub datasets: Vec, - - /// Services to be instantiated. - #[serde(default, rename = "service")] - pub services: Vec, - - /// DNS Services to be instantiated. - #[serde(default, rename = "dns_service")] - pub dns_services: Vec, } impl SetupServiceConfig { @@ -82,7 +61,6 @@ mod test { fn test_subnets() { let cfg = SetupServiceConfig { rack_subnet: "fd00:1122:3344:0100::".parse().unwrap(), - requests: vec![], }; assert_eq!( diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 0fd19706908..00f7230a3a0 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -10,6 +10,7 @@ use crate::bootstrap::{ params::SledAgentRequest, rss_handle::BootstrapAgentHandle, }; use crate::params::{DatasetEnsureBody, ServiceRequest, ServiceType}; +use internal_dns_client::names::{AAAA, SRV}; use omicron_common::address::{ get_sled_address, ReservedRackSubnet, DNS_PORT, DNS_SERVER_PORT, NEXUS_EXTERNAL_PORT, NEXUS_INTERNAL_PORT, RSS_RESERVED_ADDRESSES, @@ -18,9 +19,14 @@ use omicron_common::backoff::{ internal_service_policy, retry_notify, BackoffError, }; use serde::{Deserialize, Serialize}; +use sled_agent_client::{ + Client as SledAgentClient, + Error as SledAgentError, + types as SledAgentTypes, +}; use slog::Logger; use std::collections::{HashMap, HashSet}; -use std::net::{Ipv6Addr, SocketAddr, SocketAddrV6}; +use std::net::{Ipv6Addr, SocketAddrV6}; use std::path::PathBuf; use thiserror::Error; use tokio::sync::{Mutex, OnceCell}; @@ -29,6 +35,12 @@ use uuid::Uuid; // The number of Nexus instances to create from RSS. const NEXUS_COUNT: usize = 1; +// The number of CRDB instances to create from RSS. +const CRDB_COUNT: usize = 1; + +// The minimum number of sleds to initialize the rack. +const MINIMUM_SLED_COUNT: usize = 1; + /// Describes errors which may occur while operating the setup service. #[derive(Error, Debug)] pub enum SetupServiceError { @@ -39,11 +51,14 @@ pub enum SetupServiceError { err: std::io::Error, }, + #[error("Bad configuration for setting up rack: {0}")] + BadConfig(String), + #[error("Error initializing sled via sled-agent: {0}")] SledInitialization(String), #[error("Error making HTTP request to Sled Agent: {0}")] - SledApi(#[from] sled_agent_client::Error), + SledApi(#[from] SledAgentError), #[error("Cannot deserialize TOML file at {path}: {err}")] Toml { path: PathBuf, err: toml::de::Error }, @@ -125,14 +140,19 @@ impl Service { } } -fn rss_plan_path() -> std::path::PathBuf { +fn rss_sled_plan_path() -> std::path::PathBuf { + std::path::Path::new(omicron_common::OMICRON_CONFIG_PATH) + .join("rss-sled-plan.toml") +} + +fn rss_service_plan_path() -> std::path::PathBuf { std::path::Path::new(omicron_common::OMICRON_CONFIG_PATH) - .join("rss-plan.toml") + .join("rss-service-plan.toml") } fn rss_completed_plan_path() -> std::path::PathBuf { std::path::Path::new(omicron_common::OMICRON_CONFIG_PATH) - .join("rss-plan-completed.toml") + .join("rss-plan-completed.marker") } // Describes the options when awaiting for peers. @@ -191,22 +211,30 @@ impl ServiceInner { } } - async fn initialize_datasets( + async fn initialize_crdb( &self, - sled_address: SocketAddr, + sled_address: SocketAddrV6, datasets: &Vec, ) -> Result<(), SetupServiceError> { - let dur = std::time::Duration::from_secs(60); + if datasets.iter().any(|dataset| { + !matches!( + dataset.dataset_kind, + crate::params::DatasetKind::CockroachDb { .. } + ) + }) { + return Err(SetupServiceError::BadConfig("RSS should only initialize CRDB services".into())); + } + let dur = std::time::Duration::from_secs(60); let client = reqwest::ClientBuilder::new() .connect_timeout(dur) .timeout(dur) .build() .map_err(SetupServiceError::HttpClient)?; - let client = sled_agent_client::Client::new_with_client( + let client = SledAgentClient::new_with_client( &format!("http://{}", sled_address), client, - self.log.new(o!("SledAgentClient" => sled_address)), + self.log.new(o!("SledAgentClient" => sled_address.to_string())), ); info!(self.log, "sending dataset requests..."); @@ -220,8 +248,8 @@ impl ServiceInner { Ok::< (), BackoffError< - sled_agent_client::Error< - sled_agent_client::types::Error, + SledAgentError< + SledAgentTypes::Error, >, >, >(()) @@ -236,12 +264,34 @@ impl ServiceInner { ) .await?; } + + // Initialize DNS records for these datasets. + // + // CRDB is treated as a service, since they are interchangeable. + + let aaaa = datasets + .iter() + .map(|dataset| { + ( + AAAA::Zone(dataset.id), + dataset.address, + ) + }) + .collect::>(); + let srv_key = SRV::Service("cockroachdb".into()); + + self.dns_servers + .get() + .expect("DNS servers must be initialized first") + .insert_dns_records(&self.log, aaaa, srv_key) + .await?; + Ok(()) } async fn initialize_services( &self, - sled_address: SocketAddr, + sled_address: SocketAddrV6, services: &Vec, ) -> Result<(), SetupServiceError> { let dur = std::time::Duration::from_secs(60); @@ -250,17 +300,17 @@ impl ServiceInner { .timeout(dur) .build() .map_err(SetupServiceError::HttpClient)?; - let client = sled_agent_client::Client::new_with_client( + let client = SledAgentClient::new_with_client( &format!("http://{}", sled_address), client, - self.log.new(o!("SledAgentClient" => sled_address)), + self.log.new(o!("SledAgentClient" => sled_address.to_string())), ); info!(self.log, "sending service requests..."); let services_put = || async { info!(self.log, "initializing sled services: {:?}", services); client - .services_put(&sled_agent_client::types::ServiceEnsureBody { + .services_put(&SledAgentTypes::ServiceEnsureBody { services: services .iter() .map(|s| s.clone().into()) @@ -271,7 +321,7 @@ impl ServiceInner { Ok::< (), BackoffError< - sled_agent_client::Error, + SledAgentError, >, >(()) }; @@ -283,29 +333,29 @@ impl ServiceInner { Ok(()) } - async fn load_plan( + async fn load_sled_plan( &self, - ) -> Result>, SetupServiceError> + ) -> Result>, SetupServiceError> { // If we already created a plan for this RSS to allocate // subnets/requests to sleds, re-use that existing plan. - let rss_plan_path = rss_plan_path(); - if rss_plan_path.exists() { + let rss_sled_plan_path = rss_sled_plan_path(); + if rss_sled_plan_path.exists() { info!(self.log, "RSS plan already created, loading from file"); - let plan: std::collections::HashMap = + let plan: std::collections::HashMap = toml::from_str( - &tokio::fs::read_to_string(&rss_plan_path).await.map_err( + &tokio::fs::read_to_string(&rss_sled_plan_path).await.map_err( |err| SetupServiceError::Io { message: format!( - "Loading RSS plan {rss_plan_path:?}" + "Loading RSS plan {rss_sled_plan_path:?}" ), err, }, )?, ) .map_err(|err| SetupServiceError::Toml { - path: rss_plan_path, + path: rss_sled_plan_path, err, })?; Ok(Some(plan)) @@ -314,104 +364,14 @@ impl ServiceInner { } } - async fn create_plan( + async fn create_sled_plan( &self, config: &Config, bootstrap_addrs: impl IntoIterator, - ) -> Result, SetupServiceError> { + ) -> Result, SetupServiceError> { let bootstrap_addrs = bootstrap_addrs.into_iter().enumerate(); - let reserved_rack_subnet = ReservedRackSubnet::new(config.az_subnet()); - let dns_subnets = reserved_rack_subnet.get_dns_subnets(); - - info!(self.log, "dns_subnets: {:#?}", dns_subnets); - - let requests_and_sleds = - bootstrap_addrs.map(|(idx, bootstrap_addr)| { - let sled_subnet_index = - u8::try_from(idx + 1).expect("Too many peers!"); - let subnet = config.sled_subnet(sled_subnet_index); - let mut addr_alloc = - AddressBumpAllocator::new(*get_sled_address(subnet).ip()); - - let mut request = SledRequest::default(); - - // The first enumerated sleds get assigned the responsibility - // of hosting Nexus. - if idx < NEXUS_COUNT { - let address = addr_alloc.next().expect("Not enough addrs"); - request.services.push(ServiceRequest { - id: Uuid::new_v4(), - name: "nexus".to_string(), - addresses: vec![address], - gz_addresses: vec![], - service_type: ServiceType::Nexus { - internal_address: SocketAddrV6::new( - address, - NEXUS_INTERNAL_PORT, - 0, - 0, - ), - external_address: SocketAddrV6::new( - address, - NEXUS_EXTERNAL_PORT, - 0, - 0, - ), - }, - }) - } - - // The first enumerated sleds host the CRDB datasets, using - // zpools described from the underlying config file. - if idx < config.requests.len() { - for dataset in &config.requests[idx].datasets { - let address = SocketAddrV6::new( - addr_alloc.next().expect("Not enough addrs"), - omicron_common::address::COCKROACH_PORT, - 0, - 0, - ); - request.datasets.push(DatasetEnsureBody { - id: Uuid::new_v4(), - zpool_id: dataset.zpool_id, - dataset_kind: - crate::params::DatasetKind::CockroachDb { - all_addresses: vec![address], - }, - address, - }); - } - } - - // The first enumerated sleds get assigned the additional - // responsibility of being internal DNS servers. - if idx < dns_subnets.len() { - let dns_subnet = &dns_subnets[idx]; - let dns_addr = dns_subnet.dns_address().ip(); - request.dns_services.push(ServiceRequest { - id: Uuid::new_v4(), - name: "internal-dns".to_string(), - addresses: vec![dns_addr], - gz_addresses: vec![dns_subnet.gz_address().ip()], - service_type: ServiceType::InternalDns { - server_address: SocketAddrV6::new( - dns_addr, - DNS_SERVER_PORT, - 0, - 0, - ), - dns_address: SocketAddrV6::new( - dns_addr, DNS_PORT, 0, 0, - ), - }, - }); - } - (request, (idx, bootstrap_addr)) - }); - - let allocations = requests_and_sleds.map(|(request, sled)| { - let (idx, bootstrap_addr) = sled; + let allocations = bootstrap_addrs.map(|(idx, bootstrap_addr)| { info!( self.log, "Creating plan for the sled at {:?}", bootstrap_addr @@ -424,10 +384,7 @@ impl ServiceInner { ( bootstrap_addr, - SledAllocation { - initialization_request: SledAgentRequest { subnet }, - services_request: request, - }, + SledAgentRequest { subnet }, ) }); @@ -447,14 +404,222 @@ impl ServiceInner { .expect("Cannot turn config to string"); info!(self.log, "Plan serialized as: {}", plan_str); - let path = rss_plan_path(); + let path = rss_sled_plan_path(); tokio::fs::write(&path, plan_str).await.map_err(|err| { SetupServiceError::Io { - message: format!("Storing RSS plan to {path:?}"), + message: format!("Storing RSS sled plan to {path:?}"), err, } })?; - info!(self.log, "Plan written to storage"); + info!(self.log, "Sled plan written to storage"); + + Ok(plan) + } + + // Gets a zpool UUID from the sled. + async fn get_a_zpool_from_sled( + &self, + address: SocketAddrV6, + ) -> Result { + let dur = std::time::Duration::from_secs(60); + let client = reqwest::ClientBuilder::new() + .connect_timeout(dur) + .timeout(dur) + .build() + .map_err(SetupServiceError::HttpClient)?; + let client = SledAgentClient::new_with_client( + &format!("http://{}", address), + client, + self.log.new(o!("SledAgentClient" => address.to_string())), + ); + + let get_zpools = || async { + let zpools: Vec = client + .zpools_get() + .await + .map(|response| { + response.into_inner() + .into_iter() + .map(|zpool| zpool.id) + .collect() + }) + .map_err(|err| { + BackoffError::transient( + SetupServiceError::SledApi(err) + ) + })?; + + if zpools.is_empty() { + return Err(BackoffError::transient( + SetupServiceError::SledInitialization("Awaiting zpools".to_string()) + )); + } + + Ok(zpools) + }; + let log_failure = |error, _| { + warn!(self.log, "failed to get zpools"; "error" => ?error); + }; + let zpools = retry_notify( + internal_service_policy(), + get_zpools, + log_failure, + ) + .await?; + + Ok(zpools[0]) + } + + async fn load_service_plan( + &self, + ) -> Result>, SetupServiceError> + { + // If we already created a plan for this RSS to allocate + // services to sleds, re-use that existing plan. + let rss_service_plan_path = rss_service_plan_path(); + if rss_service_plan_path.exists() { + info!(self.log, "RSS plan already created, loading from file"); + + let plan: std::collections::HashMap = + toml::from_str( + &tokio::fs::read_to_string(&rss_service_plan_path).await.map_err( + |err| SetupServiceError::Io { + message: format!( + "Loading RSS plan {rss_service_plan_path:?}" + ), + err, + }, + )?, + ) + .map_err(|err| SetupServiceError::Toml { + path: rss_service_plan_path, + err, + })?; + Ok(Some(plan)) + } else { + Ok(None) + } + } + async fn create_service_plan( + &self, + config: &Config, + sled_addrs: &Vec, + ) -> Result, SetupServiceError> { + let reserved_rack_subnet = ReservedRackSubnet::new(config.az_subnet()); + let dns_subnets = reserved_rack_subnet.get_dns_subnets(); + + let mut allocations = vec![]; + + for idx in 0..sled_addrs.len() { + let sled_address = sled_addrs[idx]; + let sled_subnet_index = + u8::try_from(idx + 1).expect("Too many peers!"); + let subnet = config.sled_subnet(sled_subnet_index); + let mut addr_alloc = + AddressBumpAllocator::new(*get_sled_address(subnet).ip()); + + let mut request = SledRequest::default(); + + // The first enumerated sleds get assigned the responsibility + // of hosting Nexus. + if idx < NEXUS_COUNT { + let address = addr_alloc.next().expect("Not enough addrs"); + request.services.push(ServiceRequest { + id: Uuid::new_v4(), + name: "nexus".to_string(), + addresses: vec![address], + gz_addresses: vec![], + service_type: ServiceType::Nexus { + internal_address: SocketAddrV6::new( + address, + NEXUS_INTERNAL_PORT, + 0, + 0, + ), + external_address: SocketAddrV6::new( + address, + NEXUS_EXTERNAL_PORT, + 0, + 0, + ), + }, + }) + } + + // The first enumerated sleds host the CRDB datasets, using + // zpools described from the underlying config file. + if idx < CRDB_COUNT { + let zpool_id = self.get_a_zpool_from_sled(sled_address).await?; + + let address = SocketAddrV6::new( + addr_alloc.next().expect("Not enough addrs"), + omicron_common::address::COCKROACH_PORT, + 0, + 0, + ); + request.datasets.push(DatasetEnsureBody { + id: Uuid::new_v4(), + zpool_id, + dataset_kind: + crate::params::DatasetKind::CockroachDb { + all_addresses: vec![address], + }, + address, + }); + } + + // The first enumerated sleds get assigned the additional + // responsibility of being internal DNS servers. + if idx < dns_subnets.len() { + let dns_subnet = &dns_subnets[idx]; + let dns_addr = dns_subnet.dns_address().ip(); + request.dns_services.push(ServiceRequest { + id: Uuid::new_v4(), + name: "internal-dns".to_string(), + addresses: vec![dns_addr], + gz_addresses: vec![dns_subnet.gz_address().ip()], + service_type: ServiceType::InternalDns { + server_address: SocketAddrV6::new( + dns_addr, + DNS_SERVER_PORT, + 0, + 0, + ), + dns_address: SocketAddrV6::new( + dns_addr, DNS_PORT, 0, 0, + ), + }, + }); + } + + allocations.push(( + sled_address, + request + )); + } + + let mut plan = std::collections::HashMap::new(); + for (addr, allocation) in allocations { + plan.insert(addr, allocation); + } + + // Once we've constructed a plan, write it down to durable storage. + let serialized_plan = + toml::Value::try_from(&plan).unwrap_or_else(|e| { + panic!("Cannot serialize configuration: {:#?}: {}", plan, e) + }); + let plan_str = toml::to_string(&serialized_plan) + .expect("Cannot turn config to string"); + + info!(self.log, "Plan serialized as: {}", plan_str); + let path = rss_service_plan_path(); + tokio::fs::write(&path, plan_str).await.map_err(|err| { + SetupServiceError::Io { + message: format!("Storing RSS service plan to {path:?}"), + err, + } + })?; + info!(self.log, "Service plan written to storage"); Ok(plan) } @@ -509,19 +674,28 @@ impl ServiceInner { // This method has a few distinct phases, identified by files in durable // storage: // - // 1. ALLOCATION PLAN CREATION. When the RSS starts up for the first time, - // it creates an allocation plan to provision subnets and services - // to an initial set of sleds. + // 1. SLED ALLOCATION PLAN CREATION. When the RSS starts up for the first + // time, it creates an allocation plan to provision subnets to an initial + // set of sleds. // - // This plan is stored at "rss_plan_path()". + // This plan is stored at "rss_sled_plan_path()". // - // 2. ALLOCATION PLAN EXECUTION. The RSS then carries out this plan, making + // 2. SLED ALLOCATION PLAN EXECUTION. The RSS then carries out this plan, making // requests to the sleds enumerated within the "allocation plan". // - // 3. MARKING SETUP COMPLETE. Once the RSS has successfully initialized the - // rack, the "rss_plan_path()" file is renamed to - // "rss_completed_plan_path()". This indicates that the plan executed - // successfully, and no work remains. + // 3. SERVICE ALLOCATION PLAN CREATION. Now that Sled Agents are executing + // on their respsective subnets, they can be queried to create an + // allocation plan for services. + // + // This plan - for what services go where - is stored at + // "rss_service_plan_path()". + // + // 4. SERVICE ALLOCATION PLAN EXECUTION. RSS requests that the services + // outlined in the aforementioned step are created. + // + // 5. MARKING SETUP COMPLETE. Once the RSS has successfully initialized the + // rack, a marker file is created at "rss_completed_plan_path()". This + // indicates that the plan executed successfully, and no work remains. async fn inject_rack_setup_requests( &self, config: &Config, @@ -549,11 +723,11 @@ impl ServiceInner { // Wait for either: // - All the peers to re-load an old plan (if one exists) // - Enough peers to create a new plan (if one does not exist) - let maybe_plan = self.load_plan().await?; - let expectation = if let Some(plan) = &maybe_plan { + let maybe_sled_plan = self.load_sled_plan().await?; + let expectation = if let Some(plan) = &maybe_sled_plan { PeerExpectation::LoadOldPlan(plan.keys().map(|a| *a.ip()).collect()) } else { - PeerExpectation::CreateNewPlan(config.requests.len()) + PeerExpectation::CreateNewPlan(MINIMUM_SLED_COUNT) }; let addrs = self.wait_for_peers(expectation).await?; info!(self.log, "Enough peers exist to enact RSS plan"); @@ -562,24 +736,24 @@ impl ServiceInner { // // NOTE: This is a "point-of-no-return" -- before sending any requests // to neighboring sleds, the plan must be recorded to durable storage. - // This way, if the RSS power-cycles, it can idempotently execute the - // same allocation plan. - let plan = if let Some(plan) = maybe_plan { + // This way, if the RSS power-cycles, it can idempotently provide the + // same subnets to the same sleds. + let plan = if let Some(plan) = maybe_sled_plan { info!(self.log, "Re-using existing allocation plan"); plan } else { info!(self.log, "Creating new allocation plan"); - self.create_plan(config, addrs).await? + self.create_sled_plan(config, addrs).await? }; // Forward the sled initialization requests to our sled-agent. local_bootstrap_agent .initialize_sleds( plan.iter() - .map(|(bootstrap_addr, allocation)| { + .map(|(bootstrap_addr, initialization_request)| { ( *bootstrap_addr, - allocation.initialization_request.clone(), + initialization_request.clone(), ) }) .collect(), @@ -587,22 +761,34 @@ impl ServiceInner { .await .map_err(SetupServiceError::SledInitialization)?; + let sled_addresses: Vec<_> = plan.iter() + .map(|(_, initialization_request)| { + get_sled_address( + initialization_request.subnet, + ) + }) + .collect(); + + // Now that sled agents have been initialized, we can create + // a service allocation plan. + let service_plan = if let Some(plan) = self.load_service_plan().await? { + plan + } else { + self.create_service_plan(&config, &sled_addresses).await? + }; + // Set up internal DNS services. futures::future::join_all( - plan.iter() - .filter(|(_, allocation)| { + service_plan.iter() + .filter(|(_, service_request)| { // Only send requests to sleds that are supposed to be running // DNS services. - !allocation.services_request.dns_services.is_empty() + !service_request.dns_services.is_empty() }) - .map(|(_, allocation)| async move { - let sled_address = SocketAddr::V6(get_sled_address( - allocation.initialization_request.subnet, - )); - + .map(|(sled_address, services_request)| async move { self.initialize_services( - sled_address, - &allocation.services_request.dns_services, + *sled_address, + &services_request.dns_services, ) .await?; Ok(()) @@ -621,15 +807,12 @@ impl ServiceInner { .map_err(|_| ()) .expect("Already set DNS servers"); - // Issue the dataset initialization requests to all sleds. - futures::future::join_all(plan.iter().map( - |(_, allocation)| async move { - let sled_address = SocketAddr::V6(get_sled_address( - allocation.initialization_request.subnet, - )); - self.initialize_datasets( - sled_address, - &allocation.services_request.datasets, + // Issue the crdb initialization requests to all sleds. + futures::future::join_all(service_plan.iter().map( + |(sled_address, services_request)| async move { + self.initialize_crdb( + *sled_address, + &services_request.datasets, ) .await?; Ok(()) @@ -646,21 +829,23 @@ impl ServiceInner { // Note that this must happen *after* the dataset initialization, // to ensure that CockroachDB has been initialized before Nexus // starts. - futures::future::join_all(plan.iter().map( - |(_, allocation)| async move { - let sled_address = SocketAddr::V6(get_sled_address( - allocation.initialization_request.subnet, - )); - - let all_services = allocation - .services_request + futures::future::join_all(service_plan.iter().map( + |(sled_address, services_request)| async move { + // With the current implementation of "initialize_services", + // we must provide the set of *all* services that should be + // executing on a sled. + // + // This means re-requesting the DNS service, even if it is + // already running - this is fine, however, as the receiving + // sled agent doesn't modify the already-running service. + let all_services = services_request .services .iter() - .chain(allocation.services_request.dns_services.iter()) + .chain(services_request.dns_services.iter()) .map(|s| s.clone()) .collect::>(); - self.initialize_services(sled_address, &all_services).await?; + self.initialize_services(*sled_address, &all_services).await?; Ok(()) }, )) @@ -672,11 +857,10 @@ impl ServiceInner { // Finally, make sure the configuration is saved so we don't inject // the requests on the next iteration. - let plan_path = rss_plan_path(); - tokio::fs::rename(&plan_path, &rss_completed_plan_path).await.map_err( + tokio::fs::File::create(&rss_completed_plan_path).await.map_err( |err| SetupServiceError::Io { message: format!( - "renaming {plan_path:?} to {rss_completed_plan_path:?}" + "creating {rss_completed_plan_path:?}" ), err, }, diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 5f8f1e500ab..6260191f58b 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -14,7 +14,7 @@ use crate::instance_manager::InstanceManager; use crate::nexus::NexusClient; use crate::params::{ DatasetKind, DiskStateRequested, InstanceHardware, InstanceMigrateParams, - InstanceRuntimeStateRequested, ServiceEnsureBody, + InstanceRuntimeStateRequested, ServiceEnsureBody, Zpool }; use crate::services::{self, ServiceManager}; use crate::storage_manager::StorageManager; @@ -274,6 +274,15 @@ impl SledAgent { Ok(()) } + pub async fn zpools_get( + &self + ) -> Result, Error> { + let zpools = self.storage + .get_zpools() + .await?; + Ok(zpools) + } + /// Ensures that a filesystem type exists within the zpool. pub async fn filesystem_ensure( &self, diff --git a/sled-agent/src/storage_manager.rs b/sled-agent/src/storage_manager.rs index a02e68baae9..467bb70b3d8 100644 --- a/sled-agent/src/storage_manager.rs +++ b/sled-agent/src/storage_manager.rs @@ -957,6 +957,15 @@ impl StorageManager { Ok(()) } + pub async fn get_zpools(&self) -> Result, Error> { + let pools = self.pools.lock().await; + Ok(pools.keys().map(|zpool| { + crate::params::Zpool { + id: zpool.id() + } + }).collect()) + } + pub async fn upsert_filesystem( &self, zpool_id: Uuid, diff --git a/smf/sled-agent/config-rss.toml b/smf/sled-agent/config-rss.toml index 18a1a3d8597..5640bc69c81 100644 --- a/smf/sled-agent/config-rss.toml +++ b/smf/sled-agent/config-rss.toml @@ -6,8 +6,8 @@ # |...............| <- This /56 is the Rack Subnet rack_subnet = "fd00:1122:3344:0100::" -# [[request]] -# +[[request]] + # TODO(https://github.com/oxidecomputer/omicron/issues/732): Nexus # should allocate crucible datasets. # [[request.dataset]] From 565862e988c597f17f1b8b27c4d0b6a15d02aa70 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Thu, 9 Jun 2022 18:54:01 -0400 Subject: [PATCH 11/88] RSS side of handoff to Nexus mostly complete --- nexus/src/app/rack.rs | 19 +- nexus/src/db/datastore.rs | 71 ++- nexus/src/internal_api/http_entrypoints.rs | 11 +- nexus/src/internal_api/params.rs | 13 + openapi/nexus-internal.json | 48 +- sled-agent/src/bin/sled-agent.rs | 5 +- sled-agent/src/bootstrap/agent.rs | 13 +- sled-agent/src/bootstrap/params.rs | 4 + sled-agent/src/bootstrap/server.rs | 2 +- sled-agent/src/config.rs | 3 - sled-agent/src/rack_setup/mod.rs | 1 + sled-agent/src/rack_setup/plan/mod.rs | 8 + sled-agent/src/rack_setup/plan/service.rs | 319 ++++++++++++ sled-agent/src/rack_setup/plan/sled.rs | 140 ++++++ sled-agent/src/rack_setup/service.rs | 533 +++++++-------------- sled-agent/src/server.rs | 4 +- sled-agent/src/sled_agent.rs | 7 +- sled-agent/src/sp.rs | 7 +- smf/sled-agent/config-rss.toml | 3 +- smf/sled-agent/config.toml | 3 - 20 files changed, 815 insertions(+), 399 deletions(-) create mode 100644 sled-agent/src/rack_setup/plan/mod.rs create mode 100644 sled-agent/src/rack_setup/plan/service.rs create mode 100644 sled-agent/src/rack_setup/plan/sled.rs diff --git a/nexus/src/app/rack.rs b/nexus/src/app/rack.rs index a9a10a616aa..8b9728c7f77 100644 --- a/nexus/src/app/rack.rs +++ b/nexus/src/app/rack.rs @@ -7,7 +7,7 @@ use crate::authz; use crate::context::OpContext; use crate::db; -use crate::internal_api::params::ServicePutRequest; +use crate::internal_api::params::RackInitializationRequest; use futures::future::ready; use futures::StreamExt; use omicron_common::api::external::DataPageParams; @@ -69,12 +69,12 @@ impl super::Nexus { &self, opctx: &OpContext, rack_id: Uuid, - services: Vec, + request: RackInitializationRequest, ) -> Result<(), Error> { opctx.authorize(authz::Action::Modify, &authz::FLEET).await?; // Convert from parameter -> DB type. - let services: Vec<_> = services + let services: Vec<_> = request.services .into_iter() .map(|svc| { db::model::Service::new( @@ -86,8 +86,19 @@ impl super::Nexus { }) .collect(); + let datasets: Vec<_> = request.datasets + .into_iter() + .map(|dataset| { + db::model::Dataset::new( + dataset.dataset_id, + dataset.zpool_id, + dataset.request.address, + dataset.request.kind.into(), + ) + }) + .collect(); self.db_datastore - .rack_set_initialized(opctx, rack_id, services) + .rack_set_initialized(opctx, rack_id, services, datasets) .await?; Ok(()) diff --git a/nexus/src/db/datastore.rs b/nexus/src/db/datastore.rs index 9083b45eca0..a386c098eb6 100644 --- a/nexus/src/db/datastore.rs +++ b/nexus/src/db/datastore.rs @@ -181,13 +181,14 @@ impl DataStore { opctx: &OpContext, rack_id: Uuid, services: Vec, + datasets: Vec, ) -> UpdateResult { use db::schema::rack::dsl as rack_dsl; - use db::schema::service::dsl as service_dsl; #[derive(Debug)] enum RackInitError { ServiceInsert { err: SyncInsertError, sled_id: Uuid, svc_id: Uuid }, + DatasetInsert { err: SyncInsertError, zpool_id: Uuid, dataset_id: Uuid }, RackUpdate(diesel::result::Error), } type TxnError = TransactionError; @@ -209,22 +210,21 @@ impl DataStore { return Ok(rack); } - // Otherwise, insert services and set rack.initialized = true. + // Otherwise, insert services and datasets for svc in services { + use db::schema::service::dsl; let sled_id = svc.sled_id; >::insert_resource( sled_id, - diesel::insert_into(service_dsl::service) + diesel::insert_into(dsl::service) .values(svc.clone()) - .on_conflict(service_dsl::id) + .on_conflict(dsl::id) .do_update() .set(( - service_dsl::time_modified.eq(Utc::now()), - service_dsl::sled_id - .eq(excluded(service_dsl::sled_id)), - service_dsl::ip.eq(excluded(service_dsl::ip)), - service_dsl::kind - .eq(excluded(service_dsl::kind)), + dsl::time_modified.eq(Utc::now()), + dsl::sled_id.eq(excluded(dsl::sled_id)), + dsl::ip.eq(excluded(dsl::ip)), + dsl::kind.eq(excluded(dsl::kind)), )), ) .insert_and_get_result(conn) @@ -236,6 +236,34 @@ impl DataStore { }) })?; } + for dataset in datasets { + use db::schema::dataset::dsl; + let zpool_id = dataset.pool_id; + >::insert_resource( + zpool_id, + diesel::insert_into(dsl::dataset) + .values(dataset.clone()) + .on_conflict(dsl::id) + .do_update() + .set(( + dsl::time_modified.eq(Utc::now()), + dsl::pool_id.eq(excluded(dsl::pool_id)), + dsl::ip.eq(excluded(dsl::ip)), + dsl::port.eq(excluded(dsl::port)), + dsl::kind.eq(excluded(dsl::kind)), + )), + ) + .insert_and_get_result(conn) + .map_err(|err| { + TxnError::CustomError(RackInitError::DatasetInsert { + err, + zpool_id, + dataset_id: dataset.id(), + }) + })?; + } + + // Set the rack to "initialized" once the handoff is complete diesel::update(rack_dsl::rack) .filter(rack_dsl::id.eq(rack_id)) .set(( @@ -250,6 +278,25 @@ impl DataStore { }) .await .map_err(|e| match e { + TxnError::CustomError(RackInitError::DatasetInsert { + err, + zpool_id, + dataset_id, + }) => match err { + SyncInsertError::CollectionNotFound => { + Error::ObjectNotFound { + type_name: ResourceType::Zpool, + lookup_type: LookupType::ById(zpool_id), + } + } + SyncInsertError::DatabaseError(e) => { + public_error_from_diesel_create( + e, + ResourceType::Dataset, + &dataset_id.to_string(), + ) + } + }, TxnError::CustomError(RackInitError::ServiceInsert { err, sled_id, @@ -4433,14 +4480,14 @@ mod test { // Initialize the Rack. let result = datastore - .rack_set_initialized(&opctx, rack.id(), vec![]) + .rack_set_initialized(&opctx, rack.id(), vec![], vec![]) .await .unwrap(); assert!(result.initialized); // Re-initialize the rack (check for idempotency) let result = datastore - .rack_set_initialized(&opctx, rack.id(), vec![]) + .rack_set_initialized(&opctx, rack.id(), vec![], vec![]) .await .unwrap(); assert!(result.initialized); diff --git a/nexus/src/internal_api/http_entrypoints.rs b/nexus/src/internal_api/http_entrypoints.rs index 83c0c3baec8..2ead37db565 100644 --- a/nexus/src/internal_api/http_entrypoints.rs +++ b/nexus/src/internal_api/http_entrypoints.rs @@ -7,8 +7,9 @@ use crate::context::OpContext; use crate::ServerContext; use super::params::{ - DatasetPutRequest, DatasetPutResponse, OximeterInfo, ServicePutRequest, - SledAgentStartupInfo, ZpoolPutRequest, ZpoolPutResponse, + DatasetPutRequest, DatasetPutResponse, OximeterInfo, + RackInitializationRequest, SledAgentStartupInfo, ZpoolPutRequest, + ZpoolPutResponse, }; use dropshot::endpoint; use dropshot::ApiDescription; @@ -104,15 +105,15 @@ struct RackPathParam { async fn rack_initialization_complete( rqctx: Arc>>, path_params: Path, - info: TypedBody>, + info: TypedBody, ) -> Result { let apictx = rqctx.context(); let nexus = &apictx.nexus; let path = path_params.into_inner(); - let svcs = info.into_inner(); + let request = info.into_inner(); let opctx = OpContext::for_internal_api(&rqctx).await; - nexus.rack_initialize(&opctx, path.rack_id, svcs).await?; + nexus.rack_initialize(&opctx, path.rack_id, request).await?; Ok(HttpResponseUpdatedNoContent()) } diff --git a/nexus/src/internal_api/params.rs b/nexus/src/internal_api/params.rs index 8b83138c2b5..7dda7610573 100644 --- a/nexus/src/internal_api/params.rs +++ b/nexus/src/internal_api/params.rs @@ -149,6 +149,19 @@ pub struct ServicePutRequest { pub kind: ServiceKind, } +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] +pub struct DatasetCreateRequest { + pub zpool_id: Uuid, + pub dataset_id: Uuid, + pub request: DatasetPutRequest, +} + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] +pub struct RackInitializationRequest { + pub services: Vec, + pub datasets: Vec, +} + /// Message used to notify Nexus that this oximeter instance is up and running. #[derive(Debug, Clone, Copy, JsonSchema, Serialize, Deserialize)] pub struct OximeterInfo { diff --git a/openapi/nexus-internal.json b/openapi/nexus-internal.json index 743b4107589..103e0481ca9 100644 --- a/openapi/nexus-internal.json +++ b/openapi/nexus-internal.json @@ -249,11 +249,7 @@ "content": { "application/json": { "schema": { - "title": "Array_of_ServicePutRequest", - "type": "array", - "items": { - "$ref": "#/components/schemas/ServicePutRequest" - } + "$ref": "#/components/schemas/RackInitializationRequest" } } }, @@ -668,6 +664,27 @@ "value" ] }, + "DatasetCreateRequest": { + "type": "object", + "properties": { + "dataset_id": { + "type": "string", + "format": "uuid" + }, + "request": { + "$ref": "#/components/schemas/DatasetPutRequest" + }, + "zpool_id": { + "type": "string", + "format": "uuid" + } + }, + "required": [ + "dataset_id", + "request", + "zpool_id" + ] + }, "DatasetKind": { "description": "Describes the purpose of the dataset.", "type": "string", @@ -1705,6 +1722,27 @@ } ] }, + "RackInitializationRequest": { + "type": "object", + "properties": { + "datasets": { + "type": "array", + "items": { + "$ref": "#/components/schemas/DatasetCreateRequest" + } + }, + "services": { + "type": "array", + "items": { + "$ref": "#/components/schemas/ServicePutRequest" + } + } + }, + "required": [ + "datasets", + "services" + ] + }, "Sample": { "description": "A concrete type representing a single, timestamped measurement from a timeseries.", "type": "object", diff --git a/sled-agent/src/bin/sled-agent.rs b/sled-agent/src/bin/sled-agent.rs index fba2d0d5de0..ea09da733da 100644 --- a/sled-agent/src/bin/sled-agent.rs +++ b/sled-agent/src/bin/sled-agent.rs @@ -15,6 +15,7 @@ use omicron_sled_agent::{config::Config as SledConfig, server as sled_server}; use sp_sim::config::GimletConfig; use std::path::PathBuf; use structopt::StructOpt; +use uuid::Uuid; #[derive(Debug, StructOpt)] #[structopt( @@ -99,7 +100,9 @@ async fn do_run() -> Result<(), CmdError> { // Configure and run the Bootstrap server. let bootstrap_config = BootstrapConfig { - id: config.id, + // NOTE: The UUID of this bootstrap server is not stable across + // reboots. + id: Uuid::new_v4(), bind_address: bootstrap_address, log: config.log.clone(), rss_config, diff --git a/sled-agent/src/bootstrap/agent.rs b/sled-agent/src/bootstrap/agent.rs index bc0f84c2bad..e47e7300240 100644 --- a/sled-agent/src/bootstrap/agent.rs +++ b/sled-agent/src/bootstrap/agent.rs @@ -138,7 +138,6 @@ impl Agent { ) -> Result { let ba_log = log.new(o!( "component" => "BootstrapAgent", - "server" => sled_config.id.to_string(), )); // We expect this directory to exist - ensure that it does, before any @@ -246,7 +245,14 @@ impl Agent { // Server already exists, return it. info!(&self.log, "Sled Agent already loaded"); - if &server.address().ip() != sled_address.ip() { + if server.id() != request.id { + let err_str = format!( + "Sled Agent already running with UUID {}, but {} was requested", + server.id(), + request.id, + ); + return Err(BootstrapError::SledError(err_str)); + } else if &server.address().ip() != sled_address.ip() { let err_str = format!( "Sled Agent already running on address {}, but {} was requested", server.address().ip(), @@ -261,6 +267,7 @@ impl Agent { let server = SledServer::start( &self.sled_config, self.parent_log.clone(), + request.id, sled_address, ) .await @@ -289,7 +296,7 @@ impl Agent { err, })?; - Ok(SledAgentResponse { id: self.sled_config.id }) + Ok(SledAgentResponse { id: request.id }) } /// Communicates with peers, sharing secrets, until the rack has been diff --git a/sled-agent/src/bootstrap/params.rs b/sled-agent/src/bootstrap/params.rs index 334376f28d3..5a01f4c6e0b 100644 --- a/sled-agent/src/bootstrap/params.rs +++ b/sled-agent/src/bootstrap/params.rs @@ -9,6 +9,7 @@ use std::borrow::Cow; use omicron_common::address::{Ipv6Subnet, SLED_PREFIX}; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; +use uuid::Uuid; /// Identity signed by local RoT and Oxide certificate chain. #[derive(Serialize, Deserialize, JsonSchema)] @@ -20,6 +21,9 @@ pub struct ShareRequest { /// Configuration information for launching a Sled Agent. #[derive(Clone, Debug, Serialize, Deserialize, JsonSchema, PartialEq)] pub struct SledAgentRequest { + /// Uuid of the Sled Agent to be created. + pub id: Uuid, + /// Portion of the IP space to be managed by the Sled Agent. pub subnet: Ipv6Subnet, } diff --git a/sled-agent/src/bootstrap/server.rs b/sled-agent/src/bootstrap/server.rs index c5e663b7eab..629adc0ed14 100644 --- a/sled-agent/src/bootstrap/server.rs +++ b/sled-agent/src/bootstrap/server.rs @@ -53,7 +53,7 @@ impl Server { } info!(log, "detecting (real or simulated) SP"); - let sp = SpHandle::detect(&config.sp_config, &sled_config, &log) + let sp = SpHandle::detect(&config.sp_config, &log) .await .map_err(|err| format!("Failed to detect local SP: {err}"))?; diff --git a/sled-agent/src/config.rs b/sled-agent/src/config.rs index d67dd088e8a..a7b0d9fb1ab 100644 --- a/sled-agent/src/config.rs +++ b/sled-agent/src/config.rs @@ -11,13 +11,10 @@ use dropshot::ConfigLogging; use serde::Deserialize; use std::net::SocketAddr; use std::path::{Path, PathBuf}; -use uuid::Uuid; /// Configuration for a sled agent #[derive(Clone, Debug, Deserialize)] pub struct Config { - /// Unique id for the sled - pub id: Uuid, /// Address of Nexus instance pub nexus_address: SocketAddr, /// Configuration for the sled agent debug log diff --git a/sled-agent/src/rack_setup/mod.rs b/sled-agent/src/rack_setup/mod.rs index e947ff99ef0..f052b6c3120 100644 --- a/sled-agent/src/rack_setup/mod.rs +++ b/sled-agent/src/rack_setup/mod.rs @@ -6,3 +6,4 @@ pub mod config; pub mod service; +mod plan; diff --git a/sled-agent/src/rack_setup/plan/mod.rs b/sled-agent/src/rack_setup/plan/mod.rs new file mode 100644 index 00000000000..2343a3be2e6 --- /dev/null +++ b/sled-agent/src/rack_setup/plan/mod.rs @@ -0,0 +1,8 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Rack Setup Service plan generation + +pub mod service; +pub mod sled; diff --git a/sled-agent/src/rack_setup/plan/service.rs b/sled-agent/src/rack_setup/plan/service.rs new file mode 100644 index 00000000000..c5ceb3c1ef0 --- /dev/null +++ b/sled-agent/src/rack_setup/plan/service.rs @@ -0,0 +1,319 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Plan generation for "where should services be initialized". + +use crate::params::{DatasetEnsureBody, ServiceRequest, ServiceType}; +use crate::rack_setup::config::SetupServiceConfig as Config; +use omicron_common::address::{ + get_sled_address, ReservedRackSubnet, DNS_PORT, DNS_SERVER_PORT, + NEXUS_EXTERNAL_PORT, NEXUS_INTERNAL_PORT, RSS_RESERVED_ADDRESSES, +}; +use omicron_common::backoff::{ + internal_service_policy, retry_notify, BackoffError, +}; +use serde::{Deserialize, Serialize}; +use sled_agent_client::{ + Client as SledAgentClient, + Error as SledAgentError, + types as SledAgentTypes, +}; +use slog::Logger; +use std::collections::HashMap; +use std::net::{Ipv6Addr, SocketAddrV6}; +use std::path::{Path, PathBuf}; +use thiserror::Error; +use uuid::Uuid; + +// The number of Nexus instances to create from RSS. +const NEXUS_COUNT: usize = 1; + +// The number of CRDB instances to create from RSS. +const CRDB_COUNT: usize = 1; + +fn rss_service_plan_path() -> PathBuf { + Path::new(omicron_common::OMICRON_CONFIG_PATH) + .join("rss-service-plan.toml") +} + +/// Describes errors which may occur while generating a plan for services. +#[derive(Error, Debug)] +pub enum PlanError { + #[error("I/O error while {message}: {err}")] + Io { + message: String, + #[source] + err: std::io::Error, + }, + + #[error("Cannot deserialize TOML file at {path}: {err}")] + Toml { path: PathBuf, err: toml::de::Error }, + + #[error("Error making HTTP request to Sled Agent: {0}")] + SledApi(#[from] SledAgentError), + + #[error("Error initializing sled via sled-agent: {0}")] + SledInitialization(String), + + #[error("Failed to construct an HTTP client: {0}")] + HttpClient(reqwest::Error), +} + +#[derive(Clone, Debug, Default, Deserialize, Serialize, PartialEq)] +pub struct SledRequest { + /// Datasets to be created. + #[serde(default, rename = "dataset")] + pub datasets: Vec, + + /// Services to be instantiated. + #[serde(default, rename = "service")] + pub services: Vec, + + /// DNS Services to be instantiated. + #[serde(default, rename = "dns_service")] + pub dns_services: Vec, +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct Plan { + pub services: HashMap, +} + +impl Plan { + pub async fn load( + log: &Logger, + ) -> Result, PlanError> + { + // If we already created a plan for this RSS to allocate + // services to sleds, re-use that existing plan. + let rss_service_plan_path = rss_service_plan_path(); + if rss_service_plan_path.exists() { + info!(log, "RSS plan already created, loading from file"); + + let plan: Self = + toml::from_str( + &tokio::fs::read_to_string(&rss_service_plan_path).await.map_err( + |err| PlanError::Io { + message: format!( + "Loading RSS plan {rss_service_plan_path:?}" + ), + err, + }, + )?, + ) + .map_err(|err| PlanError::Toml { + path: rss_service_plan_path, + err, + })?; + Ok(Some(plan)) + } else { + Ok(None) + } + } + + // Gets a zpool UUID from the sled. + async fn get_a_zpool_from_sled( + log: &Logger, + address: SocketAddrV6, + ) -> Result { + let dur = std::time::Duration::from_secs(60); + let client = reqwest::ClientBuilder::new() + .connect_timeout(dur) + .timeout(dur) + .build() + .map_err(PlanError::HttpClient)?; + let client = SledAgentClient::new_with_client( + &format!("http://{}", address), + client, + log.new(o!("SledAgentClient" => address.to_string())), + ); + + let get_zpools = || async { + let zpools: Vec = client + .zpools_get() + .await + .map(|response| { + response.into_inner() + .into_iter() + .map(|zpool| zpool.id) + .collect() + }) + .map_err(|err| { + BackoffError::transient( + PlanError::SledApi(err) + ) + })?; + + if zpools.is_empty() { + return Err(BackoffError::transient( + PlanError::SledInitialization("Awaiting zpools".to_string()) + )); + } + + Ok(zpools) + }; + let log_failure = |error, _| { + warn!(log, "failed to get zpools"; "error" => ?error); + }; + let zpools = retry_notify( + internal_service_policy(), + get_zpools, + log_failure, + ) + .await?; + + Ok(zpools[0]) + } + + pub async fn create( + log: &Logger, + config: &Config, + sled_addrs: &Vec, + ) -> Result { + let reserved_rack_subnet = ReservedRackSubnet::new(config.az_subnet()); + let dns_subnets = reserved_rack_subnet.get_dns_subnets(); + + let mut allocations = vec![]; + + for idx in 0..sled_addrs.len() { + let sled_address = sled_addrs[idx]; + let sled_subnet_index = + u8::try_from(idx + 1).expect("Too many peers!"); + let subnet = config.sled_subnet(sled_subnet_index); + let mut addr_alloc = + AddressBumpAllocator::new(*get_sled_address(subnet).ip()); + + let mut request = SledRequest::default(); + + // The first enumerated sleds get assigned the responsibility + // of hosting Nexus. + if idx < NEXUS_COUNT { + let address = addr_alloc.next().expect("Not enough addrs"); + request.services.push(ServiceRequest { + id: Uuid::new_v4(), + name: "nexus".to_string(), + addresses: vec![address], + gz_addresses: vec![], + service_type: ServiceType::Nexus { + internal_address: SocketAddrV6::new( + address, + NEXUS_INTERNAL_PORT, + 0, + 0, + ), + external_address: SocketAddrV6::new( + address, + NEXUS_EXTERNAL_PORT, + 0, + 0, + ), + }, + }) + } + + // The first enumerated sleds host the CRDB datasets, using + // zpools described from the underlying config file. + if idx < CRDB_COUNT { + let zpool_id = Self::get_a_zpool_from_sled(log, sled_address).await?; + + let address = SocketAddrV6::new( + addr_alloc.next().expect("Not enough addrs"), + omicron_common::address::COCKROACH_PORT, + 0, + 0, + ); + request.datasets.push(DatasetEnsureBody { + id: Uuid::new_v4(), + zpool_id, + dataset_kind: + crate::params::DatasetKind::CockroachDb { + all_addresses: vec![address], + }, + address, + }); + } + + // The first enumerated sleds get assigned the additional + // responsibility of being internal DNS servers. + if idx < dns_subnets.len() { + let dns_subnet = &dns_subnets[idx]; + let dns_addr = dns_subnet.dns_address().ip(); + request.dns_services.push(ServiceRequest { + id: Uuid::new_v4(), + name: "internal-dns".to_string(), + addresses: vec![dns_addr], + gz_addresses: vec![dns_subnet.gz_address().ip()], + service_type: ServiceType::InternalDns { + server_address: SocketAddrV6::new( + dns_addr, + DNS_SERVER_PORT, + 0, + 0, + ), + dns_address: SocketAddrV6::new( + dns_addr, DNS_PORT, 0, 0, + ), + }, + }); + } + + allocations.push(( + sled_address, + request + )); + } + + let mut services = std::collections::HashMap::new(); + for (addr, allocation) in allocations { + services.insert(addr, allocation); + } + + let plan = Self { + services + }; + + // Once we've constructed a plan, write it down to durable storage. + let serialized_plan = + toml::Value::try_from(&plan).unwrap_or_else(|e| { + panic!("Cannot serialize configuration: {:#?}: {}", plan, e) + }); + let plan_str = toml::to_string(&serialized_plan) + .expect("Cannot turn config to string"); + + info!(log, "Plan serialized as: {}", plan_str); + let path = rss_service_plan_path(); + tokio::fs::write(&path, plan_str).await.map_err(|err| { + PlanError::Io { + message: format!("Storing RSS service plan to {path:?}"), + err, + } + })?; + info!(log, "Service plan written to storage"); + + Ok(plan) + } +} + +struct AddressBumpAllocator { + last_addr: Ipv6Addr, +} + +// TODO: Testable? +// TODO: Could exist in another file? +impl AddressBumpAllocator { + fn new(sled_addr: Ipv6Addr) -> Self { + Self { last_addr: sled_addr } + } + + fn next(&mut self) -> Option { + let mut segments: [u16; 8] = self.last_addr.segments(); + segments[7] = segments[7].checked_add(1)?; + if segments[7] > RSS_RESERVED_ADDRESSES { + return None; + } + self.last_addr = Ipv6Addr::from(segments); + Some(self.last_addr) + } +} + diff --git a/sled-agent/src/rack_setup/plan/sled.rs b/sled-agent/src/rack_setup/plan/sled.rs new file mode 100644 index 00000000000..7433a31dfd5 --- /dev/null +++ b/sled-agent/src/rack_setup/plan/sled.rs @@ -0,0 +1,140 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Plan generation for "how should sleds be initialized". + +use crate::bootstrap::{ + config::BOOTSTRAP_AGENT_PORT, params::SledAgentRequest, +}; +use crate::rack_setup::config::SetupServiceConfig as Config; +use serde::{Deserialize, Serialize}; +use slog::Logger; +use std::collections::HashMap; +use std::net::{Ipv6Addr, SocketAddrV6}; +use std::path::{Path, PathBuf}; +use thiserror::Error; +use uuid::Uuid; + +fn rss_sled_plan_path() -> PathBuf { + Path::new(omicron_common::OMICRON_CONFIG_PATH) + .join("rss-sled-plan.toml") +} + +/// Describes errors which may occur while generating a plan for sleds. +#[derive(Error, Debug)] +pub enum PlanError { + #[error("I/O error while {message}: {err}")] + Io { + message: String, + #[source] + err: std::io::Error, + }, + + #[error("Cannot deserialize TOML file at {path}: {err}")] + Toml { path: PathBuf, err: toml::de::Error }, +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct Plan { + pub rack_id: Uuid, + pub sleds: HashMap, + + // TODO: Consider putting the rack subnet here? This may be operator-driven + // in the future, so it should exist in the "plan". + // + // TL;DR: The more we decouple rom "rss-config.toml", the easier it'll be to + // switch to an operator-driven interface. +} + +impl Plan { + pub async fn load( + log: &Logger, + ) -> Result, PlanError> { + // If we already created a plan for this RSS to allocate + // subnets/requests to sleds, re-use that existing plan. + let rss_sled_plan_path = rss_sled_plan_path(); + if rss_sled_plan_path.exists() { + info!(log, "RSS plan already created, loading from file"); + + let plan: Self = + toml::from_str( + &tokio::fs::read_to_string(&rss_sled_plan_path).await.map_err( + |err| PlanError::Io { + message: format!( + "Loading RSS plan {rss_sled_plan_path:?}" + ), + err, + }, + )?, + ) + .map_err(|err| PlanError::Toml { + path: rss_sled_plan_path, + err, + })?; + Ok(Some(plan)) + } else { + Ok(None) + } + } + + pub async fn create( + log: &Logger, + config: &Config, + bootstrap_addrs: impl IntoIterator, + ) -> Result { + let bootstrap_addrs = bootstrap_addrs.into_iter().enumerate(); + + let allocations = bootstrap_addrs.map(|(idx, bootstrap_addr)| { + info!( + log, + "Creating plan for the sled at {:?}", bootstrap_addr + ); + let bootstrap_addr = + SocketAddrV6::new(bootstrap_addr, BOOTSTRAP_AGENT_PORT, 0, 0); + let sled_subnet_index = + u8::try_from(idx + 1).expect("Too many peers!"); + let subnet = config.sled_subnet(sled_subnet_index); + + ( + bootstrap_addr, + SledAgentRequest { + id: Uuid::new_v4(), + subnet + }, + ) + }); + + info!(log, "Serializing plan"); + + let mut sleds = std::collections::HashMap::new(); + for (addr, allocation) in allocations { + sleds.insert(addr, allocation); + } + + let plan = Self { + rack_id: Uuid::new_v4(), + sleds, + }; + + // Once we've constructed a plan, write it down to durable storage. + let serialized_plan = + toml::Value::try_from(&plan).unwrap_or_else(|e| { + panic!("Cannot serialize configuration: {:#?}: {}", plan, e) + }); + let plan_str = toml::to_string(&serialized_plan) + .expect("Cannot turn config to string"); + + info!(log, "Plan serialized as: {}", plan_str); + let path = rss_sled_plan_path(); + tokio::fs::write(&path, plan_str).await.map_err(|err| { + PlanError::Io { + message: format!("Storing RSS sled plan to {path:?}"), + err, + } + })?; + info!(log, "Sled plan written to storage"); + + Ok(plan) + } +} diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 00f7230a3a0..2b25b8fa55a 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -6,15 +6,25 @@ use super::config::SetupServiceConfig as Config; use crate::bootstrap::{ - config::BOOTSTRAP_AGENT_PORT, discovery::PeerMonitorObserver, + discovery::PeerMonitorObserver, params::SledAgentRequest, rss_handle::BootstrapAgentHandle, }; use crate::params::{DatasetEnsureBody, ServiceRequest, ServiceType}; +use crate::rack_setup::plan::service::{ + PlanError as ServicePlanError, + Plan as ServicePlan, +}; +use crate::rack_setup::plan::sled::{ + PlanError as SledPlanError, + Plan as SledPlan, +}; use internal_dns_client::names::{AAAA, SRV}; -use omicron_common::address::{ - get_sled_address, ReservedRackSubnet, DNS_PORT, DNS_SERVER_PORT, - NEXUS_EXTERNAL_PORT, NEXUS_INTERNAL_PORT, RSS_RESERVED_ADDRESSES, +use nexus_client::{ + Client as NexusClient, + Error as NexusError, + types as NexusTypes, }; +use omicron_common::address::{get_sled_address, NEXUS_INTERNAL_PORT}; use omicron_common::backoff::{ internal_service_policy, retry_notify, BackoffError, }; @@ -26,17 +36,10 @@ use sled_agent_client::{ }; use slog::Logger; use std::collections::{HashMap, HashSet}; -use std::net::{Ipv6Addr, SocketAddrV6}; -use std::path::PathBuf; +use std::net::{Ipv6Addr, SocketAddr, SocketAddrV6}; +use std::path::{Path, PathBuf}; use thiserror::Error; use tokio::sync::{Mutex, OnceCell}; -use uuid::Uuid; - -// The number of Nexus instances to create from RSS. -const NEXUS_COUNT: usize = 1; - -// The number of CRDB instances to create from RSS. -const CRDB_COUNT: usize = 1; // The minimum number of sleds to initialize the rack. const MINIMUM_SLED_COUNT: usize = 1; @@ -51,6 +54,12 @@ pub enum SetupServiceError { err: std::io::Error, }, + #[error("Cannot create plan for sled services: {0}")] + ServicePlan(#[from] ServicePlanError), + + #[error("Cannot create plan for sled setup: {0}")] + SledPlan(#[from] SledPlanError), + #[error("Bad configuration for setting up rack: {0}")] BadConfig(String), @@ -60,8 +69,8 @@ pub enum SetupServiceError { #[error("Error making HTTP request to Sled Agent: {0}")] SledApi(#[from] SledAgentError), - #[error("Cannot deserialize TOML file at {path}: {err}")] - Toml { path: PathBuf, err: toml::de::Error }, + #[error("Error making HTTP request to Nexus: {0}")] + NexusApi(#[from] NexusError), #[error("Failed to monitor for peers: {0}")] PeerMonitor(#[from] tokio::sync::broadcast::error::RecvError), @@ -140,18 +149,8 @@ impl Service { } } -fn rss_sled_plan_path() -> std::path::PathBuf { - std::path::Path::new(omicron_common::OMICRON_CONFIG_PATH) - .join("rss-sled-plan.toml") -} - -fn rss_service_plan_path() -> std::path::PathBuf { - std::path::Path::new(omicron_common::OMICRON_CONFIG_PATH) - .join("rss-service-plan.toml") -} - -fn rss_completed_plan_path() -> std::path::PathBuf { - std::path::Path::new(omicron_common::OMICRON_CONFIG_PATH) +fn rss_completed_plan_path() -> PathBuf { + Path::new(omicron_common::OMICRON_CONFIG_PATH) .join("rss-plan-completed.marker") } @@ -173,28 +172,6 @@ enum PeerExpectation { CreateNewPlan(usize), } -struct AddressBumpAllocator { - last_addr: Ipv6Addr, -} - -// TODO: Testable? -// TODO: Could exist in another file? -impl AddressBumpAllocator { - fn new(sled_addr: Ipv6Addr) -> Self { - Self { last_addr: sled_addr } - } - - fn next(&mut self) -> Option { - let mut segments: [u16; 8] = self.last_addr.segments(); - segments[7] = segments[7].checked_add(1)?; - if segments[7] > RSS_RESERVED_ADDRESSES { - return None; - } - self.last_addr = Ipv6Addr::from(segments); - Some(self.last_addr) - } -} - /// The implementation of the Rack Setup Service. struct ServiceInner { log: Logger, @@ -330,298 +307,41 @@ impl ServiceInner { }; retry_notify(internal_service_policy(), services_put, log_failure) .await?; - Ok(()) - } - async fn load_sled_plan( - &self, - ) -> Result>, SetupServiceError> - { - // If we already created a plan for this RSS to allocate - // subnets/requests to sleds, re-use that existing plan. - let rss_sled_plan_path = rss_sled_plan_path(); - if rss_sled_plan_path.exists() { - info!(self.log, "RSS plan already created, loading from file"); - - let plan: std::collections::HashMap = - toml::from_str( - &tokio::fs::read_to_string(&rss_sled_plan_path).await.map_err( - |err| SetupServiceError::Io { - message: format!( - "Loading RSS plan {rss_sled_plan_path:?}" - ), - err, - }, - )?, - ) - .map_err(|err| SetupServiceError::Toml { - path: rss_sled_plan_path, - err, - })?; - Ok(Some(plan)) - } else { - Ok(None) - } - } - - async fn create_sled_plan( - &self, - config: &Config, - bootstrap_addrs: impl IntoIterator, - ) -> Result, SetupServiceError> { - let bootstrap_addrs = bootstrap_addrs.into_iter().enumerate(); - - let allocations = bootstrap_addrs.map(|(idx, bootstrap_addr)| { - info!( - self.log, - "Creating plan for the sled at {:?}", bootstrap_addr - ); - let bootstrap_addr = - SocketAddrV6::new(bootstrap_addr, BOOTSTRAP_AGENT_PORT, 0, 0); - let sled_subnet_index = - u8::try_from(idx + 1).expect("Too many peers!"); - let subnet = config.sled_subnet(sled_subnet_index); - - ( - bootstrap_addr, - SledAgentRequest { subnet }, - ) - }); - info!(self.log, "Serializing plan"); + // Initialize DNS records for the Nexus service. + let services: Vec<_> = services.iter().filter(|svc| { + matches!(svc.service_type, crate::params::ServiceType::Nexus { .. }) + }).collect(); - let mut plan = std::collections::HashMap::new(); - for (addr, allocation) in allocations { - plan.insert(addr, allocation); + // Early-exit for non-Nexus case + if services.is_empty() { + return Ok(()); } - // Once we've constructed a plan, write it down to durable storage. - let serialized_plan = - toml::Value::try_from(&plan).unwrap_or_else(|e| { - panic!("Cannot serialize configuration: {:#?}: {}", plan, e) - }); - let plan_str = toml::to_string(&serialized_plan) - .expect("Cannot turn config to string"); - - info!(self.log, "Plan serialized as: {}", plan_str); - let path = rss_sled_plan_path(); - tokio::fs::write(&path, plan_str).await.map_err(|err| { - SetupServiceError::Io { - message: format!("Storing RSS sled plan to {path:?}"), - err, - } - })?; - info!(self.log, "Sled plan written to storage"); - - Ok(plan) - } - - // Gets a zpool UUID from the sled. - async fn get_a_zpool_from_sled( - &self, - address: SocketAddrV6, - ) -> Result { - let dur = std::time::Duration::from_secs(60); - let client = reqwest::ClientBuilder::new() - .connect_timeout(dur) - .timeout(dur) - .build() - .map_err(SetupServiceError::HttpClient)?; - let client = SledAgentClient::new_with_client( - &format!("http://{}", address), - client, - self.log.new(o!("SledAgentClient" => address.to_string())), - ); - - let get_zpools = || async { - let zpools: Vec = client - .zpools_get() - .await - .map(|response| { - response.into_inner() - .into_iter() - .map(|zpool| zpool.id) - .collect() - }) - .map_err(|err| { - BackoffError::transient( - SetupServiceError::SledApi(err) + // Otherwise, insert DNS records for Nexus + let aaaa = services + .iter() + .map(|service| { + ( + AAAA::Zone(service.id), + SocketAddrV6::new( + service.addresses[0], + NEXUS_INTERNAL_PORT, + 0, + 0, ) - })?; - - if zpools.is_empty() { - return Err(BackoffError::transient( - SetupServiceError::SledInitialization("Awaiting zpools".to_string()) - )); - } - - Ok(zpools) - }; - let log_failure = |error, _| { - warn!(self.log, "failed to get zpools"; "error" => ?error); - }; - let zpools = retry_notify( - internal_service_policy(), - get_zpools, - log_failure, - ) - .await?; - - Ok(zpools[0]) - } - - async fn load_service_plan( - &self, - ) -> Result>, SetupServiceError> - { - // If we already created a plan for this RSS to allocate - // services to sleds, re-use that existing plan. - let rss_service_plan_path = rss_service_plan_path(); - if rss_service_plan_path.exists() { - info!(self.log, "RSS plan already created, loading from file"); - - let plan: std::collections::HashMap = - toml::from_str( - &tokio::fs::read_to_string(&rss_service_plan_path).await.map_err( - |err| SetupServiceError::Io { - message: format!( - "Loading RSS plan {rss_service_plan_path:?}" - ), - err, - }, - )?, ) - .map_err(|err| SetupServiceError::Toml { - path: rss_service_plan_path, - err, - })?; - Ok(Some(plan)) - } else { - Ok(None) - } - } - async fn create_service_plan( - &self, - config: &Config, - sled_addrs: &Vec, - ) -> Result, SetupServiceError> { - let reserved_rack_subnet = ReservedRackSubnet::new(config.az_subnet()); - let dns_subnets = reserved_rack_subnet.get_dns_subnets(); - - let mut allocations = vec![]; - - for idx in 0..sled_addrs.len() { - let sled_address = sled_addrs[idx]; - let sled_subnet_index = - u8::try_from(idx + 1).expect("Too many peers!"); - let subnet = config.sled_subnet(sled_subnet_index); - let mut addr_alloc = - AddressBumpAllocator::new(*get_sled_address(subnet).ip()); - - let mut request = SledRequest::default(); - - // The first enumerated sleds get assigned the responsibility - // of hosting Nexus. - if idx < NEXUS_COUNT { - let address = addr_alloc.next().expect("Not enough addrs"); - request.services.push(ServiceRequest { - id: Uuid::new_v4(), - name: "nexus".to_string(), - addresses: vec![address], - gz_addresses: vec![], - service_type: ServiceType::Nexus { - internal_address: SocketAddrV6::new( - address, - NEXUS_INTERNAL_PORT, - 0, - 0, - ), - external_address: SocketAddrV6::new( - address, - NEXUS_EXTERNAL_PORT, - 0, - 0, - ), - }, - }) - } - - // The first enumerated sleds host the CRDB datasets, using - // zpools described from the underlying config file. - if idx < CRDB_COUNT { - let zpool_id = self.get_a_zpool_from_sled(sled_address).await?; - - let address = SocketAddrV6::new( - addr_alloc.next().expect("Not enough addrs"), - omicron_common::address::COCKROACH_PORT, - 0, - 0, - ); - request.datasets.push(DatasetEnsureBody { - id: Uuid::new_v4(), - zpool_id, - dataset_kind: - crate::params::DatasetKind::CockroachDb { - all_addresses: vec![address], - }, - address, - }); - } - - // The first enumerated sleds get assigned the additional - // responsibility of being internal DNS servers. - if idx < dns_subnets.len() { - let dns_subnet = &dns_subnets[idx]; - let dns_addr = dns_subnet.dns_address().ip(); - request.dns_services.push(ServiceRequest { - id: Uuid::new_v4(), - name: "internal-dns".to_string(), - addresses: vec![dns_addr], - gz_addresses: vec![dns_subnet.gz_address().ip()], - service_type: ServiceType::InternalDns { - server_address: SocketAddrV6::new( - dns_addr, - DNS_SERVER_PORT, - 0, - 0, - ), - dns_address: SocketAddrV6::new( - dns_addr, DNS_PORT, 0, 0, - ), - }, - }); - } - - allocations.push(( - sled_address, - request - )); - } - - let mut plan = std::collections::HashMap::new(); - for (addr, allocation) in allocations { - plan.insert(addr, allocation); - } - - // Once we've constructed a plan, write it down to durable storage. - let serialized_plan = - toml::Value::try_from(&plan).unwrap_or_else(|e| { - panic!("Cannot serialize configuration: {:#?}: {}", plan, e) - }); - let plan_str = toml::to_string(&serialized_plan) - .expect("Cannot turn config to string"); - - info!(self.log, "Plan serialized as: {}", plan_str); - let path = rss_service_plan_path(); - tokio::fs::write(&path, plan_str).await.map_err(|err| { - SetupServiceError::Io { - message: format!("Storing RSS service plan to {path:?}"), - err, - } - })?; - info!(self.log, "Service plan written to storage"); + }) + .collect::>(); + let srv_key = SRV::Service("nexus".into()); + self.dns_servers + .get() + .expect("DNS servers must be initialized first") + .insert_dns_records(&self.log, aaaa, srv_key) + .await?; - Ok(plan) + Ok(()) } // Waits for sufficient neighbors to exist so the initial set of requests @@ -667,6 +387,108 @@ impl ServiceInner { } } + async fn handoff_to_nexus( + &self, + config: &Config, + sled_plan: &SledPlan, + service_plan: &ServicePlan, + ) -> Result<(), SetupServiceError> { + info!(self.log, "Handing off control to Nexus"); + + let resolver = internal_dns_client::multiclient::create_resolver(config.az_subnet()) + .expect("Failed to create DNS resolver"); + let response = resolver.lookup_ip( + &SRV::Service("nexus".to_string()).to_string() + ).await.expect("Failed to lookup IP"); + + let nexus_address = response.iter() + .next() + .map(|addr| { + SocketAddr::new(addr, NEXUS_INTERNAL_PORT) + }) + .expect("no addresses returned from DNS resolver"); + info!(self.log, "Nexus address: {}", nexus_address.to_string()); + + let nexus_client = NexusClient::new( + &format!("http://{}", nexus_address), + self.log.new(o!("component" => "NexusClient")) + ); + + // Ensure we can quickly look up "Sled Agent Address" -> "UUID of sled". + // + // We need the ID when passing info to Nexus. + let mut id_map = HashMap::new(); + for (_, sled_request) in sled_plan.sleds.iter() { + id_map.insert(get_sled_address(sled_request.subnet), sled_request.id); + } + + // Convert all the information we have about services and datasets into + // a format which can be processed by Nexus. + let mut services: Vec = vec![]; + let mut datasets: Vec = vec![]; + for (addr, service_request) in service_plan.services.iter() { + let sled_id = *id_map.get(addr) + .expect("Sled address in service plan, but not sled plan"); + + for svc in service_request.services.iter().chain(service_request.dns_services.iter()) { + let kind = match svc.service_type { + ServiceType::Nexus { .. } => NexusTypes::ServiceKind::Nexus, + ServiceType::InternalDns { .. } => NexusTypes::ServiceKind::InternalDNS, + ServiceType::Oximeter => NexusTypes::ServiceKind::Oximeter, + }; + + services.push( + NexusTypes::ServicePutRequest { + service_id: svc.id, + sled_id, + // TODO: Should this be a vec, or a single value? + address: svc.addresses[0], + kind, + } + ) + } + + for dataset in service_request.datasets.iter() { + datasets.push( + NexusTypes::DatasetCreateRequest { + zpool_id: dataset.zpool_id, + dataset_id: dataset.id, + request: NexusTypes::DatasetPutRequest { + address: dataset.address.to_string(), + kind: dataset.dataset_kind.clone().into() + }, + } + ) + } + } + + let request = NexusTypes::RackInitializationRequest { + services, + datasets, + }; + + let notify_nexus = || async { + nexus_client.rack_initialization_complete( + &sled_plan.rack_id, + &request, + ) + .await + .map_err(BackoffError::transient) + }; + let log_failure = |err, _| { + info!(self.log, "Failed to handoff to nexus: {err}"); + }; + + retry_notify( + internal_service_policy(), + notify_nexus, + log_failure, + ).await?; + + info!(self.log, "Handoff to Nexus is complete"); + Ok(()) + } + // In lieu of having an operator send requests to all sleds via an // initialization service, the sled-agent configuration may allow for the // automated injection of setup requests from a sled. @@ -678,8 +500,6 @@ impl ServiceInner { // time, it creates an allocation plan to provision subnets to an initial // set of sleds. // - // This plan is stored at "rss_sled_plan_path()". - // // 2. SLED ALLOCATION PLAN EXECUTION. The RSS then carries out this plan, making // requests to the sleds enumerated within the "allocation plan". // @@ -687,9 +507,6 @@ impl ServiceInner { // on their respsective subnets, they can be queried to create an // allocation plan for services. // - // This plan - for what services go where - is stored at - // "rss_service_plan_path()". - // // 4. SERVICE ALLOCATION PLAN EXECUTION. RSS requests that the services // outlined in the aforementioned step are created. // @@ -715,6 +532,17 @@ impl ServiceInner { self.log, "RSS configuration looks like it has already been applied", ); + + let sled_plan = SledPlan::load(&self.log).await? + .expect("Sled plan should exist if completed marker exists"); + let service_plan = ServicePlan::load(&self.log).await? + .expect("Service plan should exist if completed marker exists"); + self.handoff_to_nexus( + &config, + &sled_plan, + &service_plan + ).await?; + return Ok(()); } else { info!(self.log, "RSS configuration has not been fully applied yet",); @@ -723,9 +551,9 @@ impl ServiceInner { // Wait for either: // - All the peers to re-load an old plan (if one exists) // - Enough peers to create a new plan (if one does not exist) - let maybe_sled_plan = self.load_sled_plan().await?; + let maybe_sled_plan = SledPlan::load(&self.log).await?; let expectation = if let Some(plan) = &maybe_sled_plan { - PeerExpectation::LoadOldPlan(plan.keys().map(|a| *a.ip()).collect()) + PeerExpectation::LoadOldPlan(plan.sleds.keys().map(|a| *a.ip()).collect()) } else { PeerExpectation::CreateNewPlan(MINIMUM_SLED_COUNT) }; @@ -743,13 +571,13 @@ impl ServiceInner { plan } else { info!(self.log, "Creating new allocation plan"); - self.create_sled_plan(config, addrs).await? + SledPlan::create(&self.log, &config, addrs).await? }; // Forward the sled initialization requests to our sled-agent. local_bootstrap_agent .initialize_sleds( - plan.iter() + plan.sleds.iter() .map(|(bootstrap_addr, initialization_request)| { ( *bootstrap_addr, @@ -761,7 +589,7 @@ impl ServiceInner { .await .map_err(SetupServiceError::SledInitialization)?; - let sled_addresses: Vec<_> = plan.iter() + let sled_addresses: Vec<_> = plan.sleds.iter() .map(|(_, initialization_request)| { get_sled_address( initialization_request.subnet, @@ -771,15 +599,15 @@ impl ServiceInner { // Now that sled agents have been initialized, we can create // a service allocation plan. - let service_plan = if let Some(plan) = self.load_service_plan().await? { + let service_plan = if let Some(plan) = ServicePlan::load(&self.log).await? { plan } else { - self.create_service_plan(&config, &sled_addresses).await? + ServicePlan::create(&self.log, &config, &sled_addresses).await? }; // Set up internal DNS services. futures::future::join_all( - service_plan.iter() + service_plan.services.iter() .filter(|(_, service_request)| { // Only send requests to sleds that are supposed to be running // DNS services. @@ -808,7 +636,7 @@ impl ServiceInner { .expect("Already set DNS servers"); // Issue the crdb initialization requests to all sleds. - futures::future::join_all(service_plan.iter().map( + futures::future::join_all(service_plan.services.iter().map( |(sled_address, services_request)| async move { self.initialize_crdb( *sled_address, @@ -829,7 +657,7 @@ impl ServiceInner { // Note that this must happen *after* the dataset initialization, // to ensure that CockroachDB has been initialized before Nexus // starts. - futures::future::join_all(service_plan.iter().map( + futures::future::join_all(service_plan.services.iter().map( |(sled_address, services_request)| async move { // With the current implementation of "initialize_services", // we must provide the set of *all* services that should be @@ -866,6 +694,15 @@ impl ServiceInner { }, )?; + // At this point, even if we reboot, we must not try to manage sleds, + // services, or DNS records. + + self.handoff_to_nexus( + &config, + &plan, + &service_plan + ).await?; + // TODO Questions to consider: // - What if a sled comes online *right after* this setup? How does // it get a /64? diff --git a/sled-agent/src/server.rs b/sled-agent/src/server.rs index 3b31854628e..fc69359008f 100644 --- a/sled-agent/src/server.rs +++ b/sled-agent/src/server.rs @@ -37,6 +37,7 @@ impl Server { pub async fn start( config: &Config, log: Logger, + sled_id: Uuid, addr: SocketAddrV6, ) -> Result { info!(log, "setting up sled agent server"); @@ -48,7 +49,7 @@ impl Server { )); let sled_agent = - SledAgent::new(&config, log.clone(), nexus_client.clone(), addr) + SledAgent::new(&config, log.clone(), nexus_client.clone(), sled_id, addr) .await .map_err(|e| e.to_string())?; @@ -66,7 +67,6 @@ impl Server { .start(); let sled_address = http_server.local_addr(); - let sled_id = config.id; let nexus_notifier_handle = tokio::task::spawn(async move { // Notify the control plane that we're up, and continue trying this // until it succeeds. We retry with an randomized, capped exponential diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 6260191f58b..5d6481fcc34 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -112,10 +112,9 @@ impl SledAgent { config: &Config, log: Logger, nexus_client: Arc, + id: Uuid, sled_address: SocketAddrV6, ) -> Result { - let id = &config.id; - // Pass the "parent_log" to all subcomponents that want to set their own // "component" value. let parent_log = log.clone(); @@ -218,7 +217,7 @@ impl SledAgent { let storage = StorageManager::new( &parent_log, - *id, + id, nexus_client.clone(), etherstub.clone(), *sled_address.ip(), @@ -250,7 +249,7 @@ impl SledAgent { .await?; Ok(SledAgent { - id: config.id, + id, storage, instances, nexus_client, diff --git a/sled-agent/src/sp.rs b/sled-agent/src/sp.rs index f47bb4110de..d0810a4c26d 100644 --- a/sled-agent/src/sp.rs +++ b/sled-agent/src/sp.rs @@ -4,7 +4,6 @@ //! Interface to a (currently simulated) SP / RoT. -use crate::config::Config as SledConfig; use crate::illumos; use crate::illumos::dladm::CreateVnicError; use crate::illumos::dladm::Dladm; @@ -69,11 +68,10 @@ impl SpHandle { /// A return value of `Ok(None)` means no SP is available. pub async fn detect( sp_config: &Option, - sled_config: &SledConfig, log: &Logger, ) -> Result, SpError> { let inner = if let Some(config) = sp_config.as_ref() { - let sim_sp = start_simulated_sp(config, sled_config, log).await?; + let sim_sp = start_simulated_sp(config, log).await?; Some(Inner::SimulatedSp(sim_sp)) } else { None @@ -199,7 +197,6 @@ struct SimulatedSp { async fn start_simulated_sp( sp_config: &GimletConfig, - sled_config: &SledConfig, log: &Logger, ) -> Result { // Is our simulated SP going to bind to addresses (acting like management @@ -240,7 +237,6 @@ async fn start_simulated_sp( info!(log, "starting simulated gimlet SP"); let sp_log = log.new(o!( "component" => "sp-sim", - "server" => sled_config.id.clone().to_string(), )); let sp = Arc::new( sp_sim::Gimlet::spawn(&sp_config, sp_log) @@ -252,7 +248,6 @@ async fn start_simulated_sp( info!(log, "starting simulated gimlet RoT"); let rot_log = log.new(o!( "component" => "rot-sim", - "server" => sled_config.id.clone().to_string(), )); let transport = SimRotTransport { sp: Arc::clone(&sp), responses: VecDeque::new() }; diff --git a/smf/sled-agent/config-rss.toml b/smf/sled-agent/config-rss.toml index 5640bc69c81..c8652efb5ea 100644 --- a/smf/sled-agent/config-rss.toml +++ b/smf/sled-agent/config-rss.toml @@ -6,8 +6,7 @@ # |...............| <- This /56 is the Rack Subnet rack_subnet = "fd00:1122:3344:0100::" -[[request]] - +# [[request]] # TODO(https://github.com/oxidecomputer/omicron/issues/732): Nexus # should allocate crucible datasets. # [[request.dataset]] diff --git a/smf/sled-agent/config.toml b/smf/sled-agent/config.toml index 53ec733e9ec..44a237e1930 100644 --- a/smf/sled-agent/config.toml +++ b/smf/sled-agent/config.toml @@ -1,9 +1,6 @@ # Sled Agent Configuration -id = "fb0f7546-4d46-40ca-9d56-cbb810684ca7" - # TODO: Remove this address - # Internal address of Nexus nexus_address = "[fd00:1122:3344:0101::3]:12221" From dfa614b4781c11a373a329810a5927b323377994 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Fri, 10 Jun 2022 12:57:54 -0400 Subject: [PATCH 12/88] Handoff to Nexus is hacky, but working --- common/src/backoff.rs | 9 ++- common/src/nexus_config.rs | 2 + nexus/src/app/mod.rs | 18 ++++-- nexus/src/app/rack.rs | 68 +++++++++++----------- nexus/src/app/update.rs | 16 +++-- nexus/src/config.rs | 5 ++ nexus/src/context.rs | 32 +++++++--- nexus/src/db/datastore.rs | 39 ++++++++++++- nexus/src/external_api/http_entrypoints.rs | 10 ++-- nexus/src/lib.rs | 42 ++++++++----- nexus/test-utils/src/lib.rs | 3 +- nexus/tests/config.test.toml | 1 + sled-agent/src/bootstrap/agent.rs | 1 + sled-agent/src/bootstrap/params.rs | 3 + sled-agent/src/config.rs | 3 - sled-agent/src/instance.rs | 18 +++--- sled-agent/src/instance_manager.rs | 10 ++-- sled-agent/src/nexus.rs | 56 ++++++++++++++++++ sled-agent/src/rack_setup/plan/sled.rs | 7 ++- sled-agent/src/server.rs | 25 ++++---- sled-agent/src/services.rs | 11 ++++ sled-agent/src/sled_agent.rs | 21 ++++--- sled-agent/src/storage_manager.rs | 46 +++++++-------- smf/sled-agent/config.toml | 4 -- 24 files changed, 308 insertions(+), 142 deletions(-) diff --git a/common/src/backoff.rs b/common/src/backoff.rs index 128bf932d0d..bcf726ff2a0 100644 --- a/common/src/backoff.rs +++ b/common/src/backoff.rs @@ -13,14 +13,19 @@ pub use ::backoff::{backoff::Backoff, ExponentialBackoff, Notify}; /// Return a backoff policy appropriate for retrying internal services /// indefinitely. pub fn internal_service_policy() -> ::backoff::ExponentialBackoff { - const INITIAL_INTERVAL: Duration = Duration::from_millis(250); const MAX_INTERVAL: Duration = Duration::from_secs(60 * 60); + internal_service_policy_with_max(MAX_INTERVAL) +} + +pub fn internal_service_policy_with_max(max_duration: Duration) -> ::backoff::ExponentialBackoff { + const INITIAL_INTERVAL: Duration = Duration::from_millis(250); ::backoff::ExponentialBackoff { current_interval: INITIAL_INTERVAL, initial_interval: INITIAL_INTERVAL, multiplier: 2.0, - max_interval: MAX_INTERVAL, + max_interval: max_duration, max_elapsed_time: None, ..backoff::ExponentialBackoff::default() } + } diff --git a/common/src/nexus_config.rs b/common/src/nexus_config.rs index f1325ae336d..085434ebf74 100644 --- a/common/src/nexus_config.rs +++ b/common/src/nexus_config.rs @@ -102,6 +102,8 @@ pub enum Database { pub struct RuntimeConfig { /// Uuid of the Nexus instance pub id: Uuid, + /// Uuid of the Rack where Nexus is executing + pub rack_id: Uuid, /// Dropshot configuration for external API server pub dropshot_external: ConfigDropshot, /// Dropshot configuration for internal API server diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index 1c3620de7e7..13cf48b91ec 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -54,15 +54,12 @@ pub struct Nexus { /// uuid for this nexus instance. id: Uuid, - /// uuid for this rack (TODO should also be in persistent storage) + /// uuid for this rack rack_id: Uuid, /// general server log log: Logger, - /// cached rack identity metadata - api_rack_identity: db::model::RackIdentity, - /// persistent storage for resources in the control plane db_datastore: Arc, @@ -146,7 +143,6 @@ impl Nexus { id: config.runtime.id, rack_id, log: log.new(o!()), - api_rack_identity: db::model::RackIdentity::new(rack_id), db_datastore: Arc::clone(&db_datastore), authz: Arc::clone(&authz), sec_client: Arc::clone(&sec_client), @@ -217,6 +213,18 @@ impl Nexus { } } + /// Returns an [`OpContext`] used for background tasks. + // TODO: dap@ recommends using a different user for this, other than + // "internal_db_init". + pub fn opctx_for_background(&self) -> OpContext { + OpContext::for_background( + self.log.new(o!("component" => "Background Work")), + Arc::clone(&self.authz), + authn::Context::internal_db_init(), + Arc::clone(&self.datastore()), + ) + } + /// Returns an [`OpContext`] used for authenticating external requests pub fn opctx_external_authn(&self) -> &OpContext { &self.opctx_external_authn diff --git a/nexus/src/app/rack.rs b/nexus/src/app/rack.rs index 8b9728c7f77..bf1f2026e8a 100644 --- a/nexus/src/app/rack.rs +++ b/nexus/src/app/rack.rs @@ -7,59 +7,61 @@ use crate::authz; use crate::context::OpContext; use crate::db; +use crate::db::lookup::LookupPath; use crate::internal_api::params::RackInitializationRequest; -use futures::future::ready; -use futures::StreamExt; use omicron_common::api::external::DataPageParams; use omicron_common::api::external::Error; -use omicron_common::api::external::ListResult; +use omicron_common::api::external::ListResultVec; use omicron_common::api::external::LookupResult; -use omicron_common::api::external::LookupType; -use omicron_common::api::external::ResourceType; use uuid::Uuid; impl super::Nexus { - pub(crate) fn as_rack(&self) -> db::model::Rack { - db::model::Rack { - identity: self.api_rack_identity.clone(), - initialized: true, - tuf_base_url: None, - } - } - pub async fn racks_list( &self, opctx: &OpContext, pagparams: &DataPageParams<'_, Uuid>, - ) -> ListResult { - opctx.authorize(authz::Action::Read, &authz::FLEET).await?; - - if let Some(marker) = pagparams.marker { - if *marker >= self.rack_id { - return Ok(futures::stream::empty().boxed()); - } - } + ) -> ListResultVec { + self.db_datastore.rack_list(&opctx, pagparams).await + } - Ok(futures::stream::once(ready(Ok(self.as_rack()))).boxed()) + // TODO: Use this, instead of the manual one. + // + // Note that this will require insertion of the rack to occur + // during the "populate" steps. + /* + pub async fn rack_lookup( + &self, + opctx: &OpContext, + rack_id: &Uuid, + ) -> LookupResult { + let (.., db_rack) = LookupPath::new(opctx, &self.db_datastore) + .rack_id(*rack_id) + .fetch() + .await?; + Ok(db_rack) } + */ pub async fn rack_lookup( &self, opctx: &OpContext, rack_id: &Uuid, ) -> LookupResult { - let authz_rack = authz::Rack::new( - authz::FLEET, - *rack_id, - LookupType::ById(*rack_id), - ); - opctx.authorize(authz::Action::Read, &authz_rack).await?; + self.db_datastore.rack_lookup_manual(opctx, *rack_id).await + } - if *rack_id == self.rack_id { - Ok(self.as_rack()) - } else { - Err(Error::not_found_by_id(ResourceType::Rack, rack_id)) - } + /// Ensures that a rack exists in the DB. + /// + /// If the rack already exists, this function is a no-op. + pub async fn rack_insert( + &self, + opctx: &OpContext, + rack_id: Uuid, + ) -> Result<(), Error> { + self.datastore() + .rack_insert(opctx, &db::model::Rack::new(rack_id)) + .await?; + Ok(()) } /// Marks the rack as initialized with a set of services. diff --git a/nexus/src/app/update.rs b/nexus/src/app/update.rs index 0d6721ec439..65ec3b6ddde 100644 --- a/nexus/src/app/update.rs +++ b/nexus/src/app/update.rs @@ -24,11 +24,15 @@ use tokio::io::AsyncWriteExt; static BASE_ARTIFACT_DIR: &str = "/var/tmp/oxide_artifacts"; impl super::Nexus { - fn tuf_base_url(&self) -> Option { - self.updates_config.as_ref().map(|c| { - let rack = self.as_rack(); + async fn tuf_base_url(&self, opctx: &OpContext) -> Result, Error> { + let rack = self.rack_lookup( + opctx, + &self.rack_id, + ).await?; + + Ok(self.updates_config.as_ref().map(|c| { rack.tuf_base_url.unwrap_or_else(|| c.default_base_url.clone()) - }) + })) } pub async fn updates_refresh_metadata( @@ -43,7 +47,7 @@ impl super::Nexus { } })?; let base_url = - self.tuf_base_url().ok_or_else(|| Error::InvalidRequest { + self.tuf_base_url(opctx).await?.ok_or_else(|| Error::InvalidRequest { message: "updates system not configured".into(), })?; let trusted_root = tokio::fs::read(&updates_config.trusted_root) @@ -129,7 +133,7 @@ impl super::Nexus { artifact: UpdateArtifact, ) -> Result, Error> { let mut base_url = - self.tuf_base_url().ok_or_else(|| Error::InvalidRequest { + self.tuf_base_url(opctx).await?.ok_or_else(|| Error::InvalidRequest { message: "updates system not configured".into(), })?; if !base_url.ends_with('/') { diff --git a/nexus/src/config.rs b/nexus/src/config.rs index a6034a7eea3..a157c4bdbeb 100644 --- a/nexus/src/config.rs +++ b/nexus/src/config.rs @@ -327,6 +327,7 @@ mod test { max_vpc_ipv4_subnet_prefix = 27 [runtime] id = "28b90dc4-c22a-65ba-f49a-f051fe01208f" + rack_id = "38b90dc4-c22a-65ba-f49a-f051fe01208f" [runtime.dropshot_external] bind_address = "10.1.2.3:4567" request_body_max_bytes = 1024 @@ -346,6 +347,7 @@ mod test { Config { runtime: RuntimeConfig { id: "28b90dc4-c22a-65ba-f49a-f051fe01208f".parse().unwrap(), + rack_id: "38b90dc4-c22a-65ba-f49a-f051fe01208f".parse().unwrap(), dropshot_external: ConfigDropshot { bind_address: "10.1.2.3:4567" .parse::() @@ -405,6 +407,7 @@ mod test { address = "[::1]:8123" [runtime] id = "28b90dc4-c22a-65ba-f49a-f051fe01208f" + rack_id = "38b90dc4-c22a-65ba-f49a-f051fe01208f" [runtime.dropshot_external] bind_address = "10.1.2.3:4567" request_body_max_bytes = 1024 @@ -446,6 +449,7 @@ mod test { address = "[::1]:8123" [runtime] id = "28b90dc4-c22a-65ba-f49a-f051fe01208f" + rack_id = "38b90dc4-c22a-65ba-f49a-f051fe01208f" [runtime.dropshot_external] bind_address = "10.1.2.3:4567" request_body_max_bytes = 1024 @@ -501,6 +505,7 @@ mod test { max_vpc_ipv4_subnet_prefix = 100 [runtime] id = "28b90dc4-c22a-65ba-f49a-f051fe01208f" + rack_id = "38b90dc4-c22a-65ba-f49a-f051fe01208f" [runtime.dropshot_external] bind_address = "10.1.2.3:4567" request_body_max_bytes = 1024 diff --git a/nexus/src/context.rs b/nexus/src/context.rs index e0ed637aef3..4cd92a05f91 100644 --- a/nexus/src/context.rs +++ b/nexus/src/context.rs @@ -160,7 +160,7 @@ impl ServerContext { let address = response.iter().next().ok_or_else(|| { "no addresses returned from DNS resolver".to_string() })?; - info!(log, "DB addreess: {}", address); + info!(log, "DB address: {}", address); PostgresConfigWithUrl::from_str(&format!( "postgresql://root@[{}]:{}/omicron?sslmode=disable", address, COCKROACH_PORT @@ -169,15 +169,31 @@ impl ServerContext { } }; let pool = db::Pool::new(&db::Config { url }); + let nexus = Nexus::new_with_id( + rack_id, + log.new(o!("component" => "nexus")), + pool, + config, + Arc::clone(&authz), + ); + + // Do not return until a rack exists in the DB with the provided UUID. + let populate_ctx = nexus.opctx_for_background(); + loop { + let result = nexus.rack_insert(&populate_ctx, rack_id) + .await; + if let Err(e) = result { + info!(log, "Failed to create initial rack: {}", e); + tokio::time::sleep(std::time::Duration::from_millis(200)).await; + } else { + info!(log, "Rack with UUID {} exists in the database", rack_id); + nexus.rack_lookup(&populate_ctx, &rack_id).await.unwrap(); + break; + } + } Ok(Arc::new(ServerContext { - nexus: Nexus::new_with_id( - rack_id, - log.new(o!("component" => "nexus")), - pool, - config, - Arc::clone(&authz), - ), + nexus, log, external_authn, internal_authn, diff --git a/nexus/src/db/datastore.rs b/nexus/src/db/datastore.rs index a386c098eb6..c8297e36aa7 100644 --- a/nexus/src/db/datastore.rs +++ b/nexus/src/db/datastore.rs @@ -175,6 +175,21 @@ impl DataStore { }) } + pub async fn rack_lookup_manual( + &self, + _opctx: &OpContext, + rack_id: Uuid, + ) -> LookupResult { + use db::schema::rack::dsl; + + dsl::rack + .filter(dsl::id.eq(rack_id)) + .select(Rack::as_select()) + .get_result_async(self.pool()) + .await + .map_err(|e| public_error_from_diesel_pool(e, ErrorHandler::Server)) + } + /// Update a rack to mark that it has been initialized pub async fn rack_set_initialized( &self, @@ -195,6 +210,7 @@ impl DataStore { // NOTE: This operation could likely be optimized with a CTE, but given // the low-frequency of calls, this optimization has been deferred. + let log = opctx.log.clone(); self.pool_authorized(opctx) .await? .transaction(move |conn| { @@ -207,6 +223,7 @@ impl DataStore { TxnError::CustomError(RackInitError::RackUpdate(e)) })?; if rack.initialized { + info!(log, "Early exit: Rack already initialized"); return Ok(rack); } @@ -236,6 +253,7 @@ impl DataStore { }) })?; } + info!(log, "Inserted services"); for dataset in datasets { use db::schema::dataset::dsl; let zpool_id = dataset.pool_id; @@ -262,9 +280,10 @@ impl DataStore { }) })?; } + info!(log, "Inserted datasets"); // Set the rack to "initialized" once the handoff is complete - diesel::update(rack_dsl::rack) + let rack = diesel::update(rack_dsl::rack) .filter(rack_dsl::id.eq(rack_id)) .set(( rack_dsl::initialized.eq(true), @@ -274,7 +293,9 @@ impl DataStore { .get_result::(conn) .map_err(|e| { TxnError::CustomError(RackInitError::RackUpdate(e)) - }) + })?; + info!(log, "Updated rack (set initialized to true)"); + Ok(rack) }) .await .map_err(|e| match e { @@ -329,6 +350,20 @@ impl DataStore { }) } + pub async fn rack_list( + &self, + opctx: &OpContext, + pagparams: &DataPageParams<'_, Uuid>, + ) -> ListResultVec { + opctx.authorize(authz::Action::Read, &authz::FLEET).await?; + use db::schema::rack::dsl; + paginated(dsl::rack, dsl::id, pagparams) + .select(Rack::as_select()) + .load_async(self.pool_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel_pool(e, ErrorHandler::Server)) + } + /// Stores a new sled in the database. pub async fn sled_upsert(&self, sled: Sled) -> CreateResult { use db::schema::sled::dsl; diff --git a/nexus/src/external_api/http_entrypoints.rs b/nexus/src/external_api/http_entrypoints.rs index bbc1f9a517f..982d4c2397b 100644 --- a/nexus/src/external_api/http_entrypoints.rs +++ b/nexus/src/external_api/http_entrypoints.rs @@ -2871,11 +2871,13 @@ async fn hardware_racks_get( let query = query_params.into_inner(); let handler = async { let opctx = OpContext::for_external_api(&rqctx).await?; - let rack_stream = nexus + let racks = nexus .racks_list(&opctx, &data_page_params_for(&rqctx, &query)?) - .await?; - let view_list = to_list::(rack_stream).await; - Ok(HttpResponseOk(ScanById::results_page(&query, view_list)?)) + .await? + .into_iter() + .map(|r| r.into()) + .collect(); + Ok(HttpResponseOk(ScanById::results_page(&query, racks)?)) }; apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await } diff --git a/nexus/src/lib.rs b/nexus/src/lib.rs index 61abe04b1ba..627a48dd681 100644 --- a/nexus/src/lib.rs +++ b/nexus/src/lib.rs @@ -36,7 +36,6 @@ use external_api::http_entrypoints::external_api; use internal_api::http_entrypoints::internal_api; use slog::Logger; use std::sync::Arc; -use uuid::Uuid; #[macro_use] extern crate slog; @@ -82,7 +81,6 @@ impl Server { /// Start a nexus server. pub async fn start( config: &Config, - rack_id: Uuid, log: &Logger, ) -> Result { let log = log.new(o!("name" => config.runtime.id.to_string())); @@ -90,15 +88,7 @@ impl Server { let ctxlog = log.new(o!("component" => "ServerContext")); - let apictx = ServerContext::new(rack_id, ctxlog, &config).await?; - - let http_server_starter_external = dropshot::HttpServerStarter::new( - &config.runtime.dropshot_external, - external_api(), - Arc::clone(&apictx), - &log.new(o!("component" => "dropshot_external")), - ) - .map_err(|error| format!("initializing external server: {}", error))?; + let apictx = ServerContext::new(config.runtime.rack_id, ctxlog, &config).await?; let http_server_starter_internal = dropshot::HttpServerStarter::new( &config.runtime.dropshot_internal, @@ -107,9 +97,34 @@ impl Server { &log.new(o!("component" => "dropshot_internal")), ) .map_err(|error| format!("initializing internal server: {}", error))?; + let http_server_internal = http_server_starter_internal.start(); + // Wait until RSS handoff completes. + let opctx = apictx.nexus.opctx_for_background(); + loop { + let result = apictx.nexus.rack_lookup(&opctx, &config.runtime.rack_id).await; + match result { + Ok(rack) => { + if rack.initialized { + break; + } + info!(log, "Still waiting for rack initialization: {:?}", rack); + }, + Err(e) => { + warn!(log, "Cannot look up rack: {}", e); + }, + } + tokio::time::sleep(std::time::Duration::from_secs(2)).await; + } + + let http_server_starter_external = dropshot::HttpServerStarter::new( + &config.runtime.dropshot_external, + external_api(), + Arc::clone(&apictx), + &log.new(o!("component" => "dropshot_external")), + ) + .map_err(|error| format!("initializing external server: {}", error))?; let http_server_external = http_server_starter_external.start(); - let http_server_internal = http_server_starter_internal.start(); Ok(Server { apictx, http_server_external, http_server_internal }) } @@ -167,8 +182,7 @@ pub async fn run_server(config: &Config) -> Result<(), String> { } else { debug!(log, "registered DTrace probes"); } - let rack_id = Uuid::new_v4(); - let server = Server::start(config, rack_id, &log).await?; + let server = Server::start(config, &log).await?; server.register_as_producer().await; server.wait_for_finish().await } diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index e4eb744e2fa..74c8a7f2d21 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -90,7 +90,6 @@ pub async fn test_setup_with_config( config: &mut omicron_nexus::Config, ) -> ControlPlaneTestContext { let logctx = LogContext::new(test_name, &config.pkg.log); - let rack_id = Uuid::parse_str(RACK_UUID).unwrap(); let log = &logctx.log; // Start up CockroachDB. @@ -104,7 +103,7 @@ pub async fn test_setup_with_config( nexus_config::Database::FromUrl { url: database.pg_config().clone() }; config.pkg.timeseries_db.address.set_port(clickhouse.port()); - let server = omicron_nexus::Server::start(&config, rack_id, &logctx.log) + let server = omicron_nexus::Server::start(&config, &logctx.log) .await .unwrap(); server diff --git a/nexus/tests/config.test.toml b/nexus/tests/config.test.toml index 2fc4ddba192..a1b47d7f178 100644 --- a/nexus/tests/config.test.toml +++ b/nexus/tests/config.test.toml @@ -39,6 +39,7 @@ max_vpc_ipv4_subnet_prefix = 29 # Identifier for this instance of Nexus. # NOTE: The test suite always overrides this. id = "e6bff1ff-24fb-49dc-a54e-c6a350cd4d6c" +rack_id = "f6bff1ff-24fb-49dc-a54e-c6a350cd4d6c" # # NOTE: for the test suite, the port MUST be 0 (in order to bind to any diff --git a/sled-agent/src/bootstrap/agent.rs b/sled-agent/src/bootstrap/agent.rs index e47e7300240..fc4260837ca 100644 --- a/sled-agent/src/bootstrap/agent.rs +++ b/sled-agent/src/bootstrap/agent.rs @@ -269,6 +269,7 @@ impl Agent { self.parent_log.clone(), request.id, sled_address, + request.rack_id, ) .await .map_err(|e| { diff --git a/sled-agent/src/bootstrap/params.rs b/sled-agent/src/bootstrap/params.rs index 5a01f4c6e0b..0cb7cd25246 100644 --- a/sled-agent/src/bootstrap/params.rs +++ b/sled-agent/src/bootstrap/params.rs @@ -26,6 +26,9 @@ pub struct SledAgentRequest { /// Portion of the IP space to be managed by the Sled Agent. pub subnet: Ipv6Subnet, + + /// Uuid of the rack to which this sled agent belongs. + pub rack_id: Uuid, } #[derive(Clone, Debug, Serialize, Deserialize, PartialEq)] diff --git a/sled-agent/src/config.rs b/sled-agent/src/config.rs index a7b0d9fb1ab..fe1b5c57764 100644 --- a/sled-agent/src/config.rs +++ b/sled-agent/src/config.rs @@ -9,14 +9,11 @@ use crate::illumos::dladm::{self, Dladm, PhysicalLink}; use crate::illumos::zpool::ZpoolName; use dropshot::ConfigLogging; use serde::Deserialize; -use std::net::SocketAddr; use std::path::{Path, PathBuf}; /// Configuration for a sled agent #[derive(Clone, Debug, Deserialize)] pub struct Config { - /// Address of Nexus instance - pub nexus_address: SocketAddr, /// Configuration for the sled agent debug log pub log: ConfigLogging, /// Optional VLAN ID to be used for tagging guest VNICs. diff --git a/sled-agent/src/instance.rs b/sled-agent/src/instance.rs index 03248017869..8ffdc5c5519 100644 --- a/sled-agent/src/instance.rs +++ b/sled-agent/src/instance.rs @@ -12,7 +12,7 @@ use crate::illumos::svc::wait_for_service; use crate::illumos::vnic::VnicAllocator; use crate::illumos::zone::{AddressRequest, PROPOLIS_ZONE_PREFIX}; use crate::instance_manager::InstanceTicket; -use crate::nexus::NexusClient; +use crate::nexus::LazyNexusClient; use crate::opte::OptePort; use crate::opte::OptePortAllocator; use crate::params::NetworkInterface; @@ -214,7 +214,7 @@ struct InstanceInner { running_state: Option, // Connection to Nexus - nexus_client: Arc, + lazy_nexus_client: LazyNexusClient, } impl InstanceInner { @@ -243,7 +243,11 @@ impl InstanceInner { ); // Notify Nexus of the state change. - self.nexus_client + self.lazy_nexus_client + .get() + .await + // TODO: Handle me + .unwrap() .cpapi_instances_put( self.id(), &nexus_client::types::InstanceRuntimeState::from( @@ -388,7 +392,7 @@ mockall::mock! { underlay_addr: Ipv6Addr, port_allocator: OptePortAllocator, initial: InstanceHardware, - nexus_client: Arc, + lazy_nexus_client: LazyNexusClient, ) -> Result; pub async fn start( &self, @@ -420,7 +424,7 @@ impl Instance { /// * `port_allocator`: A unique (to the sled) ID generator to /// refer to an OPTE port for the guest network interfaces. /// * `initial`: State of the instance at initialization time. - /// * `nexus_client`: Connection to Nexus, used for sending notifications. + /// * `lazy_nexus_client`: Connection to Nexus, used for sending notifications. // TODO: This arg list is getting a little long; can we clean this up? pub fn new( log: Logger, @@ -429,7 +433,7 @@ impl Instance { underlay_addr: Ipv6Addr, port_allocator: OptePortAllocator, initial: InstanceHardware, - nexus_client: Arc, + lazy_nexus_client: LazyNexusClient, ) -> Result { info!(log, "Instance::new w/initial HW: {:?}", initial); let instance = InstanceInner { @@ -457,7 +461,7 @@ impl Instance { cloud_init_bytes: initial.cloud_init_bytes, state: InstanceStates::new(initial.runtime), running_state: None, - nexus_client, + lazy_nexus_client, }; let inner = Arc::new(Mutex::new(instance)); diff --git a/sled-agent/src/instance_manager.rs b/sled-agent/src/instance_manager.rs index c3a941ad8a1..a87db943195 100644 --- a/sled-agent/src/instance_manager.rs +++ b/sled-agent/src/instance_manager.rs @@ -6,7 +6,7 @@ use crate::illumos::dladm::Etherstub; use crate::illumos::vnic::VnicAllocator; -use crate::nexus::NexusClient; +use crate::nexus::LazyNexusClient; use crate::opte::OptePortAllocator; use crate::params::{ InstanceHardware, InstanceMigrateParams, InstanceRuntimeStateRequested, @@ -31,7 +31,7 @@ pub enum Error { struct InstanceManagerInternal { log: Logger, - nexus_client: Arc, + lazy_nexus_client: LazyNexusClient, // TODO: If we held an object representing an enum of "Created OR Running" // instance, we could avoid the methods within "instance.rs" that panic @@ -53,14 +53,14 @@ impl InstanceManager { /// Initializes a new [`InstanceManager`] object. pub fn new( log: Logger, - nexus_client: Arc, + lazy_nexus_client: LazyNexusClient, etherstub: Etherstub, underlay_addr: Ipv6Addr, ) -> InstanceManager { InstanceManager { inner: Arc::new(InstanceManagerInternal { log: log.new(o!("component" => "InstanceManager")), - nexus_client, + lazy_nexus_client, instances: Mutex::new(BTreeMap::new()), vnic_allocator: VnicAllocator::new("Instance", etherstub), underlay_addr, @@ -119,7 +119,7 @@ impl InstanceManager { self.inner.underlay_addr, self.inner.port_allocator.clone(), initial_hardware, - self.inner.nexus_client.clone(), + self.inner.lazy_nexus_client.clone(), )?; let instance_clone = instance.clone(); let old_instance = instances diff --git a/sled-agent/src/nexus.rs b/sled-agent/src/nexus.rs index d0c6da4ba38..8a2be0e0e54 100644 --- a/sled-agent/src/nexus.rs +++ b/sled-agent/src/nexus.rs @@ -6,3 +6,59 @@ pub use crate::mocks::MockNexusClient as NexusClient; #[cfg(not(test))] pub use nexus_client::Client as NexusClient; + +use internal_dns_client::names::SRV; +use omicron_common::address::{ + AZ_PREFIX, NEXUS_INTERNAL_PORT, Ipv6Subnet, +}; +use slog::Logger; +use std::net::Ipv6Addr; +use std::sync::Arc; + +struct Inner { + log: Logger, + addr: Ipv6Addr, + + // TODO: We could also totally cache the resolver / observed IP here? +} + +#[derive(Clone)] +pub struct LazyNexusClient { + inner: Arc, +} + +impl LazyNexusClient { + pub fn new(log: Logger, addr: Ipv6Addr) -> Self { + Self { + inner: Arc::new( + Inner { + log, + addr, + } + ) + } + } + + pub async fn get(&self) -> Result { + // TODO: Consider refactoring this: + // - Address as input + // - Lookup "nexus" DNS record + // - Result
as output + let az_subnet = Ipv6Subnet::::new(self.inner.addr); + let resolver = + internal_dns_client::multiclient::create_resolver(az_subnet) + .map_err(|e| format!("Failed to create DNS resolver: {}", e))?; + let response = resolver + .lookup_ip(&SRV::Service("nexus".to_string()).to_string()) + .await + .map_err(|e| format!("Failed to lookup Nexus IP: {}", e))?; + let address = response.iter().next().ok_or_else(|| { + "no addresses returned from DNS resolver".to_string() + })?; + + Ok(NexusClient::new( + &format!("http://[{}]:{}", address, NEXUS_INTERNAL_PORT), + self.inner.log.clone(), + )) + } +} diff --git a/sled-agent/src/rack_setup/plan/sled.rs b/sled-agent/src/rack_setup/plan/sled.rs index 7433a31dfd5..9154ab6698a 100644 --- a/sled-agent/src/rack_setup/plan/sled.rs +++ b/sled-agent/src/rack_setup/plan/sled.rs @@ -85,6 +85,8 @@ impl Plan { ) -> Result { let bootstrap_addrs = bootstrap_addrs.into_iter().enumerate(); + let rack_id = Uuid::new_v4(); + let allocations = bootstrap_addrs.map(|(idx, bootstrap_addr)| { info!( log, @@ -100,7 +102,8 @@ impl Plan { bootstrap_addr, SledAgentRequest { id: Uuid::new_v4(), - subnet + subnet, + rack_id, }, ) }); @@ -113,7 +116,7 @@ impl Plan { } let plan = Self { - rack_id: Uuid::new_v4(), + rack_id, sleds, }; diff --git a/sled-agent/src/server.rs b/sled-agent/src/server.rs index fc69359008f..2e05648ffc3 100644 --- a/sled-agent/src/server.rs +++ b/sled-agent/src/server.rs @@ -7,13 +7,12 @@ use super::config::Config; use super::http_entrypoints::api as http_api; use super::sled_agent::SledAgent; -use crate::nexus::NexusClient; +use crate::nexus::LazyNexusClient; use omicron_common::backoff::{ - internal_service_policy, retry_notify, BackoffError, + internal_service_policy_with_max, retry_notify, BackoffError, }; use slog::Logger; use std::net::{SocketAddr, SocketAddrV6}; -use std::sync::Arc; use uuid::Uuid; /// Packages up a [`SledAgent`], running the sled agent API under a Dropshot @@ -39,17 +38,16 @@ impl Server { log: Logger, sled_id: Uuid, addr: SocketAddrV6, + rack_id: Uuid, ) -> Result { info!(log, "setting up sled agent server"); let client_log = log.new(o!("component" => "NexusClient")); - let nexus_client = Arc::new(NexusClient::new( - &format!("http://{}", config.nexus_address), - client_log, - )); + + let lazy_nexus_client = LazyNexusClient::new(client_log, *addr.ip()); let sled_agent = - SledAgent::new(&config, log.clone(), nexus_client.clone(), sled_id, addr) + SledAgent::new(&config, log.clone(), lazy_nexus_client.clone(), sled_id, addr, rack_id) .await .map_err(|e| e.to_string())?; @@ -79,6 +77,9 @@ impl Server { log, "contacting server nexus, registering sled: {}", sled_id ); + let nexus_client = lazy_nexus_client.get() + .await + .map_err(|err| BackoffError::transient(err.to_string()))?; nexus_client .cpapi_sled_agents_post( &sled_id, @@ -87,16 +88,16 @@ impl Server { }, ) .await - .map_err(BackoffError::transient) + .map_err(|err| BackoffError::transient(err.to_string())) }; - let log_notification_failure = |_, delay| { + let log_notification_failure = |err, delay| { warn!( log, - "failed to contact nexus, will retry in {:?}", delay; + "failed to contact nexus: {}, will retry in {:?}", err, delay; ); }; retry_notify( - internal_service_policy(), + internal_service_policy_with_max(std::time::Duration::from_secs(5)), notify_nexus, log_notification_failure, ) diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 946a6a8bc88..e7f71810fdf 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -21,6 +21,7 @@ use std::net::{IpAddr, Ipv6Addr, SocketAddr}; use std::path::{Path, PathBuf}; use tokio::io::AsyncWriteExt; use tokio::sync::Mutex; +use uuid::Uuid; // The filename of ServiceManager's internal storage. const SERVICE_CONFIG_FILENAME: &str = "service.toml"; @@ -120,6 +121,7 @@ pub struct ServiceManager { vnic_allocator: VnicAllocator, underlay_vnic: EtherstubVnic, underlay_address: Ipv6Addr, + rack_id: Uuid, } impl ServiceManager { @@ -139,6 +141,7 @@ impl ServiceManager { underlay_vnic: EtherstubVnic, underlay_address: Ipv6Addr, config: Config, + rack_id: Uuid, ) -> Result { debug!(log, "Creating new ServiceManager"); let mgr = Self { @@ -148,6 +151,7 @@ impl ServiceManager { vnic_allocator: VnicAllocator::new("Service", etherstub), underlay_vnic, underlay_address, + rack_id, }; let config_path = mgr.services_config_path(); @@ -312,6 +316,7 @@ impl ServiceManager { // cannot be known at packaging time. let runtime_config = NexusRuntimeConfig { id: service.id, + rack_id: self.rack_id, dropshot_external: ConfigDropshot { bind_address: SocketAddr::V6(external_address), request_body_max_bytes: 1048576, @@ -694,6 +699,7 @@ mod test { EtherstubVnic(ETHERSTUB_VNIC_NAME.to_string()), Ipv6Addr::LOCALHOST, test_config.make_config(), + Uuid::new_v4(), ) .await .unwrap(); @@ -720,6 +726,7 @@ mod test { EtherstubVnic(ETHERSTUB_VNIC_NAME.to_string()), Ipv6Addr::LOCALHOST, test_config.make_config(), + Uuid::new_v4(), ) .await .unwrap(); @@ -748,6 +755,7 @@ mod test { EtherstubVnic(ETHERSTUB_VNIC_NAME.to_string()), Ipv6Addr::LOCALHOST, test_config.make_config(), + Uuid::new_v4(), ) .await .unwrap(); @@ -765,6 +773,7 @@ mod test { EtherstubVnic(ETHERSTUB_VNIC_NAME.to_string()), Ipv6Addr::LOCALHOST, test_config.make_config(), + Uuid::new_v4(), ) .await .unwrap(); @@ -789,6 +798,7 @@ mod test { EtherstubVnic(ETHERSTUB_VNIC_NAME.to_string()), Ipv6Addr::LOCALHOST, test_config.make_config(), + Uuid::new_v4(), ) .await .unwrap(); @@ -808,6 +818,7 @@ mod test { EtherstubVnic(ETHERSTUB_VNIC_NAME.to_string()), Ipv6Addr::LOCALHOST, config, + Uuid::new_v4(), ) .await .unwrap(); diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 5d6481fcc34..b45c56b374c 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -11,7 +11,7 @@ use crate::illumos::zfs::{ }; use crate::illumos::{execute, PFEXEC}; use crate::instance_manager::InstanceManager; -use crate::nexus::NexusClient; +use crate::nexus::LazyNexusClient; use crate::params::{ DatasetKind, DiskStateRequested, InstanceHardware, InstanceMigrateParams, InstanceRuntimeStateRequested, ServiceEnsureBody, Zpool @@ -24,7 +24,6 @@ use omicron_common::api::{ }; use slog::Logger; use std::net::SocketAddrV6; -use std::sync::Arc; use uuid::Uuid; #[cfg(not(test))] @@ -100,7 +99,7 @@ pub struct SledAgent { // Component of Sled Agent responsible for managing Propolis instances. instances: InstanceManager, - nexus_client: Arc, + lazy_nexus_client: LazyNexusClient, // Other Oxide-controlled services running on this Sled. services: ServiceManager, @@ -111,9 +110,10 @@ impl SledAgent { pub async fn new( config: &Config, log: Logger, - nexus_client: Arc, + lazy_nexus_client: LazyNexusClient, id: Uuid, sled_address: SocketAddrV6, + rack_id: Uuid, ) -> Result { // Pass the "parent_log" to all subcomponents that want to set their own // "component" value. @@ -218,7 +218,7 @@ impl SledAgent { let storage = StorageManager::new( &parent_log, id, - nexus_client.clone(), + lazy_nexus_client.clone(), etherstub.clone(), *sled_address.ip(), ) @@ -235,7 +235,7 @@ impl SledAgent { } let instances = InstanceManager::new( parent_log.clone(), - nexus_client.clone(), + lazy_nexus_client.clone(), etherstub.clone(), *sled_address.ip(), ); @@ -245,6 +245,7 @@ impl SledAgent { etherstub_vnic.clone(), *sled_address.ip(), services::Config::default(), + rack_id, ) .await?; @@ -252,7 +253,7 @@ impl SledAgent { id, storage, instances, - nexus_client, + lazy_nexus_client, services, }) } @@ -327,7 +328,11 @@ impl SledAgent { &self, artifact: UpdateArtifact, ) -> Result<(), Error> { - crate::updates::download_artifact(artifact, self.nexus_client.as_ref()) + let nexus_client = self.lazy_nexus_client.get() + .await + // TODO: Handle error + .unwrap(); + crate::updates::download_artifact(artifact, &nexus_client) .await?; Ok(()) } diff --git a/sled-agent/src/storage_manager.rs b/sled-agent/src/storage_manager.rs index 467bb70b3d8..6305c41f949 100644 --- a/sled-agent/src/storage_manager.rs +++ b/sled-agent/src/storage_manager.rs @@ -10,7 +10,7 @@ use crate::illumos::vnic::VnicAllocator; use crate::illumos::zone::AddressRequest; use crate::illumos::zpool::ZpoolName; use crate::illumos::{zfs::Mountpoint, zone::ZONE_PREFIX, zpool::ZpoolInfo}; -use crate::nexus::NexusClient; +use crate::nexus::LazyNexusClient; use crate::params::DatasetKind; use futures::stream::FuturesOrdered; use futures::FutureExt; @@ -523,7 +523,7 @@ async fn ensure_running_zone( } type NotifyFut = dyn futures::Future< - Output = Result<(), nexus_client::Error>, + Output = Result<(), String> > + Send; #[derive(Debug)] @@ -538,7 +538,7 @@ struct NewFilesystemRequest { struct StorageWorker { log: Logger, sled_id: Uuid, - nexus_client: Arc, + lazy_nexus_client: LazyNexusClient, pools: Arc>>, new_pools_rx: mpsc::Receiver, new_filesystems_rx: mpsc::Receiver, @@ -631,21 +631,19 @@ impl StorageWorker { size: ByteCount, ) { let sled_id = self.sled_id; - let nexus = self.nexus_client.clone(); + let lazy_nexus_client = self.lazy_nexus_client.clone(); let notify_nexus = move || { let zpool_request = ZpoolPutRequest { size: size.into() }; - let nexus = nexus.clone(); + let lazy_nexus_client = lazy_nexus_client.clone(); async move { - nexus + lazy_nexus_client + .get() + .await + .map_err(|e| backoff::BackoffError::transient(e.to_string()))? .zpool_put(&sled_id, &pool_id, &zpool_request) .await - .map_err(backoff::BackoffError::transient)?; - Ok::< - (), - backoff::BackoffError< - nexus_client::Error, - >, - >(()) + .map_err(|e| backoff::BackoffError::transient(e.to_string()))?; + Ok(()) } }; let log = self.log.clone(); @@ -673,9 +671,9 @@ impl StorageWorker { datasets: Vec<(Uuid, SocketAddrV6, DatasetKind)>, pool_id: Uuid, ) { - let nexus = self.nexus_client.clone(); + let lazy_nexus_client = self.lazy_nexus_client.clone(); let notify_nexus = move || { - let nexus = nexus.clone(); + let lazy_nexus_client = lazy_nexus_client.clone(); let datasets = datasets.clone(); async move { for (id, address, kind) in datasets { @@ -683,18 +681,16 @@ impl StorageWorker { address: address.to_string(), kind: kind.into(), }; - nexus + lazy_nexus_client + .get() + .await + .map_err(|e| backoff::BackoffError::transient(e.to_string()))? .dataset_put(&pool_id, &id, &request) .await - .map_err(backoff::BackoffError::transient)?; + .map_err(|e| backoff::BackoffError::transient(e.to_string()))?; } - Ok::< - (), - backoff::BackoffError< - nexus_client::Error, - >, - >(()) + Ok(()) } }; let log = self.log.clone(); @@ -904,7 +900,7 @@ impl StorageManager { pub async fn new( log: &Logger, sled_id: Uuid, - nexus_client: Arc, + lazy_nexus_client: LazyNexusClient, etherstub: Etherstub, underlay_address: Ipv6Addr, ) -> Self { @@ -915,7 +911,7 @@ impl StorageManager { let mut worker = StorageWorker { log, sled_id, - nexus_client, + lazy_nexus_client, pools: pools.clone(), new_pools_rx, new_filesystems_rx, diff --git a/smf/sled-agent/config.toml b/smf/sled-agent/config.toml index 44a237e1930..170350afacd 100644 --- a/smf/sled-agent/config.toml +++ b/smf/sled-agent/config.toml @@ -1,9 +1,5 @@ # Sled Agent Configuration -# TODO: Remove this address -# Internal address of Nexus -nexus_address = "[fd00:1122:3344:0101::3]:12221" - # A file-backed zpool can be manually created with the following: # # truncate -s 10GB testpool.vdev # # zpool create oxp_d462a7f7-b628-40fe-80ff-4e4189e2d62b "$PWD/testpool.vdev" From dc3b84b2b15881e0862e78012cf835633de85090 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Sun, 12 Jun 2022 15:39:53 -0400 Subject: [PATCH 13/88] Add bg work user, rack insert populate, patch tests --- common/src/backoff.rs | 5 +- nexus/examples/config.toml | 1 + nexus/src/app/mod.rs | 36 +-- nexus/src/app/rack.rs | 20 +- nexus/src/app/update.rs | 23 +- nexus/src/authn/mod.rs | 11 + nexus/src/config.rs | 4 +- nexus/src/context.rs | 25 +- nexus/src/db/datastore.rs | 28 +-- nexus/src/db/fixed_data/role_assignment.rs | 7 + nexus/src/db/fixed_data/user_builtin.rs | 11 + nexus/src/lib.rs | 13 +- nexus/src/populate.rs | 71 +++++- nexus/test-utils/src/lib.rs | 5 +- nexus/tests/config.test.toml | 2 +- .../tests/integration_tests/users_builtin.rs | 2 + sled-agent/src/http_entrypoints.rs | 6 +- sled-agent/src/instance.rs | 7 +- sled-agent/src/instance_manager.rs | 12 +- sled-agent/src/nexus.rs | 39 +++- sled-agent/src/rack_setup/mod.rs | 2 +- sled-agent/src/rack_setup/plan/service.rs | 81 +++---- sled-agent/src/rack_setup/plan/sled.rs | 50 ++-- sled-agent/src/rack_setup/service.rs | 219 ++++++++---------- sled-agent/src/server.rs | 21 +- sled-agent/src/sled_agent.rs | 25 +- sled-agent/src/storage_manager.rs | 29 ++- smf/sled-agent/config-rss.toml | 56 ----- 28 files changed, 394 insertions(+), 417 deletions(-) diff --git a/common/src/backoff.rs b/common/src/backoff.rs index bcf726ff2a0..46f05d899a6 100644 --- a/common/src/backoff.rs +++ b/common/src/backoff.rs @@ -17,7 +17,9 @@ pub fn internal_service_policy() -> ::backoff::ExponentialBackoff { internal_service_policy_with_max(MAX_INTERVAL) } -pub fn internal_service_policy_with_max(max_duration: Duration) -> ::backoff::ExponentialBackoff { +pub fn internal_service_policy_with_max( + max_duration: Duration, +) -> ::backoff::ExponentialBackoff { const INITIAL_INTERVAL: Duration = Duration::from_millis(250); ::backoff::ExponentialBackoff { current_interval: INITIAL_INTERVAL, @@ -27,5 +29,4 @@ pub fn internal_service_policy_with_max(max_duration: Duration) -> ::backoff::Ex max_elapsed_time: None, ..backoff::ExponentialBackoff::default() } - } diff --git a/nexus/examples/config.toml b/nexus/examples/config.toml index 22889ab1be9..8c22e661820 100644 --- a/nexus/examples/config.toml +++ b/nexus/examples/config.toml @@ -36,6 +36,7 @@ address = "[::1]:8123" [runtime] # Identifier for this instance of Nexus id = "e6bff1ff-24fb-49dc-a54e-c6a350cd4d6c" +rack_id = "c19a698f-c6f9-4a17-ae30-20d711b8f7dc" [runtime.dropshot_external] # IP address and TCP port on which to listen for the external API diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index 13cf48b91ec..27f8fd650fc 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -10,6 +10,7 @@ use crate::config; use crate::context::OpContext; use crate::db; use crate::populate::populate_start; +use crate::populate::PopulateArgs; use crate::populate::PopulateStatus; use crate::saga_interface::SagaContext; use anyhow::anyhow; @@ -89,6 +90,9 @@ pub struct Nexus { /// Operational context used for external request authentication opctx_external_authn: OpContext, + + /// Operational context used for Nexus-driven background tasks + opctx_background_work: OpContext, } // TODO Is it possible to make some of these operations more generic? A @@ -136,8 +140,13 @@ impl Nexus { authn::Context::internal_db_init(), Arc::clone(&db_datastore), ); - let populate_status = - populate_start(populate_ctx, Arc::clone(&db_datastore)); + + let populate_args = PopulateArgs::new(rack_id); + let populate_status = populate_start( + populate_ctx, + Arc::clone(&db_datastore), + populate_args, + ); let nexus = Nexus { id: config.runtime.id, @@ -163,6 +172,12 @@ impl Nexus { authn::Context::external_authn(), Arc::clone(&db_datastore), ), + opctx_background_work: OpContext::for_background( + log.new(o!("component" => "Background Work")), + Arc::clone(&authz), + authn::Context::internal_db_background(), + Arc::clone(&db_datastore), + ), }; // TODO-cleanup all the extra Arcs here seems wrong @@ -213,23 +228,16 @@ impl Nexus { } } - /// Returns an [`OpContext`] used for background tasks. - // TODO: dap@ recommends using a different user for this, other than - // "internal_db_init". - pub fn opctx_for_background(&self) -> OpContext { - OpContext::for_background( - self.log.new(o!("component" => "Background Work")), - Arc::clone(&self.authz), - authn::Context::internal_db_init(), - Arc::clone(&self.datastore()), - ) - } - /// Returns an [`OpContext`] used for authenticating external requests pub fn opctx_external_authn(&self) -> &OpContext { &self.opctx_external_authn } + /// Returns an [`OpContext`] used for background tasks. + pub fn opctx_for_background(&self) -> &OpContext { + &self.opctx_background_work + } + /// Used as the body of a "stub" endpoint -- one that's currently /// unimplemented but that we eventually intend to implement /// diff --git a/nexus/src/app/rack.rs b/nexus/src/app/rack.rs index bf1f2026e8a..dde3df7449c 100644 --- a/nexus/src/app/rack.rs +++ b/nexus/src/app/rack.rs @@ -24,11 +24,6 @@ impl super::Nexus { self.db_datastore.rack_list(&opctx, pagparams).await } - // TODO: Use this, instead of the manual one. - // - // Note that this will require insertion of the rack to occur - // during the "populate" steps. - /* pub async fn rack_lookup( &self, opctx: &OpContext, @@ -40,15 +35,6 @@ impl super::Nexus { .await?; Ok(db_rack) } - */ - - pub async fn rack_lookup( - &self, - opctx: &OpContext, - rack_id: &Uuid, - ) -> LookupResult { - self.db_datastore.rack_lookup_manual(opctx, *rack_id).await - } /// Ensures that a rack exists in the DB. /// @@ -76,7 +62,8 @@ impl super::Nexus { opctx.authorize(authz::Action::Modify, &authz::FLEET).await?; // Convert from parameter -> DB type. - let services: Vec<_> = request.services + let services: Vec<_> = request + .services .into_iter() .map(|svc| { db::model::Service::new( @@ -88,7 +75,8 @@ impl super::Nexus { }) .collect(); - let datasets: Vec<_> = request.datasets + let datasets: Vec<_> = request + .datasets .into_iter() .map(|dataset| { db::model::Dataset::new( diff --git a/nexus/src/app/update.rs b/nexus/src/app/update.rs index 65ec3b6ddde..2d87a44a84f 100644 --- a/nexus/src/app/update.rs +++ b/nexus/src/app/update.rs @@ -24,11 +24,11 @@ use tokio::io::AsyncWriteExt; static BASE_ARTIFACT_DIR: &str = "/var/tmp/oxide_artifacts"; impl super::Nexus { - async fn tuf_base_url(&self, opctx: &OpContext) -> Result, Error> { - let rack = self.rack_lookup( - opctx, - &self.rack_id, - ).await?; + async fn tuf_base_url( + &self, + opctx: &OpContext, + ) -> Result, Error> { + let rack = self.rack_lookup(opctx, &self.rack_id).await?; Ok(self.updates_config.as_ref().map(|c| { rack.tuf_base_url.unwrap_or_else(|| c.default_base_url.clone()) @@ -46,10 +46,11 @@ impl super::Nexus { message: "updates system not configured".into(), } })?; - let base_url = - self.tuf_base_url(opctx).await?.ok_or_else(|| Error::InvalidRequest { + let base_url = self.tuf_base_url(opctx).await?.ok_or_else(|| { + Error::InvalidRequest { message: "updates system not configured".into(), - })?; + } + })?; let trusted_root = tokio::fs::read(&updates_config.trusted_root) .await .map_err(|e| Error::InternalError { @@ -133,8 +134,10 @@ impl super::Nexus { artifact: UpdateArtifact, ) -> Result, Error> { let mut base_url = - self.tuf_base_url(opctx).await?.ok_or_else(|| Error::InvalidRequest { - message: "updates system not configured".into(), + self.tuf_base_url(opctx).await?.ok_or_else(|| { + Error::InvalidRequest { + message: "updates system not configured".into(), + } })?; if !base_url.ends_with('/') { base_url.push('/'); diff --git a/nexus/src/authn/mod.rs b/nexus/src/authn/mod.rs index 59e5bc7a889..c9399bdb131 100644 --- a/nexus/src/authn/mod.rs +++ b/nexus/src/authn/mod.rs @@ -30,6 +30,7 @@ pub mod silos; pub use crate::db::fixed_data::silo_user::USER_TEST_PRIVILEGED; pub use crate::db::fixed_data::silo_user::USER_TEST_UNPRIVILEGED; +pub use crate::db::fixed_data::user_builtin::USER_BACKGROUND_WORK; pub use crate::db::fixed_data::user_builtin::USER_DB_INIT; pub use crate::db::fixed_data::user_builtin::USER_EXTERNAL_AUTHN; pub use crate::db::fixed_data::user_builtin::USER_INTERNAL_API; @@ -170,6 +171,11 @@ impl Context { Context::context_for_builtin_user(USER_DB_INIT.id) } + /// Returns an authenticated context for Nexus-driven db work. + pub fn internal_db_background() -> Context { + Context::context_for_builtin_user(USER_BACKGROUND_WORK.id) + } + fn context_for_builtin_user(user_builtin_id: Uuid) -> Context { Context { kind: Kind::Authenticated(Details { @@ -213,6 +219,7 @@ impl Context { #[cfg(test)] mod test { use super::Context; + use super::USER_BACKGROUND_WORK; use super::USER_DB_INIT; use super::USER_INTERNAL_API; use super::USER_INTERNAL_READ; @@ -251,6 +258,10 @@ mod test { let actor = authn.actor().unwrap(); assert_eq!(actor.actor_id(), USER_DB_INIT.id); + let authn = Context::internal_db_background(); + let actor = authn.actor().unwrap(); + assert_eq!(actor.actor_id(), USER_BACKGROUND_WORK.id); + let authn = Context::internal_saga_recovery(); let actor = authn.actor().unwrap(); assert_eq!(actor.actor_id(), USER_SAGA_RECOVERY.id); diff --git a/nexus/src/config.rs b/nexus/src/config.rs index a157c4bdbeb..f8f52fcf50d 100644 --- a/nexus/src/config.rs +++ b/nexus/src/config.rs @@ -347,7 +347,9 @@ mod test { Config { runtime: RuntimeConfig { id: "28b90dc4-c22a-65ba-f49a-f051fe01208f".parse().unwrap(), - rack_id: "38b90dc4-c22a-65ba-f49a-f051fe01208f".parse().unwrap(), + rack_id: "38b90dc4-c22a-65ba-f49a-f051fe01208f" + .parse() + .unwrap(), dropshot_external: ConfigDropshot { bind_address: "10.1.2.3:4567" .parse::() diff --git a/nexus/src/context.rs b/nexus/src/context.rs index 4cd92a05f91..90e57669cd9 100644 --- a/nexus/src/context.rs +++ b/nexus/src/context.rs @@ -19,9 +19,7 @@ use authn::external::spoof::HttpAuthnSpoof; use authn::external::HttpAuthnScheme; use chrono::{DateTime, Duration, Utc}; use internal_dns_client::names::SRV; -use omicron_common::address::{ - Ipv6Subnet, AZ_PREFIX, COCKROACH_PORT, -}; +use omicron_common::address::{Ipv6Subnet, AZ_PREFIX, COCKROACH_PORT}; use omicron_common::api::external::Error; use omicron_common::nexus_config; use omicron_common::postgres_config::PostgresConfigWithUrl; @@ -154,7 +152,9 @@ impl ServerContext { nexus_config::Database::FromDns => { info!(log, "Accessing DB url from DNS"); let response = resolver - .lookup_ip(&SRV::Service("cockroachdb".to_string()).to_string()) + .lookup_ip( + &SRV::Service("cockroachdb".to_string()).to_string(), + ) .await .map_err(|e| format!("Failed to lookup IP: {}", e))?; let address = response.iter().next().ok_or_else(|| { @@ -169,7 +169,7 @@ impl ServerContext { } }; let pool = db::Pool::new(&db::Config { url }); - let nexus = Nexus::new_with_id( + let nexus = Nexus::new_with_id( rack_id, log.new(o!("component" => "nexus")), pool, @@ -177,21 +177,6 @@ impl ServerContext { Arc::clone(&authz), ); - // Do not return until a rack exists in the DB with the provided UUID. - let populate_ctx = nexus.opctx_for_background(); - loop { - let result = nexus.rack_insert(&populate_ctx, rack_id) - .await; - if let Err(e) = result { - info!(log, "Failed to create initial rack: {}", e); - tokio::time::sleep(std::time::Duration::from_millis(200)).await; - } else { - info!(log, "Rack with UUID {} exists in the database", rack_id); - nexus.rack_lookup(&populate_ctx, &rack_id).await.unwrap(); - break; - } - } - Ok(Arc::new(ServerContext { nexus, log, diff --git a/nexus/src/db/datastore.rs b/nexus/src/db/datastore.rs index c8297e36aa7..f1cba756c85 100644 --- a/nexus/src/db/datastore.rs +++ b/nexus/src/db/datastore.rs @@ -175,21 +175,6 @@ impl DataStore { }) } - pub async fn rack_lookup_manual( - &self, - _opctx: &OpContext, - rack_id: Uuid, - ) -> LookupResult { - use db::schema::rack::dsl; - - dsl::rack - .filter(dsl::id.eq(rack_id)) - .select(Rack::as_select()) - .get_result_async(self.pool()) - .await - .map_err(|e| public_error_from_diesel_pool(e, ErrorHandler::Server)) - } - /// Update a rack to mark that it has been initialized pub async fn rack_set_initialized( &self, @@ -202,8 +187,16 @@ impl DataStore { #[derive(Debug)] enum RackInitError { - ServiceInsert { err: SyncInsertError, sled_id: Uuid, svc_id: Uuid }, - DatasetInsert { err: SyncInsertError, zpool_id: Uuid, dataset_id: Uuid }, + ServiceInsert { + err: SyncInsertError, + sled_id: Uuid, + svc_id: Uuid, + }, + DatasetInsert { + err: SyncInsertError, + zpool_id: Uuid, + dataset_id: Uuid, + }, RackUpdate(diesel::result::Error), } type TxnError = TransactionError; @@ -2948,6 +2941,7 @@ impl DataStore { let builtin_users = [ // Note: "db_init" is also a builtin user, but that one by necessity // is created with the database. + &*authn::USER_BACKGROUND_WORK, &*authn::USER_INTERNAL_API, &*authn::USER_INTERNAL_READ, &*authn::USER_EXTERNAL_AUTHN, diff --git a/nexus/src/db/fixed_data/role_assignment.rs b/nexus/src/db/fixed_data/role_assignment.rs index 94caf552a13..540b57abe50 100644 --- a/nexus/src/db/fixed_data/role_assignment.rs +++ b/nexus/src/db/fixed_data/role_assignment.rs @@ -24,6 +24,13 @@ lazy_static! { *FLEET_ID, role_builtin::FLEET_ADMIN.role_name, ), + RoleAssignment::new( + IdentityType::UserBuiltin, + user_builtin::USER_BACKGROUND_WORK.id, + role_builtin::FLEET_ADMIN.resource_type, + *FLEET_ID, + role_builtin::FLEET_ADMIN.role_name, + ), // The "internal-read" user gets the "viewer" role on the sole // Fleet. This will grant them the ability to read various control diff --git a/nexus/src/db/fixed_data/user_builtin.rs b/nexus/src/db/fixed_data/user_builtin.rs index 1e9dee1b7bf..238a8f5405a 100644 --- a/nexus/src/db/fixed_data/user_builtin.rs +++ b/nexus/src/db/fixed_data/user_builtin.rs @@ -39,6 +39,15 @@ lazy_static! { "used for seeding initial database data", ); + /// Internal user for performing operations driven by Nexus, rather + /// than any API request. + pub static ref USER_BACKGROUND_WORK: UserBuiltinConfig = + UserBuiltinConfig::new_static( + "001de000-05e4-4000-8000-00000000bac3", + "background-work", + "used for Nexus-driven database operations", + ); + /// Internal user used by Nexus when handling internal API requests pub static ref USER_INTERNAL_API: UserBuiltinConfig = UserBuiltinConfig::new_static( @@ -77,6 +86,7 @@ lazy_static! { #[cfg(test)] mod test { use super::super::assert_valid_uuid; + use super::USER_BACKGROUND_WORK; use super::USER_DB_INIT; use super::USER_EXTERNAL_AUTHN; use super::USER_INTERNAL_API; @@ -85,6 +95,7 @@ mod test { #[test] fn test_builtin_user_ids_are_valid() { + assert_valid_uuid(&USER_BACKGROUND_WORK.id); assert_valid_uuid(&USER_DB_INIT.id); assert_valid_uuid(&USER_INTERNAL_API.id); assert_valid_uuid(&USER_EXTERNAL_AUTHN.id); diff --git a/nexus/src/lib.rs b/nexus/src/lib.rs index 627a48dd681..079a7a26f54 100644 --- a/nexus/src/lib.rs +++ b/nexus/src/lib.rs @@ -88,7 +88,8 @@ impl Server { let ctxlog = log.new(o!("component" => "ServerContext")); - let apictx = ServerContext::new(config.runtime.rack_id, ctxlog, &config).await?; + let apictx = + ServerContext::new(config.runtime.rack_id, ctxlog, &config).await?; let http_server_starter_internal = dropshot::HttpServerStarter::new( &config.runtime.dropshot_internal, @@ -100,6 +101,15 @@ impl Server { let http_server_internal = http_server_starter_internal.start(); // Wait until RSS handoff completes. + // TODO: This messes up the tests. Should we make this a config option? + // + // TODO: This actually raises a question; what triggers background tasks + // to execute? + // + // - Perhaps the API is exposed to tests? + // - Perhaps the invocation of that API is controlled by config + // options? + /* let opctx = apictx.nexus.opctx_for_background(); loop { let result = apictx.nexus.rack_lookup(&opctx, &config.runtime.rack_id).await; @@ -116,6 +126,7 @@ impl Server { } tokio::time::sleep(std::time::Duration::from_secs(2)).await; } + */ let http_server_starter_external = dropshot::HttpServerStarter::new( &config.runtime.dropshot_external, diff --git a/nexus/src/populate.rs b/nexus/src/populate.rs index 9f6bcdcad20..85223aef2b1 100644 --- a/nexus/src/populate.rs +++ b/nexus/src/populate.rs @@ -43,13 +43,14 @@ //! each populator behaves as expected in the above ways. use crate::context::OpContext; -use crate::db::DataStore; +use crate::db::{self, DataStore}; use futures::future::BoxFuture; use futures::FutureExt; use lazy_static::lazy_static; use omicron_common::api::external::Error; use omicron_common::backoff; use std::sync::Arc; +use uuid::Uuid; #[derive(Clone, Debug)] pub enum PopulateStatus { @@ -58,14 +59,26 @@ pub enum PopulateStatus { Failed(String), } +/// Auxiliary data necessary to populate the database. +pub struct PopulateArgs { + rack_id: Uuid, +} + +impl PopulateArgs { + pub fn new(rack_id: Uuid) -> Self { + Self { rack_id } + } +} + pub fn populate_start( opctx: OpContext, datastore: Arc, + args: PopulateArgs, ) -> tokio::sync::watch::Receiver { let (tx, rx) = tokio::sync::watch::channel(PopulateStatus::NotDone); tokio::spawn(async move { - let result = populate(&opctx, &datastore).await; + let result = populate(&opctx, &datastore, &args).await; if let Err(error) = tx.send(match result { Ok(()) => PopulateStatus::Done, Err(message) => PopulateStatus::Failed(message), @@ -80,17 +93,19 @@ pub fn populate_start( async fn populate( opctx: &OpContext, datastore: &DataStore, + args: &PopulateArgs, ) -> Result<(), String> { for p in *ALL_POPULATORS { let db_result = backoff::retry_notify( backoff::internal_service_policy(), || async { - p.populate(opctx, datastore).await.map_err(|error| match &error - { - Error::ServiceUnavailable { .. } => { - backoff::BackoffError::transient(error) + p.populate(opctx, datastore, args).await.map_err(|error| { + match &error { + Error::ServiceUnavailable { .. } => { + backoff::BackoffError::transient(error) + } + _ => backoff::BackoffError::Permanent(error), } - _ => backoff::BackoffError::Permanent(error), }) }, |error, delay| { @@ -130,6 +145,7 @@ trait Populator: std::fmt::Debug + Send + Sync { &self, opctx: &'a OpContext, datastore: &'a DataStore, + args: &'a PopulateArgs, ) -> BoxFuture<'b, Result<(), Error>> where 'a: 'b; @@ -143,6 +159,7 @@ impl Populator for PopulateBuiltinUsers { &self, opctx: &'a OpContext, datastore: &'a DataStore, + _args: &'a PopulateArgs, ) -> BoxFuture<'b, Result<(), Error>> where 'a: 'b, @@ -159,6 +176,7 @@ impl Populator for PopulateBuiltinRoles { &self, opctx: &'a OpContext, datastore: &'a DataStore, + _args: &'a PopulateArgs, ) -> BoxFuture<'b, Result<(), Error>> where 'a: 'b, @@ -175,6 +193,7 @@ impl Populator for PopulateBuiltinRoleAssignments { &self, opctx: &'a OpContext, datastore: &'a DataStore, + _args: &'a PopulateArgs, ) -> BoxFuture<'b, Result<(), Error>> where 'a: 'b, @@ -192,6 +211,7 @@ impl Populator for PopulateBuiltinSilos { &self, opctx: &'a OpContext, datastore: &'a DataStore, + _args: &'a PopulateArgs, ) -> BoxFuture<'b, Result<(), Error>> where 'a: 'b, @@ -214,6 +234,7 @@ impl Populator for PopulateSiloUsers { &self, opctx: &'a OpContext, datastore: &'a DataStore, + _args: &'a PopulateArgs, ) -> BoxFuture<'b, Result<(), Error>> where 'a: 'b, @@ -230,6 +251,7 @@ impl Populator for PopulateSiloUserRoleAssignments { &self, opctx: &'a OpContext, datastore: &'a DataStore, + _args: &'a PopulateArgs, ) -> BoxFuture<'b, Result<(), Error>> where 'a: 'b, @@ -241,19 +263,43 @@ impl Populator for PopulateSiloUserRoleAssignments { } } +#[derive(Debug)] +struct PopulateRack; +impl Populator for PopulateRack { + fn populate<'a, 'b>( + &self, + opctx: &'a OpContext, + datastore: &'a DataStore, + args: &'a PopulateArgs, + ) -> BoxFuture<'b, Result<(), Error>> + where + 'a: 'b, + { + async { + datastore + .rack_insert(opctx, &db::model::Rack::new(args.rack_id)) + .await?; + Ok(()) + } + .boxed() + } +} + lazy_static! { - static ref ALL_POPULATORS: [&'static dyn Populator; 6] = [ + static ref ALL_POPULATORS: [&'static dyn Populator; 7] = [ &PopulateBuiltinUsers, &PopulateBuiltinRoles, &PopulateBuiltinRoleAssignments, &PopulateBuiltinSilos, &PopulateSiloUsers, &PopulateSiloUserRoleAssignments, + &PopulateRack, ]; } #[cfg(test)] mod test { + use super::PopulateArgs; use super::Populator; use super::ALL_POPULATORS; use crate::authn; @@ -265,6 +311,7 @@ mod test { use omicron_common::api::external::Error; use omicron_test_utils::dev; use std::sync::Arc; + use uuid::Uuid; #[tokio::test] async fn test_populators() { @@ -287,16 +334,18 @@ mod test { ); let log = &logctx.log; + let args = PopulateArgs::new(Uuid::new_v4()); + // Running each populator once under normal conditions should work. info!(&log, "populator {:?}, run 1", p); - p.populate(&opctx, &datastore) + p.populate(&opctx, &datastore, &args) .await .with_context(|| format!("populator {:?} (try 1)", p)) .unwrap(); // It should also work fine to run it again. info!(&log, "populator {:?}, run 2 (idempotency check)", p); - p.populate(&opctx, &datastore) + p.populate(&opctx, &datastore, &args) .await .with_context(|| { format!( @@ -331,7 +380,7 @@ mod test { ); info!(&log, "populator {:?}, with database offline", p); - match p.populate(&opctx, &datastore).await { + match p.populate(&opctx, &datastore, &args).await { Err(Error::ServiceUnavailable { .. }) => (), Ok(_) => panic!( "populator {:?}: unexpectedly succeeded with no database", diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index 74c8a7f2d21..48fa1fec479 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -103,9 +103,8 @@ pub async fn test_setup_with_config( nexus_config::Database::FromUrl { url: database.pg_config().clone() }; config.pkg.timeseries_db.address.set_port(clickhouse.port()); - let server = omicron_nexus::Server::start(&config, &logctx.log) - .await - .unwrap(); + let server = + omicron_nexus::Server::start(&config, &logctx.log).await.unwrap(); server .apictx .nexus diff --git a/nexus/tests/config.test.toml b/nexus/tests/config.test.toml index a1b47d7f178..20e3df3330d 100644 --- a/nexus/tests/config.test.toml +++ b/nexus/tests/config.test.toml @@ -39,7 +39,7 @@ max_vpc_ipv4_subnet_prefix = 29 # Identifier for this instance of Nexus. # NOTE: The test suite always overrides this. id = "e6bff1ff-24fb-49dc-a54e-c6a350cd4d6c" -rack_id = "f6bff1ff-24fb-49dc-a54e-c6a350cd4d6c" +rack_id = "c19a698f-c6f9-4a17-ae30-20d711b8f7dc" # # NOTE: for the test suite, the port MUST be 0 (in order to bind to any diff --git a/nexus/tests/integration_tests/users_builtin.rs b/nexus/tests/integration_tests/users_builtin.rs index b06741a3067..c6d3615c9ef 100644 --- a/nexus/tests/integration_tests/users_builtin.rs +++ b/nexus/tests/integration_tests/users_builtin.rs @@ -27,6 +27,8 @@ async fn test_users_builtin(cptestctx: &ControlPlaneTestContext) { let u = users.remove(&authn::USER_DB_INIT.name.to_string()).unwrap(); assert_eq!(u.identity.id, authn::USER_DB_INIT.id); + let u = users.remove(&authn::USER_BACKGROUND_WORK.name.to_string()).unwrap(); + assert_eq!(u.identity.id, authn::USER_BACKGROUND_WORK.id); let u = users.remove(&authn::USER_INTERNAL_API.name.to_string()).unwrap(); assert_eq!(u.identity.id, authn::USER_INTERNAL_API.id); let u = users.remove(&authn::USER_INTERNAL_READ.name.to_string()).unwrap(); diff --git a/sled-agent/src/http_entrypoints.rs b/sled-agent/src/http_entrypoints.rs index 72a8c3c3f74..d0dd478ea4c 100644 --- a/sled-agent/src/http_entrypoints.rs +++ b/sled-agent/src/http_entrypoints.rs @@ -66,11 +66,7 @@ async fn zpools_get( rqctx: Arc>, ) -> Result>, HttpError> { let sa = rqctx.context(); - Ok(HttpResponseOk( - sa.zpools_get() - .await - .map_err(|e| Error::from(e))? - )) + Ok(HttpResponseOk(sa.zpools_get().await.map_err(|e| Error::from(e))?)) } #[endpoint { diff --git a/sled-agent/src/instance.rs b/sled-agent/src/instance.rs index 8ffdc5c5519..55358cb8dd2 100644 --- a/sled-agent/src/instance.rs +++ b/sled-agent/src/instance.rs @@ -719,7 +719,7 @@ impl Instance { mod test { use super::*; use crate::illumos::dladm::Etherstub; - use crate::mocks::MockNexusClient; + use crate::nexus::LazyNexusClient; use crate::opte::OptePortAllocator; use crate::params::InstanceStateRequested; use chrono::Utc; @@ -792,7 +792,8 @@ mod test { Etherstub("mylink".to_string()), ); let port_allocator = OptePortAllocator::new(); - let nexus_client = MockNexusClient::default(); + let lazy_nexus_client = + LazyNexusClient::new(log.clone(), std::net::Ipv6Addr::LOCALHOST); let inst = Instance::new( log.clone(), @@ -803,7 +804,7 @@ mod test { ), port_allocator, new_initial_instance(), - Arc::new(nexus_client), + lazy_nexus_client, ) .unwrap(); diff --git a/sled-agent/src/instance_manager.rs b/sled-agent/src/instance_manager.rs index a87db943195..d93b8eae04e 100644 --- a/sled-agent/src/instance_manager.rs +++ b/sled-agent/src/instance_manager.rs @@ -199,7 +199,7 @@ mod test { use crate::illumos::dladm::Etherstub; use crate::illumos::{dladm::MockDladm, zone::MockZones}; use crate::instance::MockInstance; - use crate::mocks::MockNexusClient; + use crate::nexus::LazyNexusClient; use crate::params::InstanceStateRequested; use chrono::Utc; use omicron_common::api::external::{ @@ -246,7 +246,8 @@ mod test { #[serial_test::serial] async fn ensure_instance() { let log = logger(); - let nexus_client = Arc::new(MockNexusClient::default()); + let lazy_nexus_client = + LazyNexusClient::new(log.clone(), std::net::Ipv6Addr::LOCALHOST); // Creation of the instance manager incurs some "global" system // checks: cleanup of existing zones + vnics. @@ -259,7 +260,7 @@ mod test { let im = InstanceManager::new( log, - nexus_client, + lazy_nexus_client, Etherstub("mylink".to_string()), std::net::Ipv6Addr::new( 0xfd00, 0x1de, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, @@ -329,7 +330,8 @@ mod test { #[serial_test::serial] async fn ensure_instance_repeatedly() { let log = logger(); - let nexus_client = Arc::new(MockNexusClient::default()); + let lazy_nexus_client = + LazyNexusClient::new(log.clone(), std::net::Ipv6Addr::LOCALHOST); // Instance Manager creation. @@ -341,7 +343,7 @@ mod test { let im = InstanceManager::new( log, - nexus_client, + lazy_nexus_client, Etherstub("mylink".to_string()), std::net::Ipv6Addr::new( 0xfd00, 0x1de, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, diff --git a/sled-agent/src/nexus.rs b/sled-agent/src/nexus.rs index 8a2be0e0e54..00e87fd6a1b 100644 --- a/sled-agent/src/nexus.rs +++ b/sled-agent/src/nexus.rs @@ -8,9 +8,7 @@ pub use crate::mocks::MockNexusClient as NexusClient; pub use nexus_client::Client as NexusClient; use internal_dns_client::names::SRV; -use omicron_common::address::{ - AZ_PREFIX, NEXUS_INTERNAL_PORT, Ipv6Subnet, -}; +use omicron_common::address::{Ipv6Subnet, AZ_PREFIX, NEXUS_INTERNAL_PORT}; use slog::Logger; use std::net::Ipv6Addr; use std::sync::Arc; @@ -18,10 +16,19 @@ use std::sync::Arc; struct Inner { log: Logger, addr: Ipv6Addr, - // TODO: We could also totally cache the resolver / observed IP here? } +/// Wrapper around a [`NexusClient`] object, which allows deferring +/// the DNS lookup until accessed. +/// +/// Without the assistance of OS-level DNS lookups, the [`NexusClient`] +/// interface requires knowledge of the target service IP address. +/// For some services, like Nexus, this can be painful, as the IP address +/// may not have even been allocated when the Sled Agent starts. +/// +/// This structure allows clients to access the client on-demand, performing +/// the DNS lookup only once it is actually needed. #[derive(Clone)] pub struct LazyNexusClient { inner: Arc, @@ -29,14 +36,7 @@ pub struct LazyNexusClient { impl LazyNexusClient { pub fn new(log: Logger, addr: Ipv6Addr) -> Self { - Self { - inner: Arc::new( - Inner { - log, - addr, - } - ) - } + Self { inner: Arc::new(Inner { log, addr }) } } pub async fn get(&self) -> Result { @@ -62,3 +62,18 @@ impl LazyNexusClient { )) } } + +// Provides a mock implementation of the [`LazyNexusClient`]. +// +// This allows tests to use the structure without actually performing +// any DNS lookups. +#[cfg(test)] +mockall::mock! { + pub LazyNexusClient { + pub fn new(log: Logger, addr: Ipv6Addr) -> Self; + pub async fn get(&self) -> Result; + } + impl Clone for LazyNexusClient { + fn clone(&self) -> Self; + } +} diff --git a/sled-agent/src/rack_setup/mod.rs b/sled-agent/src/rack_setup/mod.rs index f052b6c3120..4df85a7727f 100644 --- a/sled-agent/src/rack_setup/mod.rs +++ b/sled-agent/src/rack_setup/mod.rs @@ -5,5 +5,5 @@ //! Rack Setup Service pub mod config; -pub mod service; mod plan; +pub mod service; diff --git a/sled-agent/src/rack_setup/plan/service.rs b/sled-agent/src/rack_setup/plan/service.rs index c5ceb3c1ef0..0bdf332d748 100644 --- a/sled-agent/src/rack_setup/plan/service.rs +++ b/sled-agent/src/rack_setup/plan/service.rs @@ -15,9 +15,7 @@ use omicron_common::backoff::{ }; use serde::{Deserialize, Serialize}; use sled_agent_client::{ - Client as SledAgentClient, - Error as SledAgentError, - types as SledAgentTypes, + types as SledAgentTypes, Client as SledAgentClient, Error as SledAgentError, }; use slog::Logger; use std::collections::HashMap; @@ -33,8 +31,7 @@ const NEXUS_COUNT: usize = 1; const CRDB_COUNT: usize = 1; fn rss_service_plan_path() -> PathBuf { - Path::new(omicron_common::OMICRON_CONFIG_PATH) - .join("rss-service-plan.toml") + Path::new(omicron_common::OMICRON_CONFIG_PATH).join("rss-service-plan.toml") } /// Describes errors which may occur while generating a plan for services. @@ -81,31 +78,27 @@ pub struct Plan { } impl Plan { - pub async fn load( - log: &Logger, - ) -> Result, PlanError> - { + pub async fn load(log: &Logger) -> Result, PlanError> { // If we already created a plan for this RSS to allocate // services to sleds, re-use that existing plan. let rss_service_plan_path = rss_service_plan_path(); if rss_service_plan_path.exists() { info!(log, "RSS plan already created, loading from file"); - let plan: Self = - toml::from_str( - &tokio::fs::read_to_string(&rss_service_plan_path).await.map_err( - |err| PlanError::Io { - message: format!( - "Loading RSS plan {rss_service_plan_path:?}" - ), - err, - }, - )?, - ) - .map_err(|err| PlanError::Toml { - path: rss_service_plan_path, - err, - })?; + let plan: Self = toml::from_str( + &tokio::fs::read_to_string(&rss_service_plan_path) + .await + .map_err(|err| PlanError::Io { + message: format!( + "Loading RSS plan {rss_service_plan_path:?}" + ), + err, + })?, + ) + .map_err(|err| PlanError::Toml { + path: rss_service_plan_path, + err, + })?; Ok(Some(plan)) } else { Ok(None) @@ -134,20 +127,21 @@ impl Plan { .zpools_get() .await .map(|response| { - response.into_inner() + response + .into_inner() .into_iter() .map(|zpool| zpool.id) .collect() }) .map_err(|err| { - BackoffError::transient( - PlanError::SledApi(err) - ) + BackoffError::transient(PlanError::SledApi(err)) })?; if zpools.is_empty() { return Err(BackoffError::transient( - PlanError::SledInitialization("Awaiting zpools".to_string()) + PlanError::SledInitialization( + "Awaiting zpools".to_string(), + ), )); } @@ -156,12 +150,9 @@ impl Plan { let log_failure = |error, _| { warn!(log, "failed to get zpools"; "error" => ?error); }; - let zpools = retry_notify( - internal_service_policy(), - get_zpools, - log_failure, - ) - .await?; + let zpools = + retry_notify(internal_service_policy(), get_zpools, log_failure) + .await?; Ok(zpools[0]) } @@ -215,7 +206,8 @@ impl Plan { // The first enumerated sleds host the CRDB datasets, using // zpools described from the underlying config file. if idx < CRDB_COUNT { - let zpool_id = Self::get_a_zpool_from_sled(log, sled_address).await?; + let zpool_id = + Self::get_a_zpool_from_sled(log, sled_address).await?; let address = SocketAddrV6::new( addr_alloc.next().expect("Not enough addrs"), @@ -226,10 +218,9 @@ impl Plan { request.datasets.push(DatasetEnsureBody { id: Uuid::new_v4(), zpool_id, - dataset_kind: - crate::params::DatasetKind::CockroachDb { - all_addresses: vec![address], - }, + dataset_kind: crate::params::DatasetKind::CockroachDb { + all_addresses: vec![address], + }, address, }); } @@ -258,10 +249,7 @@ impl Plan { }); } - allocations.push(( - sled_address, - request - )); + allocations.push((sled_address, request)); } let mut services = std::collections::HashMap::new(); @@ -269,9 +257,7 @@ impl Plan { services.insert(addr, allocation); } - let plan = Self { - services - }; + let plan = Self { services }; // Once we've constructed a plan, write it down to durable storage. let serialized_plan = @@ -316,4 +302,3 @@ impl AddressBumpAllocator { Some(self.last_addr) } } - diff --git a/sled-agent/src/rack_setup/plan/sled.rs b/sled-agent/src/rack_setup/plan/sled.rs index 9154ab6698a..2e5559c1201 100644 --- a/sled-agent/src/rack_setup/plan/sled.rs +++ b/sled-agent/src/rack_setup/plan/sled.rs @@ -17,8 +17,7 @@ use thiserror::Error; use uuid::Uuid; fn rss_sled_plan_path() -> PathBuf { - Path::new(omicron_common::OMICRON_CONFIG_PATH) - .join("rss-sled-plan.toml") + Path::new(omicron_common::OMICRON_CONFIG_PATH).join("rss-sled-plan.toml") } /// Describes errors which may occur while generating a plan for sleds. @@ -39,7 +38,6 @@ pub enum PlanError { pub struct Plan { pub rack_id: Uuid, pub sleds: HashMap, - // TODO: Consider putting the rack subnet here? This may be operator-driven // in the future, so it should exist in the "plan". // @@ -48,30 +46,24 @@ pub struct Plan { } impl Plan { - pub async fn load( - log: &Logger, - ) -> Result, PlanError> { + pub async fn load(log: &Logger) -> Result, PlanError> { // If we already created a plan for this RSS to allocate // subnets/requests to sleds, re-use that existing plan. let rss_sled_plan_path = rss_sled_plan_path(); if rss_sled_plan_path.exists() { info!(log, "RSS plan already created, loading from file"); - let plan: Self = - toml::from_str( - &tokio::fs::read_to_string(&rss_sled_plan_path).await.map_err( - |err| PlanError::Io { - message: format!( - "Loading RSS plan {rss_sled_plan_path:?}" - ), - err, - }, - )?, - ) - .map_err(|err| PlanError::Toml { - path: rss_sled_plan_path, - err, - })?; + let plan: Self = toml::from_str( + &tokio::fs::read_to_string(&rss_sled_plan_path).await.map_err( + |err| PlanError::Io { + message: format!( + "Loading RSS plan {rss_sled_plan_path:?}" + ), + err, + }, + )?, + ) + .map_err(|err| PlanError::Toml { path: rss_sled_plan_path, err })?; Ok(Some(plan)) } else { Ok(None) @@ -88,10 +80,7 @@ impl Plan { let rack_id = Uuid::new_v4(); let allocations = bootstrap_addrs.map(|(idx, bootstrap_addr)| { - info!( - log, - "Creating plan for the sled at {:?}", bootstrap_addr - ); + info!(log, "Creating plan for the sled at {:?}", bootstrap_addr); let bootstrap_addr = SocketAddrV6::new(bootstrap_addr, BOOTSTRAP_AGENT_PORT, 0, 0); let sled_subnet_index = @@ -100,11 +89,7 @@ impl Plan { ( bootstrap_addr, - SledAgentRequest { - id: Uuid::new_v4(), - subnet, - rack_id, - }, + SledAgentRequest { id: Uuid::new_v4(), subnet, rack_id }, ) }); @@ -115,10 +100,7 @@ impl Plan { sleds.insert(addr, allocation); } - let plan = Self { - rack_id, - sleds, - }; + let plan = Self { rack_id, sleds }; // Once we've constructed a plan, write it down to durable storage. let serialized_plan = diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 2b25b8fa55a..206b2e4160b 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -6,23 +6,19 @@ use super::config::SetupServiceConfig as Config; use crate::bootstrap::{ - discovery::PeerMonitorObserver, - params::SledAgentRequest, rss_handle::BootstrapAgentHandle, + discovery::PeerMonitorObserver, params::SledAgentRequest, + rss_handle::BootstrapAgentHandle, }; use crate::params::{DatasetEnsureBody, ServiceRequest, ServiceType}; use crate::rack_setup::plan::service::{ - PlanError as ServicePlanError, - Plan as ServicePlan, + Plan as ServicePlan, PlanError as ServicePlanError, }; use crate::rack_setup::plan::sled::{ - PlanError as SledPlanError, - Plan as SledPlan, + Plan as SledPlan, PlanError as SledPlanError, }; use internal_dns_client::names::{AAAA, SRV}; use nexus_client::{ - Client as NexusClient, - Error as NexusError, - types as NexusTypes, + types as NexusTypes, Client as NexusClient, Error as NexusError, }; use omicron_common::address::{get_sled_address, NEXUS_INTERNAL_PORT}; use omicron_common::backoff::{ @@ -30,9 +26,7 @@ use omicron_common::backoff::{ }; use serde::{Deserialize, Serialize}; use sled_agent_client::{ - Client as SledAgentClient, - Error as SledAgentError, - types as SledAgentTypes, + types as SledAgentTypes, Client as SledAgentClient, Error as SledAgentError, }; use slog::Logger; use std::collections::{HashMap, HashSet}; @@ -199,7 +193,9 @@ impl ServiceInner { crate::params::DatasetKind::CockroachDb { .. } ) }) { - return Err(SetupServiceError::BadConfig("RSS should only initialize CRDB services".into())); + return Err(SetupServiceError::BadConfig( + "RSS should only initialize CRDB services".into(), + )); } let dur = std::time::Duration::from_secs(60); @@ -222,14 +218,7 @@ impl ServiceInner { .filesystem_put(&dataset.clone().into()) .await .map_err(BackoffError::transient)?; - Ok::< - (), - BackoffError< - SledAgentError< - SledAgentTypes::Error, - >, - >, - >(()) + Ok::<(), BackoffError>>(()) }; let log_failure = |error, _| { warn!(self.log, "failed to create filesystem"; "error" => ?error); @@ -248,12 +237,7 @@ impl ServiceInner { let aaaa = datasets .iter() - .map(|dataset| { - ( - AAAA::Zone(dataset.id), - dataset.address, - ) - }) + .map(|dataset| (AAAA::Zone(dataset.id), dataset.address)) .collect::>(); let srv_key = SRV::Service("cockroachdb".into()); @@ -295,12 +279,7 @@ impl ServiceInner { }) .await .map_err(BackoffError::transient)?; - Ok::< - (), - BackoffError< - SledAgentError, - >, - >(()) + Ok::<(), BackoffError>>(()) }; let log_failure = |error, _| { warn!(self.log, "failed to initialize services"; "error" => ?error); @@ -308,11 +287,16 @@ impl ServiceInner { retry_notify(internal_service_policy(), services_put, log_failure) .await?; - // Initialize DNS records for the Nexus service. - let services: Vec<_> = services.iter().filter(|svc| { - matches!(svc.service_type, crate::params::ServiceType::Nexus { .. }) - }).collect(); + let services: Vec<_> = services + .iter() + .filter(|svc| { + matches!( + svc.service_type, + crate::params::ServiceType::Nexus { .. } + ) + }) + .collect(); // Early-exit for non-Nexus case if services.is_empty() { @@ -330,7 +314,7 @@ impl ServiceInner { NEXUS_INTERNAL_PORT, 0, 0, - ) + ), ) }) .collect::>(); @@ -395,23 +379,25 @@ impl ServiceInner { ) -> Result<(), SetupServiceError> { info!(self.log, "Handing off control to Nexus"); - let resolver = internal_dns_client::multiclient::create_resolver(config.az_subnet()) - .expect("Failed to create DNS resolver"); - let response = resolver.lookup_ip( - &SRV::Service("nexus".to_string()).to_string() - ).await.expect("Failed to lookup IP"); + let resolver = internal_dns_client::multiclient::create_resolver( + config.az_subnet(), + ) + .expect("Failed to create DNS resolver"); + let response = resolver + .lookup_ip(&SRV::Service("nexus".to_string()).to_string()) + .await + .expect("Failed to lookup IP"); - let nexus_address = response.iter() + let nexus_address = response + .iter() .next() - .map(|addr| { - SocketAddr::new(addr, NEXUS_INTERNAL_PORT) - }) + .map(|addr| SocketAddr::new(addr, NEXUS_INTERNAL_PORT)) .expect("no addresses returned from DNS resolver"); info!(self.log, "Nexus address: {}", nexus_address.to_string()); let nexus_client = NexusClient::new( &format!("http://{}", nexus_address), - self.log.new(o!("component" => "NexusClient")) + self.log.new(o!("component" => "NexusClient")), ); // Ensure we can quickly look up "Sled Agent Address" -> "UUID of sled". @@ -419,7 +405,8 @@ impl ServiceInner { // We need the ID when passing info to Nexus. let mut id_map = HashMap::new(); for (_, sled_request) in sled_plan.sleds.iter() { - id_map.insert(get_sled_address(sled_request.subnet), sled_request.id); + id_map + .insert(get_sled_address(sled_request.subnet), sled_request.id); } // Convert all the information we have about services and datasets into @@ -427,63 +414,59 @@ impl ServiceInner { let mut services: Vec = vec![]; let mut datasets: Vec = vec![]; for (addr, service_request) in service_plan.services.iter() { - let sled_id = *id_map.get(addr) + let sled_id = *id_map + .get(addr) .expect("Sled address in service plan, but not sled plan"); - for svc in service_request.services.iter().chain(service_request.dns_services.iter()) { + for svc in service_request + .services + .iter() + .chain(service_request.dns_services.iter()) + { let kind = match svc.service_type { ServiceType::Nexus { .. } => NexusTypes::ServiceKind::Nexus, - ServiceType::InternalDns { .. } => NexusTypes::ServiceKind::InternalDNS, + ServiceType::InternalDns { .. } => { + NexusTypes::ServiceKind::InternalDNS + } ServiceType::Oximeter => NexusTypes::ServiceKind::Oximeter, }; - services.push( - NexusTypes::ServicePutRequest { - service_id: svc.id, - sled_id, - // TODO: Should this be a vec, or a single value? - address: svc.addresses[0], - kind, - } - ) + services.push(NexusTypes::ServicePutRequest { + service_id: svc.id, + sled_id, + // TODO: Should this be a vec, or a single value? + address: svc.addresses[0], + kind, + }) } for dataset in service_request.datasets.iter() { - datasets.push( - NexusTypes::DatasetCreateRequest { - zpool_id: dataset.zpool_id, - dataset_id: dataset.id, - request: NexusTypes::DatasetPutRequest { - address: dataset.address.to_string(), - kind: dataset.dataset_kind.clone().into() - }, - } - ) + datasets.push(NexusTypes::DatasetCreateRequest { + zpool_id: dataset.zpool_id, + dataset_id: dataset.id, + request: NexusTypes::DatasetPutRequest { + address: dataset.address.to_string(), + kind: dataset.dataset_kind.clone().into(), + }, + }) } } - let request = NexusTypes::RackInitializationRequest { - services, - datasets, - }; + let request = + NexusTypes::RackInitializationRequest { services, datasets }; let notify_nexus = || async { - nexus_client.rack_initialization_complete( - &sled_plan.rack_id, - &request, - ) - .await - .map_err(BackoffError::transient) + nexus_client + .rack_initialization_complete(&sled_plan.rack_id, &request) + .await + .map_err(BackoffError::transient) }; let log_failure = |err, _| { info!(self.log, "Failed to handoff to nexus: {err}"); }; - retry_notify( - internal_service_policy(), - notify_nexus, - log_failure, - ).await?; + retry_notify(internal_service_policy(), notify_nexus, log_failure) + .await?; info!(self.log, "Handoff to Nexus is complete"); Ok(()) @@ -533,15 +516,13 @@ impl ServiceInner { "RSS configuration looks like it has already been applied", ); - let sled_plan = SledPlan::load(&self.log).await? + let sled_plan = SledPlan::load(&self.log) + .await? .expect("Sled plan should exist if completed marker exists"); - let service_plan = ServicePlan::load(&self.log).await? + let service_plan = ServicePlan::load(&self.log) + .await? .expect("Service plan should exist if completed marker exists"); - self.handoff_to_nexus( - &config, - &sled_plan, - &service_plan - ).await?; + self.handoff_to_nexus(&config, &sled_plan, &service_plan).await?; return Ok(()); } else { @@ -553,7 +534,9 @@ impl ServiceInner { // - Enough peers to create a new plan (if one does not exist) let maybe_sled_plan = SledPlan::load(&self.log).await?; let expectation = if let Some(plan) = &maybe_sled_plan { - PeerExpectation::LoadOldPlan(plan.sleds.keys().map(|a| *a.ip()).collect()) + PeerExpectation::LoadOldPlan( + plan.sleds.keys().map(|a| *a.ip()).collect(), + ) } else { PeerExpectation::CreateNewPlan(MINIMUM_SLED_COUNT) }; @@ -577,37 +560,38 @@ impl ServiceInner { // Forward the sled initialization requests to our sled-agent. local_bootstrap_agent .initialize_sleds( - plan.sleds.iter() + plan.sleds + .iter() .map(|(bootstrap_addr, initialization_request)| { - ( - *bootstrap_addr, - initialization_request.clone(), - ) + (*bootstrap_addr, initialization_request.clone()) }) .collect(), ) .await .map_err(SetupServiceError::SledInitialization)?; - let sled_addresses: Vec<_> = plan.sleds.iter() + let sled_addresses: Vec<_> = plan + .sleds + .iter() .map(|(_, initialization_request)| { - get_sled_address( - initialization_request.subnet, - ) + get_sled_address(initialization_request.subnet) }) .collect(); // Now that sled agents have been initialized, we can create // a service allocation plan. - let service_plan = if let Some(plan) = ServicePlan::load(&self.log).await? { - plan - } else { - ServicePlan::create(&self.log, &config, &sled_addresses).await? - }; + let service_plan = + if let Some(plan) = ServicePlan::load(&self.log).await? { + plan + } else { + ServicePlan::create(&self.log, &config, &sled_addresses).await? + }; // Set up internal DNS services. futures::future::join_all( - service_plan.services.iter() + service_plan + .services + .iter() .filter(|(_, service_request)| { // Only send requests to sleds that are supposed to be running // DNS services. @@ -638,11 +622,8 @@ impl ServiceInner { // Issue the crdb initialization requests to all sleds. futures::future::join_all(service_plan.services.iter().map( |(sled_address, services_request)| async move { - self.initialize_crdb( - *sled_address, - &services_request.datasets, - ) - .await?; + self.initialize_crdb(*sled_address, &services_request.datasets) + .await?; Ok(()) }, )) @@ -687,9 +668,7 @@ impl ServiceInner { // the requests on the next iteration. tokio::fs::File::create(&rss_completed_plan_path).await.map_err( |err| SetupServiceError::Io { - message: format!( - "creating {rss_completed_plan_path:?}" - ), + message: format!("creating {rss_completed_plan_path:?}"), err, }, )?; @@ -697,11 +676,7 @@ impl ServiceInner { // At this point, even if we reboot, we must not try to manage sleds, // services, or DNS records. - self.handoff_to_nexus( - &config, - &plan, - &service_plan - ).await?; + self.handoff_to_nexus(&config, &plan, &service_plan).await?; // TODO Questions to consider: // - What if a sled comes online *right after* this setup? How does diff --git a/sled-agent/src/server.rs b/sled-agent/src/server.rs index 2e05648ffc3..6273e1f2a2f 100644 --- a/sled-agent/src/server.rs +++ b/sled-agent/src/server.rs @@ -46,10 +46,16 @@ impl Server { let lazy_nexus_client = LazyNexusClient::new(client_log, *addr.ip()); - let sled_agent = - SledAgent::new(&config, log.clone(), lazy_nexus_client.clone(), sled_id, addr, rack_id) - .await - .map_err(|e| e.to_string())?; + let sled_agent = SledAgent::new( + &config, + log.clone(), + lazy_nexus_client.clone(), + sled_id, + addr, + rack_id, + ) + .await + .map_err(|e| e.to_string())?; let mut dropshot_config = dropshot::ConfigDropshot::default(); dropshot_config.request_body_max_bytes = 1024 * 1024; @@ -77,7 +83,8 @@ impl Server { log, "contacting server nexus, registering sled: {}", sled_id ); - let nexus_client = lazy_nexus_client.get() + let nexus_client = lazy_nexus_client + .get() .await .map_err(|err| BackoffError::transient(err.to_string()))?; nexus_client @@ -97,7 +104,9 @@ impl Server { ); }; retry_notify( - internal_service_policy_with_max(std::time::Duration::from_secs(5)), + internal_service_policy_with_max( + std::time::Duration::from_secs(5), + ), notify_nexus, log_notification_failure, ) diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index b45c56b374c..9fbada9571e 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -14,7 +14,7 @@ use crate::instance_manager::InstanceManager; use crate::nexus::LazyNexusClient; use crate::params::{ DatasetKind, DiskStateRequested, InstanceHardware, InstanceMigrateParams, - InstanceRuntimeStateRequested, ServiceEnsureBody, Zpool + InstanceRuntimeStateRequested, ServiceEnsureBody, Zpool, }; use crate::services::{self, ServiceManager}; use crate::storage_manager::StorageManager; @@ -249,13 +249,7 @@ impl SledAgent { ) .await?; - Ok(SledAgent { - id, - storage, - instances, - lazy_nexus_client, - services, - }) + Ok(SledAgent { id, storage, instances, lazy_nexus_client, services }) } pub fn id(&self) -> Uuid { @@ -274,12 +268,8 @@ impl SledAgent { Ok(()) } - pub async fn zpools_get( - &self - ) -> Result, Error> { - let zpools = self.storage - .get_zpools() - .await?; + pub async fn zpools_get(&self) -> Result, Error> { + let zpools = self.storage.get_zpools().await?; Ok(zpools) } @@ -328,12 +318,13 @@ impl SledAgent { &self, artifact: UpdateArtifact, ) -> Result<(), Error> { - let nexus_client = self.lazy_nexus_client.get() + let nexus_client = self + .lazy_nexus_client + .get() .await // TODO: Handle error .unwrap(); - crate::updates::download_artifact(artifact, &nexus_client) - .await?; + crate::updates::download_artifact(artifact, &nexus_client).await?; Ok(()) } } diff --git a/sled-agent/src/storage_manager.rs b/sled-agent/src/storage_manager.rs index 6305c41f949..5b82396b6fd 100644 --- a/sled-agent/src/storage_manager.rs +++ b/sled-agent/src/storage_manager.rs @@ -522,9 +522,7 @@ async fn ensure_running_zone( } } -type NotifyFut = dyn futures::Future< - Output = Result<(), String> - > + Send; +type NotifyFut = dyn futures::Future> + Send; #[derive(Debug)] struct NewFilesystemRequest { @@ -639,10 +637,14 @@ impl StorageWorker { lazy_nexus_client .get() .await - .map_err(|e| backoff::BackoffError::transient(e.to_string()))? + .map_err(|e| { + backoff::BackoffError::transient(e.to_string()) + })? .zpool_put(&sled_id, &pool_id, &zpool_request) .await - .map_err(|e| backoff::BackoffError::transient(e.to_string()))?; + .map_err(|e| { + backoff::BackoffError::transient(e.to_string()) + })?; Ok(()) } }; @@ -684,10 +686,14 @@ impl StorageWorker { lazy_nexus_client .get() .await - .map_err(|e| backoff::BackoffError::transient(e.to_string()))? + .map_err(|e| { + backoff::BackoffError::transient(e.to_string()) + })? .dataset_put(&pool_id, &id, &request) .await - .map_err(|e| backoff::BackoffError::transient(e.to_string()))?; + .map_err(|e| { + backoff::BackoffError::transient(e.to_string()) + })?; } Ok(()) @@ -955,11 +961,10 @@ impl StorageManager { pub async fn get_zpools(&self) -> Result, Error> { let pools = self.pools.lock().await; - Ok(pools.keys().map(|zpool| { - crate::params::Zpool { - id: zpool.id() - } - }).collect()) + Ok(pools + .keys() + .map(|zpool| crate::params::Zpool { id: zpool.id() }) + .collect()) } pub async fn upsert_filesystem( diff --git a/smf/sled-agent/config-rss.toml b/smf/sled-agent/config-rss.toml index c8652efb5ea..9fb540b075f 100644 --- a/smf/sled-agent/config-rss.toml +++ b/smf/sled-agent/config-rss.toml @@ -5,59 +5,3 @@ # |............| <- This /48 is the AZ Subnet # |...............| <- This /56 is the Rack Subnet rack_subnet = "fd00:1122:3344:0100::" - -# [[request]] -# TODO(https://github.com/oxidecomputer/omicron/issues/732): Nexus -# should allocate crucible datasets. -# [[request.dataset]] -# id = "09a9a25f-2602-4e2f-9630-31af9c492c3e" -# zpool_id = "d462a7f7-b628-40fe-80ff-4e4189e2d62b" -# address = "[fd00:1122:3344:0101::6]:32345" -# dataset_kind.type = "crucible" -# -# [[request.dataset]] -# id = "2713b37a-3043-4ed5-aaff-f38200e45cfb" -# zpool_id = "e4b4dc87-ab46-49fb-a4b4-d361ae214c03" -# address = "[fd00:1122:3344:0101::7]:32345" -# dataset_kind.type = "crucible" -# -# [[request.dataset]] -# id = "ffd16cad-e5d5-495e-9c59-4312a3857d91" -# zpool_id = "f4b4dc87-ab46-49fb-a4b4-d361ae214c03" -# address = "[fd00:1122:3344:0101::8]:32345" -# dataset_kind.type = "crucible" - -# [[request.dataset]] -# id = "4d08fc19-3d5f-4f6b-9c48-925f8eac7255" -# zpool_id = "d462a7f7-b628-40fe-80ff-4e4189e2d62b" -# address = "[fd00:1122:3344:0101::2]:32221" -# dataset_kind.type = "cockroach_db" -# dataset_kind.all_addresses = [ "[fd00:1122:3344:0101::2]:32221" ] - -# TODO(https://github.com/oxidecomputer/omicron/issues/732): Nexus -# should allocate clickhouse datasets. -# [[request.dataset]] -# id = "a3505b41-a592-420b-84f2-3d76bf0e0a81" -# zpool_id = "d462a7f7-b628-40fe-80ff-4e4189e2d62b" -# address = "[fd00:1122:3344:0101::5]:8123" -# dataset_kind.type = "clickhouse" - -# [[request.service]] -# id = "e6bff1ff-24fb-49dc-a54e-c6a350cd4d6c" -# name = "nexus" -# addresses = [ "fd00:1122:3344:0101::3" ] -# gz_addresses = [] -# [request.service.service_type] -# type = "nexus" -# internal_address = "[fd00:1122:3344:0101::3]:12221" -# external_address = "[fd00:1122:3344:0101::3]:12220" - -# TODO(https://github.com/oxidecomputer/omicron/issues/732): Nexus -# should allocate Oximeter services. -# [[request.service]] -# id = "1da65e5b-210c-4859-a7d7-200c1e659972" -# name = "oximeter" -# addresses = [ "fd00:1122:3344:0101::4" ] -# gz_addresses = [] -# [request.service.service_type] -# type = "oximeter" From e265f0d0b937d378eb64cd306231b24bf638992f Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Sun, 12 Jun 2022 16:43:14 -0400 Subject: [PATCH 14/88] Await RSS handoff, even in tests --- nexus/src/lib.rs | 87 ++++++++++++++----- nexus/test-utils/src/lib.rs | 33 ++++++- .../tests/integration_tests/users_builtin.rs | 3 +- 3 files changed, 95 insertions(+), 28 deletions(-) diff --git a/nexus/src/lib.rs b/nexus/src/lib.rs index 079a7a26f54..f743c7e19ba 100644 --- a/nexus/src/lib.rs +++ b/nexus/src/lib.rs @@ -66,23 +66,27 @@ pub fn run_openapi_internal() -> Result<(), String> { .map_err(|e| e.to_string()) } -/// Packages up a [`Nexus`], running both external and internal HTTP API servers -/// wired up to Nexus -pub struct Server { +/// A partially-initialized Nexus server, which exposes an internal interface, +/// but is not ready to receive external requests. +pub struct InternalServer<'a> { /// shared state used by API request handlers pub apictx: Arc, - /// dropshot server for external API - pub http_server_external: dropshot::HttpServer>, /// dropshot server for internal API pub http_server_internal: dropshot::HttpServer>, + + config: &'a Config, + log: Logger, } -impl Server { - /// Start a nexus server. +impl<'a> InternalServer<'a> { + /// Creates a Nexus instance with only the internal API exposed. + /// + /// This is often used as an argument when creating a [`Server`], + /// which also exposes the external API. pub async fn start( - config: &Config, + config: &'a Config, log: &Logger, - ) -> Result { + ) -> Result, String> { let log = log.new(o!("name" => config.runtime.id.to_string())); info!(log, "setting up nexus server"); @@ -100,33 +104,67 @@ impl Server { .map_err(|error| format!("initializing internal server: {}", error))?; let http_server_internal = http_server_starter_internal.start(); + Ok(Self { apictx, http_server_internal, config, log }) + } +} + +/// Packages up a [`Nexus`], running both external and internal HTTP API servers +/// wired up to Nexus +pub struct Server { + /// shared state used by API request handlers + pub apictx: Arc, + /// dropshot server for external API + pub http_server_external: dropshot::HttpServer>, + /// dropshot server for internal API + pub http_server_internal: dropshot::HttpServer>, +} + +impl Server { + pub async fn start<'a>( + internal: InternalServer<'a>, + ) -> Result { + let apictx = internal.apictx; + let http_server_internal = internal.http_server_internal; + let log = internal.log; + let config = internal.config; + // Wait until RSS handoff completes. - // TODO: This messes up the tests. Should we make this a config option? - // - // TODO: This actually raises a question; what triggers background tasks - // to execute? - // - // - Perhaps the API is exposed to tests? - // - Perhaps the invocation of that API is controlled by config - // options? - /* let opctx = apictx.nexus.opctx_for_background(); loop { - let result = apictx.nexus.rack_lookup(&opctx, &config.runtime.rack_id).await; + let result = + apictx.nexus.rack_lookup(&opctx, &config.runtime.rack_id).await; match result { Ok(rack) => { if rack.initialized { break; } - info!(log, "Still waiting for rack initialization: {:?}", rack); - }, + info!( + log, + "Still waiting for rack initialization: {:?}", rack + ); + } Err(e) => { warn!(log, "Cannot look up rack: {}", e); - }, + } } tokio::time::sleep(std::time::Duration::from_secs(2)).await; } - */ + + // TODO: What triggers background tasks to execute? + // + // - Perhaps the API is exposed to tests? + // - Perhaps the invocation of that API is controlled by config + // options? + // + // TODO: services we need to start: + // + // Datasets: + // - Crucible (as a dataset on each unique zpool) + // - Clickhouse (as a dataset on a zpool) + // - CRDB (prolly just check it exists, period) + // + // - Oximeter (as a service) + // - Nexus (again, maybe just check it exists at all) let http_server_starter_external = dropshot::HttpServerStarter::new( &config.runtime.dropshot_external, @@ -193,7 +231,8 @@ pub async fn run_server(config: &Config) -> Result<(), String> { } else { debug!(log, "registered DTrace probes"); } - let server = Server::start(config, &log).await?; + let internal_server = InternalServer::start(config, &log).await?; + let server = Server::start(internal_server).await?; server.register_as_producer().await; server.wait_for_finish().await } diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index 48fa1fec479..d3a22be62e5 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -103,15 +103,42 @@ pub async fn test_setup_with_config( nexus_config::Database::FromUrl { url: database.pg_config().clone() }; config.pkg.timeseries_db.address.set_port(clickhouse.port()); - let server = - omicron_nexus::Server::start(&config, &logctx.log).await.unwrap(); - server + // Start the Nexus internal API. + let internal_server = + omicron_nexus::InternalServer::start(&config, &logctx.log) + .await + .unwrap(); + internal_server .apictx .nexus .wait_for_populate() .await .expect("Nexus never loaded users"); + // Perform the "handoff from RSS". + // + // However, RSS isn't running, so we'll do the handoff ourselves. + let opctx = internal_server.apictx.nexus.opctx_for_background(); + internal_server + .apictx + .nexus + .rack_initialize( + &opctx, + config.runtime.rack_id, + // NOTE: In the context of this test utility, we arguably do have an + // instance of CRDB and Nexus running. However, as this info isn't + // necessary for most tests, we pass no information here. + omicron_nexus::internal_api::params::RackInitializationRequest { + services: vec![], + datasets: vec![], + }, + ) + .await + .expect("Could not initialize rack"); + + // Start the Nexus external API. + let server = omicron_nexus::Server::start(internal_server).await.unwrap(); + let testctx_external = ClientTestContext::new( server.http_server_external.local_addr(), logctx.log.new(o!("component" => "external client test context")), diff --git a/nexus/tests/integration_tests/users_builtin.rs b/nexus/tests/integration_tests/users_builtin.rs index c6d3615c9ef..0df3fbaf04b 100644 --- a/nexus/tests/integration_tests/users_builtin.rs +++ b/nexus/tests/integration_tests/users_builtin.rs @@ -27,7 +27,8 @@ async fn test_users_builtin(cptestctx: &ControlPlaneTestContext) { let u = users.remove(&authn::USER_DB_INIT.name.to_string()).unwrap(); assert_eq!(u.identity.id, authn::USER_DB_INIT.id); - let u = users.remove(&authn::USER_BACKGROUND_WORK.name.to_string()).unwrap(); + let u = + users.remove(&authn::USER_BACKGROUND_WORK.name.to_string()).unwrap(); assert_eq!(u.identity.id, authn::USER_BACKGROUND_WORK.id); let u = users.remove(&authn::USER_INTERNAL_API.name.to_string()).unwrap(); assert_eq!(u.identity.id, authn::USER_INTERNAL_API.id); From b5ca139e19cc34a385a011ad4f954eb4081c84a7 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 13 Jun 2022 01:13:42 -0400 Subject: [PATCH 15/88] Partway through service allocation - still very WIP --- common/src/address.rs | 4 +- nexus/src/app/background/mod.rs | 7 + nexus/src/app/background/services.rs | 285 +++++++++++++++++++++++++++ nexus/src/app/mod.rs | 11 ++ nexus/src/app/rack.rs | 33 ++++ nexus/src/db/datastore.rs | 59 +++++- nexus/src/lib.rs | 20 +- 7 files changed, 397 insertions(+), 22 deletions(-) create mode 100644 nexus/src/app/background/mod.rs create mode 100644 nexus/src/app/background/services.rs diff --git a/common/src/address.rs b/common/src/address.rs index b105588b587..7a3c488a82d 100644 --- a/common/src/address.rs +++ b/common/src/address.rs @@ -20,12 +20,12 @@ pub const SLED_PREFIX: u8 = 64; /// The amount of redundancy for DNS servers. /// /// Must be less than MAX_DNS_REDUNDANCY. -pub const DNS_REDUNDANCY: usize = 1; +pub const DNS_REDUNDANCY: u32 = 1; /// The maximum amount of redundancy for DNS servers. /// /// This determines the number of addresses which are /// reserved for DNS servers. -pub const MAX_DNS_REDUNDANCY: usize = 5; +pub const MAX_DNS_REDUNDANCY: u32 = 5; pub const DNS_PORT: u16 = 53; pub const DNS_SERVER_PORT: u16 = 5353; diff --git a/nexus/src/app/background/mod.rs b/nexus/src/app/background/mod.rs new file mode 100644 index 00000000000..bd25adc89e9 --- /dev/null +++ b/nexus/src/app/background/mod.rs @@ -0,0 +1,7 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Background tasks managed by Nexus. + +mod services; diff --git a/nexus/src/app/background/services.rs b/nexus/src/app/background/services.rs new file mode 100644 index 00000000000..cbb1e8c81ba --- /dev/null +++ b/nexus/src/app/background/services.rs @@ -0,0 +1,285 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Task which ensures that expected Nexus services exist. + +use crate::Nexus; +use crate::context::OpContext; +use crate::db::identity::Asset; +use crate::db::model::DatasetKind; +use crate::db::model::ServiceKind; +use omicron_common::api::external::Error; +use omicron_common::address::{DNS_REDUNDANCY, ReservedRackSubnet}; +use slog::Logger; +use std::sync::Arc; +use std::net::Ipv6Addr; +use uuid::Uuid; + +// Policy for the number of services to be provisioned. +#[derive(Debug)] +enum ServiceRedundancy { + // This service must exist on at least this many sleds + // within the racki. + PerRack(u32), + + // This service must exist on at least this many sleds + // within the availability zone. + DnsPerAz(u32), +} + +#[derive(Debug)] +struct ExpectedService { + kind: ServiceKind, + redundancy: ServiceRedundancy, +} + +const EXPECTED_SERVICES: [ExpectedService; 3] = [ + ExpectedService { + kind: ServiceKind::InternalDNS, + redundancy: ServiceRedundancy::DnsPerAz(DNS_REDUNDANCY), + }, + ExpectedService { + kind: ServiceKind::Nexus, + redundancy: ServiceRedundancy::PerRack(1), + }, + ExpectedService { + kind: ServiceKind::Oximeter, + redundancy: ServiceRedundancy::PerRack(1), + }, +]; + +pub struct ServiceWorker { + log: Logger, + nexus: Arc, +} + +impl ServiceWorker { + async fn ensure_rack_svc( + &self, + opctx: &OpContext, + expected_svc: &ExpectedService, + desired_count: u32, + ) -> Result<(), Error> { + // Look up all the sleds, both with and without the service. + let sleds_and_maybe_svcs = self.nexus + .datastore() + .sled_and_service_list( + opctx, + expected_svc.kind.clone(), + self.nexus.rack_id, + ) + .await?; + let (sleds_with_svc, sleds_without_svc): (Vec<_>, Vec<_>) = + sleds_and_maybe_svcs + .iter() + .partition(|(_, maybe_svc)| { + maybe_svc.is_some() + }); + let mut sleds_without_svc = sleds_without_svc.into_iter() + .map(|(sled, _)| sled); + let mut actual_count = sleds_with_svc.len() as u32; + + // Add services to sleds, in-order, until we've met a + // number sufficient for our redundancy. + while desired_count < actual_count { + let sled = sleds_without_svc.next().ok_or_else(|| { + Error::internal_error("Not enough sleds to deploy service") + })?; + let svc_id = Uuid::new_v4(); + let address = self.nexus.datastore() + .next_ipv6_address(&opctx, sled.id()) + .await?; + + self.nexus.upsert_service( + &opctx, + svc_id, + sled.id(), + address, + expected_svc.kind.clone() + ) + .await?; + + actual_count += 1; + } + + // TODO: Actually deploy service + + Ok(()) + } + + async fn ensure_dns_svc( + &self, + opctx: &OpContext, + expected_svc: &ExpectedService, + desired_count: u32, + ) -> Result<(), Error> { + if !matches!(expected_svc.kind, ServiceKind::InternalDNS) { + // NOTE: This is a constraint on how we allocate IP addresses + // within the AZ - however, as DNS is the only existing + // AZ-wide service, support for this has been punted. + return Err(Error::internal_error( + &format!("DNS is the only suppoted svc ({:?} is not supported)", expected_svc), + )); + } + + // Look up all existing DNS services. + // + // Note that we should not look up "all services" - as internal DNS servers + // are rack-wide, this would be too expensive of an operation. + let existing_services = self.nexus + .datastore() + .dns_service_list(opctx) + .await?; + + let mut actual_count = existing_services.len() as u32; + + // Get all subnets not allocated to existing services. + let mut usable_dns_subnets = ReservedRackSubnet(self.nexus.rack_subnet) + .get_dns_subnets() + .into_iter() + .filter(|subnet| { + // This address is only usable if none of the existing + // DNS services are using it. + existing_services.iter() + .all(|svc| Ipv6Addr::from(svc.ip) != subnet.dns_address().ip()) + }); + + // Get all sleds which aren't already running DNS services. + let mut target_sleds = self.nexus + .datastore() + .sled_list_with_limit(opctx, desired_count) + .await? + .into_iter() + .filter(|sled| { + // The target sleds are only considered if they aren't already + // running a DNS service. + existing_services.iter() + .all(|svc| svc.sled_id != sled.id()) + }); + + while desired_count < actual_count { + let sled = target_sleds.next().ok_or_else(|| { + Error::internal_error("Not enough sleds to deploy service") + })?; + let svc_id = Uuid::new_v4(); + let dns_subnet = usable_dns_subnets.next().ok_or_else(|| { + Error::internal_error("Not enough IPs to deploy service") + })?; + let address = dns_subnet + .dns_address() + .ip(); + + self.nexus.upsert_service( + &opctx, + svc_id, + sled.id(), + address, + expected_svc.kind.clone() + ) + .await?; + + actual_count += 1; + } + + // TODO: actually deploy service + + Ok(()) + } + + // Provides a single point-in-time evaluation and adjustment of + // the services provisioned within the rack. + // + // May adjust the provisioned services to meet the redundancy of the + // rack, if necessary. + // + // TODO: Can we: + // - [ ] Put these steps in a saga, to ensure they happen + // - [ ] Use a state variable on the rack to ensure mutual exclusion + // of service re-balancing. It's an involved operation; it would + // be nice to not be conflicting with anyone else while operating - + // and also helps us avoid using transactions. + pub async fn ensure_services_provisioned( + &self, + opctx: &OpContext, + ) -> Result<(), Error> { + for expected_svc in &EXPECTED_SERVICES { + info!( + self.log, + "Ensuring service {:?} exists according to redundancy {:?}", + expected_svc.kind, + expected_svc.redundancy, + ); + match expected_svc.redundancy { + ServiceRedundancy::PerRack(desired_count) => { + self.ensure_rack_svc(opctx, expected_svc, desired_count).await?; + }, + ServiceRedundancy::DnsPerAz(desired_count) => { + self.ensure_dns_svc(opctx, expected_svc, desired_count).await?; + } + } + } + + // Strategy: + // + // TODO Step 1. In a transaction: + // - Look up all sleds within the Rack + // - Look up all the services of a particular kind (e.g., Oximeter) + // - IF enough exist, exit early. + // - ELSE assign services to sleds. Write to Db. + // + // Step 2. As follow-up: request those svcs execute on sleds. + + Ok(()) + + } +} + +// Redundancy for the number of datasets to be provisioned. +enum DatasetRedundancy { + // The dataset should exist on all zpools. + OnAll, + // The dataset should exist on at least this many zpools. + PerRack(u32), +} + +struct ExpectedDataset { + kind: DatasetKind, + redundancy: DatasetRedundancy, +} + +const EXPECTED_DATASERT: [ExpectedDataset; 3] = [ + ExpectedDataset { + kind: DatasetKind::Crucible, + redundancy: DatasetRedundancy::OnAll, + }, + ExpectedDataset { + kind: DatasetKind::Cockroach, + redundancy: DatasetRedundancy::PerRack(1), + }, + ExpectedDataset { + kind: DatasetKind::Clickhouse, + redundancy: DatasetRedundancy::PerRack(1), + }, +]; + +fn ensure_datasets_provisioned() { + // TODO: + // - [ ] Each zpool has Crucible + // - [ ] Clickhouse exists on N zpools + // - [ ] CRDB exists on N zpools + + // Strategy: + // + // Step 1. In a transaction: + // - Look up all sleds within the Rack + // - Look up all zpools within those sleds + // + // - Look up all the services of a particular kind (e.g., Oximeter) + // - IF enough exist, exit early. + // - ELSE assign services to sleds. Write to Db. + // + // Step 2. As follow-up: request those datasets exist on sleds. + + +} diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index 27f8fd650fc..51c326390dd 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -14,6 +14,7 @@ use crate::populate::PopulateArgs; use crate::populate::PopulateStatus; use crate::saga_interface::SagaContext; use anyhow::anyhow; +use omicron_common::address::{Ipv6Subnet, RACK_PREFIX}; use omicron_common::api::external::Error; use slog::Logger; use std::sync::Arc; @@ -39,6 +40,9 @@ mod vpc; mod vpc_router; mod vpc_subnet; +// Background tasks exist in the "background" module. +mod background; + // Sagas are not part of the "Nexus" implementation, but they are // application logic. mod sagas; @@ -58,6 +62,9 @@ pub struct Nexus { /// uuid for this rack rack_id: Uuid, + /// subnet of this rack + rack_subnet: Ipv6Subnet, + /// general server log log: Logger, @@ -151,6 +158,7 @@ impl Nexus { let nexus = Nexus { id: config.runtime.id, rack_id, + rack_subnet: config.runtime.subnet, log: log.new(o!()), db_datastore: Arc::clone(&db_datastore), authz: Arc::clone(&authz), @@ -234,6 +242,9 @@ impl Nexus { } /// Returns an [`OpContext`] used for background tasks. + // TODO: Probably should be making a *new* opctx here? + // + // I think there should be one-per-"op", to get better metrics on bg ops. pub fn opctx_for_background(&self) -> &OpContext { &self.opctx_background_work } diff --git a/nexus/src/app/rack.rs b/nexus/src/app/rack.rs index dde3df7449c..f180395d4e1 100644 --- a/nexus/src/app/rack.rs +++ b/nexus/src/app/rack.rs @@ -93,4 +93,37 @@ impl super::Nexus { Ok(()) } + + /// Awaits the initialization of the rack. + /// + /// This will occur by either: + /// 1. RSS invoking the internal API, handing off responsibility, or + /// 2. Re-reading a value from the DB, if the rack has already been + /// initialized. + /// + /// See RFD 278 for additional context. + pub async fn await_rack_initialization( + &self, + opctx: &OpContext + ) { + loop { + let result = self.rack_lookup(&opctx, &self.rack_id).await; + match result { + Ok(rack) => { + if rack.initialized { + return; + } + info!( + self.log, + "Still waiting for rack initialization: {:?}", rack + ); + } + Err(e) => { + warn!(self.log, "Cannot look up rack: {}", e); + } + } + tokio::time::sleep(std::time::Duration::from_secs(2)).await; + } + + } } diff --git a/nexus/src/db/datastore.rs b/nexus/src/db/datastore.rs index f1cba756c85..2b3285395e3 100644 --- a/nexus/src/db/datastore.rs +++ b/nexus/src/db/datastore.rs @@ -55,7 +55,7 @@ use crate::db::{ Instance, InstanceRuntimeState, Name, NetworkInterface, Organization, OrganizationUpdate, OximeterInfo, ProducerEndpoint, Project, ProjectUpdate, Rack, Region, RoleAssignment, RoleBuiltin, RouterRoute, - RouterRouteUpdate, Service, Silo, SiloUser, Sled, SshKey, + RouterRouteUpdate, Service, ServiceKind, Silo, SiloUser, Sled, SshKey, UpdateAvailableArtifact, UserBuiltin, Volume, Vpc, VpcFirewallRule, VpcRouter, VpcRouterUpdate, VpcSubnet, VpcSubnetUpdate, VpcUpdate, Zpool, @@ -397,6 +397,63 @@ impl DataStore { .map_err(|e| public_error_from_diesel_pool(e, ErrorHandler::Server)) } + // TODO: de-duplicate with sled_list? + pub async fn sled_list_with_limit( + &self, + opctx: &OpContext, + limit: u32, + ) -> ListResultVec { + opctx.authorize(authz::Action::Read, &authz::FLEET).await?; + use db::schema::sled::dsl; + dsl::sled + .filter(dsl::time_deleted.is_null()) + .limit(limit as i64) + .select(Sled::as_select()) + .load_async(self.pool_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel_pool(e, ErrorHandler::Server)) + } + + // TODO-correctness: Filter the sleds by rack ID! + // This filtering will feasible when Sleds store a FK for + // the rack on which they're stored. + pub async fn sled_and_service_list( + &self, + opctx: &OpContext, + kind: ServiceKind, + _rack_id: Uuid, + ) -> ListResultVec<(Sled, Option)> { + opctx.authorize(authz::Action::Read, &authz::FLEET).await?; + use db::schema::service::dsl as svc_dsl; + use db::schema::sled::dsl as sled_dsl; + + db::schema::sled::table + .filter(sled_dsl::time_deleted.is_null()) + .left_outer_join(db::schema::service::table.on( + svc_dsl::id.eq(svc_dsl::sled_id) + )) + .filter(svc_dsl::kind.eq(kind)) + .select(<(Sled, Option)>::as_select()) + .get_results_async(self.pool_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel_pool(e, ErrorHandler::Server)) + } + + pub async fn dns_service_list( + &self, + opctx: &OpContext, + ) -> ListResultVec { + opctx.authorize(authz::Action::Read, &authz::FLEET).await?; + use db::schema::service::dsl as svc; + + svc::service + .filter(svc::kind.eq(ServiceKind::InternalDNS)) + .select(Service::as_select()) + .get_results_async(self.pool_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel_pool(e, ErrorHandler::Server)) + } + /// Stores a new zpool in the database. pub async fn zpool_upsert(&self, zpool: Zpool) -> CreateResult { use db::schema::zpool::dsl; diff --git a/nexus/src/lib.rs b/nexus/src/lib.rs index f743c7e19ba..60881613181 100644 --- a/nexus/src/lib.rs +++ b/nexus/src/lib.rs @@ -130,25 +130,7 @@ impl Server { // Wait until RSS handoff completes. let opctx = apictx.nexus.opctx_for_background(); - loop { - let result = - apictx.nexus.rack_lookup(&opctx, &config.runtime.rack_id).await; - match result { - Ok(rack) => { - if rack.initialized { - break; - } - info!( - log, - "Still waiting for rack initialization: {:?}", rack - ); - } - Err(e) => { - warn!(log, "Cannot look up rack: {}", e); - } - } - tokio::time::sleep(std::time::Duration::from_secs(2)).await; - } + apictx.nexus.await_rack_initialization(&opctx).await; // TODO: What triggers background tasks to execute? // From 7e986b85a00bb9a35ba03c6cade5222ff440da01 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 13 Jun 2022 23:51:08 -0400 Subject: [PATCH 16/88] v1 of nexus-managed services is code complete; no tests yet --- common/src/address.rs | 1 + nexus/src/app/background/mod.rs | 33 ++ nexus/src/app/background/services.rs | 409 +++++++++++++------------ nexus/src/app/mod.rs | 30 +- nexus/src/db/datastore.rs | 432 +++++++++++++++++++++++++-- nexus/src/db/model/dataset.rs | 3 +- nexus/src/db/model/dataset_kind.rs | 11 + nexus/src/lib.rs | 3 + 8 files changed, 698 insertions(+), 224 deletions(-) diff --git a/common/src/address.rs b/common/src/address.rs index 7a3c488a82d..bd5d03da2e6 100644 --- a/common/src/address.rs +++ b/common/src/address.rs @@ -35,6 +35,7 @@ pub const SLED_AGENT_PORT: u16 = 12345; pub const PROPOLIS_PORT: u16 = 12400; pub const COCKROACH_PORT: u16 = 32221; pub const CRUCIBLE_PORT: u16 = 32345; +pub const CLICKHOUSE_PORT: u16 = 8123; pub const NEXUS_EXTERNAL_PORT: u16 = 12220; pub const NEXUS_INTERNAL_PORT: u16 = 12221; diff --git a/nexus/src/app/background/mod.rs b/nexus/src/app/background/mod.rs index bd25adc89e9..d091fdd7858 100644 --- a/nexus/src/app/background/mod.rs +++ b/nexus/src/app/background/mod.rs @@ -5,3 +5,36 @@ //! Background tasks managed by Nexus. mod services; + +use crate::app::Nexus; +use std::sync::Arc; +use tokio::task::{spawn, JoinHandle}; + +/// Management structure which encapsulates periodically-executing background +/// tasks. +pub struct TaskRunner { + _handle: JoinHandle<()>, +} + +impl TaskRunner { + pub fn new(nexus: Arc) -> Self { + let handle = spawn(async move { + let log = nexus.log.new(o!("component" => "BackgroundTaskRunner")); + let service_balancer = services::ServiceBalancer::new(log.clone(), nexus.clone()); + + loop { + // TODO: We may want triggers to exist here, to invoke this task + // more frequently (e.g., on Sled failure). + let opctx = nexus.opctx_for_background(); + if let Err(e) = service_balancer.balance_services(&opctx).await { + warn!(log, "Failed to balance services: {:?}", e); + } + + tokio::time::sleep(std::time::Duration::from_secs(30)).await; + } + }); + Self { + _handle: handle, + } + } +} diff --git a/nexus/src/app/background/services.rs b/nexus/src/app/background/services.rs index cbb1e8c81ba..6f60dc09904 100644 --- a/nexus/src/app/background/services.rs +++ b/nexus/src/app/background/services.rs @@ -6,15 +6,23 @@ use crate::Nexus; use crate::context::OpContext; +use crate::db::datastore::DatasetRedundancy; use crate::db::identity::Asset; +use crate::db::model::Dataset; use crate::db::model::DatasetKind; +use crate::db::model::Service; use crate::db::model::ServiceKind; +use crate::db::model::Sled; +use crate::db::model::Zpool; use omicron_common::api::external::Error; -use omicron_common::address::{DNS_REDUNDANCY, ReservedRackSubnet}; +use omicron_common::address::{ + DNS_REDUNDANCY, NEXUS_INTERNAL_PORT, NEXUS_EXTERNAL_PORT, DNS_SERVER_PORT, DNS_PORT +}; +use sled_agent_client::types as SledAgentTypes; use slog::Logger; +use std::collections::{HashMap, HashSet}; use std::sync::Arc; -use std::net::Ipv6Addr; -use uuid::Uuid; +use std::net::{Ipv6Addr, SocketAddrV6}; // Policy for the number of services to be provisioned. #[derive(Debug)] @@ -49,237 +57,254 @@ const EXPECTED_SERVICES: [ExpectedService; 3] = [ }, ]; -pub struct ServiceWorker { +#[derive(Debug)] +struct ExpectedDataset { + kind: DatasetKind, + redundancy: DatasetRedundancy, +} + +const EXPECTED_DATASETS: [ExpectedDataset; 3] = [ + ExpectedDataset { + kind: DatasetKind::Crucible, + redundancy: DatasetRedundancy::OnAll, + }, + ExpectedDataset { + kind: DatasetKind::Cockroach, + redundancy: DatasetRedundancy::PerRack(1), + }, + ExpectedDataset { + kind: DatasetKind::Clickhouse, + redundancy: DatasetRedundancy::PerRack(1), + }, +]; + +pub struct ServiceBalancer { log: Logger, nexus: Arc, } -impl ServiceWorker { - async fn ensure_rack_svc( +impl ServiceBalancer { + pub fn new(log: Logger, nexus: Arc) -> Self { + Self { + log, + nexus, + } + } + + // Reaches out to all sled agents implied in "services", and + // requests that the desired services are executing. + async fn instantiate_services( &self, opctx: &OpContext, - expected_svc: &ExpectedService, - desired_count: u32, + services: Vec ) -> Result<(), Error> { - // Look up all the sleds, both with and without the service. - let sleds_and_maybe_svcs = self.nexus - .datastore() - .sled_and_service_list( - opctx, - expected_svc.kind.clone(), - self.nexus.rack_id, - ) - .await?; - let (sleds_with_svc, sleds_without_svc): (Vec<_>, Vec<_>) = - sleds_and_maybe_svcs - .iter() - .partition(|(_, maybe_svc)| { - maybe_svc.is_some() - }); - let mut sleds_without_svc = sleds_without_svc.into_iter() - .map(|(sled, _)| sled); - let mut actual_count = sleds_with_svc.len() as u32; + let mut sled_ids = HashSet::new(); + for svc in &services { + sled_ids.insert(svc.sled_id); + } - // Add services to sleds, in-order, until we've met a - // number sufficient for our redundancy. - while desired_count < actual_count { - let sled = sleds_without_svc.next().ok_or_else(|| { - Error::internal_error("Not enough sleds to deploy service") - })?; - let svc_id = Uuid::new_v4(); - let address = self.nexus.datastore() - .next_ipv6_address(&opctx, sled.id()) - .await?; + // For all sleds requiring an update, request all services be + // instantiated. + for sled_id in &sled_ids { + // TODO: This interface kinda sucks; ideally we would + // only insert the *new* services. + // + // Inserting the old ones too is costing us an extra query. + let services = self.nexus.datastore().service_list(opctx, *sled_id).await?; + let sled_client = self.nexus.sled_client(sled_id).await?; + + sled_client.services_put(&SledAgentTypes::ServiceEnsureBody { + services: services.iter().map(|s| { + let address = Ipv6Addr::from(s.ip); + let (name, service_type) = Self::get_service_name_and_type(address, s.kind.clone()); + + SledAgentTypes::ServiceRequest { + id: s.id(), + name: name.to_string(), + addresses: vec![address], + gz_addresses: vec![], + service_type, + } + }).collect() + }).await?; + } + Ok(()) + } - self.nexus.upsert_service( - &opctx, - svc_id, - sled.id(), - address, - expected_svc.kind.clone() + // Translates (address, db kind) to Sled Agent client types. + fn get_service_name_and_type( + address: Ipv6Addr, + kind: ServiceKind + ) -> (String, SledAgentTypes::ServiceType) { + match kind { + ServiceKind::Nexus => { + ( + "nexus".to_string(), + SledAgentTypes::ServiceType::Nexus { + internal_address: SocketAddrV6::new(address, NEXUS_INTERNAL_PORT, 0, 0).to_string(), + external_address: SocketAddrV6::new(address, NEXUS_EXTERNAL_PORT, 0, 0).to_string(), + } ) - .await?; - - actual_count += 1; + }, + ServiceKind::InternalDNS => { + ( + "internal-dns".to_string(), + SledAgentTypes::ServiceType::InternalDns { + server_address: SocketAddrV6::new(address, DNS_SERVER_PORT, 0, 0).to_string(), + dns_address: SocketAddrV6::new(address, DNS_PORT, 0, 0).to_string(), + }, + ) + }, + ServiceKind::Oximeter => { + ( + "oximeter".to_string(), + SledAgentTypes::ServiceType::Oximeter, + ) + }, } - - // TODO: Actually deploy service - - Ok(()) } - async fn ensure_dns_svc( + async fn ensure_rack_service( &self, opctx: &OpContext, - expected_svc: &ExpectedService, + kind: ServiceKind, desired_count: u32, ) -> Result<(), Error> { - if !matches!(expected_svc.kind, ServiceKind::InternalDNS) { - // NOTE: This is a constraint on how we allocate IP addresses - // within the AZ - however, as DNS is the only existing - // AZ-wide service, support for this has been punted. - return Err(Error::internal_error( - &format!("DNS is the only suppoted svc ({:?} is not supported)", expected_svc), - )); - } - - // Look up all existing DNS services. - // - // Note that we should not look up "all services" - as internal DNS servers - // are rack-wide, this would be too expensive of an operation. - let existing_services = self.nexus + // Provision the services within the database. + let new_services = self.nexus .datastore() - .dns_service_list(opctx) + .ensure_rack_service( + opctx, + self.nexus.rack_id, + kind, + desired_count, + ) .await?; - let mut actual_count = existing_services.len() as u32; - - // Get all subnets not allocated to existing services. - let mut usable_dns_subnets = ReservedRackSubnet(self.nexus.rack_subnet) - .get_dns_subnets() - .into_iter() - .filter(|subnet| { - // This address is only usable if none of the existing - // DNS services are using it. - existing_services.iter() - .all(|svc| Ipv6Addr::from(svc.ip) != subnet.dns_address().ip()) - }); + // Actually instantiate those services. + self.instantiate_services(opctx, new_services).await + } - // Get all sleds which aren't already running DNS services. - let mut target_sleds = self.nexus + async fn ensure_dns_service( + &self, + opctx: &OpContext, + desired_count: u32, + ) -> Result<(), Error> { + // Provision the services within the database. + let new_services = self.nexus .datastore() - .sled_list_with_limit(opctx, desired_count) - .await? - .into_iter() - .filter(|sled| { - // The target sleds are only considered if they aren't already - // running a DNS service. - existing_services.iter() - .all(|svc| svc.sled_id != sled.id()) - }); - - while desired_count < actual_count { - let sled = target_sleds.next().ok_or_else(|| { - Error::internal_error("Not enough sleds to deploy service") - })?; - let svc_id = Uuid::new_v4(); - let dns_subnet = usable_dns_subnets.next().ok_or_else(|| { - Error::internal_error("Not enough IPs to deploy service") - })?; - let address = dns_subnet - .dns_address() - .ip(); - - self.nexus.upsert_service( - &opctx, - svc_id, - sled.id(), - address, - expected_svc.kind.clone() - ) - .await?; - - actual_count += 1; - } - - // TODO: actually deploy service + .ensure_dns_service(opctx, self.nexus.rack_subnet, desired_count) + .await?; - Ok(()) + // Actually instantiate those services. + self.instantiate_services(opctx, new_services).await } - // Provides a single point-in-time evaluation and adjustment of - // the services provisioned within the rack. - // - // May adjust the provisioned services to meet the redundancy of the - // rack, if necessary. - // - // TODO: Can we: - // - [ ] Put these steps in a saga, to ensure they happen - // - [ ] Use a state variable on the rack to ensure mutual exclusion - // of service re-balancing. It's an involved operation; it would - // be nice to not be conflicting with anyone else while operating - - // and also helps us avoid using transactions. - pub async fn ensure_services_provisioned( + // TODO: Consider using sagas to ensure the rollout of services happens. + // Not using sagas *happens* to be fine because these operations are + // re-tried periodically, but that's kind forcing a dependency on the + // caller. + async fn ensure_services_provisioned( &self, opctx: &OpContext, ) -> Result<(), Error> { for expected_svc in &EXPECTED_SERVICES { - info!( - self.log, - "Ensuring service {:?} exists according to redundancy {:?}", - expected_svc.kind, - expected_svc.redundancy, - ); + info!(self.log, "Ensuring service {:?} exists", expected_svc); match expected_svc.redundancy { ServiceRedundancy::PerRack(desired_count) => { - self.ensure_rack_svc(opctx, expected_svc, desired_count).await?; + self.ensure_rack_service(opctx, expected_svc.kind.clone(), desired_count).await?; }, ServiceRedundancy::DnsPerAz(desired_count) => { - self.ensure_dns_svc(opctx, expected_svc, desired_count).await?; + self.ensure_dns_service(opctx, desired_count).await?; } } } - - // Strategy: - // - // TODO Step 1. In a transaction: - // - Look up all sleds within the Rack - // - Look up all the services of a particular kind (e.g., Oximeter) - // - IF enough exist, exit early. - // - ELSE assign services to sleds. Write to Db. - // - // Step 2. As follow-up: request those svcs execute on sleds. - Ok(()) - } -} - -// Redundancy for the number of datasets to be provisioned. -enum DatasetRedundancy { - // The dataset should exist on all zpools. - OnAll, - // The dataset should exist on at least this many zpools. - PerRack(u32), -} -struct ExpectedDataset { - kind: DatasetKind, - redundancy: DatasetRedundancy, -} + async fn ensure_rack_dataset( + &self, + opctx: &OpContext, + kind: DatasetKind, + redundancy: DatasetRedundancy, + ) -> Result<(), Error> { + // Provision the datasets within the database. + let new_datasets = self.nexus + .datastore() + .ensure_rack_dataset( + opctx, + self.nexus.rack_id, + kind, + redundancy, + ) + .await?; -const EXPECTED_DATASERT: [ExpectedDataset; 3] = [ - ExpectedDataset { - kind: DatasetKind::Crucible, - redundancy: DatasetRedundancy::OnAll, - }, - ExpectedDataset { - kind: DatasetKind::Cockroach, - redundancy: DatasetRedundancy::PerRack(1), - }, - ExpectedDataset { - kind: DatasetKind::Clickhouse, - redundancy: DatasetRedundancy::PerRack(1), - }, -]; + // Actually instantiate those datasets. + self.instantiate_datasets(new_datasets).await + } -fn ensure_datasets_provisioned() { - // TODO: - // - [ ] Each zpool has Crucible - // - [ ] Clickhouse exists on N zpools - // - [ ] CRDB exists on N zpools + // Reaches out to all sled agents implied in "services", and + // requests that the desired services are executing. + async fn instantiate_datasets( + &self, + datasets: Vec<(Sled, Zpool, Dataset)> + ) -> Result<(), Error> { + let mut sled_clients = HashMap::new(); + + for (sled, zpool, dataset) in &datasets { + let sled_client = { + match sled_clients.get(&sled.id()) { + Some(client) => client, + None => { + let sled_client = self.nexus.sled_client(&sled.id()).await?; + sled_clients.insert(sled.id(), sled_client); + sled_clients.get(&sled.id()).unwrap() + } + } + }; + + let dataset_kind = match dataset.kind { + // TODO: This set of "all addresses" isn't right. + // TODO: ... should we even be using "all addresses" to contact CRDB? + DatasetKind::Cockroach => SledAgentTypes::DatasetKind::CockroachDb(vec![]), + DatasetKind::Crucible => SledAgentTypes::DatasetKind::Crucible, + DatasetKind::Clickhouse => SledAgentTypes::DatasetKind::Clickhouse, + }; + + // Instantiate each dataset. + sled_client.filesystem_put(&SledAgentTypes::DatasetEnsureBody { + id: dataset.id(), + zpool_id: zpool.id(), + dataset_kind, + address: dataset.address().to_string(), + }).await?; + } - // Strategy: - // - // Step 1. In a transaction: - // - Look up all sleds within the Rack - // - Look up all zpools within those sleds - // - // - Look up all the services of a particular kind (e.g., Oximeter) - // - IF enough exist, exit early. - // - ELSE assign services to sleds. Write to Db. - // - // Step 2. As follow-up: request those datasets exist on sleds. + Ok(()) + } + async fn ensure_datasets_provisioned( + &self, + opctx: &OpContext, + ) -> Result<(), Error> { + for expected_dataset in &EXPECTED_DATASETS { + info!(self.log, "Ensuring dataset {:?} exists", expected_dataset); + self.ensure_rack_dataset(opctx, expected_dataset.kind.clone(), expected_dataset.redundancy).await? + } + Ok(()) + } + // Provides a single point-in-time evaluation and adjustment of + // the services provisioned within the rack. + // + // May adjust the provisioned services to meet the redundancy of the + // rack, if necessary. + pub async fn balance_services( + &self, + opctx: &OpContext, + ) -> Result<(), Error> { + self.ensure_datasets_provisioned(opctx).await?; + self.ensure_services_provisioned(opctx).await?; + Ok(()) + } } diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index 51c326390dd..61a4fbf7d22 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -18,6 +18,7 @@ use omicron_common::address::{Ipv6Subnet, RACK_PREFIX}; use omicron_common::api::external::Error; use slog::Logger; use std::sync::Arc; +use tokio::sync::OnceCell; use uuid::Uuid; // The implementation of Nexus is large, and split into a number of submodules @@ -83,6 +84,9 @@ pub struct Nexus { /// Status of background task to populate database populate_status: tokio::sync::watch::Receiver, + /// Background task for Nexus. + background_task_runner: OnceCell, + /// Client to the timeseries database. timeseries_client: oximeter_db::Client, @@ -97,9 +101,6 @@ pub struct Nexus { /// Operational context used for external request authentication opctx_external_authn: OpContext, - - /// Operational context used for Nexus-driven background tasks - opctx_background_work: OpContext, } // TODO Is it possible to make some of these operations more generic? A @@ -165,6 +166,7 @@ impl Nexus { sec_client: Arc::clone(&sec_client), recovery_task: std::sync::Mutex::new(None), populate_status, + background_task_runner: OnceCell::new(), timeseries_client, updates_config: config.pkg.updates.clone(), tunables: config.pkg.tunables.clone(), @@ -180,12 +182,6 @@ impl Nexus { authn::Context::external_authn(), Arc::clone(&db_datastore), ), - opctx_background_work: OpContext::for_background( - log.new(o!("component" => "Background Work")), - Arc::clone(&authz), - authn::Context::internal_db_background(), - Arc::clone(&db_datastore), - ), }; // TODO-cleanup all the extra Arcs here seems wrong @@ -236,6 +232,13 @@ impl Nexus { } } + pub fn start_background_tasks(self: &Arc) -> Result<(), anyhow::Error> { + let nexus = self.clone(); + self.background_task_runner.set( + background::TaskRunner::new(nexus) + ).map_err(|error| anyhow!(error.to_string())) + } + /// Returns an [`OpContext`] used for authenticating external requests pub fn opctx_external_authn(&self) -> &OpContext { &self.opctx_external_authn @@ -245,8 +248,13 @@ impl Nexus { // TODO: Probably should be making a *new* opctx here? // // I think there should be one-per-"op", to get better metrics on bg ops. - pub fn opctx_for_background(&self) -> &OpContext { - &self.opctx_background_work + pub fn opctx_for_background(&self) -> OpContext { + OpContext::for_background( + self.log.new(o!("component" => "BackgroundWork")), + Arc::clone(&self.authz), + authn::Context::internal_db_background(), + Arc::clone(&self.db_datastore), + ) } /// Used as the body of a "stub" endpoint -- one that's currently diff --git a/nexus/src/db/datastore.rs b/nexus/src/db/datastore.rs index 2b3285395e3..23b66b0bb4f 100644 --- a/nexus/src/db/datastore.rs +++ b/nexus/src/db/datastore.rs @@ -74,6 +74,9 @@ use diesel::query_builder::{QueryFragment, QueryId}; use diesel::query_dsl::methods::LoadQuery; use diesel::upsert::excluded; use diesel::{ExpressionMethods, QueryDsl, SelectableHelper}; +use omicron_common::address::{ + RACK_PREFIX, Ipv6Subnet, ReservedRackSubnet, +}; use omicron_common::api; use omicron_common::api::external; use omicron_common::api::external::DataPageParams; @@ -90,7 +93,7 @@ use omicron_common::api::external::{ use omicron_common::bail_unless; use sled_agent_client::types as sled_client_types; use std::convert::{TryFrom, TryInto}; -use std::net::Ipv6Addr; +use std::net::{Ipv6Addr, SocketAddr, SocketAddrV6}; use std::sync::Arc; use uuid::Uuid; @@ -119,6 +122,15 @@ impl RunnableQuery for T where { } +// Redundancy for the number of datasets to be provisioned. +#[derive(Clone, Copy, Debug)] +pub enum DatasetRedundancy { + // The dataset should exist on all zpools. + OnAll, + // The dataset should exist on at least this many zpools. + PerRack(u32), +} + pub struct DataStore { pool: Arc, } @@ -397,18 +409,28 @@ impl DataStore { .map_err(|e| public_error_from_diesel_pool(e, ErrorHandler::Server)) } - // TODO: de-duplicate with sled_list? - pub async fn sled_list_with_limit( - &self, - opctx: &OpContext, + pub fn sled_list_with_limit_sync( + conn: &mut DbConnection, limit: u32, - ) -> ListResultVec { - opctx.authorize(authz::Action::Read, &authz::FLEET).await?; + ) -> Result, diesel::result::Error> { use db::schema::sled::dsl; dsl::sled .filter(dsl::time_deleted.is_null()) .limit(limit as i64) .select(Sled::as_select()) + .load(conn) + } + + pub async fn service_list( + &self, + opctx: &OpContext, + sled_id: Uuid, + ) -> Result, Error> { + opctx.authorize(authz::Action::Read, &authz::FLEET).await?; + use db::schema::service::dsl; + dsl::service + .filter(dsl::sled_id.eq(sled_id)) + .select(Service::as_select()) .load_async(self.pool_authorized(opctx).await?) .await .map_err(|e| public_error_from_diesel_pool(e, ErrorHandler::Server)) @@ -417,41 +439,307 @@ impl DataStore { // TODO-correctness: Filter the sleds by rack ID! // This filtering will feasible when Sleds store a FK for // the rack on which they're stored. - pub async fn sled_and_service_list( - &self, - opctx: &OpContext, - kind: ServiceKind, + pub fn sled_and_service_list_sync( + conn: &mut DbConnection, _rack_id: Uuid, - ) -> ListResultVec<(Sled, Option)> { - opctx.authorize(authz::Action::Read, &authz::FLEET).await?; + kind: ServiceKind, + ) -> Result)>, diesel::result::Error> { use db::schema::service::dsl as svc_dsl; use db::schema::sled::dsl as sled_dsl; db::schema::sled::table .filter(sled_dsl::time_deleted.is_null()) .left_outer_join(db::schema::service::table.on( - svc_dsl::id.eq(svc_dsl::sled_id) + svc_dsl::sled_id.eq(sled_dsl::id) )) .filter(svc_dsl::kind.eq(kind)) .select(<(Sled, Option)>::as_select()) - .get_results_async(self.pool_authorized(opctx).await?) + .get_results(conn) + } + + pub async fn ensure_rack_service( + &self, + opctx: &OpContext, + rack_id: Uuid, + kind: ServiceKind, + redundancy: u32, + ) -> Result, Error> { + opctx.authorize(authz::Action::Read, &authz::FLEET).await?; + + #[derive(Debug)] + enum ServiceError { + NotEnoughSleds, + Other(Error), + } + type TxnError = TransactionError; + + self.pool() + .transaction(move |conn| { + let sleds_and_maybe_svcs = Self::sled_and_service_list_sync( + conn, + rack_id, + kind.clone(), + )?; + + // Split the set of returned sleds into "those with" and "those + // without" the requested service. + let (sleds_with_svc, sleds_without_svc): (Vec<_>, Vec<_>) = + sleds_and_maybe_svcs + .iter() + .partition(|(_, maybe_svc)| { + maybe_svc.is_some() + }); + let mut sleds_without_svc = sleds_without_svc.into_iter() + .map(|(sled, _)| sled); + let existing_count = sleds_with_svc.len(); + + // Add services to sleds, in-order, until we've met a + // number sufficient for our redundancy. + // + // The selection of "which sleds run this service" is completely + // arbitrary. + let mut new_svcs = vec![]; + while (redundancy as usize) < existing_count + new_svcs.len() { + let sled = sleds_without_svc.next().ok_or_else(|| { + TxnError::CustomError(ServiceError::NotEnoughSleds) + })?; + let svc_id = Uuid::new_v4(); + let address = Self::next_ipv6_address_sync(conn, sled.id()) + .map_err(|e| TxnError::CustomError(ServiceError::Other(e)))?; + + let service = db::model::Service::new( + svc_id, + sled.id(), + address, + kind.clone() + ); + + // TODO: Can we insert all the services at the same time? + let svc = Self::service_upsert_sync(conn, service) + .map_err(|e| TxnError::CustomError(ServiceError::Other(e)))?; + new_svcs.push(svc); + } + + return Ok(new_svcs); + }) .await - .map_err(|e| public_error_from_diesel_pool(e, ErrorHandler::Server)) + .map_err(|e| match e { + TxnError::CustomError(ServiceError::NotEnoughSleds) => { + Error::unavail("Not enough sleds for service allocation") + }, + TxnError::CustomError(ServiceError::Other(e)) => e, + TxnError::Pool(e) => public_error_from_diesel_pool(e, ErrorHandler::Server) + }) } - pub async fn dns_service_list( + pub async fn ensure_dns_service( &self, opctx: &OpContext, - ) -> ListResultVec { + rack_subnet: Ipv6Subnet, + redundancy: u32, + ) -> Result, Error> { opctx.authorize(authz::Action::Read, &authz::FLEET).await?; + + #[derive(Debug)] + enum ServiceError { + NotEnoughSleds, + NotEnoughIps, + Other(Error), + } + type TxnError = TransactionError; + + self.pool() + .transaction(move |conn| { + let existing_services = Self::dns_service_list_sync(conn)?; + let existing_count = existing_services.len(); + + // Get all subnets not allocated to existing services. + let mut usable_dns_subnets = ReservedRackSubnet(rack_subnet) + .get_dns_subnets() + .into_iter() + .filter(|subnet| { + // This address is only usable if none of the existing + // DNS services are using it. + existing_services.iter() + .all(|svc| Ipv6Addr::from(svc.ip) != subnet.dns_address().ip()) + }); + + + // Get all sleds which aren't already running DNS services. + let mut target_sleds = Self::sled_list_with_limit_sync(conn, redundancy)? + .into_iter() + .filter(|sled| { + // The target sleds are only considered if they aren't already + // running a DNS service. + existing_services.iter() + .all(|svc| svc.sled_id != sled.id()) + }); + + let mut new_svcs = vec![]; + while (redundancy as usize) < existing_count + new_svcs.len() { + let sled = target_sleds.next().ok_or_else(|| { + TxnError::CustomError(ServiceError::NotEnoughSleds) + })?; + let svc_id = Uuid::new_v4(); + let dns_subnet = usable_dns_subnets.next().ok_or_else(|| { + TxnError::CustomError(ServiceError::NotEnoughIps) + })?; + let address = dns_subnet + .dns_address() + .ip(); + + let service = db::model::Service::new( + svc_id, + sled.id(), + address, + ServiceKind::InternalDNS, + ); + + // TODO: Can we insert all the services at the same time? + let svc = Self::service_upsert_sync(conn, service) + .map_err(|e| TxnError::CustomError(ServiceError::Other(e)))?; + + new_svcs.push(svc); + } + return Ok(new_svcs); + }) + .await + .map_err(|e| match e { + TxnError::CustomError(ServiceError::NotEnoughSleds) => { + Error::unavail("Not enough sleds for service allocation") + }, + TxnError::CustomError(ServiceError::NotEnoughIps) => { + Error::unavail("Not enough IP addresses for service allocation") + }, + TxnError::CustomError(ServiceError::Other(e)) => e, + TxnError::Pool(e) => public_error_from_diesel_pool(e, ErrorHandler::Server) + }) + } + + fn dns_service_list_sync( + conn: &mut DbConnection, + ) -> Result, diesel::result::Error> { use db::schema::service::dsl as svc; svc::service .filter(svc::kind.eq(ServiceKind::InternalDNS)) .select(Service::as_select()) - .get_results_async(self.pool_authorized(opctx).await?) + .get_results(conn) + } + + // TODO: Filter by rack ID + pub fn sled_zpool_and_dataset_list_sync( + conn: &mut DbConnection, + _rack_id: Uuid, + kind: DatasetKind, + ) -> Result)>, diesel::result::Error> { + use db::schema::sled::dsl as sled_dsl; + use db::schema::zpool::dsl as zpool_dsl; + use db::schema::dataset::dsl as dataset_dsl; + + db::schema::sled::table + .filter(sled_dsl::time_deleted.is_null()) + .inner_join(db::schema::zpool::table.on( + zpool_dsl::sled_id.eq(sled_dsl::id) + )) + .filter(zpool_dsl::time_deleted.is_null()) + .left_outer_join(db::schema::dataset::table.on( + dataset_dsl::pool_id.eq(zpool_dsl::id) + )) + .filter(dataset_dsl::kind.eq(kind)) + .select(<(Sled, Zpool, Option)>::as_select()) + .get_results(conn) + } + + pub async fn ensure_rack_dataset( + &self, + opctx: &OpContext, + rack_id: Uuid, + kind: DatasetKind, + redundancy: DatasetRedundancy, + ) -> Result, Error> { + opctx.authorize(authz::Action::Read, &authz::FLEET).await?; + + #[derive(Debug)] + enum DatasetError { + NotEnoughZpools, + Other(Error), + } + type TxnError = TransactionError; + + self.pool() + .transaction(move |conn| { + let sleds_zpools_and_maybe_datasets = Self::sled_zpool_and_dataset_list_sync( + conn, + rack_id, + kind.clone(), + )?; + + // Split the set of returned zpools into "those with" and "those + // without" the requested dataset. + let (zpools_with_dataset, zpools_without_dataset): (Vec<_>, Vec<_>) = + sleds_zpools_and_maybe_datasets + .into_iter() + .partition(|(_, _, maybe_dataset)| { + maybe_dataset.is_some() + }); + let mut zpools_without_dataset = zpools_without_dataset.into_iter() + .map(|(sled, zpool, _)| (sled, zpool)) + .peekable(); + let existing_count = zpools_with_dataset.len(); + + // Add services to zpools, in-order, until we've met a + // number sufficient for our redundancy. + // + // The selection of "which zpools run this service" is completely + // arbitrary. + let mut new_datasets = vec![]; + + loop { + match redundancy { + DatasetRedundancy::OnAll => { + if zpools_without_dataset.peek().is_none() { + break; + } + }, + DatasetRedundancy::PerRack(count) => { + if (count as usize) >= existing_count + new_datasets.len() { + break; + } + }, + }; + + let (sled, zpool) = zpools_without_dataset.next().ok_or_else(|| { + TxnError::CustomError(DatasetError::NotEnoughZpools) + })?; + let dataset_id = Uuid::new_v4(); + let address = Self::next_ipv6_address_sync(conn, sled.id()) + .map_err(|e| TxnError::CustomError(DatasetError::Other(e))) + .map(|ip| SocketAddr::V6(SocketAddrV6::new(ip, kind.port(), 0, 0)))?; + + let dataset = db::model::Dataset::new( + dataset_id, + zpool.id(), + address, + kind.clone() + ); + + // TODO: Can we insert all the datasets at the same time? + let dataset = Self::dataset_upsert_sync(conn, dataset) + .map_err(|e| TxnError::CustomError(DatasetError::Other(e)))?; + new_datasets.push((sled, zpool, dataset)); + } + + return Ok(new_datasets); + }) .await - .map_err(|e| public_error_from_diesel_pool(e, ErrorHandler::Server)) + .map_err(|e| match e { + TxnError::CustomError(DatasetError::NotEnoughZpools) => { + Error::unavail("Not enough sleds for dataset allocation") + }, + TxnError::CustomError(DatasetError::Other(e)) => e, + TxnError::Pool(e) => public_error_from_diesel_pool(e, ErrorHandler::Server) + }) } /// Stores a new zpool in the database. @@ -531,6 +819,44 @@ impl DataStore { }) } + /// Stores a new dataset in the database. + pub fn dataset_upsert_sync( + conn: &mut DbConnection, + dataset: Dataset, + ) -> CreateResult { + use db::schema::dataset::dsl; + + let zpool_id = dataset.pool_id; + Zpool::insert_resource( + zpool_id, + diesel::insert_into(dsl::dataset) + .values(dataset.clone()) + .on_conflict(dsl::id) + .do_update() + .set(( + dsl::time_modified.eq(Utc::now()), + dsl::pool_id.eq(excluded(dsl::pool_id)), + dsl::ip.eq(excluded(dsl::ip)), + dsl::port.eq(excluded(dsl::port)), + dsl::kind.eq(excluded(dsl::kind)), + )), + ) + .insert_and_get_result(conn) + .map_err(|e| match e { + SyncInsertError::CollectionNotFound => Error::ObjectNotFound { + type_name: ResourceType::Zpool, + lookup_type: LookupType::ById(zpool_id), + }, + SyncInsertError::DatabaseError(e) => { + public_error_from_diesel_create( + e, + ResourceType::Dataset, + &dataset.id().to_string(), + ) + } + }) + } + /// Stores a new service in the database. pub async fn service_upsert( &self, @@ -572,6 +898,42 @@ impl DataStore { }) } + pub fn service_upsert_sync( + conn: &mut DbConnection, + service: Service, + ) -> CreateResult { + use db::schema::service::dsl; + + let sled_id = service.sled_id; + Sled::insert_resource( + sled_id, + diesel::insert_into(dsl::service) + .values(service.clone()) + .on_conflict(dsl::id) + .do_update() + .set(( + dsl::time_modified.eq(Utc::now()), + dsl::sled_id.eq(excluded(dsl::sled_id)), + dsl::ip.eq(excluded(dsl::ip)), + dsl::kind.eq(excluded(dsl::kind)), + )), + ) + .insert_and_get_result(conn) + .map_err(|e| match e { + SyncInsertError::CollectionNotFound => Error::ObjectNotFound { + type_name: ResourceType::Sled, + lookup_type: LookupType::ById(sled_id), + }, + SyncInsertError::DatabaseError(e) => { + public_error_from_diesel_create( + e, + ResourceType::Service, + &service.id().to_string(), + ) + } + }) + } + fn get_allocated_regions_query( volume_id: Uuid, ) -> impl RunnableQuery<(Dataset, Region)> { @@ -3578,6 +3940,36 @@ impl DataStore { } } + /// Return the next available IPv6 address for an Oxide service running on + /// the provided sled. + pub fn next_ipv6_address_sync( + conn: &mut DbConnection, + sled_id: Uuid, + ) -> Result { + use db::schema::sled::dsl; + let net = diesel::update( + dsl::sled.find(sled_id).filter(dsl::time_deleted.is_null()), + ) + .set(dsl::last_used_address.eq(dsl::last_used_address + 1)) + .returning(dsl::last_used_address) + .get_result(conn) + .map_err(|e| { + public_error_from_diesel_lookup( + e, + ResourceType::Sled, + &LookupType::ById(sled_id), + ) + })?; + + // TODO-correctness: We could ensure that this address is actually + // within the sled's underlay prefix, once that's included in the + // database record. + match net { + ipnetwork::IpNetwork::V6(net) => Ok(net.ip()), + _ => panic!("Sled IP must be IPv6"), + } + } + pub async fn global_image_list_images( &self, opctx: &OpContext, diff --git a/nexus/src/db/model/dataset.rs b/nexus/src/db/model/dataset.rs index fd4d24eee40..fa4e238fb47 100644 --- a/nexus/src/db/model/dataset.rs +++ b/nexus/src/db/model/dataset.rs @@ -38,11 +38,12 @@ pub struct Dataset { ip: ipnetwork::IpNetwork, port: SqlU16, - kind: DatasetKind, + pub kind: DatasetKind, pub size_used: Option, } impl Dataset { + // TODO: Only operate on SocketAddrV6 pub fn new( id: Uuid, pool_id: Uuid, diff --git a/nexus/src/db/model/dataset_kind.rs b/nexus/src/db/model/dataset_kind.rs index c760a12f53c..effced69522 100644 --- a/nexus/src/db/model/dataset_kind.rs +++ b/nexus/src/db/model/dataset_kind.rs @@ -6,6 +6,7 @@ use super::impl_enum_type; use crate::internal_api; use serde::{Deserialize, Serialize}; use std::io::Write; +use omicron_common::address::{COCKROACH_PORT, CRUCIBLE_PORT, CLICKHOUSE_PORT}; impl_enum_type!( #[derive(SqlType, Debug, QueryId)] @@ -22,6 +23,16 @@ impl_enum_type!( Clickhouse => b"clickhouse" ); +impl DatasetKind { + pub fn port(&self) -> u16 { + match self { + DatasetKind::Crucible => CRUCIBLE_PORT, + DatasetKind::Cockroach => COCKROACH_PORT, + DatasetKind::Clickhouse => CLICKHOUSE_PORT, + } + } +} + impl From for DatasetKind { fn from(k: internal_api::params::DatasetKind) -> Self { match k { diff --git a/nexus/src/lib.rs b/nexus/src/lib.rs index 60881613181..b5d3ccc5085 100644 --- a/nexus/src/lib.rs +++ b/nexus/src/lib.rs @@ -131,6 +131,9 @@ impl Server { // Wait until RSS handoff completes. let opctx = apictx.nexus.opctx_for_background(); apictx.nexus.await_rack_initialization(&opctx).await; + apictx.nexus + .start_background_tasks() + .map_err(|e| e.to_string())?; // TODO: What triggers background tasks to execute? // From 95a5873b1e7bbc919a7be7258d68fe60548cf30f Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 15 Jun 2022 14:27:52 -0400 Subject: [PATCH 17/88] Add indices, add tests, fix bugs --- common/src/address.rs | 2 +- common/src/sql/dbinit.sql | 27 +- nexus/src/app/background/mod.rs | 10 +- nexus/src/app/background/services.rs | 183 +++--- nexus/src/app/mod.rs | 10 +- nexus/src/app/rack.rs | 6 +- nexus/src/app/sled.rs | 2 +- nexus/src/db/datastore.rs | 803 +++++++++++++++++++++++---- nexus/src/db/model/dataset_kind.rs | 4 +- nexus/src/db/model/service.rs | 2 +- nexus/src/db/model/service_kind.rs | 2 +- nexus/src/db/model/sled.rs | 7 +- nexus/src/db/model/zpool.rs | 2 +- nexus/src/db/schema.rs | 1 + nexus/src/lib.rs | 20 +- sled-agent/src/storage_manager.rs | 20 +- 16 files changed, 858 insertions(+), 243 deletions(-) diff --git a/common/src/address.rs b/common/src/address.rs index bd5d03da2e6..7284ba4cc64 100644 --- a/common/src/address.rs +++ b/common/src/address.rs @@ -178,7 +178,7 @@ mod test { // Observe the first DNS subnet within this reserved rack subnet. let dns_subnets = rack_subnet.get_dns_subnets(); - assert_eq!(DNS_REDUNDANCY, dns_subnets.len()); + assert_eq!(DNS_REDUNDANCY, dns_subnets.len() as u32); // The DNS address and GZ address should be only differing by one. assert_eq!( diff --git a/common/src/sql/dbinit.sql b/common/src/sql/dbinit.sql index d13d3ba2b04..d568cc833b8 100644 --- a/common/src/sql/dbinit.sql +++ b/common/src/sql/dbinit.sql @@ -75,6 +75,9 @@ CREATE TABLE omicron.public.sled ( time_deleted TIMESTAMPTZ, rcgen INT NOT NULL, + /* FK into the Rack table */ + rack_id UUID NOT NULL, + /* The IP address and bound port of the sled agent server. */ ip INET NOT NULL, port INT4 CHECK (port BETWEEN 0 AND 65535) NOT NULL, @@ -83,6 +86,12 @@ CREATE TABLE omicron.public.sled ( last_used_address INET NOT NULL ); +/* Add an index which lets us look up the sleds on a rack */ +CREATE INDEX ON omicron.public.sled ( + rack_id +) WHERE + time_deleted IS NULL; + /* * Services */ @@ -109,7 +118,13 @@ CREATE TABLE omicron.public.service ( /* Add an index which lets us look up the services on a sled */ CREATE INDEX ON omicron.public.service ( - sled_id + sled_id, + kind +); + +/* Add an index which lets us look up services of a particular kind on a sled */ +CREATE INDEX ON omicron.public.service ( + kind ); /* @@ -132,6 +147,11 @@ CREATE TABLE omicron.public.Zpool ( total_size INT NOT NULL ); +/* Create an index which allows looking up all zpools on a sled */ +CREATE INDEX on omicron.public.Zpool ( + sled_id +) WHERE time_deleted IS NULL; + CREATE TYPE omicron.public.dataset_kind AS ENUM ( 'crucible', 'cockroach', @@ -162,6 +182,11 @@ CREATE TABLE omicron.public.Dataset ( size_used INT ); +/* Create an index which allows looking up all datasets in a pool */ +CREATE INDEX on omicron.public.Dataset ( + pool_id +) WHERE time_deleted IS NULL; + /* Create an index on the size usage for Crucible's allocation */ CREATE INDEX on omicron.public.Dataset ( size_used diff --git a/nexus/src/app/background/mod.rs b/nexus/src/app/background/mod.rs index d091fdd7858..82e08c2b680 100644 --- a/nexus/src/app/background/mod.rs +++ b/nexus/src/app/background/mod.rs @@ -20,21 +20,21 @@ impl TaskRunner { pub fn new(nexus: Arc) -> Self { let handle = spawn(async move { let log = nexus.log.new(o!("component" => "BackgroundTaskRunner")); - let service_balancer = services::ServiceBalancer::new(log.clone(), nexus.clone()); + let service_balancer = + services::ServiceBalancer::new(log.clone(), nexus.clone()); loop { // TODO: We may want triggers to exist here, to invoke this task // more frequently (e.g., on Sled failure). let opctx = nexus.opctx_for_background(); - if let Err(e) = service_balancer.balance_services(&opctx).await { + if let Err(e) = service_balancer.balance_services(&opctx).await + { warn!(log, "Failed to balance services: {:?}", e); } tokio::time::sleep(std::time::Duration::from_secs(30)).await; } }); - Self { - _handle: handle, - } + Self { _handle: handle } } } diff --git a/nexus/src/app/background/services.rs b/nexus/src/app/background/services.rs index 6f60dc09904..8aee6b7f3b2 100644 --- a/nexus/src/app/background/services.rs +++ b/nexus/src/app/background/services.rs @@ -4,7 +4,6 @@ //! Task which ensures that expected Nexus services exist. -use crate::Nexus; use crate::context::OpContext; use crate::db::datastore::DatasetRedundancy; use crate::db::identity::Asset; @@ -14,15 +13,17 @@ use crate::db::model::Service; use crate::db::model::ServiceKind; use crate::db::model::Sled; use crate::db::model::Zpool; -use omicron_common::api::external::Error; +use crate::Nexus; use omicron_common::address::{ - DNS_REDUNDANCY, NEXUS_INTERNAL_PORT, NEXUS_EXTERNAL_PORT, DNS_SERVER_PORT, DNS_PORT + DNS_PORT, DNS_REDUNDANCY, DNS_SERVER_PORT, NEXUS_EXTERNAL_PORT, + NEXUS_INTERNAL_PORT, }; +use omicron_common::api::external::Error; use sled_agent_client::types as SledAgentTypes; use slog::Logger; use std::collections::{HashMap, HashSet}; -use std::sync::Arc; use std::net::{Ipv6Addr, SocketAddrV6}; +use std::sync::Arc; // Policy for the number of services to be provisioned. #[derive(Debug)] @@ -85,10 +86,7 @@ pub struct ServiceBalancer { impl ServiceBalancer { pub fn new(log: Logger, nexus: Arc) -> Self { - Self { - log, - nexus, - } + Self { log, nexus } } // Reaches out to all sled agents implied in "services", and @@ -96,7 +94,7 @@ impl ServiceBalancer { async fn instantiate_services( &self, opctx: &OpContext, - services: Vec + services: Vec, ) -> Result<(), Error> { let mut sled_ids = HashSet::new(); for svc in &services { @@ -110,23 +108,33 @@ impl ServiceBalancer { // only insert the *new* services. // // Inserting the old ones too is costing us an extra query. - let services = self.nexus.datastore().service_list(opctx, *sled_id).await?; + let services = + self.nexus.datastore().service_list(opctx, *sled_id).await?; let sled_client = self.nexus.sled_client(sled_id).await?; - sled_client.services_put(&SledAgentTypes::ServiceEnsureBody { - services: services.iter().map(|s| { - let address = Ipv6Addr::from(s.ip); - let (name, service_type) = Self::get_service_name_and_type(address, s.kind.clone()); + sled_client + .services_put(&SledAgentTypes::ServiceEnsureBody { + services: services + .iter() + .map(|s| { + let address = Ipv6Addr::from(s.ip); + let (name, service_type) = + Self::get_service_name_and_type( + address, + s.kind.clone(), + ); - SledAgentTypes::ServiceRequest { - id: s.id(), - name: name.to_string(), - addresses: vec![address], - gz_addresses: vec![], - service_type, - } - }).collect() - }).await?; + SledAgentTypes::ServiceRequest { + id: s.id(), + name: name.to_string(), + addresses: vec![address], + gz_addresses: vec![], + service_type, + } + }) + .collect(), + }) + .await?; } Ok(()) } @@ -134,33 +142,45 @@ impl ServiceBalancer { // Translates (address, db kind) to Sled Agent client types. fn get_service_name_and_type( address: Ipv6Addr, - kind: ServiceKind + kind: ServiceKind, ) -> (String, SledAgentTypes::ServiceType) { match kind { - ServiceKind::Nexus => { - ( - "nexus".to_string(), - SledAgentTypes::ServiceType::Nexus { - internal_address: SocketAddrV6::new(address, NEXUS_INTERNAL_PORT, 0, 0).to_string(), - external_address: SocketAddrV6::new(address, NEXUS_EXTERNAL_PORT, 0, 0).to_string(), - } - ) - }, - ServiceKind::InternalDNS => { - ( - "internal-dns".to_string(), - SledAgentTypes::ServiceType::InternalDns { - server_address: SocketAddrV6::new(address, DNS_SERVER_PORT, 0, 0).to_string(), - dns_address: SocketAddrV6::new(address, DNS_PORT, 0, 0).to_string(), - }, - ) - }, + ServiceKind::Nexus => ( + "nexus".to_string(), + SledAgentTypes::ServiceType::Nexus { + internal_address: SocketAddrV6::new( + address, + NEXUS_INTERNAL_PORT, + 0, + 0, + ) + .to_string(), + external_address: SocketAddrV6::new( + address, + NEXUS_EXTERNAL_PORT, + 0, + 0, + ) + .to_string(), + }, + ), + ServiceKind::InternalDNS => ( + "internal-dns".to_string(), + SledAgentTypes::ServiceType::InternalDns { + server_address: SocketAddrV6::new( + address, + DNS_SERVER_PORT, + 0, + 0, + ) + .to_string(), + dns_address: SocketAddrV6::new(address, DNS_PORT, 0, 0) + .to_string(), + }, + ), ServiceKind::Oximeter => { - ( - "oximeter".to_string(), - SledAgentTypes::ServiceType::Oximeter, - ) - }, + ("oximeter".to_string(), SledAgentTypes::ServiceType::Oximeter) + } } } @@ -171,14 +191,10 @@ impl ServiceBalancer { desired_count: u32, ) -> Result<(), Error> { // Provision the services within the database. - let new_services = self.nexus + let new_services = self + .nexus .datastore() - .ensure_rack_service( - opctx, - self.nexus.rack_id, - kind, - desired_count, - ) + .ensure_rack_service(opctx, self.nexus.rack_id, kind, desired_count) .await?; // Actually instantiate those services. @@ -191,7 +207,8 @@ impl ServiceBalancer { desired_count: u32, ) -> Result<(), Error> { // Provision the services within the database. - let new_services = self.nexus + let new_services = self + .nexus .datastore() .ensure_dns_service(opctx, self.nexus.rack_subnet, desired_count) .await?; @@ -208,12 +225,20 @@ impl ServiceBalancer { &self, opctx: &OpContext, ) -> Result<(), Error> { + // NOTE: If any sleds host DNS + other redudant services, we send + // redundant requests. We could propagate the service list up to a + // higher level, and do instantiation after all services complete? for expected_svc in &EXPECTED_SERVICES { info!(self.log, "Ensuring service {:?} exists", expected_svc); match expected_svc.redundancy { ServiceRedundancy::PerRack(desired_count) => { - self.ensure_rack_service(opctx, expected_svc.kind.clone(), desired_count).await?; - }, + self.ensure_rack_service( + opctx, + expected_svc.kind.clone(), + desired_count, + ) + .await?; + } ServiceRedundancy::DnsPerAz(desired_count) => { self.ensure_dns_service(opctx, desired_count).await?; } @@ -229,14 +254,10 @@ impl ServiceBalancer { redundancy: DatasetRedundancy, ) -> Result<(), Error> { // Provision the datasets within the database. - let new_datasets = self.nexus + let new_datasets = self + .nexus .datastore() - .ensure_rack_dataset( - opctx, - self.nexus.rack_id, - kind, - redundancy, - ) + .ensure_rack_dataset(opctx, self.nexus.rack_id, kind, redundancy) .await?; // Actually instantiate those datasets. @@ -247,7 +268,7 @@ impl ServiceBalancer { // requests that the desired services are executing. async fn instantiate_datasets( &self, - datasets: Vec<(Sled, Zpool, Dataset)> + datasets: Vec<(Sled, Zpool, Dataset)>, ) -> Result<(), Error> { let mut sled_clients = HashMap::new(); @@ -256,7 +277,8 @@ impl ServiceBalancer { match sled_clients.get(&sled.id()) { Some(client) => client, None => { - let sled_client = self.nexus.sled_client(&sled.id()).await?; + let sled_client = + self.nexus.sled_client(&sled.id()).await?; sled_clients.insert(sled.id(), sled_client); sled_clients.get(&sled.id()).unwrap() } @@ -266,18 +288,24 @@ impl ServiceBalancer { let dataset_kind = match dataset.kind { // TODO: This set of "all addresses" isn't right. // TODO: ... should we even be using "all addresses" to contact CRDB? - DatasetKind::Cockroach => SledAgentTypes::DatasetKind::CockroachDb(vec![]), + DatasetKind::Cockroach => { + SledAgentTypes::DatasetKind::CockroachDb(vec![]) + } DatasetKind::Crucible => SledAgentTypes::DatasetKind::Crucible, - DatasetKind::Clickhouse => SledAgentTypes::DatasetKind::Clickhouse, + DatasetKind::Clickhouse => { + SledAgentTypes::DatasetKind::Clickhouse + } }; // Instantiate each dataset. - sled_client.filesystem_put(&SledAgentTypes::DatasetEnsureBody { - id: dataset.id(), - zpool_id: zpool.id(), - dataset_kind, - address: dataset.address().to_string(), - }).await?; + sled_client + .filesystem_put(&SledAgentTypes::DatasetEnsureBody { + id: dataset.id(), + zpool_id: zpool.id(), + dataset_kind, + address: dataset.address().to_string(), + }) + .await?; } Ok(()) @@ -289,7 +317,12 @@ impl ServiceBalancer { ) -> Result<(), Error> { for expected_dataset in &EXPECTED_DATASETS { info!(self.log, "Ensuring dataset {:?} exists", expected_dataset); - self.ensure_rack_dataset(opctx, expected_dataset.kind.clone(), expected_dataset.redundancy).await? + self.ensure_rack_dataset( + opctx, + expected_dataset.kind.clone(), + expected_dataset.redundancy, + ) + .await? } Ok(()) } diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index 61a4fbf7d22..0abf1d4abfd 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -232,11 +232,13 @@ impl Nexus { } } - pub fn start_background_tasks(self: &Arc) -> Result<(), anyhow::Error> { + pub fn start_background_tasks( + self: &Arc, + ) -> Result<(), anyhow::Error> { let nexus = self.clone(); - self.background_task_runner.set( - background::TaskRunner::new(nexus) - ).map_err(|error| anyhow!(error.to_string())) + self.background_task_runner + .set(background::TaskRunner::new(nexus)) + .map_err(|error| anyhow!(error.to_string())) } /// Returns an [`OpContext`] used for authenticating external requests diff --git a/nexus/src/app/rack.rs b/nexus/src/app/rack.rs index f180395d4e1..5abf7b4ea99 100644 --- a/nexus/src/app/rack.rs +++ b/nexus/src/app/rack.rs @@ -102,10 +102,7 @@ impl super::Nexus { /// initialized. /// /// See RFD 278 for additional context. - pub async fn await_rack_initialization( - &self, - opctx: &OpContext - ) { + pub async fn await_rack_initialization(&self, opctx: &OpContext) { loop { let result = self.rack_lookup(&opctx, &self.rack_id).await; match result { @@ -124,6 +121,5 @@ impl super::Nexus { } tokio::time::sleep(std::time::Duration::from_secs(2)).await; } - } } diff --git a/nexus/src/app/sled.rs b/nexus/src/app/sled.rs index 0150cbec148..e4fc616f095 100644 --- a/nexus/src/app/sled.rs +++ b/nexus/src/app/sled.rs @@ -31,7 +31,7 @@ impl super::Nexus { address: SocketAddrV6, ) -> Result<(), Error> { info!(self.log, "registered sled agent"; "sled_uuid" => id.to_string()); - let sled = db::model::Sled::new(id, address); + let sled = db::model::Sled::new(id, address, self.rack_id); self.db_datastore.sled_upsert(sled).await?; Ok(()) } diff --git a/nexus/src/db/datastore.rs b/nexus/src/db/datastore.rs index 23b66b0bb4f..81afcc25fdc 100644 --- a/nexus/src/db/datastore.rs +++ b/nexus/src/db/datastore.rs @@ -75,7 +75,7 @@ use diesel::query_dsl::methods::LoadQuery; use diesel::upsert::excluded; use diesel::{ExpressionMethods, QueryDsl, SelectableHelper}; use omicron_common::address::{ - RACK_PREFIX, Ipv6Subnet, ReservedRackSubnet, + Ipv6Subnet, ReservedRackSubnet, DNS_REDUNDANCY, RACK_PREFIX, }; use omicron_common::api; use omicron_common::api::external; @@ -409,7 +409,7 @@ impl DataStore { .map_err(|e| public_error_from_diesel_pool(e, ErrorHandler::Server)) } - pub fn sled_list_with_limit_sync( + fn sled_list_with_limit_sync( conn: &mut DbConnection, limit: u32, ) -> Result, diesel::result::Error> { @@ -436,12 +436,9 @@ impl DataStore { .map_err(|e| public_error_from_diesel_pool(e, ErrorHandler::Server)) } - // TODO-correctness: Filter the sleds by rack ID! - // This filtering will feasible when Sleds store a FK for - // the rack on which they're stored. - pub fn sled_and_service_list_sync( + fn sled_and_service_list_sync( conn: &mut DbConnection, - _rack_id: Uuid, + rack_id: Uuid, kind: ServiceKind, ) -> Result)>, diesel::result::Error> { use db::schema::service::dsl as svc_dsl; @@ -449,10 +446,12 @@ impl DataStore { db::schema::sled::table .filter(sled_dsl::time_deleted.is_null()) - .left_outer_join(db::schema::service::table.on( - svc_dsl::sled_id.eq(sled_dsl::id) - )) - .filter(svc_dsl::kind.eq(kind)) + .filter(sled_dsl::rack_id.eq(rack_id)) + .left_outer_join( + db::schema::service::table.on(svc_dsl::sled_id + .eq(sled_dsl::id) + .and(svc_dsl::kind.eq(kind.clone()))), + ) .select(<(Sled, Option)>::as_select()) .get_results(conn) } @@ -476,59 +475,80 @@ impl DataStore { self.pool() .transaction(move |conn| { let sleds_and_maybe_svcs = Self::sled_and_service_list_sync( - conn, - rack_id, - kind.clone(), - )?; + conn, + rack_id, + kind.clone(), + )?; + + eprintln!( + "Observed sleds/services: {:?}", + sleds_and_maybe_svcs + ); // Split the set of returned sleds into "those with" and "those // without" the requested service. let (sleds_with_svc, sleds_without_svc): (Vec<_>, Vec<_>) = sleds_and_maybe_svcs - .iter() - .partition(|(_, maybe_svc)| { - maybe_svc.is_some() - }); - let mut sleds_without_svc = sleds_without_svc.into_iter() - .map(|(sled, _)| sled); - let existing_count = sleds_with_svc.len(); + .into_iter() + .partition(|(_, maybe_svc)| maybe_svc.is_some()); + // Identify sleds without services (targets for future + // allocation). + let mut sleds_without_svc = + sleds_without_svc.into_iter().map(|(sled, _)| sled); + + // Identify sleds with services (part of output). + let mut svcs: Vec<_> = sleds_with_svc + .into_iter() + .map(|(_, maybe_svc)| { + maybe_svc.expect( + "Should have filtered by sleds with the service", + ) + }) + .collect(); + + eprintln!("Observed services: {:?}", svcs); // Add services to sleds, in-order, until we've met a // number sufficient for our redundancy. // // The selection of "which sleds run this service" is completely // arbitrary. - let mut new_svcs = vec![]; - while (redundancy as usize) < existing_count + new_svcs.len() { + while svcs.len() < (redundancy as usize) { let sled = sleds_without_svc.next().ok_or_else(|| { TxnError::CustomError(ServiceError::NotEnoughSleds) })?; let svc_id = Uuid::new_v4(); let address = Self::next_ipv6_address_sync(conn, sled.id()) - .map_err(|e| TxnError::CustomError(ServiceError::Other(e)))?; + .map_err(|e| { + TxnError::CustomError(ServiceError::Other(e)) + })?; let service = db::model::Service::new( svc_id, sled.id(), address, - kind.clone() + kind.clone(), ); // TODO: Can we insert all the services at the same time? let svc = Self::service_upsert_sync(conn, service) - .map_err(|e| TxnError::CustomError(ServiceError::Other(e)))?; - new_svcs.push(svc); + .map_err(|e| { + TxnError::CustomError(ServiceError::Other(e)) + })?; + svcs.push(svc); } - return Ok(new_svcs); + return Ok(svcs); }) .await .map_err(|e| match e { TxnError::CustomError(ServiceError::NotEnoughSleds) => { Error::unavail("Not enough sleds for service allocation") - }, + } TxnError::CustomError(ServiceError::Other(e)) => e, - TxnError::Pool(e) => public_error_from_diesel_pool(e, ErrorHandler::Server) + TxnError::Pool(e) => { + public_error_from_diesel_pool(e, ErrorHandler::Server) + } }) } @@ -550,44 +570,51 @@ impl DataStore { self.pool() .transaction(move |conn| { - let existing_services = Self::dns_service_list_sync(conn)?; - let existing_count = existing_services.len(); + let mut svcs = Self::dns_service_list_sync(conn)?; + eprintln!("Observed DNS services: {:?}", svcs); // Get all subnets not allocated to existing services. let mut usable_dns_subnets = ReservedRackSubnet(rack_subnet) .get_dns_subnets() .into_iter() .filter(|subnet| { - // This address is only usable if none of the existing - // DNS services are using it. - existing_services.iter() - .all(|svc| Ipv6Addr::from(svc.ip) != subnet.dns_address().ip()) - }); - + // If any existing services are using this address, + // skip it. + !svcs.iter().any(|svc| { + Ipv6Addr::from(svc.ip) == subnet.dns_address().ip() + }) + }) + .collect::>() + .into_iter(); + eprintln!("Usable DNS services: {:?}", usable_dns_subnets); // Get all sleds which aren't already running DNS services. - let mut target_sleds = Self::sled_list_with_limit_sync(conn, redundancy)? - .into_iter() - .filter(|sled| { - // The target sleds are only considered if they aren't already - // running a DNS service. - existing_services.iter() - .all(|svc| svc.sled_id != sled.id()) - }); + let mut target_sleds = + Self::sled_list_with_limit_sync(conn, redundancy)? + .into_iter() + .filter(|sled| { + // The target sleds are only considered if they aren't already + // running a DNS service. + svcs.iter().all(|svc| svc.sled_id != sled.id()) + }) + .collect::>() + .into_iter(); - let mut new_svcs = vec![]; - while (redundancy as usize) < existing_count + new_svcs.len() { + while svcs.len() < (redundancy as usize) { let sled = target_sleds.next().ok_or_else(|| { - TxnError::CustomError(ServiceError::NotEnoughSleds) - })?; + TxnError::CustomError(ServiceError::NotEnoughSleds) + })?; let svc_id = Uuid::new_v4(); - let dns_subnet = usable_dns_subnets.next().ok_or_else(|| { + let dns_subnet = + usable_dns_subnets.next().ok_or_else(|| { TxnError::CustomError(ServiceError::NotEnoughIps) })?; - let address = dns_subnet - .dns_address() - .ip(); + let address = dns_subnet.dns_address().ip(); + // TODO: How are we tracking the GZ address that must be + // allocated? They're tracked by the "DnsSubnet" object + // in address.rs, but I don't think they're getting + // propagated out of here. let service = db::model::Service::new( svc_id, sled.id(), @@ -597,22 +624,28 @@ impl DataStore { // TODO: Can we insert all the services at the same time? let svc = Self::service_upsert_sync(conn, service) - .map_err(|e| TxnError::CustomError(ServiceError::Other(e)))?; + .map_err(|e| { + TxnError::CustomError(ServiceError::Other(e)) + })?; - new_svcs.push(svc); + svcs.push(svc); } - return Ok(new_svcs); + return Ok(svcs); }) .await .map_err(|e| match e { TxnError::CustomError(ServiceError::NotEnoughSleds) => { Error::unavail("Not enough sleds for service allocation") - }, + } TxnError::CustomError(ServiceError::NotEnoughIps) => { - Error::unavail("Not enough IP addresses for service allocation") - }, + Error::unavail( + "Not enough IP addresses for service allocation", + ) + } TxnError::CustomError(ServiceError::Other(e)) => e, - TxnError::Pool(e) => public_error_from_diesel_pool(e, ErrorHandler::Server) + TxnError::Pool(e) => { + public_error_from_diesel_pool(e, ErrorHandler::Server) + } }) } @@ -623,30 +656,35 @@ impl DataStore { svc::service .filter(svc::kind.eq(ServiceKind::InternalDNS)) + .limit(DNS_REDUNDANCY.into()) .select(Service::as_select()) .get_results(conn) } - // TODO: Filter by rack ID - pub fn sled_zpool_and_dataset_list_sync( + fn sled_zpool_and_dataset_list_sync( conn: &mut DbConnection, - _rack_id: Uuid, + rack_id: Uuid, kind: DatasetKind, - ) -> Result)>, diesel::result::Error> { + ) -> Result)>, diesel::result::Error> + { + use db::schema::dataset::dsl as dataset_dsl; use db::schema::sled::dsl as sled_dsl; use db::schema::zpool::dsl as zpool_dsl; - use db::schema::dataset::dsl as dataset_dsl; db::schema::sled::table .filter(sled_dsl::time_deleted.is_null()) - .inner_join(db::schema::zpool::table.on( - zpool_dsl::sled_id.eq(sled_dsl::id) - )) - .filter(zpool_dsl::time_deleted.is_null()) - .left_outer_join(db::schema::dataset::table.on( - dataset_dsl::pool_id.eq(zpool_dsl::id) - )) - .filter(dataset_dsl::kind.eq(kind)) + .filter(sled_dsl::rack_id.eq(rack_id)) + .inner_join( + db::schema::zpool::table.on(zpool_dsl::sled_id + .eq(sled_dsl::id) + .and(zpool_dsl::time_deleted.is_null())), + ) + .left_outer_join( + db::schema::dataset::table.on(dataset_dsl::pool_id + .eq(zpool_dsl::id) + .and(dataset_dsl::kind.eq(kind.clone())) + .and(dataset_dsl::time_deleted.is_null())), + ) .select(<(Sled, Zpool, Option)>::as_select()) .get_results(conn) } @@ -669,76 +707,108 @@ impl DataStore { self.pool() .transaction(move |conn| { - let sleds_zpools_and_maybe_datasets = Self::sled_zpool_and_dataset_list_sync( + let sleds_zpools_and_maybe_datasets = + Self::sled_zpool_and_dataset_list_sync( conn, rack_id, kind.clone(), )?; + eprintln!( + "Observed datasets: {:?}", + sleds_zpools_and_maybe_datasets + ); + // Split the set of returned zpools into "those with" and "those // without" the requested dataset. - let (zpools_with_dataset, zpools_without_dataset): (Vec<_>, Vec<_>) = - sleds_zpools_and_maybe_datasets + let (zpools_with_dataset, zpools_without_dataset): ( + Vec<_>, + Vec<_>, + ) = sleds_zpools_and_maybe_datasets + .into_iter() + .partition(|(_, _, maybe_dataset)| maybe_dataset.is_some()); + let mut zpools_without_dataset = zpools_without_dataset .into_iter() - .partition(|(_, _, maybe_dataset)| { - maybe_dataset.is_some() - }); - let mut zpools_without_dataset = zpools_without_dataset.into_iter() .map(|(sled, zpool, _)| (sled, zpool)) .peekable(); - let existing_count = zpools_with_dataset.len(); - // Add services to zpools, in-order, until we've met a + eprintln!("Dataset targets: {:?}", zpools_without_dataset); + + let mut datasets: Vec<_> = zpools_with_dataset + .into_iter() + .map(|(sled, zpool, maybe_dataset)| { + ( + sled, + zpool, + maybe_dataset.expect("Dataset should exist"), + ) + }) + .collect(); + eprintln!("Existing datasets: {:?}", datasets); + + // Add datasets to zpools, in-order, until we've met a // number sufficient for our redundancy. // - // The selection of "which zpools run this service" is completely + // The selection of "which zpools contain this dataset" is completely // arbitrary. - let mut new_datasets = vec![]; - loop { match redundancy { DatasetRedundancy::OnAll => { if zpools_without_dataset.peek().is_none() { break; } - }, - DatasetRedundancy::PerRack(count) => { - if (count as usize) >= existing_count + new_datasets.len() { + } + DatasetRedundancy::PerRack(desired) => { + if datasets.len() >= (desired as usize) { break; } - }, + } }; - let (sled, zpool) = zpools_without_dataset.next().ok_or_else(|| { - TxnError::CustomError(DatasetError::NotEnoughZpools) - })?; + let (sled, zpool) = + zpools_without_dataset.next().ok_or_else(|| { + TxnError::CustomError(DatasetError::NotEnoughZpools) + })?; let dataset_id = Uuid::new_v4(); let address = Self::next_ipv6_address_sync(conn, sled.id()) - .map_err(|e| TxnError::CustomError(DatasetError::Other(e))) - .map(|ip| SocketAddr::V6(SocketAddrV6::new(ip, kind.port(), 0, 0)))?; + .map_err(|e| { + TxnError::CustomError(DatasetError::Other(e)) + }) + .map(|ip| { + SocketAddr::V6(SocketAddrV6::new( + ip, + kind.port(), + 0, + 0, + )) + })?; let dataset = db::model::Dataset::new( dataset_id, zpool.id(), address, - kind.clone() + kind.clone(), ); // TODO: Can we insert all the datasets at the same time? let dataset = Self::dataset_upsert_sync(conn, dataset) - .map_err(|e| TxnError::CustomError(DatasetError::Other(e)))?; - new_datasets.push((sled, zpool, dataset)); + .map_err(|e| { + TxnError::CustomError(DatasetError::Other(e)) + })?; + datasets.push((sled, zpool, dataset)); } - return Ok(new_datasets); + return Ok(datasets); }) .await .map_err(|e| match e { TxnError::CustomError(DatasetError::NotEnoughZpools) => { - Error::unavail("Not enough sleds for dataset allocation") - }, + Error::unavail("Not enough zpools for dataset allocation") + } TxnError::CustomError(DatasetError::Other(e)) => e, - TxnError::Pool(e) => public_error_from_diesel_pool(e, ErrorHandler::Server) + TxnError::Pool(e) => { + public_error_from_diesel_pool(e, ErrorHandler::Server) + } }) } @@ -778,6 +848,25 @@ impl DataStore { }) } + // NOTE: This doesn't need to be test-only, it just happens to be test-only + // to avoid unused warnings. + #[cfg(test)] + async fn dataset_list( + &self, + opctx: &OpContext, + zpool_id: Uuid, + ) -> Result, Error> { + opctx.authorize(authz::Action::Read, &authz::FLEET).await?; + use db::schema::dataset::dsl; + dsl::dataset + .filter(dsl::time_deleted.is_null()) + .filter(dsl::pool_id.eq(zpool_id)) + .select(Dataset::as_select()) + .load_async(self.pool_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel_pool(e, ErrorHandler::Server)) + } + /// Stores a new dataset in the database. pub async fn dataset_upsert( &self, @@ -820,7 +909,7 @@ impl DataStore { } /// Stores a new dataset in the database. - pub fn dataset_upsert_sync( + fn dataset_upsert_sync( conn: &mut DbConnection, dataset: Dataset, ) -> CreateResult { @@ -898,7 +987,7 @@ impl DataStore { }) } - pub fn service_upsert_sync( + fn service_upsert_sync( conn: &mut DbConnection, service: Service, ) -> CreateResult { @@ -3942,7 +4031,7 @@ impl DataStore { /// Return the next available IPv6 address for an Oxide service running on /// the provided sled. - pub fn next_ipv6_address_sync( + fn next_ipv6_address_sync( conn: &mut DbConnection, sled_id: Uuid, ) -> Result { @@ -4265,6 +4354,7 @@ mod test { use crate::authz; use crate::db::explain::ExplainableAsync; use crate::db::fixed_data::silo::SILO_ID; + use crate::db::identity::Asset; use crate::db::identity::Resource; use crate::db::lookup::LookupPath; use crate::db::model::{ConsoleSession, DatasetKind, Project, ServiceKind}; @@ -4275,7 +4365,7 @@ mod test { ByteCount, Error, IdentityMetadataCreateParams, LookupType, Name, }; use omicron_test_utils::dev; - use std::collections::HashSet; + use std::collections::{HashMap, HashSet}; use std::net::Ipv6Addr; use std::net::SocketAddrV6; use std::net::{IpAddr, Ipv4Addr, SocketAddr}; @@ -4436,7 +4526,8 @@ mod test { 0, ); let sled_id = Uuid::new_v4(); - let sled = Sled::new(sled_id, bogus_addr.clone()); + let rack_id = Uuid::new_v4(); + let sled = Sled::new(sled_id, bogus_addr.clone(), rack_id); datastore.sled_upsert(sled).await.unwrap(); sled_id } @@ -4791,15 +4882,16 @@ mod test { let datastore = Arc::new(DataStore::new(Arc::clone(&pool))); let opctx = OpContext::for_tests(logctx.log.new(o!()), datastore.clone()); + let rack_id = Uuid::new_v4(); let addr1 = "[fd00:1de::1]:12345".parse().unwrap(); let sled1_id = "0de4b299-e0b4-46f0-d528-85de81a7095f".parse().unwrap(); - let sled1 = db::model::Sled::new(sled1_id, addr1); + let sled1 = db::model::Sled::new(sled1_id, addr1, rack_id); datastore.sled_upsert(sled1).await.unwrap(); let addr2 = "[fd00:1df::1]:12345".parse().unwrap(); let sled2_id = "66285c18-0c79-43e0-e54f-95271f271314".parse().unwrap(); - let sled2 = db::model::Sled::new(sled2_id, addr2); + let sled2 = db::model::Sled::new(sled2_id, addr2, rack_id); datastore.sled_upsert(sled2).await.unwrap(); let ip = datastore.next_ipv6_address(&opctx, sled1_id).await.unwrap(); @@ -4939,6 +5031,497 @@ mod test { logctx.cleanup_successful(); } + #[tokio::test] + async fn test_ensure_rack_service() { + let logctx = dev::test_setup_log("test_ensure_rack_service"); + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = datastore_test(&logctx, &db).await; + + // Create a sled on which the service should exist. + let sled_addr = SocketAddrV6::new( + Ipv6Addr::new(0xfd00, 0, 0, 0, 0, 0, 0, 1), + 8080, + 0, + 0, + ); + let sled_id = Uuid::new_v4(); + let rack_id = Uuid::new_v4(); + let sled = Sled::new(sled_id, sled_addr.clone(), rack_id); + datastore + .sled_upsert(sled) + .await + .expect("Should be able to upsert sled"); + + // Ensure a service exists on the rack. + let services = datastore + .ensure_rack_service(&opctx, rack_id, ServiceKind::Nexus, 1) + .await + .expect("Should have allocated service"); + + // Only a single service was allocated, with the type / address we + // expect. + assert_eq!(1, services.len()); + assert_eq!(ServiceKind::Nexus, services[0].kind); + assert_eq!(sled_id, services[0].sled_id); + + // Listing services only shows this one. + let observed_services = datastore + .service_list(&opctx, sled_id) + .await + .expect("Should be able to list services"); + assert_eq!(1, observed_services.len()); + assert_eq!(services[0].id(), observed_services[0].id()); + + // Test that ensuring services is idempotent. + let services_again = datastore + .ensure_rack_service(&opctx, rack_id, ServiceKind::Nexus, 1) + .await + .expect("Should have allocated service"); + assert_eq!(services_again, services); + + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_ensure_rack_service_multi_sled() { + let logctx = dev::test_setup_log("test_ensure_rack_service_multi_sled"); + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = datastore_test(&logctx, &db).await; + + let rack_id = Uuid::new_v4(); + + // Create sleds with distinct underlay subnets. + const SLED_COUNT: usize = 3; + let mut sleds = HashMap::new(); + for i in 0..SLED_COUNT { + let sled_addr = SocketAddrV6::new( + Ipv6Addr::new(0xfd00, 0, 0, i.try_into().unwrap(), 0, 0, 0, 1), + 8080, + 0, + 0, + ); + let sled_id = Uuid::new_v4(); + let sled = Sled::new(sled_id, sled_addr.clone(), rack_id); + datastore + .sled_upsert(sled.clone()) + .await + .expect("Should be able to upsert sled"); + sleds.insert(sled.id(), sled); + } + + // Ensure a service exists on the rack, with some redundancy. + const NEXUS_COUNT: u32 = 3; + let mut services = datastore + .ensure_rack_service( + &opctx, + rack_id, + ServiceKind::Nexus, + NEXUS_COUNT, + ) + .await + .expect("Should have allocated service"); + services.sort_by(|a, b| a.id().cmp(&b.id())); + + assert_eq!(NEXUS_COUNT, services.len() as u32); + for svc in &services { + assert_eq!(ServiceKind::Nexus, svc.kind); + + // Each service should have been provisioned to a distinct sled. + let observed_services = datastore + .service_list(&opctx, svc.sled_id) + .await + .expect("Should be able to list services"); + assert_eq!(1, observed_services.len()); + assert_eq!(svc.id(), observed_services[0].id()); + } + + // Test that ensuring services is idempotent. + let mut services_again = datastore + .ensure_rack_service( + &opctx, + rack_id, + ServiceKind::Nexus, + NEXUS_COUNT, + ) + .await + .expect("Should have allocated service"); + services_again.sort_by(|a, b| a.id().cmp(&b.id())); + assert_eq!(services_again, services); + + // Ask for a different service type on the rack. + let oximeter_services = datastore + .ensure_rack_service(&opctx, rack_id, ServiceKind::Oximeter, 1) + .await + .expect("Should have allocated service"); + + // This should only return a single service + assert_eq!(1, oximeter_services.len()); + + // The target sled should contain both the nexus and oximeter services + let observed_services = datastore + .service_list(&opctx, oximeter_services[0].sled_id) + .await + .expect("Should be able to list services"); + assert_eq!(2, observed_services.len()); + + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_ensure_rack_service_not_enough_sleds() { + let logctx = + dev::test_setup_log("test_ensure_rack_service_not_enough_sleds"); + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = datastore_test(&logctx, &db).await; + + // Create a sled on which the service should exist. + let sled_addr = SocketAddrV6::new( + Ipv6Addr::new(0xfd00, 0, 0, 0, 0, 0, 0, 1), + 8080, + 0, + 0, + ); + let sled_id = Uuid::new_v4(); + let rack_id = Uuid::new_v4(); + let sled = Sled::new(sled_id, sled_addr.clone(), rack_id); + datastore + .sled_upsert(sled) + .await + .expect("Should be able to upsert sled"); + + // Try to request a redundancy which is larger than the number of sleds. + let err = datastore + .ensure_rack_service(&opctx, rack_id, ServiceKind::Nexus, 2) + .await + .expect_err("Should have failed to allocate service"); + + assert!( + matches!(err, Error::ServiceUnavailable { .. }), + "Error should have been ServiceUnavailable: {:?}", + err + ); + assert!( + err.to_string().contains("Not enough sleds"), + "Error should have identified 'Not enough sleds' as the cause: {:?}", err + ); + + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_ensure_dns_service() { + let logctx = dev::test_setup_log("test_ensure_dns_service"); + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = datastore_test(&logctx, &db).await; + + // Create a sled on which the service should exist. + let sled_addr = SocketAddrV6::new( + Ipv6Addr::new(0xfd00, 0, 0, 0, 0, 0, 0, 1), + 8080, + 0, + 0, + ); + let sled_id = Uuid::new_v4(); + let rack_id = Uuid::new_v4(); + let sled = Sled::new(sled_id, sled_addr.clone(), rack_id); + datastore + .sled_upsert(sled) + .await + .expect("Should be able to upsert sled"); + + let rack_subnet = Ipv6Subnet::::new(*sled_addr.ip()); + + // Ensure a service exists on the rack. + let services = datastore + .ensure_dns_service(&opctx, rack_subnet, 1) + .await + .expect("Should have allocated service"); + + // Only a single service was allocated, with the type / address we + // expect. + assert_eq!(1, services.len()); + assert_eq!(ServiceKind::InternalDNS, services[0].kind); + assert_eq!(sled_id, services[0].sled_id); + + // Listing services only shows this one. + let observed_services = datastore + .service_list(&opctx, sled_id) + .await + .expect("Should be able to list services"); + assert_eq!(1, observed_services.len()); + assert_eq!(services[0].id(), observed_services[0].id()); + + // Test that ensuring services is idempotent. + let services_again = datastore + .ensure_dns_service(&opctx, rack_subnet, 1) + .await + .expect("Should have allocated service"); + assert_eq!(services_again, services); + + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_ensure_dns_service_multi_sled() { + let logctx = dev::test_setup_log("test_ensure_dns_service_multi_sled"); + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = datastore_test(&logctx, &db).await; + + let rack_id = Uuid::new_v4(); + + // Create sleds with distinct underlay subnets. + const SLED_COUNT: u32 = DNS_REDUNDANCY; + let mut sleds = HashMap::new(); + for i in 0..SLED_COUNT { + let sled_addr = SocketAddrV6::new( + Ipv6Addr::new(0xfd00, 0, 0, i.try_into().unwrap(), 0, 0, 0, 1), + 8080, + 0, + 0, + ); + let sled_id = Uuid::new_v4(); + let sled = Sled::new(sled_id, sled_addr.clone(), rack_id); + datastore + .sled_upsert(sled.clone()) + .await + .expect("Should be able to upsert sled"); + sleds.insert(sled.id(), sled); + } + let rack_subnet = Ipv6Subnet::::new(Ipv6Addr::from( + sleds.values().next().unwrap().ip, + )); + + for sled in sleds.values() { + assert_eq!( + rack_subnet, + Ipv6Subnet::::new(Ipv6Addr::from(sled.ip)), + "Test pre-condition violated: All sleds must belong to the same rack" + ); + } + + // Ensure a service exists on the rack. + const DNS_COUNT: u32 = DNS_REDUNDANCY; + let mut services = datastore + .ensure_dns_service(&opctx, rack_subnet, DNS_COUNT) + .await + .expect("Should have allocated service"); + services.sort_by(|a, b| a.id().cmp(&b.id())); + + assert_eq!(DNS_COUNT, services.len() as u32); + for svc in &services { + assert_eq!(ServiceKind::InternalDNS, svc.kind); + + // Each service should have been provisioned to a distinct sled. + let observed_services = datastore + .service_list(&opctx, svc.sled_id) + .await + .expect("Should be able to list services"); + assert_eq!(1, observed_services.len()); + assert_eq!(svc.id(), observed_services[0].id()); + } + + // Test for idempotency + let mut services_again = datastore + .ensure_dns_service(&opctx, rack_subnet, DNS_COUNT) + .await + .expect("Should have allocated service"); + services_again.sort_by(|a, b| a.id().cmp(&b.id())); + assert_eq!(services_again, services); + + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_ensure_rack_dataset() { + let logctx = dev::test_setup_log("test_ensure_rack_dataset"); + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = datastore_test(&logctx, &db).await; + + // Create a sled on which the dataset should exist. + let sled_addr = SocketAddrV6::new( + Ipv6Addr::new(0xfd00, 0, 0, 0, 0, 0, 0, 1), + 8080, + 0, + 0, + ); + let sled_id = Uuid::new_v4(); + let rack_id = Uuid::new_v4(); + let sled = Sled::new(sled_id, sled_addr.clone(), rack_id); + datastore + .sled_upsert(sled) + .await + .expect("Should be able to upsert sled"); + let zpool_id = create_test_zpool(&datastore, sled_id).await; + + // Ensure a dataset exists on the rack. + let output = datastore + .ensure_rack_dataset( + &opctx, + rack_id, + DatasetKind::Crucible, + DatasetRedundancy::PerRack(1), + ) + .await + .expect("Should have allocated dataset"); + + // Observe that only a single dataset was allocated + assert_eq!(1, output.len()); + let (_, _, output_dataset) = &output[0]; + assert_eq!(DatasetKind::Crucible, output_dataset.kind); + assert_eq!(zpool_id, output_dataset.pool_id); + + // Listing datasets only shows this one. + let observed_datasets = datastore + .dataset_list(&opctx, zpool_id) + .await + .expect("Should be able to list datasets"); + assert_eq!(1, observed_datasets.len()); + assert_eq!(output_dataset.id(), observed_datasets[0].id()); + + // Test that ensuring datasets is idempotent. + let output_again = datastore + .ensure_rack_dataset( + &opctx, + rack_id, + DatasetKind::Crucible, + DatasetRedundancy::PerRack(1), + ) + .await + .expect("Should have allocated dataset"); + let (_, _, output_dataset_again) = &output_again[0]; + assert_eq!(output_dataset_again, output_dataset); + + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_ensure_rack_dataset_not_enough_zpools() { + let logctx = + dev::test_setup_log("test_ensure_rack_dataset_not_enough_zpools"); + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = datastore_test(&logctx, &db).await; + + // Create a sled on which the dataset should exist. + let sled_addr = SocketAddrV6::new( + Ipv6Addr::new(0xfd00, 0, 0, 0, 0, 0, 0, 1), + 8080, + 0, + 0, + ); + let sled_id = Uuid::new_v4(); + let rack_id = Uuid::new_v4(); + let sled = Sled::new(sled_id, sled_addr.clone(), rack_id); + datastore + .sled_upsert(sled) + .await + .expect("Should be able to upsert sled"); + + // Attempt to allocate a dataset on a rack without zpools. + let err = datastore + .ensure_rack_dataset( + &opctx, + rack_id, + DatasetKind::Crucible, + DatasetRedundancy::PerRack(1), + ) + .await + .expect_err("Should not have allocated dataset"); + + assert!( + matches!(err, Error::ServiceUnavailable { .. }), + "Error should have been ServiceUnavailable: {:?}", + err + ); + assert!( + err.to_string().contains("Not enough zpools"), + "Error should have identified 'Not enough zpools' as the cause: {:?}", err + ); + + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_ensure_rack_dataset_multi_sled() { + let logctx = dev::test_setup_log("test_ensure_rack_dataset_multi_sled"); + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = datastore_test(&logctx, &db).await; + + let rack_id = Uuid::new_v4(); + + // Create sleds with distinct underlay subnets. + const SLED_COUNT: usize = 3; + let mut sleds = HashMap::new(); + for i in 0..SLED_COUNT { + let sled_addr = SocketAddrV6::new( + Ipv6Addr::new(0xfd00, 0, 0, i.try_into().unwrap(), 0, 0, 0, 1), + 8080, + 0, + 0, + ); + let sled_id = Uuid::new_v4(); + let sled = Sled::new(sled_id, sled_addr.clone(), rack_id); + datastore + .sled_upsert(sled.clone()) + .await + .expect("Should be able to upsert sled"); + sleds.insert(sled.id(), sled); + create_test_zpool(&datastore, sled_id).await; + } + + // Ensure datasets exist on the rack. + let output = datastore + .ensure_rack_dataset( + &opctx, + rack_id, + DatasetKind::Crucible, + DatasetRedundancy::OnAll, + ) + .await + .expect("Should have allocated dataset"); + assert_eq!(SLED_COUNT, output.len()); + for (sled, zpool, dataset) in &output { + assert_eq!(DatasetKind::Crucible, dataset.kind); + assert_eq!(zpool.id(), dataset.pool_id); + assert_eq!(sled.id(), zpool.sled_id); + + let observed_datasets = datastore + .dataset_list(&opctx, zpool.id()) + .await + .expect("Should be able to list datasets"); + assert_eq!(1, observed_datasets.len()); + assert_eq!(dataset.id(), observed_datasets[0].id()) + } + + // Test that ensuring datasets is idempotent. + let output_again = datastore + .ensure_rack_dataset( + &opctx, + rack_id, + DatasetKind::Crucible, + DatasetRedundancy::OnAll, + ) + .await + .expect("Should have allocated dataset"); + + let mut output: Vec<_> = + output.into_iter().map(|(_, _, dataset)| dataset).collect(); + output.sort_by(|a, b| a.id().cmp(&b.id())); + let mut output_again: Vec<_> = + output_again.into_iter().map(|(_, _, dataset)| dataset).collect(); + output_again.sort_by(|a, b| a.id().cmp(&b.id())); + assert_eq!(output, output_again); + + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } + #[tokio::test] async fn test_rack_initialize_is_idempotent() { let logctx = dev::test_setup_log("test_rack_initialize_is_idempotent"); diff --git a/nexus/src/db/model/dataset_kind.rs b/nexus/src/db/model/dataset_kind.rs index effced69522..bd85972b3dc 100644 --- a/nexus/src/db/model/dataset_kind.rs +++ b/nexus/src/db/model/dataset_kind.rs @@ -4,12 +4,12 @@ use super::impl_enum_type; use crate::internal_api; +use omicron_common::address::{CLICKHOUSE_PORT, COCKROACH_PORT, CRUCIBLE_PORT}; use serde::{Deserialize, Serialize}; use std::io::Write; -use omicron_common::address::{COCKROACH_PORT, CRUCIBLE_PORT, CLICKHOUSE_PORT}; impl_enum_type!( - #[derive(SqlType, Debug, QueryId)] + #[derive(Clone, SqlType, Debug, QueryId)] #[diesel(postgres_type(name = "dataset_kind"))] pub struct DatasetKindEnum; diff --git a/nexus/src/db/model/service.rs b/nexus/src/db/model/service.rs index 3501337e42b..0762db538b1 100644 --- a/nexus/src/db/model/service.rs +++ b/nexus/src/db/model/service.rs @@ -10,7 +10,7 @@ use std::net::Ipv6Addr; use uuid::Uuid; /// Representation of services which may run on Sleds. -#[derive(Queryable, Insertable, Debug, Clone, Selectable, Asset)] +#[derive(Queryable, Insertable, Debug, Clone, Selectable, Asset, PartialEq)] #[diesel(table_name = service)] pub struct Service { #[diesel(embed)] diff --git a/nexus/src/db/model/service_kind.rs b/nexus/src/db/model/service_kind.rs index f66532e64c0..b9a3e96c6f6 100644 --- a/nexus/src/db/model/service_kind.rs +++ b/nexus/src/db/model/service_kind.rs @@ -8,7 +8,7 @@ use serde::{Deserialize, Serialize}; use std::io::Write; impl_enum_type!( - #[derive(SqlType, Debug, QueryId)] + #[derive(Clone, SqlType, Debug, QueryId)] #[diesel(postgres_type(name = "service_kind"))] pub struct ServiceKindEnum; diff --git a/nexus/src/db/model/sled.rs b/nexus/src/db/model/sled.rs index ad756c3473f..84882679087 100644 --- a/nexus/src/db/model/sled.rs +++ b/nexus/src/db/model/sled.rs @@ -13,7 +13,7 @@ use std::net::SocketAddrV6; use uuid::Uuid; /// Database representation of a Sled. -#[derive(Queryable, Insertable, Debug, Clone, Selectable, Asset)] +#[derive(Queryable, Insertable, Debug, Clone, Selectable, Asset, PartialEq)] #[diesel(table_name = sled)] pub struct Sled { #[diesel(embed)] @@ -21,6 +21,8 @@ pub struct Sled { time_deleted: Option>, rcgen: Generation, + pub rack_id: Uuid, + // ServiceAddress (Sled Agent). pub ip: ipv6::Ipv6Addr, pub port: SqlU16, @@ -30,7 +32,7 @@ pub struct Sled { } impl Sled { - pub fn new(id: Uuid, addr: SocketAddrV6) -> Self { + pub fn new(id: Uuid, addr: SocketAddrV6, rack_id: Uuid) -> Self { let last_used_address = { let mut segments = addr.ip().segments(); segments[7] += omicron_common::address::RSS_RESERVED_ADDRESSES; @@ -40,6 +42,7 @@ impl Sled { identity: SledIdentity::new(id), time_deleted: None, rcgen: Generation::new(), + rack_id, ip: ipv6::Ipv6Addr::from(addr.ip()), port: addr.port().into(), last_used_address, diff --git a/nexus/src/db/model/zpool.rs b/nexus/src/db/model/zpool.rs index 511312a3382..475fc7bf0ee 100644 --- a/nexus/src/db/model/zpool.rs +++ b/nexus/src/db/model/zpool.rs @@ -14,7 +14,7 @@ use uuid::Uuid; /// /// A zpool represents a ZFS storage pool, allocated on a single /// physical sled. -#[derive(Queryable, Insertable, Debug, Clone, Selectable, Asset)] +#[derive(Queryable, Insertable, Debug, Clone, Selectable, Asset, PartialEq)] #[diesel(table_name = zpool)] pub struct Zpool { #[diesel(embed)] diff --git a/nexus/src/db/schema.rs b/nexus/src/db/schema.rs index a6d281d987e..41c8c3527b9 100644 --- a/nexus/src/db/schema.rs +++ b/nexus/src/db/schema.rs @@ -297,6 +297,7 @@ table! { time_deleted -> Nullable, rcgen -> Int8, + rack_id -> Uuid, ip -> Inet, port -> Int4, last_used_address -> Inet, diff --git a/nexus/src/lib.rs b/nexus/src/lib.rs index b5d3ccc5085..d9aa20adb48 100644 --- a/nexus/src/lib.rs +++ b/nexus/src/lib.rs @@ -131,25 +131,7 @@ impl Server { // Wait until RSS handoff completes. let opctx = apictx.nexus.opctx_for_background(); apictx.nexus.await_rack_initialization(&opctx).await; - apictx.nexus - .start_background_tasks() - .map_err(|e| e.to_string())?; - - // TODO: What triggers background tasks to execute? - // - // - Perhaps the API is exposed to tests? - // - Perhaps the invocation of that API is controlled by config - // options? - // - // TODO: services we need to start: - // - // Datasets: - // - Crucible (as a dataset on each unique zpool) - // - Clickhouse (as a dataset on a zpool) - // - CRDB (prolly just check it exists, period) - // - // - Oximeter (as a service) - // - Nexus (again, maybe just check it exists at all) + apictx.nexus.start_background_tasks().map_err(|e| e.to_string())?; let http_server_starter_external = dropshot::HttpServerStarter::new( &config.runtime.dropshot_external, diff --git a/sled-agent/src/storage_manager.rs b/sled-agent/src/storage_manager.rs index 5b82396b6fd..24bf14ccfa1 100644 --- a/sled-agent/src/storage_manager.rs +++ b/sled-agent/src/storage_manager.rs @@ -15,7 +15,7 @@ use crate::params::DatasetKind; use futures::stream::FuturesOrdered; use futures::FutureExt; use futures::StreamExt; -use nexus_client::types::{DatasetPutRequest, ZpoolPutRequest}; +use nexus_client::types::ZpoolPutRequest; use omicron_common::api::external::{ByteCount, ByteCountRangeError}; use omicron_common::backoff; use schemars::JsonSchema; @@ -667,6 +667,7 @@ impl StorageWorker { // Adds a "notification to nexus" to `nexus_notifications`, // informing it about the addition of `datasets` to `pool_id`. + /* fn add_datasets_notify( &self, nexus_notifications: &mut FuturesOrdered>>, @@ -715,6 +716,7 @@ impl StorageWorker { .boxed(), ); } + */ // TODO: a lot of these functions act on the `FuturesOrdered` - should // that just be a part of the "worker" struct? @@ -722,7 +724,6 @@ impl StorageWorker { // Attempts to add a dataset within a zpool, according to `request`. async fn add_dataset( &self, - nexus_notifications: &mut FuturesOrdered>>, request: &NewFilesystemRequest, ) -> Result<(), Error> { info!(self.log, "add_dataset: {:?}", request); @@ -769,12 +770,6 @@ impl StorageWorker { err, })?; - self.add_datasets_notify( - nexus_notifications, - vec![(id, dataset_info.address, dataset_info.kind)], - pool.id(), - ); - Ok(()) } @@ -868,21 +863,16 @@ impl StorageWorker { } } - // Notify Nexus of the zpool and all datasets within. + // Notify Nexus of the zpool. self.add_zpool_notify( &mut nexus_notifications, pool.id(), size, ); - self.add_datasets_notify( - &mut nexus_notifications, - datasets, - pool.id(), - ); }, Some(request) = self.new_filesystems_rx.recv() => { - let result = self.add_dataset(&mut nexus_notifications, &request).await; + let result = self.add_dataset(&request).await; let _ = request.responder.send(result); } } From 2a28eb99e66bb7295a38e5f957f00238af976502 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 15 Jun 2022 15:12:23 -0400 Subject: [PATCH 18/88] It's hacky, but it's working. I'm seeing services be re-balanced correctly --- nexus/src/app/background/services.rs | 26 +++++++++++++++++++++----- nexus/src/db/datastore.rs | 17 ----------------- sled-agent/src/services.rs | 2 +- 3 files changed, 22 insertions(+), 23 deletions(-) diff --git a/nexus/src/app/background/services.rs b/nexus/src/app/background/services.rs index 8aee6b7f3b2..53628f5905c 100644 --- a/nexus/src/app/background/services.rs +++ b/nexus/src/app/background/services.rs @@ -112,6 +112,8 @@ impl ServiceBalancer { self.nexus.datastore().service_list(opctx, *sled_id).await?; let sled_client = self.nexus.sled_client(sled_id).await?; + info!(self.log, "instantiate_services: {:?}", services); + sled_client .services_put(&SledAgentTypes::ServiceEnsureBody { services: services @@ -124,11 +126,24 @@ impl ServiceBalancer { s.kind.clone(), ); + // TODO: This is hacky, specifically to inject + // global zone addresses in the DNS service. + let gz_addresses = match &s.kind { + ServiceKind::InternalDNS => { + let mut octets = address.octets(); + octets[15] = octets[15] + 1; + vec![Ipv6Addr::from(octets)] + } + _ => vec![], + }; + + // TODO: this is wrong for DNS service; needs the gz + // addreess SledAgentTypes::ServiceRequest { id: s.id(), name: name.to_string(), addresses: vec![address], - gz_addresses: vec![], + gz_addresses, service_type, } }) @@ -191,14 +206,14 @@ impl ServiceBalancer { desired_count: u32, ) -> Result<(), Error> { // Provision the services within the database. - let new_services = self + let services = self .nexus .datastore() .ensure_rack_service(opctx, self.nexus.rack_id, kind, desired_count) .await?; // Actually instantiate those services. - self.instantiate_services(opctx, new_services).await + self.instantiate_services(opctx, services).await } async fn ensure_dns_service( @@ -207,14 +222,14 @@ impl ServiceBalancer { desired_count: u32, ) -> Result<(), Error> { // Provision the services within the database. - let new_services = self + let services = self .nexus .datastore() .ensure_dns_service(opctx, self.nexus.rack_subnet, desired_count) .await?; // Actually instantiate those services. - self.instantiate_services(opctx, new_services).await + self.instantiate_services(opctx, services).await } // TODO: Consider using sagas to ensure the rollout of services happens. @@ -272,6 +287,7 @@ impl ServiceBalancer { ) -> Result<(), Error> { let mut sled_clients = HashMap::new(); + // TODO: We could issue these requests concurrently for (sled, zpool, dataset) in &datasets { let sled_client = { match sled_clients.get(&sled.id()) { diff --git a/nexus/src/db/datastore.rs b/nexus/src/db/datastore.rs index 81afcc25fdc..53ad724560d 100644 --- a/nexus/src/db/datastore.rs +++ b/nexus/src/db/datastore.rs @@ -480,11 +480,6 @@ impl DataStore { kind.clone(), )?; - eprintln!( - "Observed sleds/services: {:?}", - sleds_and_maybe_svcs - ); - // Split the set of returned sleds into "those with" and "those // without" the requested service. let (sleds_with_svc, sleds_without_svc): (Vec<_>, Vec<_>) = @@ -506,8 +501,6 @@ impl DataStore { }) .collect(); - eprintln!("Observed services: {:?}", svcs); - // Add services to sleds, in-order, until we've met a // number sufficient for our redundancy. // @@ -572,7 +565,6 @@ impl DataStore { .transaction(move |conn| { let mut svcs = Self::dns_service_list_sync(conn)?; - eprintln!("Observed DNS services: {:?}", svcs); // Get all subnets not allocated to existing services. let mut usable_dns_subnets = ReservedRackSubnet(rack_subnet) .get_dns_subnets() @@ -586,7 +578,6 @@ impl DataStore { }) .collect::>() .into_iter(); - eprintln!("Usable DNS services: {:?}", usable_dns_subnets); // Get all sleds which aren't already running DNS services. let mut target_sleds = @@ -714,11 +705,6 @@ impl DataStore { kind.clone(), )?; - eprintln!( - "Observed datasets: {:?}", - sleds_zpools_and_maybe_datasets - ); - // Split the set of returned zpools into "those with" and "those // without" the requested dataset. let (zpools_with_dataset, zpools_without_dataset): ( @@ -732,8 +718,6 @@ impl DataStore { .map(|(sled, zpool, _)| (sled, zpool)) .peekable(); - eprintln!("Dataset targets: {:?}", zpools_without_dataset); - let mut datasets: Vec<_> = zpools_with_dataset .into_iter() .map(|(sled, zpool, maybe_dataset)| { @@ -744,7 +728,6 @@ impl DataStore { ) }) .collect(); - eprintln!("Existing datasets: {:?}", datasets); // Add datasets to zpools, in-order, until we've met a // number sufficient for our redundancy. diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index e7f71810fdf..3bae6521a4c 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -486,7 +486,7 @@ impl ServiceManager { // that removal implicitly. warn!( self.log, - "Cannot request services on this sled, differing configurations: {:?}", + "Cannot request services on this sled, differing configurations: {:#?}", known_set.symmetric_difference(&requested_set) ); return Err(Error::ServicesAlreadyConfigured); From dd04a67f04b29af80b5e182a9c99ac62889c7778 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 15 Jun 2022 16:39:09 -0400 Subject: [PATCH 19/88] s/runtime/deployment --- common/src/nexus_config.rs | 8 +-- nexus/examples/config.toml | 10 ++-- nexus/src/app/mod.rs | 4 +- nexus/src/config.rs | 54 +++++++++++---------- nexus/src/context.rs | 10 ++-- nexus/src/lib.rs | 6 +-- nexus/test-utils/src/lib.rs | 4 +- nexus/tests/config.test.toml | 10 ++-- nexus/tests/integration_tests/authn_http.rs | 2 +- nexus/tests/integration_tests/commands.rs | 2 +- sled-agent/src/services.rs | 13 +++-- 11 files changed, 65 insertions(+), 58 deletions(-) diff --git a/common/src/nexus_config.rs b/common/src/nexus_config.rs index f1325ae336d..2b34108643d 100644 --- a/common/src/nexus_config.rs +++ b/common/src/nexus_config.rs @@ -3,7 +3,7 @@ // file, You can obtain one at https://mozilla.org/MPL/2.0/. //! Configuration parameters to Nexus that are usually only known -//! at runtime. +//! at deployment time. use super::address::{Ipv6Subnet, RACK_PREFIX}; use super::postgres_config::PostgresConfigWithUrl; @@ -99,7 +99,7 @@ pub enum Database { } #[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] -pub struct RuntimeConfig { +pub struct DeploymentConfig { /// Uuid of the Nexus instance pub id: Uuid, /// Dropshot configuration for external API server @@ -112,8 +112,8 @@ pub struct RuntimeConfig { pub database: Database, } -impl RuntimeConfig { - /// Load a `RuntimeConfig` from the given TOML file +impl DeploymentConfig { + /// Load a `DeploymentConfig` from the given TOML file /// /// This config object can then be used to create a new `Nexus`. /// The format is described in the README. diff --git a/nexus/examples/config.toml b/nexus/examples/config.toml index 22889ab1be9..c841a12ac1c 100644 --- a/nexus/examples/config.toml +++ b/nexus/examples/config.toml @@ -33,25 +33,25 @@ mode = "stderr-terminal" [timeseries_db] address = "[::1]:8123" -[runtime] +[deployment] # Identifier for this instance of Nexus id = "e6bff1ff-24fb-49dc-a54e-c6a350cd4d6c" -[runtime.dropshot_external] +[deployment.dropshot_external] # IP address and TCP port on which to listen for the external API bind_address = "127.0.0.1:12220" # Allow larger request bodies (1MiB) to accomodate firewall endpoints (one # rule is ~500 bytes) request_body_max_bytes = 1048576 -[runtime.dropshot_internal] +[deployment.dropshot_internal] # IP address and TCP port on which to listen for the internal API bind_address = "127.0.0.1:12221" -[runtime.subnet] +[deployment.subnet] net = "fd00:1122:3344:0100::/56" -[runtime.database] +[deployment.database] # URL for connecting to the database type = "from_url" url = "postgresql://root@127.0.0.1:32221/omicron?sslmode=disable" diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index 1c3620de7e7..1cb1f6b6ff7 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -112,7 +112,7 @@ impl Nexus { authz: Arc, ) -> Arc { let pool = Arc::new(pool); - let my_sec_id = db::SecId::from(config.runtime.id); + let my_sec_id = db::SecId::from(config.deployment.id); let db_datastore = Arc::new(db::DataStore::new(Arc::clone(&pool))); let sec_store = Arc::new(db::CockroachDbSecStore::new( my_sec_id, @@ -143,7 +143,7 @@ impl Nexus { populate_start(populate_ctx, Arc::clone(&db_datastore)); let nexus = Nexus { - id: config.runtime.id, + id: config.deployment.id, rack_id, log: log.new(o!()), api_rack_identity: db::model::RackIdentity::new(rack_id), diff --git a/nexus/src/config.rs b/nexus/src/config.rs index a6034a7eea3..83be56fd335 100644 --- a/nexus/src/config.rs +++ b/nexus/src/config.rs @@ -7,7 +7,9 @@ use anyhow::anyhow; use dropshot::ConfigLogging; -use omicron_common::nexus_config::{InvalidTunable, LoadError, RuntimeConfig}; +use omicron_common::nexus_config::{ + DeploymentConfig, InvalidTunable, LoadError, +}; use serde::Deserialize; use serde::Serialize; use serde_with::DeserializeFromStr; @@ -147,8 +149,8 @@ pub struct Config { #[serde(flatten)] pub pkg: PackageConfig, - /// A variety of configuration parameters only known at runtime. - pub runtime: RuntimeConfig, + /// A variety of configuration parameters only known at deployment time. + pub deployment: DeploymentConfig, } impl Config { @@ -214,7 +216,7 @@ mod test { use libc; use omicron_common::address::{Ipv6Subnet, RACK_PREFIX}; use omicron_common::nexus_config::{ - Database, LoadErrorKind, RuntimeConfig, + Database, DeploymentConfig, LoadErrorKind, }; use std::fs; use std::net::{Ipv6Addr, SocketAddr}; @@ -288,7 +290,7 @@ mod test { let error = read_config("empty", "").expect_err("expected failure"); if let LoadErrorKind::Parse(error) = &error.kind { assert_eq!(error.line_col(), None); - assert_eq!(error.to_string(), "missing field `runtime`"); + assert_eq!(error.to_string(), "missing field `deployment`"); } else { panic!( "Got an unexpected error, expected Parse but got {:?}", @@ -325,17 +327,17 @@ mod test { default_base_url = "http://example.invalid/" [tunables] max_vpc_ipv4_subnet_prefix = 27 - [runtime] + [deployment] id = "28b90dc4-c22a-65ba-f49a-f051fe01208f" - [runtime.dropshot_external] + [deployment.dropshot_external] bind_address = "10.1.2.3:4567" request_body_max_bytes = 1024 - [runtime.dropshot_internal] + [deployment.dropshot_internal] bind_address = "10.1.2.3:4568" request_body_max_bytes = 1024 - [runtime.subnet] + [deployment.subnet] net = "::/56" - [runtime.database] + [deployment.database] type = "from_dns" "##, ) @@ -344,7 +346,7 @@ mod test { assert_eq!( config, Config { - runtime: RuntimeConfig { + deployment: DeploymentConfig { id: "28b90dc4-c22a-65ba-f49a-f051fe01208f".parse().unwrap(), dropshot_external: ConfigDropshot { bind_address: "10.1.2.3:4567" @@ -403,17 +405,17 @@ mod test { if_exists = "fail" [timeseries_db] address = "[::1]:8123" - [runtime] + [deployment] id = "28b90dc4-c22a-65ba-f49a-f051fe01208f" - [runtime.dropshot_external] + [deployment.dropshot_external] bind_address = "10.1.2.3:4567" request_body_max_bytes = 1024 - [runtime.dropshot_internal] + [deployment.dropshot_internal] bind_address = "10.1.2.3:4568" request_body_max_bytes = 1024 - [runtime.subnet] + [deployment.subnet] net = "::/56" - [runtime.database] + [deployment.database] type = "from_dns" "##, ) @@ -444,17 +446,17 @@ mod test { if_exists = "fail" [timeseries_db] address = "[::1]:8123" - [runtime] + [deployment] id = "28b90dc4-c22a-65ba-f49a-f051fe01208f" - [runtime.dropshot_external] + [deployment.dropshot_external] bind_address = "10.1.2.3:4567" request_body_max_bytes = 1024 - [runtime.dropshot_internal] + [deployment.dropshot_internal] bind_address = "10.1.2.3:4568" request_body_max_bytes = 1024 - [runtime.subnet] + [deployment.subnet] net = "::/56" - [runtime.database] + [deployment.database] type = "from_dns" "##, ) @@ -499,17 +501,17 @@ mod test { default_base_url = "http://example.invalid/" [tunables] max_vpc_ipv4_subnet_prefix = 100 - [runtime] + [deployment] id = "28b90dc4-c22a-65ba-f49a-f051fe01208f" - [runtime.dropshot_external] + [deployment.dropshot_external] bind_address = "10.1.2.3:4567" request_body_max_bytes = 1024 - [runtime.dropshot_internal] + [deployment.dropshot_internal] bind_address = "10.1.2.3:4568" request_body_max_bytes = 1024 - [runtime.subnet] + [deployment.subnet] net = "::/56" - [runtime.database] + [deployment.database] type = "from_dns" "##, ) diff --git a/nexus/src/context.rs b/nexus/src/context.rs index 2ad6a93553a..e940bef6d10 100644 --- a/nexus/src/context.rs +++ b/nexus/src/context.rs @@ -91,8 +91,10 @@ impl ServerContext { let internal_authn = Arc::new(authn::Context::internal_api()); let authz = Arc::new(authz::Authz::new(&log)); let create_tracker = |name: &str| { - let target = - HttpService { name: name.to_string(), id: config.runtime.id }; + let target = HttpService { + name: name.to_string(), + id: config.deployment.id, + }; const START_LATENCY_DECADE: i8 = -6; const END_LATENCY_DECADE: i8 = 3; LatencyTracker::with_latency_decades( @@ -104,7 +106,7 @@ impl ServerContext { }; let internal_latencies = create_tracker("nexus-internal"); let external_latencies = create_tracker("nexus-external"); - let producer_registry = ProducerRegistry::with_id(config.runtime.id); + let producer_registry = ProducerRegistry::with_id(config.deployment.id); producer_registry .register_producer(internal_latencies.clone()) .unwrap(); @@ -135,7 +137,7 @@ impl ServerContext { // nexus in dev for everyone // Set up DB pool - let url = match &config.runtime.database { + let url = match &config.deployment.database { nexus_config::Database::FromUrl { url } => url.clone(), nexus_config::Database::FromDns => { todo!("Not yet implemented"); diff --git a/nexus/src/lib.rs b/nexus/src/lib.rs index c13fc3de3c8..79f8a2cd838 100644 --- a/nexus/src/lib.rs +++ b/nexus/src/lib.rs @@ -85,7 +85,7 @@ impl Server { rack_id: Uuid, log: &Logger, ) -> Result { - let log = log.new(o!("name" => config.runtime.id.to_string())); + let log = log.new(o!("name" => config.deployment.id.to_string())); info!(log, "setting up nexus server"); let ctxlog = log.new(o!("component" => "ServerContext")); @@ -93,7 +93,7 @@ impl Server { let apictx = ServerContext::new(rack_id, ctxlog, &config)?; let http_server_starter_external = dropshot::HttpServerStarter::new( - &config.runtime.dropshot_external, + &config.deployment.dropshot_external, external_api(), Arc::clone(&apictx), &log.new(o!("component" => "dropshot_external")), @@ -101,7 +101,7 @@ impl Server { .map_err(|error| format!("initializing external server: {}", error))?; let http_server_starter_internal = dropshot::HttpServerStarter::new( - &config.runtime.dropshot_internal, + &config.deployment.dropshot_internal, internal_api(), Arc::clone(&apictx), &log.new(o!("component" => "dropshot_internal")), diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index e4eb744e2fa..02b9a0d7b7d 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -76,7 +76,7 @@ pub fn load_test_config() -> omicron_nexus::Config { let config_file_path = Path::new("tests/config.test.toml"); let mut config = omicron_nexus::Config::from_file(config_file_path) .expect("failed to load config.test.toml"); - config.runtime.id = Uuid::new_v4(); + config.deployment.id = Uuid::new_v4(); config } @@ -100,7 +100,7 @@ pub async fn test_setup_with_config( let clickhouse = dev::clickhouse::ClickHouseInstance::new(0).await.unwrap(); // Store actual address/port information for the databases after they start. - config.runtime.database = + config.deployment.database = nexus_config::Database::FromUrl { url: database.pg_config().clone() }; config.pkg.timeseries_db.address.set_port(clickhouse.port()); diff --git a/nexus/tests/config.test.toml b/nexus/tests/config.test.toml index 2fc4ddba192..0a8789893a1 100644 --- a/nexus/tests/config.test.toml +++ b/nexus/tests/config.test.toml @@ -35,7 +35,7 @@ address = "[::1]:0" # Allow small subnets, so we can test IP address exhaustion easily / quickly max_vpc_ipv4_subnet_prefix = 29 -[runtime] +[deployment] # Identifier for this instance of Nexus. # NOTE: The test suite always overrides this. id = "e6bff1ff-24fb-49dc-a54e-c6a350cd4d6c" @@ -45,22 +45,22 @@ id = "e6bff1ff-24fb-49dc-a54e-c6a350cd4d6c" # available port) because the test suite will be running many servers # concurrently. # -[runtime.dropshot_external] +[deployment.dropshot_external] bind_address = "127.0.0.1:0" request_body_max_bytes = 1048576 # port must be 0. see above -[runtime.dropshot_internal] +[deployment.dropshot_internal] bind_address = "127.0.0.1:0" request_body_max_bytes = 1048576 -[runtime.subnet] +[deployment.subnet] net = "fd00:1122:3344:0100::/56" # # NOTE: for the test suite, the database URL will be replaced with one # appropriate for the database that's started by the test runner. # -[runtime.database] +[deployment.database] type = "from_url" url = "postgresql://root@127.0.0.1:0/omicron?sslmode=disable" diff --git a/nexus/tests/integration_tests/authn_http.rs b/nexus/tests/integration_tests/authn_http.rs index e0234da1b97..99f25f91539 100644 --- a/nexus/tests/integration_tests/authn_http.rs +++ b/nexus/tests/integration_tests/authn_http.rs @@ -299,7 +299,7 @@ async fn start_whoami_server( TestContext::new( whoami_api, server_state, - &config.runtime.dropshot_external, + &config.deployment.dropshot_external, Some(logctx), log, ) diff --git a/nexus/tests/integration_tests/commands.rs b/nexus/tests/integration_tests/commands.rs index 561e5fc478c..e28e313ff31 100644 --- a/nexus/tests/integration_tests/commands.rs +++ b/nexus/tests/integration_tests/commands.rs @@ -76,7 +76,7 @@ fn test_nexus_invalid_config() { assert_eq!( stderr_text, format!( - "nexus: parse \"{}\": missing field `runtime`\n", + "nexus: parse \"{}\": missing field `deployment`\n", config_path.display() ), ); diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 4edd18a3fa7..3f617aaf399 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -13,7 +13,9 @@ use crate::params::{ServiceEnsureBody, ServiceRequest, ServiceType}; use crate::zone::Zones; use dropshot::ConfigDropshot; use omicron_common::address::{Ipv6Subnet, RACK_PREFIX}; -use omicron_common::nexus_config::{self, RuntimeConfig as NexusRuntimeConfig}; +use omicron_common::nexus_config::{ + self, DeploymentConfig as NexusDeploymentConfig, +}; use omicron_common::postgres_config::PostgresConfigWithUrl; use slog::Logger; use std::collections::HashSet; @@ -312,7 +314,7 @@ impl ServiceManager { // Nexus takes a separate config file for parameters which // cannot be known at packaging time. - let runtime_config = NexusRuntimeConfig { + let deployment_config = NexusDeploymentConfig { id: service.id, dropshot_external: ConfigDropshot { bind_address: SocketAddr::V6(external_address), @@ -351,10 +353,11 @@ impl ServiceManager { })?; // Serialize the configuration and append it into the file. - let serialized_cfg = toml::Value::try_from(&runtime_config) - .expect("Cannot serialize config"); + let serialized_cfg = + toml::Value::try_from(&deployment_config) + .expect("Cannot serialize config"); let mut map = toml::map::Map::new(); - map.insert("runtime".to_string(), serialized_cfg); + map.insert("deployment".to_string(), serialized_cfg); let config_str = toml::to_string(&map).map_err(|err| { Error::TomlSerialize { path: config_path.clone(), err } })?; From b07322c2a5132574097691ce47e5937558b24bd0 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 15 Jun 2022 18:00:48 -0400 Subject: [PATCH 20/88] clippy, fmt --- nexus/src/app/background/services.rs | 11 ++++------- nexus/src/db/datastore.rs | 25 +++++++++---------------- nexus/src/db/model/dataset_kind.rs | 2 +- nexus/src/db/model/service_kind.rs | 2 +- nexus/src/lib.rs | 4 +--- sled-agent/src/server.rs | 2 +- sled-agent/src/storage_manager.rs | 4 +--- 7 files changed, 18 insertions(+), 32 deletions(-) diff --git a/nexus/src/app/background/services.rs b/nexus/src/app/background/services.rs index 53628f5905c..16ef56fe77b 100644 --- a/nexus/src/app/background/services.rs +++ b/nexus/src/app/background/services.rs @@ -122,8 +122,7 @@ impl ServiceBalancer { let address = Ipv6Addr::from(s.ip); let (name, service_type) = Self::get_service_name_and_type( - address, - s.kind.clone(), + address, s.kind, ); // TODO: This is hacky, specifically to inject @@ -137,11 +136,9 @@ impl ServiceBalancer { _ => vec![], }; - // TODO: this is wrong for DNS service; needs the gz - // addreess SledAgentTypes::ServiceRequest { id: s.id(), - name: name.to_string(), + name, addresses: vec![address], gz_addresses, service_type, @@ -249,7 +246,7 @@ impl ServiceBalancer { ServiceRedundancy::PerRack(desired_count) => { self.ensure_rack_service( opctx, - expected_svc.kind.clone(), + expected_svc.kind, desired_count, ) .await?; @@ -335,7 +332,7 @@ impl ServiceBalancer { info!(self.log, "Ensuring dataset {:?} exists", expected_dataset); self.ensure_rack_dataset( opctx, - expected_dataset.kind.clone(), + expected_dataset.kind, expected_dataset.redundancy, ) .await? diff --git a/nexus/src/db/datastore.rs b/nexus/src/db/datastore.rs index 6249c5975b0..54985c276a9 100644 --- a/nexus/src/db/datastore.rs +++ b/nexus/src/db/datastore.rs @@ -449,11 +449,9 @@ impl DataStore { db::schema::sled::table .filter(sled_dsl::time_deleted.is_null()) .filter(sled_dsl::rack_id.eq(rack_id)) - .left_outer_join( - db::schema::service::table.on(svc_dsl::sled_id - .eq(sled_dsl::id) - .and(svc_dsl::kind.eq(kind.clone()))), - ) + .left_outer_join(db::schema::service::table.on( + svc_dsl::sled_id.eq(sled_dsl::id).and(svc_dsl::kind.eq(kind)), + )) .select(<(Sled, Option)>::as_select()) .get_results(conn) } @@ -476,11 +474,8 @@ impl DataStore { self.pool() .transaction(move |conn| { - let sleds_and_maybe_svcs = Self::sled_and_service_list_sync( - conn, - rack_id, - kind.clone(), - )?; + let sleds_and_maybe_svcs = + Self::sled_and_service_list_sync(conn, rack_id, kind)?; // Split the set of returned sleds into "those with" and "those // without" the requested service. @@ -522,7 +517,7 @@ impl DataStore { svc_id, sled.id(), address, - kind.clone(), + kind, ); // TODO: Can we insert all the services at the same time? @@ -675,7 +670,7 @@ impl DataStore { .left_outer_join( db::schema::dataset::table.on(dataset_dsl::pool_id .eq(zpool_dsl::id) - .and(dataset_dsl::kind.eq(kind.clone())) + .and(dataset_dsl::kind.eq(kind)) .and(dataset_dsl::time_deleted.is_null())), ) .select(<(Sled, Zpool, Option)>::as_select()) @@ -702,9 +697,7 @@ impl DataStore { .transaction(move |conn| { let sleds_zpools_and_maybe_datasets = Self::sled_zpool_and_dataset_list_sync( - conn, - rack_id, - kind.clone(), + conn, rack_id, kind, )?; // Split the set of returned zpools into "those with" and "those @@ -772,7 +765,7 @@ impl DataStore { dataset_id, zpool.id(), address, - kind.clone(), + kind, ); // TODO: Can we insert all the datasets at the same time? diff --git a/nexus/src/db/model/dataset_kind.rs b/nexus/src/db/model/dataset_kind.rs index bd85972b3dc..ef004bef9bf 100644 --- a/nexus/src/db/model/dataset_kind.rs +++ b/nexus/src/db/model/dataset_kind.rs @@ -13,7 +13,7 @@ impl_enum_type!( #[diesel(postgres_type(name = "dataset_kind"))] pub struct DatasetKindEnum; - #[derive(Clone, Debug, AsExpression, FromSqlRow, Serialize, Deserialize, PartialEq)] + #[derive(Clone, Copy, Debug, AsExpression, FromSqlRow, Serialize, Deserialize, PartialEq)] #[diesel(sql_type = DatasetKindEnum)] pub enum DatasetKind; diff --git a/nexus/src/db/model/service_kind.rs b/nexus/src/db/model/service_kind.rs index b9a3e96c6f6..0cbb0d0f658 100644 --- a/nexus/src/db/model/service_kind.rs +++ b/nexus/src/db/model/service_kind.rs @@ -12,7 +12,7 @@ impl_enum_type!( #[diesel(postgres_type(name = "service_kind"))] pub struct ServiceKindEnum; - #[derive(Clone, Debug, AsExpression, FromSqlRow, Serialize, Deserialize, PartialEq)] + #[derive(Clone, Copy, Debug, AsExpression, FromSqlRow, Serialize, Deserialize, PartialEq)] #[diesel(sql_type = ServiceKindEnum)] pub enum ServiceKind; diff --git a/nexus/src/lib.rs b/nexus/src/lib.rs index 1f11a323cc5..1a461a61559 100644 --- a/nexus/src/lib.rs +++ b/nexus/src/lib.rs @@ -121,9 +121,7 @@ pub struct Server { } impl Server { - pub async fn start<'a>( - internal: InternalServer<'a>, - ) -> Result { + pub async fn start(internal: InternalServer<'_>) -> Result { let apictx = internal.apictx; let http_server_internal = internal.http_server_internal; let log = internal.log; diff --git a/sled-agent/src/server.rs b/sled-agent/src/server.rs index 6273e1f2a2f..97920925789 100644 --- a/sled-agent/src/server.rs +++ b/sled-agent/src/server.rs @@ -86,7 +86,7 @@ impl Server { let nexus_client = lazy_nexus_client .get() .await - .map_err(|err| BackoffError::transient(err.to_string()))?; + .map_err(|err| BackoffError::transient(err))?; nexus_client .cpapi_sled_agents_post( &sled_id, diff --git a/sled-agent/src/storage_manager.rs b/sled-agent/src/storage_manager.rs index 24bf14ccfa1..d559ee70589 100644 --- a/sled-agent/src/storage_manager.rs +++ b/sled-agent/src/storage_manager.rs @@ -637,9 +637,7 @@ impl StorageWorker { lazy_nexus_client .get() .await - .map_err(|e| { - backoff::BackoffError::transient(e.to_string()) - })? + .map_err(|e| backoff::BackoffError::transient(e))? .zpool_put(&sled_id, &pool_id, &zpool_request) .await .map_err(|e| { From e1dc94188da4827e6ef1a11c671d85ff234af148 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 15 Jun 2022 18:36:05 -0400 Subject: [PATCH 21/88] [nexus][sled-agent] Generate rack ID in RSS, plumb it through Nexus --- common/src/nexus_config.rs | 2 ++ common/src/sql/dbinit.sql | 8 ++++++++ nexus/examples/config.toml | 1 + nexus/src/app/sled.rs | 2 +- nexus/src/config.rs | 7 +++++++ nexus/src/db/datastore.rs | 8 +++++--- nexus/src/db/model/sled.rs | 5 ++++- nexus/src/db/schema.rs | 1 + nexus/src/lib.rs | 8 +++----- nexus/test-utils/src/lib.rs | 6 ++---- sled-agent/src/bootstrap/agent.rs | 1 + sled-agent/src/bootstrap/params.rs | 7 +++++++ sled-agent/src/rack_setup/service.rs | 3 +++ sled-agent/src/server.rs | 14 ++++++++++---- sled-agent/src/services.rs | 11 +++++++++++ sled-agent/src/sled_agent.rs | 2 ++ 16 files changed, 68 insertions(+), 18 deletions(-) diff --git a/common/src/nexus_config.rs b/common/src/nexus_config.rs index 2b34108643d..a18454e02d0 100644 --- a/common/src/nexus_config.rs +++ b/common/src/nexus_config.rs @@ -102,6 +102,8 @@ pub enum Database { pub struct DeploymentConfig { /// Uuid of the Nexus instance pub id: Uuid, + /// Uuid of the Rack where Nexus is executing. + pub rack_id: Uuid, /// Dropshot configuration for external API server pub dropshot_external: ConfigDropshot, /// Dropshot configuration for internal API server diff --git a/common/src/sql/dbinit.sql b/common/src/sql/dbinit.sql index 3944b3fd46f..e358c9a227e 100644 --- a/common/src/sql/dbinit.sql +++ b/common/src/sql/dbinit.sql @@ -75,6 +75,9 @@ CREATE TABLE omicron.public.sled ( time_deleted TIMESTAMPTZ, rcgen INT NOT NULL, + /* FK into the Rack table */ + rack_id UUID NOT NULL, + /* The IP address and bound port of the sled agent server. */ ip INET NOT NULL, port INT4 CHECK (port BETWEEN 0 AND 65535) NOT NULL, @@ -83,6 +86,11 @@ CREATE TABLE omicron.public.sled ( last_used_address INET NOT NULL ); +/* Add an index which lets us look up sleds on a rack */ +CREATE INDEX ON omicron.public.sled ( + rack_id +) WHERE time_deleted IS NULL; + /* * Services */ diff --git a/nexus/examples/config.toml b/nexus/examples/config.toml index c841a12ac1c..727055490e8 100644 --- a/nexus/examples/config.toml +++ b/nexus/examples/config.toml @@ -36,6 +36,7 @@ address = "[::1]:8123" [deployment] # Identifier for this instance of Nexus id = "e6bff1ff-24fb-49dc-a54e-c6a350cd4d6c" +rack_id = "c19a698f-c6f9-4a17-ae30-20d711b8f7dc" [deployment.dropshot_external] # IP address and TCP port on which to listen for the external API diff --git a/nexus/src/app/sled.rs b/nexus/src/app/sled.rs index 0150cbec148..e4fc616f095 100644 --- a/nexus/src/app/sled.rs +++ b/nexus/src/app/sled.rs @@ -31,7 +31,7 @@ impl super::Nexus { address: SocketAddrV6, ) -> Result<(), Error> { info!(self.log, "registered sled agent"; "sled_uuid" => id.to_string()); - let sled = db::model::Sled::new(id, address); + let sled = db::model::Sled::new(id, address, self.rack_id); self.db_datastore.sled_upsert(sled).await?; Ok(()) } diff --git a/nexus/src/config.rs b/nexus/src/config.rs index 83be56fd335..98cbf0169cf 100644 --- a/nexus/src/config.rs +++ b/nexus/src/config.rs @@ -329,6 +329,7 @@ mod test { max_vpc_ipv4_subnet_prefix = 27 [deployment] id = "28b90dc4-c22a-65ba-f49a-f051fe01208f" + rack_id = "38b90dc4-c22a-65ba-f49a-f051fe01208f" [deployment.dropshot_external] bind_address = "10.1.2.3:4567" request_body_max_bytes = 1024 @@ -348,6 +349,9 @@ mod test { Config { deployment: DeploymentConfig { id: "28b90dc4-c22a-65ba-f49a-f051fe01208f".parse().unwrap(), + rack_id: "38b90dc4-c22a-65ba-f49a-f051fe01208f" + .parse() + .unwrap(), dropshot_external: ConfigDropshot { bind_address: "10.1.2.3:4567" .parse::() @@ -407,6 +411,7 @@ mod test { address = "[::1]:8123" [deployment] id = "28b90dc4-c22a-65ba-f49a-f051fe01208f" + rack_id = "38b90dc4-c22a-65ba-f49a-f051fe01208f" [deployment.dropshot_external] bind_address = "10.1.2.3:4567" request_body_max_bytes = 1024 @@ -448,6 +453,7 @@ mod test { address = "[::1]:8123" [deployment] id = "28b90dc4-c22a-65ba-f49a-f051fe01208f" + rack_id = "38b90dc4-c22a-65ba-f49a-f051fe01208f" [deployment.dropshot_external] bind_address = "10.1.2.3:4567" request_body_max_bytes = 1024 @@ -503,6 +509,7 @@ mod test { max_vpc_ipv4_subnet_prefix = 100 [deployment] id = "28b90dc4-c22a-65ba-f49a-f051fe01208f" + rack_id = "38b90dc4-c22a-65ba-f49a-f051fe01208f" [deployment.dropshot_external] bind_address = "10.1.2.3:4567" request_body_max_bytes = 1024 diff --git a/nexus/src/db/datastore.rs b/nexus/src/db/datastore.rs index 499eee458bc..6c28185ce7d 100644 --- a/nexus/src/db/datastore.rs +++ b/nexus/src/db/datastore.rs @@ -4034,8 +4034,9 @@ mod test { 0, 0, ); + let rack_id = Uuid::new_v4(); let sled_id = Uuid::new_v4(); - let sled = Sled::new(sled_id, bogus_addr.clone()); + let sled = Sled::new(sled_id, bogus_addr.clone(), rack_id); datastore.sled_upsert(sled).await.unwrap(); sled_id } @@ -4391,14 +4392,15 @@ mod test { let opctx = OpContext::for_tests(logctx.log.new(o!()), datastore.clone()); + let rack_id = Uuid::new_v4(); let addr1 = "[fd00:1de::1]:12345".parse().unwrap(); let sled1_id = "0de4b299-e0b4-46f0-d528-85de81a7095f".parse().unwrap(); - let sled1 = db::model::Sled::new(sled1_id, addr1); + let sled1 = db::model::Sled::new(sled1_id, addr1, rack_id); datastore.sled_upsert(sled1).await.unwrap(); let addr2 = "[fd00:1df::1]:12345".parse().unwrap(); let sled2_id = "66285c18-0c79-43e0-e54f-95271f271314".parse().unwrap(); - let sled2 = db::model::Sled::new(sled2_id, addr2); + let sled2 = db::model::Sled::new(sled2_id, addr2, rack_id); datastore.sled_upsert(sled2).await.unwrap(); let ip = datastore.next_ipv6_address(&opctx, sled1_id).await.unwrap(); diff --git a/nexus/src/db/model/sled.rs b/nexus/src/db/model/sled.rs index ad756c3473f..ebe492c7459 100644 --- a/nexus/src/db/model/sled.rs +++ b/nexus/src/db/model/sled.rs @@ -21,6 +21,8 @@ pub struct Sled { time_deleted: Option>, rcgen: Generation, + pub rack_id: Uuid, + // ServiceAddress (Sled Agent). pub ip: ipv6::Ipv6Addr, pub port: SqlU16, @@ -30,7 +32,7 @@ pub struct Sled { } impl Sled { - pub fn new(id: Uuid, addr: SocketAddrV6) -> Self { + pub fn new(id: Uuid, addr: SocketAddrV6, rack_id: Uuid) -> Self { let last_used_address = { let mut segments = addr.ip().segments(); segments[7] += omicron_common::address::RSS_RESERVED_ADDRESSES; @@ -40,6 +42,7 @@ impl Sled { identity: SledIdentity::new(id), time_deleted: None, rcgen: Generation::new(), + rack_id, ip: ipv6::Ipv6Addr::from(addr.ip()), port: addr.port().into(), last_used_address, diff --git a/nexus/src/db/schema.rs b/nexus/src/db/schema.rs index a6d281d987e..41c8c3527b9 100644 --- a/nexus/src/db/schema.rs +++ b/nexus/src/db/schema.rs @@ -297,6 +297,7 @@ table! { time_deleted -> Nullable, rcgen -> Int8, + rack_id -> Uuid, ip -> Inet, port -> Int4, last_used_address -> Inet, diff --git a/nexus/src/lib.rs b/nexus/src/lib.rs index 79f8a2cd838..f0d5210930b 100644 --- a/nexus/src/lib.rs +++ b/nexus/src/lib.rs @@ -36,7 +36,6 @@ use external_api::http_entrypoints::external_api; use internal_api::http_entrypoints::internal_api; use slog::Logger; use std::sync::Arc; -use uuid::Uuid; #[macro_use] extern crate slog; @@ -82,7 +81,6 @@ impl Server { /// Start a nexus server. pub async fn start( config: &Config, - rack_id: Uuid, log: &Logger, ) -> Result { let log = log.new(o!("name" => config.deployment.id.to_string())); @@ -90,7 +88,8 @@ impl Server { let ctxlog = log.new(o!("component" => "ServerContext")); - let apictx = ServerContext::new(rack_id, ctxlog, &config)?; + let apictx = + ServerContext::new(config.deployment.rack_id, ctxlog, &config)?; let http_server_starter_external = dropshot::HttpServerStarter::new( &config.deployment.dropshot_external, @@ -167,8 +166,7 @@ pub async fn run_server(config: &Config) -> Result<(), String> { } else { debug!(log, "registered DTrace probes"); } - let rack_id = Uuid::new_v4(); - let server = Server::start(config, rack_id, &log).await?; + let server = Server::start(config, &log).await?; server.register_as_producer().await; server.wait_for_finish().await } diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index 02b9a0d7b7d..ed056f48d8d 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -90,7 +90,6 @@ pub async fn test_setup_with_config( config: &mut omicron_nexus::Config, ) -> ControlPlaneTestContext { let logctx = LogContext::new(test_name, &config.pkg.log); - let rack_id = Uuid::parse_str(RACK_UUID).unwrap(); let log = &logctx.log; // Start up CockroachDB. @@ -104,9 +103,8 @@ pub async fn test_setup_with_config( nexus_config::Database::FromUrl { url: database.pg_config().clone() }; config.pkg.timeseries_db.address.set_port(clickhouse.port()); - let server = omicron_nexus::Server::start(&config, rack_id, &logctx.log) - .await - .unwrap(); + let server = + omicron_nexus::Server::start(&config, &logctx.log).await.unwrap(); server .apictx .nexus diff --git a/sled-agent/src/bootstrap/agent.rs b/sled-agent/src/bootstrap/agent.rs index fc432554bfa..507d92baf91 100644 --- a/sled-agent/src/bootstrap/agent.rs +++ b/sled-agent/src/bootstrap/agent.rs @@ -245,6 +245,7 @@ impl Agent { &self.sled_config, self.parent_log.clone(), sled_address, + request.rack_id, ) .await .map_err(|e| { diff --git a/sled-agent/src/bootstrap/params.rs b/sled-agent/src/bootstrap/params.rs index def1f55c068..fdbbf2c4295 100644 --- a/sled-agent/src/bootstrap/params.rs +++ b/sled-agent/src/bootstrap/params.rs @@ -8,13 +8,20 @@ use super::trust_quorum::ShareDistribution; use omicron_common::address::{Ipv6Subnet, SLED_PREFIX}; use serde::{Deserialize, Serialize}; use std::borrow::Cow; +use uuid::Uuid; /// Configuration information for launching a Sled Agent. #[derive(Clone, Debug, Serialize, Deserialize, PartialEq)] pub struct SledAgentRequest { + /// Uuid of the Sled Agent to be created. + pub id: Uuid, + /// Portion of the IP space to be managed by the Sled Agent. pub subnet: Ipv6Subnet, + /// Uuid of the rack to which this sled agent belongs. + pub rack_id: Uuid, + /// Share of the rack secret for this Sled Agent. // TODO-cleanup This is currently optional because we don't do trust quorum // shares for single-node deployments (i.e., most dev/test environments), diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 0f8775ed932..c48a20cc4bc 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -357,6 +357,7 @@ impl ServiceInner { (request, (idx, bootstrap_addr)) }); + let rack_id = Uuid::new_v4(); let allocations = requests_and_sleds.map(|(request, sled)| { let (idx, bootstrap_addr) = sled; info!( @@ -373,7 +374,9 @@ impl ServiceInner { bootstrap_addr, SledAllocation { initialization_request: SledAgentRequest { + id: Uuid::new_v4(), subnet, + rack_id, trust_quorum_share: maybe_rack_secret_shares .as_mut() .map(|shares_iter| { diff --git a/sled-agent/src/server.rs b/sled-agent/src/server.rs index 3b31854628e..df596db8d01 100644 --- a/sled-agent/src/server.rs +++ b/sled-agent/src/server.rs @@ -38,6 +38,7 @@ impl Server { config: &Config, log: Logger, addr: SocketAddrV6, + rack_id: Uuid, ) -> Result { info!(log, "setting up sled agent server"); @@ -47,10 +48,15 @@ impl Server { client_log, )); - let sled_agent = - SledAgent::new(&config, log.clone(), nexus_client.clone(), addr) - .await - .map_err(|e| e.to_string())?; + let sled_agent = SledAgent::new( + &config, + log.clone(), + nexus_client.clone(), + addr, + rack_id, + ) + .await + .map_err(|e| e.to_string())?; let mut dropshot_config = dropshot::ConfigDropshot::default(); dropshot_config.request_body_max_bytes = 1024 * 1024; diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 3f617aaf399..dde2ef47937 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -25,6 +25,7 @@ use std::path::{Path, PathBuf}; use std::str::FromStr; use tokio::io::AsyncWriteExt; use tokio::sync::Mutex; +use uuid::Uuid; // The filename of ServiceManager's internal storage. const SERVICE_CONFIG_FILENAME: &str = "service.toml"; @@ -124,6 +125,7 @@ pub struct ServiceManager { vnic_allocator: VnicAllocator, underlay_vnic: EtherstubVnic, underlay_address: Ipv6Addr, + rack_id: Uuid, } impl ServiceManager { @@ -143,6 +145,7 @@ impl ServiceManager { underlay_vnic: EtherstubVnic, underlay_address: Ipv6Addr, config: Config, + rack_id: Uuid, ) -> Result { debug!(log, "Creating new ServiceManager"); let mgr = Self { @@ -152,6 +155,7 @@ impl ServiceManager { vnic_allocator: VnicAllocator::new("Service", etherstub), underlay_vnic, underlay_address, + rack_id, }; let config_path = mgr.services_config_path(); @@ -316,6 +320,7 @@ impl ServiceManager { // cannot be known at packaging time. let deployment_config = NexusDeploymentConfig { id: service.id, + rack_id: self.rack_id, dropshot_external: ConfigDropshot { bind_address: SocketAddr::V6(external_address), request_body_max_bytes: 1048576, @@ -702,6 +707,7 @@ mod test { EtherstubVnic(ETHERSTUB_VNIC_NAME.to_string()), Ipv6Addr::LOCALHOST, test_config.make_config(), + Uuid::new_v4(), ) .await .unwrap(); @@ -728,6 +734,7 @@ mod test { EtherstubVnic(ETHERSTUB_VNIC_NAME.to_string()), Ipv6Addr::LOCALHOST, test_config.make_config(), + Uuid::new_v4(), ) .await .unwrap(); @@ -756,6 +763,7 @@ mod test { EtherstubVnic(ETHERSTUB_VNIC_NAME.to_string()), Ipv6Addr::LOCALHOST, test_config.make_config(), + Uuid::new_v4(), ) .await .unwrap(); @@ -773,6 +781,7 @@ mod test { EtherstubVnic(ETHERSTUB_VNIC_NAME.to_string()), Ipv6Addr::LOCALHOST, test_config.make_config(), + Uuid::new_v4(), ) .await .unwrap(); @@ -797,6 +806,7 @@ mod test { EtherstubVnic(ETHERSTUB_VNIC_NAME.to_string()), Ipv6Addr::LOCALHOST, test_config.make_config(), + Uuid::new_v4(), ) .await .unwrap(); @@ -816,6 +826,7 @@ mod test { EtherstubVnic(ETHERSTUB_VNIC_NAME.to_string()), Ipv6Addr::LOCALHOST, config, + Uuid::new_v4(), ) .await .unwrap(); diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index e674663bac6..14e34f0d8d3 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -115,6 +115,7 @@ impl SledAgent { log: Logger, nexus_client: Arc, sled_address: SocketAddrV6, + rack_id: Uuid, ) -> Result { let id = &config.id; @@ -248,6 +249,7 @@ impl SledAgent { etherstub_vnic.clone(), *sled_address.ip(), services::Config::default(), + rack_id, ) .await?; From a4309ac5bf0b986b347be1d1583ff370a626c2ec Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 15 Jun 2022 18:40:57 -0400 Subject: [PATCH 22/88] need rack_id in the test config too --- nexus/tests/config.test.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/nexus/tests/config.test.toml b/nexus/tests/config.test.toml index 0a8789893a1..fdfeb5effb4 100644 --- a/nexus/tests/config.test.toml +++ b/nexus/tests/config.test.toml @@ -39,6 +39,7 @@ max_vpc_ipv4_subnet_prefix = 29 # Identifier for this instance of Nexus. # NOTE: The test suite always overrides this. id = "e6bff1ff-24fb-49dc-a54e-c6a350cd4d6c" +rack_id = "c19a698f-c6f9-4a17-ae30-20d711b8f7dc" # # NOTE: for the test suite, the port MUST be 0 (in order to bind to any From 7f41e42cf63fdbb4d64554a438c4fa3ed514655a Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 15 Jun 2022 19:02:23 -0400 Subject: [PATCH 23/88] Strongly-typed DNS service names --- internal-dns-client/src/names.rs | 30 ++++++++++++++++++++++++++-- nexus/src/context.rs | 4 ++-- sled-agent/src/nexus.rs | 4 ++-- sled-agent/src/rack_setup/service.rs | 9 ++++----- 4 files changed, 36 insertions(+), 11 deletions(-) diff --git a/internal-dns-client/src/names.rs b/internal-dns-client/src/names.rs index 6384ec9e503..66e356f46cc 100644 --- a/internal-dns-client/src/names.rs +++ b/internal-dns-client/src/names.rs @@ -7,17 +7,43 @@ use uuid::Uuid; const DNS_ZONE: &str = "control-plane.oxide.internal"; +pub enum ServiceName { + Nexus, + Cockroach, +} + +impl fmt::Display for ServiceName { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match &self { + ServiceName::Nexus => write!(f, "nexus"), + ServiceName::Cockroach => write!(f, "cockroachdb"), + } + } +} + +pub enum BackendName { + SledAgent, +} + +impl fmt::Display for BackendName { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match &self { + BackendName::SledAgent => write!(f, "sledagent"), + } + } +} + pub enum SRV { /// A service identified and accessed by name, such as "nexus", "CRDB", etc. /// /// This is used in cases where services are interchangeable. - Service(String), + Service(ServiceName), /// A service identified by name and a unique identifier. /// /// This is used in cases where services are not interchangeable, such as /// for the Sled agent. - Backend(String, Uuid), + Backend(BackendName, Uuid), } impl fmt::Display for SRV { diff --git a/nexus/src/context.rs b/nexus/src/context.rs index 636b2bde30f..2677df0c3c9 100644 --- a/nexus/src/context.rs +++ b/nexus/src/context.rs @@ -18,7 +18,7 @@ use authn::external::session_cookie::HttpAuthnSessionCookie; use authn::external::spoof::HttpAuthnSpoof; use authn::external::HttpAuthnScheme; use chrono::{DateTime, Duration, Utc}; -use internal_dns_client::names::SRV; +use internal_dns_client::names::{ServiceName, SRV}; use omicron_common::address::{Ipv6Subnet, AZ_PREFIX, COCKROACH_PORT}; use omicron_common::api::external::Error; use omicron_common::nexus_config; @@ -155,7 +155,7 @@ impl ServerContext { info!(log, "Accessing DB url from DNS"); let response = resolver .lookup_ip( - &SRV::Service("cockroachdb".to_string()).to_string(), + &SRV::Service(ServiceName::Cockroach).to_string(), ) .await .map_err(|e| format!("Failed to lookup IP: {}", e))?; diff --git a/sled-agent/src/nexus.rs b/sled-agent/src/nexus.rs index 00e87fd6a1b..3dc40369219 100644 --- a/sled-agent/src/nexus.rs +++ b/sled-agent/src/nexus.rs @@ -7,7 +7,7 @@ pub use crate::mocks::MockNexusClient as NexusClient; #[cfg(not(test))] pub use nexus_client::Client as NexusClient; -use internal_dns_client::names::SRV; +use internal_dns_client::names::{ServiceName, SRV}; use omicron_common::address::{Ipv6Subnet, AZ_PREFIX, NEXUS_INTERNAL_PORT}; use slog::Logger; use std::net::Ipv6Addr; @@ -49,7 +49,7 @@ impl LazyNexusClient { internal_dns_client::multiclient::create_resolver(az_subnet) .map_err(|e| format!("Failed to create DNS resolver: {}", e))?; let response = resolver - .lookup_ip(&SRV::Service("nexus".to_string()).to_string()) + .lookup_ip(&SRV::Service(ServiceName::Nexus).to_string()) .await .map_err(|e| format!("Failed to lookup Nexus IP: {}", e))?; let address = response.iter().next().ok_or_else(|| { diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 30b0e867b79..802fac5f304 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -15,7 +15,7 @@ use crate::rack_setup::plan::service::{ use crate::rack_setup::plan::sled::{ Plan as SledPlan, PlanError as SledPlanError, }; -use internal_dns_client::names::{AAAA, SRV}; +use internal_dns_client::names::{ServiceName, AAAA, SRV}; use nexus_client::{ types as NexusTypes, Client as NexusClient, Error as NexusError, }; @@ -225,8 +225,7 @@ impl ServiceInner { .iter() .map(|dataset| (AAAA::Zone(dataset.id), dataset.address)) .collect::>(); - let srv_key = SRV::Service("cockroachdb".into()); - + let srv_key = SRV::Service(ServiceName::Cockroach); self.dns_servers .get() .expect("DNS servers must be initialized first") @@ -304,7 +303,7 @@ impl ServiceInner { ) }) .collect::>(); - let srv_key = SRV::Service("nexus".into()); + let srv_key = SRV::Service(ServiceName::Nexus); self.dns_servers .get() .expect("DNS servers must be initialized first") @@ -370,7 +369,7 @@ impl ServiceInner { ) .expect("Failed to create DNS resolver"); let response = resolver - .lookup_ip(&SRV::Service("nexus".to_string()).to_string()) + .lookup_ip(&SRV::Service(ServiceName::Nexus).to_string()) .await .expect("Failed to lookup IP"); From a68de334aa30e39ce3ccb3510c4cdeb5d9666d08 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 15 Jun 2022 22:58:34 -0400 Subject: [PATCH 24/88] Populate DNS records --- common/src/address.rs | 1 + internal-dns-client/src/multiclient.rs | 56 ++++++++++++++++++-- internal-dns-client/src/names.rs | 15 +++++- nexus/src/app/background/services.rs | 41 +++++++++++++-- nexus/src/app/mod.rs | 6 ++- nexus/src/app/sled.rs | 4 +- nexus/src/db/datastore.rs | 6 +-- nexus/src/db/ipv6.rs | 2 + nexus/src/db/model/dataset.rs | 34 +++++++++--- nexus/src/db/model/service.rs | 28 +++++++++- nexus/src/internal_api/params.rs | 2 +- sled-agent/src/params.rs | 61 +++++++++++++++++++--- sled-agent/src/rack_setup/service.rs | 72 +++++--------------------- 13 files changed, 237 insertions(+), 91 deletions(-) diff --git a/common/src/address.rs b/common/src/address.rs index 7284ba4cc64..32fef1c3994 100644 --- a/common/src/address.rs +++ b/common/src/address.rs @@ -36,6 +36,7 @@ pub const PROPOLIS_PORT: u16 = 12400; pub const COCKROACH_PORT: u16 = 32221; pub const CRUCIBLE_PORT: u16 = 32345; pub const CLICKHOUSE_PORT: u16 = 8123; +pub const OXIMETER_PORT: u16 = 12223; pub const NEXUS_EXTERNAL_PORT: u16 = 12220; pub const NEXUS_INTERNAL_PORT: u16 = 12221; diff --git a/internal-dns-client/src/multiclient.rs b/internal-dns-client/src/multiclient.rs index 24c8817c274..de14055197b 100644 --- a/internal-dns-client/src/multiclient.rs +++ b/internal-dns-client/src/multiclient.rs @@ -21,9 +21,16 @@ type DnsError = crate::Error; /// A connection used to update multiple DNS servers. pub struct Updater { + log: Logger, clients: Vec, } +pub trait Service { + fn aaaa(&self) -> crate::names::AAAA; + fn srv(&self) -> crate::names::SRV; + fn address(&self) -> SocketAddrV6; +} + impl Updater { /// Creates a new "Updater", capable of communicating with all /// DNS servers within the AZ. @@ -41,15 +48,56 @@ impl Updater { }) .collect::>(); - Self { clients } + Self { log, clients } + } + + /// Inserts all service records into the DNS server. + /// + /// This method is most efficient when records are sorted by + /// SRV key. + pub async fn insert_dns_records( + &self, + records: &Vec, + ) -> Result<(), DnsError> { + let mut records = records.iter().peekable(); + + while let Some(record) = records.next() { + let srv = record.srv(); + + match &srv { + &crate::names::SRV::Service(_) => { + let mut aaaa = vec![]; + while let Some(record) = records.peek() { + if record.srv() == srv { + let record = records.next().unwrap(); + aaaa.push((record.aaaa(), record.address())); + } else { + break; + } + } + + self.insert_dns_records_internal( + aaaa, + srv, + ).await?; + }, + &crate::names::SRV::Backend(_, _) => { + let aaaa = vec![(record.aaaa(), record.address())]; + self.insert_dns_records_internal( + aaaa, + record.srv(), + ).await?; + }, + }; + } + Ok(()) } /// Utility function to insert: /// - A set of uniquely-named AAAA records, each corresponding to an address /// - An SRV record, pointing to each of the AAAA records. - pub async fn insert_dns_records( + async fn insert_dns_records_internal( &self, - log: &Logger, aaaa: Vec<(crate::names::AAAA, SocketAddrV6)>, srv_key: crate::names::SRV, ) -> Result<(), DnsError> { @@ -84,7 +132,7 @@ impl Updater { Ok::<(), BackoffError>(()) }; let log_failure = |error, _| { - warn!(log, "Failed to set DNS records"; "error" => ?error); + warn!(self.log, "Failed to set DNS records"; "error" => ?error); }; retry_notify(internal_service_policy(), set_record, log_failure) diff --git a/internal-dns-client/src/names.rs b/internal-dns-client/src/names.rs index 66e356f46cc..53c1504d168 100644 --- a/internal-dns-client/src/names.rs +++ b/internal-dns-client/src/names.rs @@ -7,32 +7,43 @@ use uuid::Uuid; const DNS_ZONE: &str = "control-plane.oxide.internal"; +#[derive(Debug, PartialEq, PartialOrd)] pub enum ServiceName { - Nexus, + Clickhouse, Cockroach, + InternalDNS, + Nexus, + Oximeter, } impl fmt::Display for ServiceName { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match &self { + ServiceName::Clickhouse => write!(f, "clickhouse"), + ServiceName::Cockroach => write!(f, "cockroach"), + ServiceName::InternalDNS => write!(f, "internalDNS"), ServiceName::Nexus => write!(f, "nexus"), - ServiceName::Cockroach => write!(f, "cockroachdb"), + ServiceName::Oximeter => write!(f, "oximeter"), } } } +#[derive(Debug, PartialEq, PartialOrd)] pub enum BackendName { + Crucible, SledAgent, } impl fmt::Display for BackendName { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match &self { + BackendName::Crucible => write!(f, "crucible"), BackendName::SledAgent => write!(f, "sledagent"), } } } +#[derive(Debug, PartialEq, PartialOrd)] pub enum SRV { /// A service identified and accessed by name, such as "nexus", "CRDB", etc. /// diff --git a/nexus/src/app/background/services.rs b/nexus/src/app/background/services.rs index 16ef56fe77b..33835fa7087 100644 --- a/nexus/src/app/background/services.rs +++ b/nexus/src/app/background/services.rs @@ -14,6 +14,10 @@ use crate::db::model::ServiceKind; use crate::db::model::Sled; use crate::db::model::Zpool; use crate::Nexus; +use internal_dns_client::multiclient::{ + Service as DnsService, + Updater as DnsUpdater +}; use omicron_common::address::{ DNS_PORT, DNS_REDUNDANCY, DNS_SERVER_PORT, NEXUS_EXTERNAL_PORT, NEXUS_INTERNAL_PORT, @@ -82,11 +86,21 @@ const EXPECTED_DATASETS: [ExpectedDataset; 3] = [ pub struct ServiceBalancer { log: Logger, nexus: Arc, + dns_updater: DnsUpdater, } impl ServiceBalancer { pub fn new(log: Logger, nexus: Arc) -> Self { - Self { log, nexus } + let dns_updater = DnsUpdater::new( + nexus.az_subnet(), + log.new(o!("component" => "DNS Updater")), + ); + + Self { + log, + nexus, + dns_updater, + } } // Reaches out to all sled agents implied in "services", and @@ -94,7 +108,7 @@ impl ServiceBalancer { async fn instantiate_services( &self, opctx: &OpContext, - services: Vec, + mut services: Vec, ) -> Result<(), Error> { let mut sled_ids = HashSet::new(); for svc in &services { @@ -148,6 +162,15 @@ impl ServiceBalancer { }) .await?; } + + // Putting records of the same SRV right next to each other isn't + // strictly necessary, but doing so makes the record insertion more + // efficient. + services.sort_by(|a, b| a.srv().partial_cmp(&b.srv()).unwrap()); + self.dns_updater.insert_dns_records( + &services + ).await.map_err(|e| Error::internal_error(&e.to_string()))?; + Ok(()) } @@ -273,7 +296,7 @@ impl ServiceBalancer { .await?; // Actually instantiate those datasets. - self.instantiate_datasets(new_datasets).await + self.instantiate_datasets(new_datasets, kind).await } // Reaches out to all sled agents implied in "services", and @@ -281,7 +304,12 @@ impl ServiceBalancer { async fn instantiate_datasets( &self, datasets: Vec<(Sled, Zpool, Dataset)>, + kind: DatasetKind, ) -> Result<(), Error> { + if datasets.is_empty() { + return Ok(()); + } + let mut sled_clients = HashMap::new(); // TODO: We could issue these requests concurrently @@ -298,9 +326,10 @@ impl ServiceBalancer { } }; - let dataset_kind = match dataset.kind { + let dataset_kind = match kind { // TODO: This set of "all addresses" isn't right. // TODO: ... should we even be using "all addresses" to contact CRDB? + // Can it just rely on DNS, somehow? DatasetKind::Cockroach => { SledAgentTypes::DatasetKind::CockroachDb(vec![]) } @@ -321,6 +350,10 @@ impl ServiceBalancer { .await?; } + self.dns_updater.insert_dns_records( + &datasets.into_iter().map(|(_, _, dataset)| dataset).collect() + ).await.map_err(|e| Error::internal_error(&e.to_string()))?; + Ok(()) } diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index 2f12e461b73..8b6cc606802 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -14,7 +14,7 @@ use crate::populate::PopulateArgs; use crate::populate::PopulateStatus; use crate::saga_interface::SagaContext; use anyhow::anyhow; -use omicron_common::address::{Ipv6Subnet, RACK_PREFIX}; +use omicron_common::address::{Ipv6Subnet, AZ_PREFIX, RACK_PREFIX}; use omicron_common::api::external::Error; use slog::Logger; use std::sync::Arc; @@ -210,6 +210,10 @@ impl Nexus { nexus } + pub fn az_subnet(&self) -> Ipv6Subnet { + Ipv6Subnet::::new(self.rack_subnet.net().ip()) + } + /// Return the tunable configuration parameters, e.g. for use in tests. pub fn tunables(&self) -> &config::Tunables { &self.tunables diff --git a/nexus/src/app/sled.rs b/nexus/src/app/sled.rs index e4fc616f095..0e01112c532 100644 --- a/nexus/src/app/sled.rs +++ b/nexus/src/app/sled.rs @@ -16,7 +16,7 @@ use omicron_common::api::external::Error; use omicron_common::api::external::ListResultVec; use omicron_common::api::external::LookupResult; use sled_agent_client::Client as SledAgentClient; -use std::net::{Ipv6Addr, SocketAddr, SocketAddrV6}; +use std::net::{Ipv6Addr, SocketAddrV6}; use std::sync::Arc; use uuid::Uuid; @@ -135,7 +135,7 @@ impl super::Nexus { &self, id: Uuid, zpool_id: Uuid, - address: SocketAddr, + address: SocketAddrV6, kind: DatasetKind, ) -> Result<(), Error> { info!(self.log, "upserting dataset"; "zpool_id" => zpool_id.to_string(), "dataset_id" => id.to_string(), "address" => address.to_string()); diff --git a/nexus/src/db/datastore.rs b/nexus/src/db/datastore.rs index 54985c276a9..6485cfd3d42 100644 --- a/nexus/src/db/datastore.rs +++ b/nexus/src/db/datastore.rs @@ -95,7 +95,7 @@ use omicron_common::api::external::{ use omicron_common::bail_unless; use sled_agent_client::types as sled_client_types; use std::convert::{TryFrom, TryInto}; -use std::net::{Ipv6Addr, SocketAddr, SocketAddrV6}; +use std::net::{Ipv6Addr, SocketAddrV6}; use std::sync::Arc; use uuid::Uuid; @@ -753,12 +753,12 @@ impl DataStore { TxnError::CustomError(DatasetError::Other(e)) }) .map(|ip| { - SocketAddr::V6(SocketAddrV6::new( + SocketAddrV6::new( ip, kind.port(), 0, 0, - )) + ) })?; let dataset = db::model::Dataset::new( diff --git a/nexus/src/db/ipv6.rs b/nexus/src/db/ipv6.rs index 2b494100825..41855e0da0c 100644 --- a/nexus/src/db/ipv6.rs +++ b/nexus/src/db/ipv6.rs @@ -16,9 +16,11 @@ use diesel::sql_types::Inet; use ipnetwork::IpNetwork; use ipnetwork::Ipv6Network; use omicron_common::api::external::Error; +use serde::{Deserialize, Serialize}; #[derive( Clone, Copy, AsExpression, FromSqlRow, PartialEq, Ord, PartialOrd, Eq, + Deserialize, Serialize, )] #[diesel(sql_type = Inet)] pub struct Ipv6Addr(std::net::Ipv6Addr); diff --git a/nexus/src/db/model/dataset.rs b/nexus/src/db/model/dataset.rs index fa4e238fb47..652cd7ede0b 100644 --- a/nexus/src/db/model/dataset.rs +++ b/nexus/src/db/model/dataset.rs @@ -3,12 +3,15 @@ // file, You can obtain one at https://mozilla.org/MPL/2.0/. use super::{DatasetKind, Generation, Region, SqlU16}; +use crate::db::identity::Asset; use crate::db::collection_insert::DatastoreCollection; +use crate::db::ipv6; use crate::db::schema::{dataset, region}; use chrono::{DateTime, Utc}; use db_macros::Asset; +use internal_dns_client::names::{AAAA, SRV, ServiceName, BackendName}; use serde::{Deserialize, Serialize}; -use std::net::SocketAddr; +use std::net::{Ipv6Addr, SocketAddrV6}; use uuid::Uuid; /// Database representation of a Dataset. @@ -35,7 +38,7 @@ pub struct Dataset { pub pool_id: Uuid, - ip: ipnetwork::IpNetwork, + ip: ipv6::Ipv6Addr, port: SqlU16, pub kind: DatasetKind, @@ -43,11 +46,10 @@ pub struct Dataset { } impl Dataset { - // TODO: Only operate on SocketAddrV6 pub fn new( id: Uuid, pool_id: Uuid, - addr: SocketAddr, + addr: SocketAddrV6, kind: DatasetKind, ) -> Self { let size_used = match kind { @@ -66,12 +68,30 @@ impl Dataset { } } - pub fn address(&self) -> SocketAddr { + pub fn address(&self) -> SocketAddrV6 { self.address_with_port(self.port.into()) } - pub fn address_with_port(&self, port: u16) -> SocketAddr { - SocketAddr::new(self.ip.ip(), port) + pub fn address_with_port(&self, port: u16) -> SocketAddrV6 { + SocketAddrV6::new(Ipv6Addr::from(self.ip), port, 0, 0) + } +} + +impl internal_dns_client::multiclient::Service for Dataset { + fn aaaa(&self) -> AAAA { + AAAA::Zone(self.id()) + } + + fn srv(&self) -> SRV { + match self.kind { + DatasetKind::Crucible => SRV::Backend(BackendName::Crucible, self.id()), + DatasetKind::Clickhouse => SRV::Service(ServiceName::Clickhouse), + DatasetKind::Cockroach => SRV::Service(ServiceName::Cockroach), + } + } + + fn address(&self) -> SocketAddrV6 { + self.address() } } diff --git a/nexus/src/db/model/service.rs b/nexus/src/db/model/service.rs index 0762db538b1..0ff7987c464 100644 --- a/nexus/src/db/model/service.rs +++ b/nexus/src/db/model/service.rs @@ -4,9 +4,12 @@ use super::ServiceKind; use crate::db::ipv6; +use crate::db::identity::Asset; use crate::db::schema::service; use db_macros::Asset; -use std::net::Ipv6Addr; +use internal_dns_client::names::{AAAA, SRV, ServiceName}; +use omicron_common::address::{DNS_SERVER_PORT, NEXUS_INTERNAL_PORT, OXIMETER_PORT}; +use std::net::{Ipv6Addr, SocketAddrV6}; use uuid::Uuid; /// Representation of services which may run on Sleds. @@ -36,3 +39,26 @@ impl Service { } } } + +impl internal_dns_client::multiclient::Service for Service { + fn aaaa(&self) -> AAAA { + AAAA::Zone(self.id()) + } + + fn srv(&self) -> SRV { + match self.kind { + ServiceKind::InternalDNS => SRV::Service(ServiceName::InternalDNS), + ServiceKind::Nexus => SRV::Service(ServiceName::Nexus), + ServiceKind::Oximeter => SRV::Service(ServiceName::Oximeter), + } + } + + fn address(&self) -> SocketAddrV6 { + let port = match self.kind { + ServiceKind::InternalDNS => DNS_SERVER_PORT, + ServiceKind::Nexus => NEXUS_INTERNAL_PORT, + ServiceKind::Oximeter => OXIMETER_PORT, + }; + SocketAddrV6::new(Ipv6Addr::from(self.ip), port, 0, 0) + } +} diff --git a/nexus/src/internal_api/params.rs b/nexus/src/internal_api/params.rs index 7dda7610573..32199a08505 100644 --- a/nexus/src/internal_api/params.rs +++ b/nexus/src/internal_api/params.rs @@ -74,7 +74,7 @@ impl FromStr for DatasetKind { pub struct DatasetPutRequest { /// Address on which a service is responding to requests for the /// dataset. - pub address: SocketAddr, + pub address: SocketAddrV6, /// Type of dataset being inserted. pub kind: DatasetKind, diff --git a/sled-agent/src/params.rs b/sled-agent/src/params.rs index 32fd6ab6248..261ec9ecb16 100644 --- a/sled-agent/src/params.rs +++ b/sled-agent/src/params.rs @@ -2,6 +2,8 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. +use internal_dns_client::names::{BackendName, ServiceName, AAAA, SRV}; +use omicron_common::address::OXIMETER_PORT; use omicron_common::api::external; use omicron_common::api::internal::nexus::{ DiskRuntimeState, InstanceRuntimeState, @@ -252,14 +254,28 @@ pub struct DatasetEnsureBody { pub dataset_kind: DatasetKind, // The address on which the zone will listen for requests. pub address: SocketAddrV6, - // NOTE: We could insert a UUID here, if we want that to be set by the - // caller explicitly? Currently, the lack of a UUID implies that - // "at most one dataset type" exists within a zpool. - // - // It's unclear if this is actually necessary - making this change - // would also require the RSS to query existing datasets before - // requesting new ones (after all, we generally wouldn't want to - // create two CRDB datasets with different UUIDs on the same zpool). +} + +impl internal_dns_client::multiclient::Service for DatasetEnsureBody { + fn aaaa(&self) -> AAAA { + AAAA::Zone(self.id) + } + + fn srv(&self) -> SRV { + match self.dataset_kind { + DatasetKind::Crucible => { + SRV::Backend(BackendName::Crucible, self.id) + } + DatasetKind::Clickhouse => SRV::Service(ServiceName::Clickhouse), + DatasetKind::CockroachDb { .. } => { + SRV::Service(ServiceName::Cockroach) + } + } + } + + fn address(&self) -> SocketAddrV6 { + self.address + } } impl From for sled_agent_client::types::DatasetEnsureBody { @@ -331,6 +347,35 @@ pub struct ServiceRequest { pub service_type: ServiceType, } +impl internal_dns_client::multiclient::Service for ServiceRequest { + fn aaaa(&self) -> AAAA { + AAAA::Zone(self.id) + } + + fn srv(&self) -> SRV { + match self.service_type { + ServiceType::InternalDns { .. } => { + SRV::Service(ServiceName::InternalDNS) + } + ServiceType::Nexus { .. } => SRV::Service(ServiceName::Nexus), + ServiceType::Oximeter => SRV::Service(ServiceName::Oximeter), + } + } + + fn address(&self) -> SocketAddrV6 { + match self.service_type { + ServiceType::InternalDns { server_address, .. } => server_address, + ServiceType::Nexus { internal_address, .. } => internal_address, + ServiceType::Oximeter => SocketAddrV6::new( + Ipv6Addr::from(self.addresses[0]), + OXIMETER_PORT, + 0, + 0, + ), + } + } +} + impl From for sled_agent_client::types::ServiceRequest { fn from(s: ServiceRequest) -> Self { Self { diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 802fac5f304..3b2f7c4aa4d 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -15,7 +15,7 @@ use crate::rack_setup::plan::service::{ use crate::rack_setup::plan::sled::{ Plan as SledPlan, PlanError as SledPlanError, }; -use internal_dns_client::names::{ServiceName, AAAA, SRV}; +use internal_dns_client::names::{ServiceName, SRV}; use nexus_client::{ types as NexusTypes, Client as NexusClient, Error as NexusError, }; @@ -168,22 +168,11 @@ impl ServiceInner { } } - async fn initialize_crdb( + async fn initialize_datasets( &self, sled_address: SocketAddrV6, datasets: &Vec, ) -> Result<(), SetupServiceError> { - if datasets.iter().any(|dataset| { - !matches!( - dataset.dataset_kind, - crate::params::DatasetKind::CockroachDb { .. } - ) - }) { - return Err(SetupServiceError::BadConfig( - "RSS should only initialize CRDB services".into(), - )); - } - let dur = std::time::Duration::from_secs(60); let client = reqwest::ClientBuilder::new() .connect_timeout(dur) @@ -218,18 +207,10 @@ impl ServiceInner { } // Initialize DNS records for these datasets. - // - // CRDB is treated as a service, since they are interchangeable. - - let aaaa = datasets - .iter() - .map(|dataset| (AAAA::Zone(dataset.id), dataset.address)) - .collect::>(); - let srv_key = SRV::Service(ServiceName::Cockroach); self.dns_servers .get() .expect("DNS servers must be initialized first") - .insert_dns_records(&self.log, aaaa, srv_key) + .insert_dns_records(datasets) .await?; Ok(()) @@ -272,42 +253,11 @@ impl ServiceInner { retry_notify(internal_service_policy(), services_put, log_failure) .await?; - // Initialize DNS records for the Nexus service. - let services: Vec<_> = services - .iter() - .filter(|svc| { - matches!( - svc.service_type, - crate::params::ServiceType::Nexus { .. } - ) - }) - .collect(); - - // Early-exit for non-Nexus case - if services.is_empty() { - return Ok(()); - } - - // Otherwise, insert DNS records for Nexus - let aaaa = services - .iter() - .map(|service| { - ( - AAAA::Zone(service.id), - SocketAddrV6::new( - service.addresses[0], - NEXUS_INTERNAL_PORT, - 0, - 0, - ), - ) - }) - .collect::>(); - let srv_key = SRV::Service(ServiceName::Nexus); + // Insert DNS records self.dns_servers .get() .expect("DNS servers must be initialized first") - .insert_dns_records(&self.log, aaaa, srv_key) + .insert_dns_records(services) .await?; Ok(()) @@ -609,8 +559,11 @@ impl ServiceInner { // Issue the crdb initialization requests to all sleds. futures::future::join_all(service_plan.services.iter().map( |(sled_address, services_request)| async move { - self.initialize_crdb(*sled_address, &services_request.datasets) - .await?; + self.initialize_datasets( + *sled_address, + &services_request.datasets, + ) + .await?; Ok(()) }, )) @@ -622,9 +575,12 @@ impl ServiceInner { // Issue service initialization requests. // - // Note that this must happen *after* the dataset initialization, + // NOTE: This must happen *after* the dataset initialization, // to ensure that CockroachDB has been initialized before Nexus // starts. + // + // If Nexus was more resilient to concurrent initialization + // of CRDB, this requirement could be relaxed. futures::future::join_all(service_plan.services.iter().map( |(sled_address, services_request)| async move { // With the current implementation of "initialize_services", From 746114bdb04e7476184f3360aaded2a3cef174d9 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 15 Jun 2022 23:55:23 -0400 Subject: [PATCH 25/88] Fix dns client bug, start shortening timeouts --- internal-dns-client/src/multiclient.rs | 18 +++++++----------- nexus/src/app/background/services.rs | 25 ++++++++++++------------- nexus/src/db/datastore.rs | 9 +-------- nexus/src/db/ipv6.rs | 12 ++++++++++-- nexus/src/db/model/dataset.rs | 8 +++++--- nexus/src/db/model/service.rs | 8 +++++--- sled-agent/src/rack_setup/service.rs | 10 ++++------ sled-agent/src/storage_manager.rs | 4 +++- 8 files changed, 47 insertions(+), 47 deletions(-) diff --git a/internal-dns-client/src/multiclient.rs b/internal-dns-client/src/multiclient.rs index de14055197b..d91a58d4d73 100644 --- a/internal-dns-client/src/multiclient.rs +++ b/internal-dns-client/src/multiclient.rs @@ -63,10 +63,11 @@ impl Updater { while let Some(record) = records.next() { let srv = record.srv(); + info!(self.log, "Inserting DNS record: {:?}", srv); match &srv { &crate::names::SRV::Service(_) => { - let mut aaaa = vec![]; + let mut aaaa = vec![(record.aaaa(), record.address())]; while let Some(record) = records.peek() { if record.srv() == srv { let record = records.next().unwrap(); @@ -76,18 +77,13 @@ impl Updater { } } - self.insert_dns_records_internal( - aaaa, - srv, - ).await?; - }, + self.insert_dns_records_internal(aaaa, srv).await?; + } &crate::names::SRV::Backend(_, _) => { let aaaa = vec![(record.aaaa(), record.address())]; - self.insert_dns_records_internal( - aaaa, - record.srv(), - ).await?; - }, + self.insert_dns_records_internal(aaaa, record.srv()) + .await?; + } }; } Ok(()) diff --git a/nexus/src/app/background/services.rs b/nexus/src/app/background/services.rs index 33835fa7087..eca4d336607 100644 --- a/nexus/src/app/background/services.rs +++ b/nexus/src/app/background/services.rs @@ -15,8 +15,7 @@ use crate::db::model::Sled; use crate::db::model::Zpool; use crate::Nexus; use internal_dns_client::multiclient::{ - Service as DnsService, - Updater as DnsUpdater + Service as DnsService, Updater as DnsUpdater, }; use omicron_common::address::{ DNS_PORT, DNS_REDUNDANCY, DNS_SERVER_PORT, NEXUS_EXTERNAL_PORT, @@ -96,11 +95,7 @@ impl ServiceBalancer { log.new(o!("component" => "DNS Updater")), ); - Self { - log, - nexus, - dns_updater, - } + Self { log, nexus, dns_updater } } // Reaches out to all sled agents implied in "services", and @@ -167,9 +162,10 @@ impl ServiceBalancer { // strictly necessary, but doing so makes the record insertion more // efficient. services.sort_by(|a, b| a.srv().partial_cmp(&b.srv()).unwrap()); - self.dns_updater.insert_dns_records( - &services - ).await.map_err(|e| Error::internal_error(&e.to_string()))?; + self.dns_updater + .insert_dns_records(&services) + .await + .map_err(|e| Error::internal_error(&e.to_string()))?; Ok(()) } @@ -350,9 +346,12 @@ impl ServiceBalancer { .await?; } - self.dns_updater.insert_dns_records( - &datasets.into_iter().map(|(_, _, dataset)| dataset).collect() - ).await.map_err(|e| Error::internal_error(&e.to_string()))?; + self.dns_updater + .insert_dns_records( + &datasets.into_iter().map(|(_, _, dataset)| dataset).collect(), + ) + .await + .map_err(|e| Error::internal_error(&e.to_string()))?; Ok(()) } diff --git a/nexus/src/db/datastore.rs b/nexus/src/db/datastore.rs index 6485cfd3d42..3ea27b32c7b 100644 --- a/nexus/src/db/datastore.rs +++ b/nexus/src/db/datastore.rs @@ -752,14 +752,7 @@ impl DataStore { .map_err(|e| { TxnError::CustomError(DatasetError::Other(e)) }) - .map(|ip| { - SocketAddrV6::new( - ip, - kind.port(), - 0, - 0, - ) - })?; + .map(|ip| SocketAddrV6::new(ip, kind.port(), 0, 0))?; let dataset = db::model::Dataset::new( dataset_id, diff --git a/nexus/src/db/ipv6.rs b/nexus/src/db/ipv6.rs index 41855e0da0c..60f7c0558c6 100644 --- a/nexus/src/db/ipv6.rs +++ b/nexus/src/db/ipv6.rs @@ -19,8 +19,16 @@ use omicron_common::api::external::Error; use serde::{Deserialize, Serialize}; #[derive( - Clone, Copy, AsExpression, FromSqlRow, PartialEq, Ord, PartialOrd, Eq, - Deserialize, Serialize, + Clone, + Copy, + AsExpression, + FromSqlRow, + PartialEq, + Ord, + PartialOrd, + Eq, + Deserialize, + Serialize, )] #[diesel(sql_type = Inet)] pub struct Ipv6Addr(std::net::Ipv6Addr); diff --git a/nexus/src/db/model/dataset.rs b/nexus/src/db/model/dataset.rs index 652cd7ede0b..4b2b294542a 100644 --- a/nexus/src/db/model/dataset.rs +++ b/nexus/src/db/model/dataset.rs @@ -3,13 +3,13 @@ // file, You can obtain one at https://mozilla.org/MPL/2.0/. use super::{DatasetKind, Generation, Region, SqlU16}; -use crate::db::identity::Asset; use crate::db::collection_insert::DatastoreCollection; +use crate::db::identity::Asset; use crate::db::ipv6; use crate::db::schema::{dataset, region}; use chrono::{DateTime, Utc}; use db_macros::Asset; -use internal_dns_client::names::{AAAA, SRV, ServiceName, BackendName}; +use internal_dns_client::names::{BackendName, ServiceName, AAAA, SRV}; use serde::{Deserialize, Serialize}; use std::net::{Ipv6Addr, SocketAddrV6}; use uuid::Uuid; @@ -84,7 +84,9 @@ impl internal_dns_client::multiclient::Service for Dataset { fn srv(&self) -> SRV { match self.kind { - DatasetKind::Crucible => SRV::Backend(BackendName::Crucible, self.id()), + DatasetKind::Crucible => { + SRV::Backend(BackendName::Crucible, self.id()) + } DatasetKind::Clickhouse => SRV::Service(ServiceName::Clickhouse), DatasetKind::Cockroach => SRV::Service(ServiceName::Cockroach), } diff --git a/nexus/src/db/model/service.rs b/nexus/src/db/model/service.rs index 0ff7987c464..6f05011d415 100644 --- a/nexus/src/db/model/service.rs +++ b/nexus/src/db/model/service.rs @@ -3,12 +3,14 @@ // file, You can obtain one at https://mozilla.org/MPL/2.0/. use super::ServiceKind; -use crate::db::ipv6; use crate::db::identity::Asset; +use crate::db::ipv6; use crate::db::schema::service; use db_macros::Asset; -use internal_dns_client::names::{AAAA, SRV, ServiceName}; -use omicron_common::address::{DNS_SERVER_PORT, NEXUS_INTERNAL_PORT, OXIMETER_PORT}; +use internal_dns_client::names::{ServiceName, AAAA, SRV}; +use omicron_common::address::{ + DNS_SERVER_PORT, NEXUS_INTERNAL_PORT, OXIMETER_PORT, +}; use std::net::{Ipv6Addr, SocketAddrV6}; use uuid::Uuid; diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 3b2f7c4aa4d..862c0e05e21 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -253,12 +253,10 @@ impl ServiceInner { retry_notify(internal_service_policy(), services_put, log_failure) .await?; - // Insert DNS records - self.dns_servers - .get() - .expect("DNS servers must be initialized first") - .insert_dns_records(services) - .await?; + // Insert DNS records, if the DNS servers have been initialized + if let Some(dns_servers) = self.dns_servers.get() { + dns_servers.insert_dns_records(services).await?; + } Ok(()) } diff --git a/sled-agent/src/storage_manager.rs b/sled-agent/src/storage_manager.rs index d559ee70589..d010a9bfff5 100644 --- a/sled-agent/src/storage_manager.rs +++ b/sled-agent/src/storage_manager.rs @@ -655,7 +655,9 @@ impl StorageWorker { }; nexus_notifications.push( backoff::retry_notify( - backoff::internal_service_policy(), + backoff::internal_service_policy_with_max( + std::time::Duration::from_secs(1), + ), notify_nexus, log_post_failure, ) From 1b019b1586347f3c42cdd82de3afa3e7f03860eb Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Thu, 16 Jun 2022 00:07:05 -0400 Subject: [PATCH 26/88] clippy --- sled-agent/src/params.rs | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/sled-agent/src/params.rs b/sled-agent/src/params.rs index 261ec9ecb16..bd74b1ca324 100644 --- a/sled-agent/src/params.rs +++ b/sled-agent/src/params.rs @@ -366,12 +366,9 @@ impl internal_dns_client::multiclient::Service for ServiceRequest { match self.service_type { ServiceType::InternalDns { server_address, .. } => server_address, ServiceType::Nexus { internal_address, .. } => internal_address, - ServiceType::Oximeter => SocketAddrV6::new( - Ipv6Addr::from(self.addresses[0]), - OXIMETER_PORT, - 0, - 0, - ), + ServiceType::Oximeter => { + SocketAddrV6::new(self.addresses[0], OXIMETER_PORT, 0, 0) + } } } } From 94b4b46723e5906f7340cbe45791b8cf5aec46d2 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Thu, 16 Jun 2022 01:28:46 -0400 Subject: [PATCH 27/88] Concurrent provisioning --- nexus/src/app/background/services.rs | 259 ++++++++++++---------- nexus/src/db/datastore.rs | 16 +- nexus/tests/integration_tests/datasets.rs | 8 +- sled-agent/src/rack_setup/service.rs | 11 +- sled-agent/src/server.rs | 4 +- sled-agent/src/services.rs | 4 +- sled-agent/src/sled_agent.rs | 2 +- sled-agent/src/storage_manager.rs | 62 +----- 8 files changed, 167 insertions(+), 199 deletions(-) diff --git a/nexus/src/app/background/services.rs b/nexus/src/app/background/services.rs index eca4d336607..7178ae7930b 100644 --- a/nexus/src/app/background/services.rs +++ b/nexus/src/app/background/services.rs @@ -14,6 +14,7 @@ use crate::db::model::ServiceKind; use crate::db::model::Sled; use crate::db::model::Zpool; use crate::Nexus; +use futures::stream::{self, StreamExt, TryStreamExt}; use internal_dns_client::multiclient::{ Service as DnsService, Updater as DnsUpdater, }; @@ -112,51 +113,58 @@ impl ServiceBalancer { // For all sleds requiring an update, request all services be // instantiated. - for sled_id in &sled_ids { - // TODO: This interface kinda sucks; ideally we would - // only insert the *new* services. - // - // Inserting the old ones too is costing us an extra query. - let services = - self.nexus.datastore().service_list(opctx, *sled_id).await?; - let sled_client = self.nexus.sled_client(sled_id).await?; - - info!(self.log, "instantiate_services: {:?}", services); - - sled_client - .services_put(&SledAgentTypes::ServiceEnsureBody { - services: services - .iter() - .map(|s| { - let address = Ipv6Addr::from(s.ip); - let (name, service_type) = - Self::get_service_name_and_type( - address, s.kind, - ); - - // TODO: This is hacky, specifically to inject - // global zone addresses in the DNS service. - let gz_addresses = match &s.kind { - ServiceKind::InternalDNS => { - let mut octets = address.octets(); - octets[15] = octets[15] + 1; - vec![Ipv6Addr::from(octets)] + stream::iter(&sled_ids) + .map(Ok::<_, Error>) + .try_for_each_concurrent(None, |sled_id| async { + // TODO: This interface kinda sucks; ideally we would + // only insert the *new* services. + // + // Inserting the old ones too is costing us an extra query. + let services = self + .nexus + .datastore() + .service_list(opctx, *sled_id) + .await?; + let sled_client = self.nexus.sled_client(sled_id).await?; + + info!(self.log, "instantiate_services: {:?}", services); + + sled_client + .services_put(&SledAgentTypes::ServiceEnsureBody { + services: services + .iter() + .map(|s| { + let address = Ipv6Addr::from(s.ip); + let (name, service_type) = + Self::get_service_name_and_type( + address, s.kind, + ); + + // TODO: This is hacky, specifically to inject + // global zone addresses in the DNS service. + let gz_addresses = match &s.kind { + ServiceKind::InternalDNS => { + let mut octets = address.octets(); + octets[15] = octets[15] + 1; + vec![Ipv6Addr::from(octets)] + } + _ => vec![], + }; + + SledAgentTypes::ServiceRequest { + id: s.id(), + name, + addresses: vec![address], + gz_addresses, + service_type, } - _ => vec![], - }; - - SledAgentTypes::ServiceRequest { - id: s.id(), - name, - addresses: vec![address], - gz_addresses, - service_type, - } - }) - .collect(), - }) - .await?; - } + }) + .collect(), + }) + .await?; + Ok(()) + }) + .await?; // Putting records of the same SRV right next to each other isn't // strictly necessary, but doing so makes the record insertion more @@ -215,40 +223,33 @@ impl ServiceBalancer { } } - async fn ensure_rack_service( + // Provision the services within the database. + async fn provision_rack_service( &self, opctx: &OpContext, kind: ServiceKind, desired_count: u32, - ) -> Result<(), Error> { - // Provision the services within the database. - let services = self - .nexus + ) -> Result, Error> { + self.nexus .datastore() .ensure_rack_service(opctx, self.nexus.rack_id, kind, desired_count) - .await?; - - // Actually instantiate those services. - self.instantiate_services(opctx, services).await + .await } - async fn ensure_dns_service( + // Provision the services within the database. + async fn provision_dns_service( &self, opctx: &OpContext, desired_count: u32, - ) -> Result<(), Error> { - // Provision the services within the database. - let services = self - .nexus + ) -> Result, Error> { + self.nexus .datastore() .ensure_dns_service(opctx, self.nexus.rack_subnet, desired_count) - .await?; - - // Actually instantiate those services. - self.instantiate_services(opctx, services).await + .await } - // TODO: Consider using sagas to ensure the rollout of services happens. + // TODO: Consider using sagas to ensure the rollout of services. + // // Not using sagas *happens* to be fine because these operations are // re-tried periodically, but that's kind forcing a dependency on the // caller. @@ -256,25 +257,34 @@ impl ServiceBalancer { &self, opctx: &OpContext, ) -> Result<(), Error> { - // NOTE: If any sleds host DNS + other redudant services, we send - // redundant requests. We could propagate the service list up to a - // higher level, and do instantiation after all services complete? + // Provision services within the database. + let mut svcs = vec![]; for expected_svc in &EXPECTED_SERVICES { info!(self.log, "Ensuring service {:?} exists", expected_svc); match expected_svc.redundancy { ServiceRedundancy::PerRack(desired_count) => { - self.ensure_rack_service( - opctx, - expected_svc.kind, - desired_count, - ) - .await?; + svcs.extend_from_slice( + &self + .provision_rack_service( + opctx, + expected_svc.kind, + desired_count, + ) + .await?, + ); } ServiceRedundancy::DnsPerAz(desired_count) => { - self.ensure_dns_service(opctx, desired_count).await?; + svcs.extend_from_slice( + &self + .provision_dns_service(opctx, desired_count) + .await?, + ); } } } + + // Ensure services exist on the target sleds. + self.instantiate_services(opctx, svcs).await?; Ok(()) } @@ -306,46 +316,50 @@ impl ServiceBalancer { return Ok(()); } + // Ensure that there is one connection per sled. let mut sled_clients = HashMap::new(); + for (sled, _, _) in &datasets { + if sled_clients.get(&sled.id()).is_none() { + let sled_client = self.nexus.sled_client(&sled.id()).await?; + sled_clients.insert(sled.id(), sled_client); + } + } - // TODO: We could issue these requests concurrently - for (sled, zpool, dataset) in &datasets { - let sled_client = { - match sled_clients.get(&sled.id()) { - Some(client) => client, - None => { - let sled_client = - self.nexus.sled_client(&sled.id()).await?; - sled_clients.insert(sled.id(), sled_client); - sled_clients.get(&sled.id()).unwrap() + // Issue all dataset instantiation requests concurrently. + stream::iter(&datasets) + .map(Ok::<_, Error>) + .try_for_each_concurrent(None, |(sled, zpool, dataset)| async { + let sled_client = sled_clients.get(&sled.id()).unwrap(); + + let dataset_kind = match kind { + // TODO: This set of "all addresses" isn't right. + // TODO: ... should we even be using "all addresses" to contact CRDB? + // Can it just rely on DNS, somehow? + DatasetKind::Cockroach => { + SledAgentTypes::DatasetKind::CockroachDb(vec![]) } - } - }; - - let dataset_kind = match kind { - // TODO: This set of "all addresses" isn't right. - // TODO: ... should we even be using "all addresses" to contact CRDB? - // Can it just rely on DNS, somehow? - DatasetKind::Cockroach => { - SledAgentTypes::DatasetKind::CockroachDb(vec![]) - } - DatasetKind::Crucible => SledAgentTypes::DatasetKind::Crucible, - DatasetKind::Clickhouse => { - SledAgentTypes::DatasetKind::Clickhouse - } - }; - - // Instantiate each dataset. - sled_client - .filesystem_put(&SledAgentTypes::DatasetEnsureBody { - id: dataset.id(), - zpool_id: zpool.id(), - dataset_kind, - address: dataset.address().to_string(), - }) - .await?; - } + DatasetKind::Crucible => { + SledAgentTypes::DatasetKind::Crucible + } + DatasetKind::Clickhouse => { + SledAgentTypes::DatasetKind::Clickhouse + } + }; + + // Instantiate each dataset. + sled_client + .filesystem_put(&SledAgentTypes::DatasetEnsureBody { + id: dataset.id(), + zpool_id: zpool.id(), + dataset_kind, + address: dataset.address().to_string(), + }) + .await?; + Ok(()) + }) + .await?; + // Ensure all DNS records are updated for the created datasets. self.dns_updater .insert_dns_records( &datasets.into_iter().map(|(_, _, dataset)| dataset).collect(), @@ -360,16 +374,23 @@ impl ServiceBalancer { &self, opctx: &OpContext, ) -> Result<(), Error> { - for expected_dataset in &EXPECTED_DATASETS { - info!(self.log, "Ensuring dataset {:?} exists", expected_dataset); - self.ensure_rack_dataset( - opctx, - expected_dataset.kind, - expected_dataset.redundancy, - ) - .await? - } - Ok(()) + // Provision all dataset types concurrently. + stream::iter(&EXPECTED_DATASETS) + .map(Ok::<_, Error>) + .try_for_each_concurrent(None, |expected_dataset| async move { + info!( + self.log, + "Ensuring dataset {:?} exists", expected_dataset + ); + self.ensure_rack_dataset( + opctx, + expected_dataset.kind, + expected_dataset.redundancy, + ) + .await?; + Ok(()) + }) + .await } // Provides a single point-in-time evaluation and adjustment of diff --git a/nexus/src/db/datastore.rs b/nexus/src/db/datastore.rs index 3ea27b32c7b..a21895938f3 100644 --- a/nexus/src/db/datastore.rs +++ b/nexus/src/db/datastore.rs @@ -4459,9 +4459,7 @@ mod test { }; use omicron_test_utils::dev; use std::collections::{HashMap, HashSet}; - use std::net::Ipv6Addr; - use std::net::SocketAddrV6; - use std::net::{IpAddr, Ipv4Addr, SocketAddr}; + use std::net::{Ipv6Addr, SocketAddrV6}; use std::sync::Arc; use uuid::Uuid; @@ -4677,8 +4675,7 @@ mod test { // ... and datasets within that zpool. let dataset_count = REGION_REDUNDANCY_THRESHOLD * 2; - let bogus_addr = - SocketAddr::new(IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)), 8080); + let bogus_addr = SocketAddrV6::new(Ipv6Addr::LOCALHOST, 8080, 0, 0); let dataset_ids: Vec = (0..dataset_count).map(|_| Uuid::new_v4()).collect(); for id in &dataset_ids { @@ -4759,8 +4756,7 @@ mod test { // ... and datasets within that zpool. let dataset_count = REGION_REDUNDANCY_THRESHOLD; - let bogus_addr = - SocketAddr::new(IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)), 8080); + let bogus_addr = SocketAddrV6::new(Ipv6Addr::LOCALHOST, 8080, 0, 0); let dataset_ids: Vec = (0..dataset_count).map(|_| Uuid::new_v4()).collect(); for id in &dataset_ids { @@ -4826,8 +4822,7 @@ mod test { // ... and datasets within that zpool. let dataset_count = REGION_REDUNDANCY_THRESHOLD - 1; - let bogus_addr = - SocketAddr::new(IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)), 8080); + let bogus_addr = SocketAddrV6::new(Ipv6Addr::LOCALHOST, 8080, 0, 0); let dataset_ids: Vec = (0..dataset_count).map(|_| Uuid::new_v4()).collect(); for id in &dataset_ids { @@ -4878,8 +4873,7 @@ mod test { // ... and datasets within that zpool. let dataset_count = REGION_REDUNDANCY_THRESHOLD; - let bogus_addr = - SocketAddr::new(IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)), 8080); + let bogus_addr = SocketAddrV6::new(Ipv6Addr::LOCALHOST, 8080, 0, 0); let dataset_ids: Vec = (0..dataset_count).map(|_| Uuid::new_v4()).collect(); for id in &dataset_ids { diff --git a/nexus/tests/integration_tests/datasets.rs b/nexus/tests/integration_tests/datasets.rs index ebc89f71378..d65a7fa1f81 100644 --- a/nexus/tests/integration_tests/datasets.rs +++ b/nexus/tests/integration_tests/datasets.rs @@ -8,7 +8,7 @@ use omicron_common::api::external::ByteCount; use omicron_nexus::internal_api::params::{ DatasetKind, DatasetPutRequest, ZpoolPutRequest, }; -use std::net::{IpAddr, Ipv4Addr, SocketAddr}; +use std::net::{Ipv6Addr, SocketAddrV6}; use uuid::Uuid; use nexus_test_utils::{ControlPlaneTestContext, SLED_AGENT_UUID}; @@ -36,8 +36,7 @@ async fn test_dataset_put_success(cptestctx: &ControlPlaneTestContext) { .await .unwrap(); - let address = - SocketAddr::new(IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)), 8080); + let address = SocketAddrV6::new(Ipv6Addr::LOCALHOST, 8080, 0, 0); let kind = DatasetKind::Crucible; let request = DatasetPutRequest { address, kind }; let dataset_id = Uuid::new_v4(); @@ -69,8 +68,7 @@ async fn test_dataset_put_bad_zpool_returns_not_found( let dataset_put_url = format!("/zpools/{}/dataset/{}", zpool_id, dataset_id); - let address = - SocketAddr::new(IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)), 8080); + let address = SocketAddrV6::new(Ipv6Addr::LOCALHOST, 8080, 0, 0); let kind = DatasetKind::Crucible; let request = DatasetPutRequest { address, kind }; diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 862c0e05e21..29b579484bf 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -21,7 +21,8 @@ use nexus_client::{ }; use omicron_common::address::{get_sled_address, NEXUS_INTERNAL_PORT}; use omicron_common::backoff::{ - internal_service_policy, retry_notify, BackoffError, + internal_service_policy, internal_service_policy_with_max, retry_notify, + BackoffError, }; use sled_agent_client::{ types as SledAgentTypes, Client as SledAgentClient, Error as SledAgentError, @@ -398,8 +399,12 @@ impl ServiceInner { info!(self.log, "Failed to handoff to nexus: {err}"); }; - retry_notify(internal_service_policy(), notify_nexus, log_failure) - .await?; + retry_notify( + internal_service_policy_with_max(std::time::Duration::from_secs(1)), + notify_nexus, + log_failure, + ) + .await?; info!(self.log, "Handoff to Nexus is complete"); Ok(()) diff --git a/sled-agent/src/server.rs b/sled-agent/src/server.rs index 97920925789..6725a8351e7 100644 --- a/sled-agent/src/server.rs +++ b/sled-agent/src/server.rs @@ -100,12 +100,12 @@ impl Server { let log_notification_failure = |err, delay| { warn!( log, - "failed to contact nexus: {}, will retry in {:?}", err, delay; + "failed to notify nexus about sled agent: {}, will retry in {:?}", err, delay; ); }; retry_notify( internal_service_policy_with_max( - std::time::Duration::from_secs(5), + std::time::Duration::from_secs(1), ), notify_nexus, log_notification_failure, diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index fd89dc686a1..2900bf7761c 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -202,11 +202,11 @@ impl ServiceManager { existing_zones: &mut Vec, services: &Vec, ) -> Result<(), Error> { - info!(self.log, "Ensuring services are initialized: {:?}", services); // TODO(https://github.com/oxidecomputer/omicron/issues/726): // As long as we ensure the requests don't overlap, we could // parallelize this request. for service in services { + info!(self.log, "Ensuring service is initialized: {:?}", service); // Before we bother allocating anything for this request, check if // this service has already been created. let expected_zone_name = @@ -332,8 +332,6 @@ impl ServiceManager { subnet: Ipv6Subnet::::new( self.underlay_address, ), - // TODO: Switch to inferring this URL by DNS. - // "postgresql://root@[fd00:1122:3344:0101::2]:32221/omicron?sslmode=disable" database: nexus_config::Database::FromDns, }; diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index c7e8faa391c..11212cf8e44 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -324,7 +324,7 @@ impl SledAgent { .lazy_nexus_client .get() .await - // TODO: Handle error + // TODO: Handle error... or push out lazy nexus client. .unwrap(); crate::updates::download_artifact(artifact, &nexus_client).await?; Ok(()) diff --git a/sled-agent/src/storage_manager.rs b/sled-agent/src/storage_manager.rs index d010a9bfff5..bb0a21930cc 100644 --- a/sled-agent/src/storage_manager.rs +++ b/sled-agent/src/storage_manager.rs @@ -241,6 +241,9 @@ impl DatasetInfo { address: SocketAddrV6, do_format: bool, ) -> Result<(), Error> { + // TODO: Related to + // https://github.com/oxidecomputer/omicron/pull/1124 , should we + // avoid importing these manifests? match self.kind { DatasetKind::CockroachDb { .. } => { info!(log, "start_zone: Loading CRDB manifest"); @@ -317,7 +320,9 @@ impl DatasetInfo { warn!(log, "cockroachdb not yet alive"); }; backoff::retry_notify( - backoff::internal_service_policy(), + backoff::internal_service_policy_with_max( + std::time::Duration::from_secs(1), + ), check_health, log_failure, ) @@ -650,7 +655,7 @@ impl StorageWorker { let log_post_failure = move |_, delay| { warn!( log, - "failed to notify nexus, will retry in {:?}", delay; + "failed to notify nexus about zpool, will retry in {:?}", delay; ); }; nexus_notifications.push( @@ -665,59 +670,6 @@ impl StorageWorker { ); } - // Adds a "notification to nexus" to `nexus_notifications`, - // informing it about the addition of `datasets` to `pool_id`. - /* - fn add_datasets_notify( - &self, - nexus_notifications: &mut FuturesOrdered>>, - datasets: Vec<(Uuid, SocketAddrV6, DatasetKind)>, - pool_id: Uuid, - ) { - let lazy_nexus_client = self.lazy_nexus_client.clone(); - let notify_nexus = move || { - let lazy_nexus_client = lazy_nexus_client.clone(); - let datasets = datasets.clone(); - async move { - for (id, address, kind) in datasets { - let request = DatasetPutRequest { - address: address.to_string(), - kind: kind.into(), - }; - lazy_nexus_client - .get() - .await - .map_err(|e| { - backoff::BackoffError::transient(e.to_string()) - })? - .dataset_put(&pool_id, &id, &request) - .await - .map_err(|e| { - backoff::BackoffError::transient(e.to_string()) - })?; - } - - Ok(()) - } - }; - let log = self.log.clone(); - let log_post_failure = move |_, delay| { - warn!( - log, - "failed to notify nexus about datasets, will retry in {:?}", delay; - ); - }; - nexus_notifications.push( - backoff::retry_notify( - backoff::internal_service_policy(), - notify_nexus, - log_post_failure, - ) - .boxed(), - ); - } - */ - // TODO: a lot of these functions act on the `FuturesOrdered` - should // that just be a part of the "worker" struct? From a02e009b9a2ccbd325fe99c1fcf0b43fb7ff23d0 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Thu, 16 Jun 2022 11:25:29 -0400 Subject: [PATCH 28/88] Dynamic oximeter config --- Cargo.lock | 2 + internal-dns-client/Cargo.toml | 1 + internal-dns-client/src/multiclient.rs | 64 +++++++++++++++++++++++++- internal-dns-client/src/names.rs | 6 +-- oximeter/collector/Cargo.toml | 1 + oximeter/collector/config.toml | 7 --- oximeter/collector/src/bin/oximeter.rs | 13 +++++- oximeter/collector/src/lib.rs | 62 ++++++++++++++++--------- sled-agent/src/instance.rs | 3 +- sled-agent/src/instance_manager.rs | 6 ++- sled-agent/src/nexus.rs | 43 ++++++++--------- sled-agent/src/server.rs | 5 +- sled-agent/src/services.rs | 48 +++++++++++++++++-- sled-agent/src/storage_manager.rs | 4 +- smf/oximeter/config.toml | 8 ---- smf/oximeter/manifest.xml | 7 ++- 16 files changed, 205 insertions(+), 75 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4c672006255..25bd98d2e97 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2346,6 +2346,7 @@ dependencies = [ "serde", "serde_json", "slog", + "thiserror", "trust-dns-proto", "trust-dns-resolver", "uuid", @@ -3440,6 +3441,7 @@ dependencies = [ "clap 3.2.5", "dropshot", "expectorate", + "internal-dns-client", "nexus-client 0.1.0", "omicron-common 0.1.0", "omicron-test-utils", diff --git a/internal-dns-client/Cargo.toml b/internal-dns-client/Cargo.toml index 3303ddfc44c..9572b53b40f 100644 --- a/internal-dns-client/Cargo.toml +++ b/internal-dns-client/Cargo.toml @@ -12,6 +12,7 @@ reqwest = { version = "0.11", features = ["json", "rustls-tls", "stream"] } serde = { version = "1.0", features = [ "derive" ] } serde_json = "1.0" slog = { version = "2.5.0", features = [ "max_level_trace", "release_max_level_debug" ] } +thiserror = "1.0" trust-dns-proto = "0.21" trust-dns-resolver = "0.21" uuid = { version = "1.1.0", features = [ "v4", "serde" ] } diff --git a/internal-dns-client/src/multiclient.rs b/internal-dns-client/src/multiclient.rs index d91a58d4d73..19336bd3d17 100644 --- a/internal-dns-client/src/multiclient.rs +++ b/internal-dns-client/src/multiclient.rs @@ -11,7 +11,7 @@ use omicron_common::backoff::{ internal_service_policy, retry_notify, BackoffError, }; use slog::{info, warn, Logger}; -use std::net::{SocketAddr, SocketAddrV6}; +use std::net::{IpAddr, Ipv6Addr, SocketAddr, SocketAddrV6}; use trust_dns_resolver::config::{ NameServerConfig, Protocol, ResolverConfig, ResolverOpts, }; @@ -197,3 +197,65 @@ pub fn create_resolver( } TokioAsyncResolver::tokio(rc, ResolverOpts::default()) } + +#[derive(Debug, Clone, thiserror::Error)] +pub enum ResolveError { + #[error(transparent)] + Resolve(#[from] trust_dns_resolver::error::ResolveError), + + #[error("Record not found for SRV key: {0}")] + NotFound(crate::names::SRV), +} + +/// A wrapper around a DNS resolver, providing a way to conveniently +/// look up IP addresses of services based on their SRV keys. +pub struct Resolver { + inner: TokioAsyncResolver, +} + +impl Resolver { + /// Creates a DNS resolver, looking up DNS server addresses based on + /// the provided subnet. + pub fn new(subnet: Ipv6Subnet) -> Result { + Ok(Self { inner: create_resolver(subnet)? }) + } + + /// Convenience wrapper for [`Resolver::new`] which determines the subnet + /// based on a provided IP address. + pub fn new_from_ip(address: Ipv6Addr) -> Result { + let subnet = Ipv6Subnet::::new(address); + + Resolver::new(subnet) + } + + /// Looks up a single [`Ipv6Addr`] based on the SRV name. + /// Returns an error if the record does not exist. + // TODO: There are lots of ways this API can expand: Caching, + // actually respecting TTL, looking up ports, etc. + // + // For now, however, it serves as a very simple "get everyone using DNS" + // API that can be improved upon later. + pub async fn lookup_ipv6( + &self, + srv: crate::names::SRV, + ) -> Result { + let response = self.inner.ipv6_lookup(&srv.to_string()).await?; + let address = response + .iter() + .next() + .ok_or_else(|| ResolveError::NotFound(srv))?; + Ok(*address) + } + + pub async fn lookup_ip( + &self, + srv: crate::names::SRV, + ) -> Result { + let response = self.inner.lookup_ip(&srv.to_string()).await?; + let address = response + .iter() + .next() + .ok_or_else(|| ResolveError::NotFound(srv))?; + Ok(address) + } +} diff --git a/internal-dns-client/src/names.rs b/internal-dns-client/src/names.rs index 53c1504d168..d920ef77fbd 100644 --- a/internal-dns-client/src/names.rs +++ b/internal-dns-client/src/names.rs @@ -7,7 +7,7 @@ use uuid::Uuid; const DNS_ZONE: &str = "control-plane.oxide.internal"; -#[derive(Debug, PartialEq, PartialOrd)] +#[derive(Clone, Debug, PartialEq, PartialOrd)] pub enum ServiceName { Clickhouse, Cockroach, @@ -28,7 +28,7 @@ impl fmt::Display for ServiceName { } } -#[derive(Debug, PartialEq, PartialOrd)] +#[derive(Clone, Debug, PartialEq, PartialOrd)] pub enum BackendName { Crucible, SledAgent, @@ -43,7 +43,7 @@ impl fmt::Display for BackendName { } } -#[derive(Debug, PartialEq, PartialOrd)] +#[derive(Clone, Debug, PartialEq, PartialOrd)] pub enum SRV { /// A service identified and accessed by name, such as "nexus", "CRDB", etc. /// diff --git a/oximeter/collector/Cargo.toml b/oximeter/collector/Cargo.toml index 7e36050d9af..10fe6058c0a 100644 --- a/oximeter/collector/Cargo.toml +++ b/oximeter/collector/Cargo.toml @@ -8,6 +8,7 @@ license = "MPL-2.0" [dependencies] clap = { version = "3.2", features = ["derive"] } dropshot = { git = "https://github.com/oxidecomputer/dropshot", branch = "main", features = [ "usdt-probes" ] } +internal-dns-client = { path = "../../internal-dns-client" } nexus-client = { path = "../../nexus-client" } omicron-common = { path = "../../common" } oximeter = { path = "../oximeter" } diff --git a/oximeter/collector/config.toml b/oximeter/collector/config.toml index 6b03a3974d2..0e8557a71bf 100644 --- a/oximeter/collector/config.toml +++ b/oximeter/collector/config.toml @@ -1,16 +1,9 @@ # Example configuration file for running an oximeter collector server -id = "1da65e5b-210c-4859-a7d7-200c1e659972" -nexus_address = "127.0.0.1:12221" - [db] -address = "[::1]:8123" batch_size = 1000 batch_interval = 5 # In seconds [log] level = "debug" mode = "stderr-terminal" - -[dropshot] -bind_address = "[::1]:12223" diff --git a/oximeter/collector/src/bin/oximeter.rs b/oximeter/collector/src/bin/oximeter.rs index 19f9b5b3da0..b9ff5e42d6b 100644 --- a/oximeter/collector/src/bin/oximeter.rs +++ b/oximeter/collector/src/bin/oximeter.rs @@ -8,8 +8,10 @@ use clap::Parser; use omicron_common::cmd::fatal; use omicron_common::cmd::CmdError; -use oximeter_collector::{oximeter_api, Config, Oximeter}; +use oximeter_collector::{oximeter_api, Config, Oximeter, OximeterArguments}; +use std::net::SocketAddrV6; use std::path::PathBuf; +use uuid::Uuid; pub fn run_openapi() -> Result<(), String> { oximeter_api() @@ -36,6 +38,12 @@ struct Args { /// Path to TOML file with configuration for the server #[clap(name = "CONFIG_FILE", action)] config_file: PathBuf, + + #[clap(short, long, action)] + id: Uuid, + + #[clap(short, long, action)] + address: SocketAddrV6, } #[tokio::main] @@ -51,7 +59,8 @@ async fn do_run() -> Result<(), CmdError> { if args.openapi { run_openapi().map_err(CmdError::Failure) } else { - Oximeter::new(&config) + let args = OximeterArguments { id: args.id, address: args.address }; + Oximeter::new(&config, &args) .await .unwrap() .serve_forever() diff --git a/oximeter/collector/src/lib.rs b/oximeter/collector/src/lib.rs index 4e2f6ca4fda..6f19492e83e 100644 --- a/oximeter/collector/src/lib.rs +++ b/oximeter/collector/src/lib.rs @@ -11,6 +11,11 @@ use dropshot::{ HttpResponseUpdatedNoContent, HttpServer, HttpServerStarter, RequestContext, TypedBody, }; +use internal_dns_client::{ + multiclient::{ResolveError, Resolver}, + names::{ServiceName, SRV}, +}; +use omicron_common::address::{CLICKHOUSE_PORT, NEXUS_INTERNAL_PORT}; use omicron_common::api::internal::nexus::ProducerEndpoint; use omicron_common::backoff; use oximeter::types::{ProducerResults, ProducerResultsItem}; @@ -18,7 +23,7 @@ use oximeter_db::{Client, DbWrite}; use serde::{Deserialize, Serialize}; use slog::{debug, error, info, o, trace, warn, Drain, Logger}; use std::collections::{btree_map::Entry, BTreeMap}; -use std::net::SocketAddr; +use std::net::{SocketAddr, SocketAddrV6}; use std::path::Path; use std::sync::Arc; use std::time::Duration; @@ -37,6 +42,9 @@ pub enum Error { #[error(transparent)] Database(#[from] oximeter_db::Error), + + #[error(transparent)] + ResolveError(#[from] ResolveError), } // Messages for controlling a collection task @@ -231,9 +239,6 @@ async fn results_sink( /// Configuration for interacting with the metric database. #[derive(Debug, Clone, Copy, Deserialize, Serialize)] pub struct DbConfig { - /// Address of the ClickHouse server - pub address: SocketAddr, - /// Batch size of samples at which to insert pub batch_size: usize, @@ -259,6 +264,7 @@ impl OximeterAgent { pub async fn with_id( id: Uuid, db_config: DbConfig, + resolver: &Resolver, log: &Logger, ) -> Result { let (result_sender, result_receiver) = mpsc::channel(8); @@ -267,7 +273,11 @@ impl OximeterAgent { // Construct the ClickHouse client first, propagate an error if we can't reach the // database. - let client = Client::new(db_config.address, &log); + let db_address = SocketAddr::new( + resolver.lookup_ip(SRV::Service(ServiceName::Clickhouse)).await?, + CLICKHOUSE_PORT, + ); + let client = Client::new(db_address, &log); client.init_db().await?; // Spawn the task for aggregating and inserting all metrics @@ -334,18 +344,9 @@ impl OximeterAgent { /// Configuration used to initialize an oximeter server #[derive(Clone, Debug, Deserialize, Serialize)] pub struct Config { - /// An unique ID for this oximeter server - pub id: Uuid, - - /// The address used to connect to Nexus. - pub nexus_address: SocketAddr, - /// Configuration for working with ClickHouse pub db: DbConfig, - /// The internal Dropshot HTTP server configuration - pub dropshot: ConfigDropshot, - /// Logging configuration pub log: ConfigLogging, } @@ -360,6 +361,11 @@ impl Config { } } +pub struct OximeterArguments { + pub id: Uuid, + pub address: SocketAddrV6, +} + /// A server used to collect metrics from components in the control plane. pub struct Oximeter { _agent: Arc, @@ -371,7 +377,10 @@ impl Oximeter { /// /// This starts an HTTP server used to communicate with other agents in Omicron, especially /// Nexus. It also registers itself as a new `oximeter` instance with Nexus. - pub async fn new(config: &Config) -> Result { + pub async fn new( + config: &Config, + args: &OximeterArguments, + ) -> Result { let (drain, registration) = slog_dtrace::with_drain( config .log @@ -388,10 +397,13 @@ impl Oximeter { } info!(log, "starting oximeter server"); + let resolver = Resolver::new_from_ip(*args.address.ip())?; + let make_agent = || async { debug!(log, "creating ClickHouse client"); Ok(Arc::new( - OximeterAgent::with_id(config.id, config.db, &log).await?, + OximeterAgent::with_id(args.id, config.db, &resolver, &log) + .await?, )) }; let log_client_failure = |error, delay| { @@ -411,7 +423,10 @@ impl Oximeter { let dropshot_log = log.new(o!("component" => "dropshot")); let server = HttpServerStarter::new( - &config.dropshot, + &ConfigDropshot { + bind_address: SocketAddr::V6(args.address), + ..Default::default() + }, oximeter_api(), Arc::clone(&agent), &dropshot_log, @@ -423,10 +438,15 @@ impl Oximeter { let client = reqwest::Client::new(); let notify_nexus = || async { debug!(log, "contacting nexus"); + let nexus_address = resolver + .lookup_ipv6(SRV::Service(ServiceName::Nexus)) + .await + .map_err(|e| backoff::BackoffError::transient(e.to_string()))?; + client .post(format!( - "http://{}/metrics/collectors", - config.nexus_address + "http://[{}]:{}/metrics/collectors", + nexus_address, NEXUS_INTERNAL_PORT, )) .json(&nexus_client::types::OximeterInfo { address: server.local_addr().to_string(), @@ -434,9 +454,9 @@ impl Oximeter { }) .send() .await - .map_err(backoff::BackoffError::transient)? + .map_err(|e| backoff::BackoffError::transient(e.to_string()))? .error_for_status() - .map_err(backoff::BackoffError::transient) + .map_err(|e| backoff::BackoffError::transient(e.to_string())) }; let log_notification_failure = |error, delay| { warn!( diff --git a/sled-agent/src/instance.rs b/sled-agent/src/instance.rs index 9d3b9cc0d17..16d0fffad30 100644 --- a/sled-agent/src/instance.rs +++ b/sled-agent/src/instance.rs @@ -831,7 +831,8 @@ mod test { ); let port_allocator = OptePortAllocator::new(); let lazy_nexus_client = - LazyNexusClient::new(log.clone(), std::net::Ipv6Addr::LOCALHOST); + LazyNexusClient::new(log.clone(), std::net::Ipv6Addr::LOCALHOST) + .unwrap(); let inst = Instance::new( log.clone(), diff --git a/sled-agent/src/instance_manager.rs b/sled-agent/src/instance_manager.rs index b89eab6473a..37805f868fd 100644 --- a/sled-agent/src/instance_manager.rs +++ b/sled-agent/src/instance_manager.rs @@ -271,7 +271,8 @@ mod test { async fn ensure_instance() { let log = logger(); let lazy_nexus_client = - LazyNexusClient::new(log.clone(), std::net::Ipv6Addr::LOCALHOST); + LazyNexusClient::new(log.clone(), std::net::Ipv6Addr::LOCALHOST) + .unwrap(); // Creation of the instance manager incurs some "global" system // checks: cleanup of existing zones + vnics. @@ -355,7 +356,8 @@ mod test { async fn ensure_instance_repeatedly() { let log = logger(); let lazy_nexus_client = - LazyNexusClient::new(log.clone(), std::net::Ipv6Addr::LOCALHOST); + LazyNexusClient::new(log.clone(), std::net::Ipv6Addr::LOCALHOST) + .unwrap(); // Instance Manager creation. diff --git a/sled-agent/src/nexus.rs b/sled-agent/src/nexus.rs index 3dc40369219..7c252cde417 100644 --- a/sled-agent/src/nexus.rs +++ b/sled-agent/src/nexus.rs @@ -7,16 +7,18 @@ pub use crate::mocks::MockNexusClient as NexusClient; #[cfg(not(test))] pub use nexus_client::Client as NexusClient; -use internal_dns_client::names::{ServiceName, SRV}; -use omicron_common::address::{Ipv6Subnet, AZ_PREFIX, NEXUS_INTERNAL_PORT}; +use internal_dns_client::{ + multiclient::{ResolveError, Resolver}, + names::{ServiceName, SRV}, +}; +use omicron_common::address::NEXUS_INTERNAL_PORT; use slog::Logger; use std::net::Ipv6Addr; use std::sync::Arc; struct Inner { log: Logger, - addr: Ipv6Addr, - // TODO: We could also totally cache the resolver / observed IP here? + resolver: Resolver, } /// Wrapper around a [`NexusClient`] object, which allows deferring @@ -35,26 +37,21 @@ pub struct LazyNexusClient { } impl LazyNexusClient { - pub fn new(log: Logger, addr: Ipv6Addr) -> Self { - Self { inner: Arc::new(Inner { log, addr }) } + pub fn new(log: Logger, addr: Ipv6Addr) -> Result { + Ok(Self { + inner: Arc::new(Inner { + log, + resolver: Resolver::new_from_ip(addr)?, + }), + }) } - pub async fn get(&self) -> Result { - // TODO: Consider refactoring this: - // - Address as input - // - Lookup "nexus" DNS record - // - Result
as output - let az_subnet = Ipv6Subnet::::new(self.inner.addr); - let resolver = - internal_dns_client::multiclient::create_resolver(az_subnet) - .map_err(|e| format!("Failed to create DNS resolver: {}", e))?; - let response = resolver - .lookup_ip(&SRV::Service(ServiceName::Nexus).to_string()) - .await - .map_err(|e| format!("Failed to lookup Nexus IP: {}", e))?; - let address = response.iter().next().ok_or_else(|| { - "no addresses returned from DNS resolver".to_string() - })?; + pub async fn get(&self) -> Result { + let address = self + .inner + .resolver + .lookup_ipv6(SRV::Service(ServiceName::Nexus)) + .await?; Ok(NexusClient::new( &format!("http://[{}]:{}", address, NEXUS_INTERNAL_PORT), @@ -70,7 +67,7 @@ impl LazyNexusClient { #[cfg(test)] mockall::mock! { pub LazyNexusClient { - pub fn new(log: Logger, addr: Ipv6Addr) -> Self; + pub fn new(log: Logger, addr: Ipv6Addr) -> Result; pub async fn get(&self) -> Result; } impl Clone for LazyNexusClient { diff --git a/sled-agent/src/server.rs b/sled-agent/src/server.rs index 6725a8351e7..bd73c6e4169 100644 --- a/sled-agent/src/server.rs +++ b/sled-agent/src/server.rs @@ -44,7 +44,8 @@ impl Server { let client_log = log.new(o!("component" => "NexusClient")); - let lazy_nexus_client = LazyNexusClient::new(client_log, *addr.ip()); + let lazy_nexus_client = LazyNexusClient::new(client_log, *addr.ip()) + .map_err(|e| e.to_string())?; let sled_agent = SledAgent::new( &config, @@ -86,7 +87,7 @@ impl Server { let nexus_client = lazy_nexus_client .get() .await - .map_err(|err| BackoffError::transient(err))?; + .map_err(|err| BackoffError::transient(err.to_string()))?; nexus_client .cpapi_sled_agents_post( &sled_id, diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 2900bf7761c..ea989c3eab0 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -12,7 +12,7 @@ use crate::illumos::zone::AddressRequest; use crate::params::{ServiceEnsureBody, ServiceRequest, ServiceType}; use crate::zone::Zones; use dropshot::ConfigDropshot; -use omicron_common::address::{Ipv6Subnet, RACK_PREFIX}; +use omicron_common::address::{Ipv6Subnet, OXIMETER_PORT, RACK_PREFIX}; use omicron_common::nexus_config::{ self, DeploymentConfig as NexusDeploymentConfig, }; @@ -427,8 +427,50 @@ impl ServiceManager { ServiceType::Oximeter => { info!(self.log, "Setting up oximeter service"); - // TODO: Implement with dynamic parameters, when address is - // dynamically assigned. + let address = service.addresses[0]; + running_zone + .run_cmd(&[ + crate::illumos::zone::SVCCFG, + "-s", + &smf_name, + "setprop", + &format!("config/id={}", service.id), + ]) + .map_err(|err| Error::ZoneCommand { + intent: "set server ID".to_string(), + err, + })?; + + running_zone + .run_cmd(&[ + crate::illumos::zone::SVCCFG, + "-s", + &smf_name, + "setprop", + &format!( + "config/address=[{}]:{}", + address, OXIMETER_PORT, + ), + ]) + .map_err(|err| Error::ZoneCommand { + intent: "set server address".to_string(), + err, + })?; + + running_zone + .run_cmd(&[ + crate::illumos::zone::SVCCFG, + "-s", + &default_smf_name, + "refresh", + ]) + .map_err(|err| Error::ZoneCommand { + intent: format!( + "Refresh SMF manifest {}", + default_smf_name + ), + err, + })?; } } diff --git a/sled-agent/src/storage_manager.rs b/sled-agent/src/storage_manager.rs index bb0a21930cc..72a3f8c4327 100644 --- a/sled-agent/src/storage_manager.rs +++ b/sled-agent/src/storage_manager.rs @@ -642,7 +642,9 @@ impl StorageWorker { lazy_nexus_client .get() .await - .map_err(|e| backoff::BackoffError::transient(e))? + .map_err(|e| { + backoff::BackoffError::transient(e.to_string()) + })? .zpool_put(&sled_id, &pool_id, &zpool_request) .await .map_err(|e| { diff --git a/smf/oximeter/config.toml b/smf/oximeter/config.toml index 4a0095fdd00..ca14fe6ec8b 100644 --- a/smf/oximeter/config.toml +++ b/smf/oximeter/config.toml @@ -1,11 +1,6 @@ # Example configuration file for running an oximeter collector server -id = "1da65e5b-210c-4859-a7d7-200c1e659972" -# Internal address of nexus -nexus_address = "[fd00:1122:3344:0101::3]:12221" - [db] -address = "[fd00:1122:3344:0101::5]:8123" batch_size = 1000 batch_interval = 5 # In seconds @@ -14,6 +9,3 @@ level = "debug" mode = "file" path = "/dev/stdout" if_exists = "append" - -[dropshot] -bind_address = "[fd00:1122:3344:0101::4]:12223" diff --git a/smf/oximeter/manifest.xml b/smf/oximeter/manifest.xml index 47e3cb254f1..5e91cbfc96a 100644 --- a/smf/oximeter/manifest.xml +++ b/smf/oximeter/manifest.xml @@ -18,10 +18,15 @@ + + + + + From a5be4d0508a65a42ae33e93819216ebe7628b04c Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Fri, 17 Jun 2022 14:10:18 -0400 Subject: [PATCH 29/88] Allow oximeter to use config-provided addresses --- nexus/test-utils/src/lib.rs | 17 ++++++------ oximeter/collector/src/lib.rs | 50 ++++++++++++++++++++++++++--------- 2 files changed, 46 insertions(+), 21 deletions(-) diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index 72ece48526f..d4e234b9e8b 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -18,7 +18,7 @@ use oximeter_collector::Oximeter; use oximeter_producer::Server as ProducerServer; use slog::o; use slog::Logger; -use std::net::{IpAddr, Ipv6Addr, SocketAddr}; +use std::net::{IpAddr, Ipv6Addr, SocketAddr, SocketAddrV6}; use std::path::Path; use std::time::Duration; use uuid::Uuid; @@ -224,21 +224,20 @@ pub async fn start_oximeter( id: Uuid, ) -> Result { let db = oximeter_collector::DbConfig { - address: SocketAddr::new(Ipv6Addr::LOCALHOST.into(), db_port), + address: Some(SocketAddr::new(Ipv6Addr::LOCALHOST.into(), db_port)), batch_size: 10, batch_interval: 1, }; let config = oximeter_collector::Config { - id, - nexus_address, + nexus_address: Some(nexus_address), db, - dropshot: ConfigDropshot { - bind_address: SocketAddr::new(Ipv6Addr::LOCALHOST.into(), 0), - ..Default::default() - }, log: ConfigLogging::StderrTerminal { level: ConfigLoggingLevel::Error }, }; - Oximeter::new(&config).await.map_err(|e| e.to_string()) + let args = oximeter_collector::OximeterArguments { + id, + address: SocketAddrV6::new(Ipv6Addr::LOCALHOST, 0, 0, 0), + }; + Oximeter::new(&config, &args).await.map_err(|e| e.to_string()) } #[derive(Debug, Clone, oximeter::Target)] diff --git a/oximeter/collector/src/lib.rs b/oximeter/collector/src/lib.rs index 6f19492e83e..64a2af4c96a 100644 --- a/oximeter/collector/src/lib.rs +++ b/oximeter/collector/src/lib.rs @@ -239,6 +239,12 @@ async fn results_sink( /// Configuration for interacting with the metric database. #[derive(Debug, Clone, Copy, Deserialize, Serialize)] pub struct DbConfig { + /// Optional address of the ClickHouse server. + /// + /// If "None", will be inferred from DNS. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub address: Option, + /// Batch size of samples at which to insert pub batch_size: usize, @@ -273,10 +279,16 @@ impl OximeterAgent { // Construct the ClickHouse client first, propagate an error if we can't reach the // database. - let db_address = SocketAddr::new( - resolver.lookup_ip(SRV::Service(ServiceName::Clickhouse)).await?, - CLICKHOUSE_PORT, - ); + let db_address = if let Some(address) = db_config.address { + address + } else { + SocketAddr::new( + resolver + .lookup_ip(SRV::Service(ServiceName::Clickhouse)) + .await?, + CLICKHOUSE_PORT, + ) + }; let client = Client::new(db_address, &log); client.init_db().await?; @@ -344,6 +356,12 @@ impl OximeterAgent { /// Configuration used to initialize an oximeter server #[derive(Clone, Debug, Deserialize, Serialize)] pub struct Config { + /// The address used to connect to Nexus. + /// + /// If "None", will be inferred from DNS. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub nexus_address: Option, + /// Configuration for working with ClickHouse pub db: DbConfig, @@ -438,16 +456,24 @@ impl Oximeter { let client = reqwest::Client::new(); let notify_nexus = || async { debug!(log, "contacting nexus"); - let nexus_address = resolver - .lookup_ipv6(SRV::Service(ServiceName::Nexus)) - .await - .map_err(|e| backoff::BackoffError::transient(e.to_string()))?; + let nexus_address = if let Some(address) = config.nexus_address { + address + } else { + SocketAddr::V6(SocketAddrV6::new( + resolver + .lookup_ipv6(SRV::Service(ServiceName::Nexus)) + .await + .map_err(|e| { + backoff::BackoffError::transient(e.to_string()) + })?, + NEXUS_INTERNAL_PORT, + 0, + 0, + )) + }; client - .post(format!( - "http://[{}]:{}/metrics/collectors", - nexus_address, NEXUS_INTERNAL_PORT, - )) + .post(format!("http://{}/metrics/collectors", nexus_address,)) .json(&nexus_client::types::OximeterInfo { address: server.local_addr().to_string(), collector_id: agent.id, From 59dc38273de962f2f53e1e1d40e84fbbbb6e5965 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Fri, 17 Jun 2022 14:26:32 -0400 Subject: [PATCH 30/88] Fix command-based tests --- oximeter/collector/src/bin/oximeter.rs | 51 +++++++++---------- .../tests/output/cmd-oximeter-noargs-stderr | 14 +++-- oximeter/collector/tests/test_commands.rs | 2 +- smf/oximeter/manifest.xml | 2 +- 4 files changed, 37 insertions(+), 32 deletions(-) diff --git a/oximeter/collector/src/bin/oximeter.rs b/oximeter/collector/src/bin/oximeter.rs index b9ff5e42d6b..bf54cf33fa0 100644 --- a/oximeter/collector/src/bin/oximeter.rs +++ b/oximeter/collector/src/bin/oximeter.rs @@ -26,24 +26,22 @@ pub fn run_openapi() -> Result<(), String> { /// Run an oximeter metric collection server in the Oxide Control Plane. #[derive(Parser)] #[clap(name = "oximeter", about = "See README.adoc for more information")] -struct Args { - #[clap( - short = 'O', - long = "openapi", - help = "Print the external OpenAPI Spec document and exit", - action - )] - openapi: bool, +enum Args { + /// Print the external OpenAPI Spec document and exit + Openapi, - /// Path to TOML file with configuration for the server - #[clap(name = "CONFIG_FILE", action)] - config_file: PathBuf, + /// Start an Oximeter server + Run { + /// Path to TOML file with configuration for the server + #[clap(name = "CONFIG_FILE", action)] + config_file: PathBuf, - #[clap(short, long, action)] - id: Uuid, + #[clap(short, long, action)] + id: Uuid, - #[clap(short, long, action)] - address: SocketAddrV6, + #[clap(short, long, action)] + address: SocketAddrV6, + }, } #[tokio::main] @@ -55,16 +53,17 @@ async fn main() { async fn do_run() -> Result<(), CmdError> { let args = Args::parse(); - let config = Config::from_file(args.config_file).unwrap(); - if args.openapi { - run_openapi().map_err(CmdError::Failure) - } else { - let args = OximeterArguments { id: args.id, address: args.address }; - Oximeter::new(&config, &args) - .await - .unwrap() - .serve_forever() - .await - .map_err(|e| CmdError::Failure(e.to_string())) + match args { + Args::Openapi => run_openapi().map_err(CmdError::Failure), + Args::Run { config_file, id, address } => { + let config = Config::from_file(config_file).unwrap(); + let args = OximeterArguments { id, address }; + Oximeter::new(&config, &args) + .await + .unwrap() + .serve_forever() + .await + .map_err(|e| CmdError::Failure(e.to_string())) + } } } diff --git a/oximeter/collector/tests/output/cmd-oximeter-noargs-stderr b/oximeter/collector/tests/output/cmd-oximeter-noargs-stderr index 1398febf119..dfb062bca75 100644 --- a/oximeter/collector/tests/output/cmd-oximeter-noargs-stderr +++ b/oximeter/collector/tests/output/cmd-oximeter-noargs-stderr @@ -1,7 +1,13 @@ -error: The following required arguments were not provided: - +oximeter +See README.adoc for more information USAGE: - oximeter [OPTIONS] + oximeter -For more information try --help +OPTIONS: + -h, --help Print help information + +SUBCOMMANDS: + help Print this message or the help of the given subcommand(s) + openapi Print the external OpenAPI Spec document and exit + run Start an Oximeter server diff --git a/oximeter/collector/tests/test_commands.rs b/oximeter/collector/tests/test_commands.rs index 7b910a5be4a..d3d66be0580 100644 --- a/oximeter/collector/tests/test_commands.rs +++ b/oximeter/collector/tests/test_commands.rs @@ -50,7 +50,7 @@ fn test_oximeter_openapi() { // But we do know where it is at compile time, so we load it then. let config = include_str!("../../collector/config.toml"); let config_path = write_config(config); - let exec = Exec::cmd(path_to_oximeter()).arg(&config_path).arg("--openapi"); + let exec = Exec::cmd(path_to_oximeter()).arg("openapi"); let (exit_status, stdout_text, stderr_text) = run_command(exec); fs::remove_file(&config_path).expect("failed to remove temporary file"); assert_exit_code(exit_status, EXIT_SUCCESS, &stderr_text); diff --git a/smf/oximeter/manifest.xml b/smf/oximeter/manifest.xml index 5e91cbfc96a..d16efd90d99 100644 --- a/smf/oximeter/manifest.xml +++ b/smf/oximeter/manifest.xml @@ -18,7 +18,7 @@ From 81bf2d4846ba9d20179aa9927cfd64a7fa2c82d4 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 20 Jun 2022 07:27:12 -0400 Subject: [PATCH 31/88] Nexus lazily accessing timeseries DB --- internal-dns-client/src/multiclient.rs | 1 + nexus/src/app/mod.rs | 16 +++++++-- nexus/src/app/oximeter.rs | 50 ++++++++++++++++++++++++++ nexus/src/config.rs | 11 +++--- nexus/src/context.rs | 15 ++++---- nexus/test-utils/src/lib.rs | 8 ++++- smf/nexus/config-partial.toml | 4 --- 7 files changed, 83 insertions(+), 22 deletions(-) diff --git a/internal-dns-client/src/multiclient.rs b/internal-dns-client/src/multiclient.rs index 19336bd3d17..4c6edba5498 100644 --- a/internal-dns-client/src/multiclient.rs +++ b/internal-dns-client/src/multiclient.rs @@ -173,6 +173,7 @@ impl Updater { } } +// TODO: not pub? /// Creates a resolver using all internal DNS name servers. pub fn create_resolver( subnet: Ipv6Subnet, diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index 8b6cc606802..84408965e44 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -4,6 +4,7 @@ //! Nexus, the service that operates much of the control plane in an Oxide fleet +use crate::app::oximeter::LazyTimeseriesClient; use crate::authn; use crate::authz; use crate::config; @@ -88,7 +89,7 @@ pub struct Nexus { background_task_runner: OnceCell, /// Client to the timeseries database. - timeseries_client: oximeter_db::Client, + timeseries_client: LazyTimeseriesClient, /// Contents of the trusted root role for the TUF repository. updates_config: Option, @@ -113,9 +114,10 @@ pub struct Nexus { impl Nexus { /// Create a new Nexus instance for the given rack id `rack_id` // TODO-polish revisit rack metadata - pub fn new_with_id( + pub async fn new_with_id( rack_id: Uuid, log: Logger, + resolver: internal_dns_client::multiclient::Resolver, pool: db::Pool, config: &config::Config, authz: Arc, @@ -135,8 +137,16 @@ impl Nexus { )), sec_store, )); + + // Connect to Clickhouse - but do so lazily. + // Clickhouse may not be executing when Nexus starts. let timeseries_client = - oximeter_db::Client::new(config.pkg.timeseries_db.address, &log); + if let Some(address) = &config.pkg.timeseries_db.address { + // If an address was provided, use it instead of DNS. + LazyTimeseriesClient::new_from_address(log.clone(), *address) + } else { + LazyTimeseriesClient::new_from_dns(log.clone(), resolver) + }; // TODO-cleanup We may want a first-class subsystem for managing startup // background tasks. It could use a Future for each one, a status enum diff --git a/nexus/src/app/oximeter.rs b/nexus/src/app/oximeter.rs index e270868f90c..7f6fb9b6ffd 100644 --- a/nexus/src/app/oximeter.rs +++ b/nexus/src/app/oximeter.rs @@ -9,6 +9,11 @@ use crate::context::OpContext; use crate::db; use crate::db::identity::Asset; use crate::internal_api::params::OximeterInfo; +use internal_dns_client::{ + multiclient::{ResolveError, Resolver}, + names::{ServiceName, SRV}, +}; +use omicron_common::address::CLICKHOUSE_PORT; use omicron_common::api::external::DataPageParams; use omicron_common::api::external::Error; use omicron_common::api::external::ListResultVec; @@ -19,12 +24,54 @@ use oximeter_client::Client as OximeterClient; use oximeter_db::TimeseriesSchema; use oximeter_db::TimeseriesSchemaPaginationParams; use oximeter_producer::register; +use slog::Logger; use std::convert::TryInto; use std::net::SocketAddr; use std::num::NonZeroU32; use std::time::Duration; use uuid::Uuid; +/// A client which knows how to connect to Clickhouse, but does so +/// only when a request is actually made. +/// +/// This allows callers to set up the mechanism of connection (by address +/// or DNS) separately from actually making that connection. This +/// is particularly useful in situations where configurations are parsed +/// prior to Clickhouse existing. +pub struct LazyTimeseriesClient { + log: Logger, + source: ClientSource, +} + +enum ClientSource { + FromDns { resolver: Resolver }, + FromIp { address: SocketAddr }, +} + +impl LazyTimeseriesClient { + pub fn new_from_dns(log: Logger, resolver: Resolver) -> Self { + Self { log, source: ClientSource::FromDns { resolver } } + } + + pub fn new_from_address(log: Logger, address: SocketAddr) -> Self { + Self { log, source: ClientSource::FromIp { address } } + } + + pub async fn get(&self) -> Result { + let address = match &self.source { + ClientSource::FromIp { address } => *address, + ClientSource::FromDns { resolver } => SocketAddr::new( + resolver + .lookup_ip(SRV::Service(ServiceName::Clickhouse)) + .await?, + CLICKHOUSE_PORT, + ), + }; + + Ok(oximeter_db::Client::new(address, &self.log)) + } +} + impl super::Nexus { /// Insert a new record of an Oximeter collector server. pub async fn upsert_oximeter_collector( @@ -160,6 +207,9 @@ impl super::Nexus { ) -> Result, Error> { opctx.authorize(authz::Action::Read, &authz::FLEET).await?; self.timeseries_client + .get() + .await + .map_err(|e| Error::internal_error(&e.to_string()))? .timeseries_schema_list(&pag_params.page, limit) .await .map_err(|e| match e { diff --git a/nexus/src/config.rs b/nexus/src/config.rs index 98cbf0169cf..5ca452e7388 100644 --- a/nexus/src/config.rs +++ b/nexus/src/config.rs @@ -45,10 +45,11 @@ pub struct UpdatesConfig { pub default_base_url: String, } -/// Configuration for the timeseries database. -#[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] +/// Optional configuration for the timeseries database. +#[derive(Clone, Debug, Default, Deserialize, PartialEq, Serialize)] pub struct TimeseriesDbConfig { - pub address: SocketAddr, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub address: Option, } // A deserializable type that does no validation on the tunable parameters. @@ -132,7 +133,7 @@ pub struct PackageConfig { /// Authentication-related configuration pub authn: AuthnConfig, /// Timeseries database configuration. - // TODO: Should this be removed? Nexus needs to initialize it. + #[serde(default)] pub timeseries_db: TimeseriesDbConfig, /// Updates-related configuration. Updates APIs return 400 Bad Request when this is /// unconfigured. @@ -381,7 +382,7 @@ mod test { path: "/nonexistent/path".to_string() }, timeseries_db: TimeseriesDbConfig { - address: "[::1]:8123".parse().unwrap() + address: Some("[::1]:8123".parse().unwrap()) }, updates: Some(UpdatesConfig { trusted_root: PathBuf::from("/path/to/root.json"), diff --git a/nexus/src/context.rs b/nexus/src/context.rs index 2677df0c3c9..31d0af0808a 100644 --- a/nexus/src/context.rs +++ b/nexus/src/context.rs @@ -145,7 +145,7 @@ impl ServerContext { Ipv6Subnet::::new(config.deployment.subnet.net().ip()); info!(log, "Setting up resolver on subnet: {:?}", az_subnet); let resolver = - internal_dns_client::multiclient::create_resolver(az_subnet) + internal_dns_client::multiclient::Resolver::new(az_subnet) .map_err(|e| format!("Failed to create DNS resolver: {}", e))?; // Set up DB pool @@ -153,15 +153,10 @@ impl ServerContext { nexus_config::Database::FromUrl { url } => url.clone(), nexus_config::Database::FromDns => { info!(log, "Accessing DB url from DNS"); - let response = resolver - .lookup_ip( - &SRV::Service(ServiceName::Cockroach).to_string(), - ) + let address = resolver + .lookup_ipv6(SRV::Service(ServiceName::Cockroach)) .await .map_err(|e| format!("Failed to lookup IP: {}", e))?; - let address = response.iter().next().ok_or_else(|| { - "no addresses returned from DNS resolver".to_string() - })?; info!(log, "DB address: {}", address); PostgresConfigWithUrl::from_str(&format!( "postgresql://root@[{}]:{}/omicron?sslmode=disable", @@ -174,10 +169,12 @@ impl ServerContext { let nexus = Nexus::new_with_id( rack_id, log.new(o!("component" => "nexus")), + resolver, pool, config, Arc::clone(&authz), - ); + ) + .await; Ok(Arc::new(ServerContext { nexus, diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index d4e234b9e8b..e1c3949c57f 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -101,7 +101,13 @@ pub async fn test_setup_with_config( // Store actual address/port information for the databases after they start. config.deployment.database = nexus_config::Database::FromUrl { url: database.pg_config().clone() }; - config.pkg.timeseries_db.address.set_port(clickhouse.port()); + config + .pkg + .timeseries_db + .address + .as_mut() + .expect("Tests expect to set a port of Clickhouse") + .set_port(clickhouse.port()); // Start the Nexus internal API. let internal_server = diff --git a/smf/nexus/config-partial.toml b/smf/nexus/config-partial.toml index b77ffc3137f..4b759f1761c 100644 --- a/smf/nexus/config-partial.toml +++ b/smf/nexus/config-partial.toml @@ -19,7 +19,3 @@ level = "info" mode = "file" path = "/dev/stdout" if_exists = "append" - -# Configuration for interacting with the timeseries database -[timeseries_db] -address = "[fd00:1122:3344:0101::5]:8123" From aed3ba6b7f90b6877c6036b380b2be3920b50c9b Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 20 Jun 2022 07:40:16 -0400 Subject: [PATCH 32/88] Cleanup TODOs --- internal-dns-client/src/multiclient.rs | 5 ++--- nexus/src/app/mod.rs | 3 --- sled-agent/src/instance.rs | 7 ++++--- sled-agent/src/rack_setup/service.rs | 17 ++++++----------- sled-agent/src/sled_agent.rs | 10 ++++------ 5 files changed, 16 insertions(+), 26 deletions(-) diff --git a/internal-dns-client/src/multiclient.rs b/internal-dns-client/src/multiclient.rs index 4c6edba5498..8c4313a40a4 100644 --- a/internal-dns-client/src/multiclient.rs +++ b/internal-dns-client/src/multiclient.rs @@ -173,9 +173,8 @@ impl Updater { } } -// TODO: not pub? -/// Creates a resolver using all internal DNS name servers. -pub fn create_resolver( +// Creates a resolver using all internal DNS name servers. +fn create_resolver( subnet: Ipv6Subnet, ) -> Result { let mut rc = ResolverConfig::new(); diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index 84408965e44..682bb406c77 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -261,9 +261,6 @@ impl Nexus { } /// Returns an [`OpContext`] used for background tasks. - // TODO: Probably should be making a *new* opctx here? - // - // I think there should be one-per-"op", to get better metrics on bg ops. pub fn opctx_for_background(&self) -> OpContext { OpContext::for_background( self.log.new(o!("component" => "BackgroundWork")), diff --git a/sled-agent/src/instance.rs b/sled-agent/src/instance.rs index 16d0fffad30..08d866a1266 100644 --- a/sled-agent/src/instance.rs +++ b/sled-agent/src/instance.rs @@ -84,6 +84,9 @@ pub enum Error { #[error("Serial console buffer: {0}")] Serial(#[from] crate::serial::Error), + + #[error("Error resolving DNS name: {0}")] + ResolveError(#[from] internal_dns_client::multiclient::ResolveError), } // Issues read-only, idempotent HTTP requests at propolis until it responds with @@ -253,9 +256,7 @@ impl InstanceInner { // Notify Nexus of the state change. self.lazy_nexus_client .get() - .await - // TODO: Handle me - .unwrap() + .await? .cpapi_instances_put( self.id(), &nexus_client::types::InstanceRuntimeState::from( diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 29b579484bf..42e821a379e 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -313,20 +313,15 @@ impl ServiceInner { ) -> Result<(), SetupServiceError> { info!(self.log, "Handing off control to Nexus"); - let resolver = internal_dns_client::multiclient::create_resolver( - config.az_subnet(), - ) - .expect("Failed to create DNS resolver"); - let response = resolver - .lookup_ip(&SRV::Service(ServiceName::Nexus).to_string()) + let resolver = + internal_dns_client::multiclient::Resolver::new(config.az_subnet()) + .expect("Failed to create DNS resolver"); + let ip = resolver + .lookup_ip(SRV::Service(ServiceName::Nexus)) .await .expect("Failed to lookup IP"); + let nexus_address = SocketAddr::new(ip, NEXUS_INTERNAL_PORT); - let nexus_address = response - .iter() - .next() - .map(|addr| SocketAddr::new(addr, NEXUS_INTERNAL_PORT)) - .expect("no addresses returned from DNS resolver"); info!(self.log, "Nexus address: {}", nexus_address.to_string()); let nexus_client = NexusClient::new( diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 11212cf8e44..5eba7217349 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -78,6 +78,9 @@ pub enum Error { #[error("Error managing guest networking: {0}")] Opte(#[from] crate::opte::Error), + + #[error("Error resolving DNS name: {0}")] + ResolveError(#[from] internal_dns_client::multiclient::ResolveError), } impl From for omicron_common::api::external::Error { @@ -320,12 +323,7 @@ impl SledAgent { &self, artifact: UpdateArtifact, ) -> Result<(), Error> { - let nexus_client = self - .lazy_nexus_client - .get() - .await - // TODO: Handle error... or push out lazy nexus client. - .unwrap(); + let nexus_client = self.lazy_nexus_client.get().await?; crate::updates::download_artifact(artifact, &nexus_client).await?; Ok(()) } From 8fce9a14e50952b784bf28263ac8de36b4061486 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 20 Jun 2022 08:00:32 -0400 Subject: [PATCH 33/88] Box resolver to make clippy happy --- internal-dns-client/src/multiclient.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/internal-dns-client/src/multiclient.rs b/internal-dns-client/src/multiclient.rs index 8c4313a40a4..0c549e9931c 100644 --- a/internal-dns-client/src/multiclient.rs +++ b/internal-dns-client/src/multiclient.rs @@ -210,14 +210,14 @@ pub enum ResolveError { /// A wrapper around a DNS resolver, providing a way to conveniently /// look up IP addresses of services based on their SRV keys. pub struct Resolver { - inner: TokioAsyncResolver, + inner: Box, } impl Resolver { /// Creates a DNS resolver, looking up DNS server addresses based on /// the provided subnet. pub fn new(subnet: Ipv6Subnet) -> Result { - Ok(Self { inner: create_resolver(subnet)? }) + Ok(Self { inner: Box::new(create_resolver(subnet)?) }) } /// Convenience wrapper for [`Resolver::new`] which determines the subnet From d26ee1442c63e2aa95c9a15dd853f0a0f3c0a7d3 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 20 Jun 2022 14:45:36 -0400 Subject: [PATCH 34/88] Internal DNS tests --- Cargo.lock | 6 + internal-dns-client/Cargo.toml | 8 + internal-dns-client/src/multiclient.rs | 522 ++++++++++++++++++++++--- internal-dns-client/src/names.rs | 62 ++- internal-dns/src/bin/dns-server.rs | 12 +- internal-dns/src/dns_server.rs | 46 ++- internal-dns/tests/basic_test.rs | 19 +- nexus/src/db/datastore.rs | 3 - 8 files changed, 601 insertions(+), 77 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 25bd98d2e97..8a7b9f25cb0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2339,14 +2339,20 @@ dependencies = [ name = "internal-dns-client" version = "0.1.0" dependencies = [ + "dropshot", "futures", + "internal-dns", "omicron-common 0.1.0", + "omicron-test-utils", "progenitor", "reqwest", "serde", "serde_json", + "sled", "slog", + "tempfile", "thiserror", + "tokio", "trust-dns-proto", "trust-dns-resolver", "uuid", diff --git a/internal-dns-client/Cargo.toml b/internal-dns-client/Cargo.toml index 9572b53b40f..4872699610a 100644 --- a/internal-dns-client/Cargo.toml +++ b/internal-dns-client/Cargo.toml @@ -16,3 +16,11 @@ thiserror = "1.0" trust-dns-proto = "0.21" trust-dns-resolver = "0.21" uuid = { version = "1.1.0", features = [ "v4", "serde" ] } + +[dev-dependencies] +dropshot = { git = "https://github.com/oxidecomputer/dropshot", branch = "main", features = [ "usdt-probes" ] } +internal-dns = { path = "../internal-dns" } +omicron-test-utils = { path = "../test-utils" } +sled = "0.34" +tempfile = "3.3" +tokio = { version = "1.18", features = [ "full" ] } diff --git a/internal-dns-client/src/multiclient.rs b/internal-dns-client/src/multiclient.rs index 0c549e9931c..333f0283dd8 100644 --- a/internal-dns-client/src/multiclient.rs +++ b/internal-dns-client/src/multiclient.rs @@ -19,32 +19,115 @@ use trust_dns_resolver::TokioAsyncResolver; type DnsError = crate::Error; -/// A connection used to update multiple DNS servers. -pub struct Updater { - log: Logger, - clients: Vec, +// A structure which instructs the client APIs how to access +// DNS servers. +// +// These functions exist in a separate struct for comparison +// with the test-utility, [`LocalAddressGetter`]. +struct FromReservedRackSubnet {} + +const FROM_RESERVED_RACK_SUBNET: FromReservedRackSubnet = + FromReservedRackSubnet {}; + +impl FromReservedRackSubnet { + fn subnet_to_ips( + subnet: Ipv6Subnet, + ) -> impl Iterator { + ReservedRackSubnet::new(subnet) + .get_dns_subnets() + .into_iter() + .map(|dns_subnet| IpAddr::V6(dns_subnet.dns_address().ip())) + } + + fn subnet_to_dropshot_server_addrs( + &self, + subnet: Ipv6Subnet, + ) -> impl Iterator { + Self::subnet_to_ips(subnet) + .map(|address| SocketAddr::new(address, DNS_SERVER_PORT)) + } + + fn subnet_to_dns_server_addrs( + &self, + subnet: Ipv6Subnet, + ) -> impl Iterator { + Self::subnet_to_ips(subnet) + .map(|address| SocketAddr::new(address, DNS_PORT)) + } +} + +// A test-only alternative to [`FromReservedRackSubnet`]. +// +// Rather than inferring DNS server addresses from the rack subnet, +// they may be explicitly supplied. This results in easier-to-test code. +#[cfg(test)] +#[derive(Default)] +struct LocalAddressGetter { + addrs: Vec<(SocketAddr, SocketAddr)>, +} + +#[cfg(test)] +impl LocalAddressGetter { + fn add_dns_server( + &mut self, + dns_address: SocketAddr, + server_address: SocketAddr, + ) { + self.addrs.push((dns_address, server_address)); + } + + fn subnet_to_dropshot_server_addrs( + &self, + ) -> impl Iterator + '_ { + self.addrs + .iter() + .map(|(_dns_address, dropshot_address)| *dropshot_address) + } + + fn subnet_to_dns_server_addrs( + &self, + ) -> impl Iterator + '_ { + self.addrs.iter().map(|(dns_address, _dropshot_address)| *dns_address) + } } +/// Describes a service which may be inserted into DNS records. pub trait Service { fn aaaa(&self) -> crate::names::AAAA; fn srv(&self) -> crate::names::SRV; fn address(&self) -> SocketAddrV6; } +/// A connection used to update multiple DNS servers. +pub struct Updater { + log: Logger, + clients: Vec, +} + impl Updater { /// Creates a new "Updater", capable of communicating with all /// DNS servers within the AZ. pub fn new(subnet: Ipv6Subnet, log: Logger) -> Self { - let clients = ReservedRackSubnet::new(subnet) - .get_dns_subnets() - .into_iter() - .map(|dns_subnet| { - let addr = dns_subnet.dns_address().ip(); + let addrs = + FROM_RESERVED_RACK_SUBNET.subnet_to_dropshot_server_addrs(subnet); + Self::new_from_addrs(addrs, log) + } + + // Creates a new updater, using test-supplied DNS servers. + #[cfg(test)] + fn new_for_test(address_getter: &LocalAddressGetter, log: Logger) -> Self { + let dns_addrs = address_getter.subnet_to_dropshot_server_addrs(); + Self::new_from_addrs(dns_addrs, log) + } + + fn new_from_addrs( + addrs: impl Iterator, + log: Logger, + ) -> Self { + let clients = addrs + .map(|addr| { info!(log, "Adding DNS server: {}", addr); - crate::Client::new( - &format!("http://[{}]:{}", addr, DNS_SERVER_PORT), - log.clone(), - ) + crate::Client::new(&format!("http://{}", addr), log.clone()) }) .collect::>(); @@ -53,8 +136,7 @@ impl Updater { /// Inserts all service records into the DNS server. /// - /// This method is most efficient when records are sorted by - /// SRV key. + /// This method is most efficient when records are sorted by SRV key. pub async fn insert_dns_records( &self, records: &Vec, @@ -89,9 +171,9 @@ impl Updater { Ok(()) } - /// Utility function to insert: - /// - A set of uniquely-named AAAA records, each corresponding to an address - /// - An SRV record, pointing to each of the AAAA records. + // Utility function to insert: + // - A set of uniquely-named AAAA records, each corresponding to an address + // - An SRV record, pointing to each of the AAAA records. async fn insert_dns_records_internal( &self, aaaa: Vec<(crate::names::AAAA, SocketAddrV6)>, @@ -173,31 +255,6 @@ impl Updater { } } -// Creates a resolver using all internal DNS name servers. -fn create_resolver( - subnet: Ipv6Subnet, -) -> Result { - let mut rc = ResolverConfig::new(); - let dns_ips = ReservedRackSubnet::new(subnet) - .get_dns_subnets() - .into_iter() - .map(|subnet| subnet.dns_address().ip()) - .collect::>(); - - for dns_ip in dns_ips { - rc.add_name_server(NameServerConfig { - socket_addr: SocketAddr::V6(SocketAddrV6::new( - dns_ip, DNS_PORT, 0, 0, - )), - protocol: Protocol::Udp, - tls_dns_name: None, - trust_nx_responses: false, - bind_addr: None, - }); - } - TokioAsyncResolver::tokio(rc, ResolverOpts::default()) -} - #[derive(Debug, Clone, thiserror::Error)] pub enum ResolveError { #[error(transparent)] @@ -217,7 +274,37 @@ impl Resolver { /// Creates a DNS resolver, looking up DNS server addresses based on /// the provided subnet. pub fn new(subnet: Ipv6Subnet) -> Result { - Ok(Self { inner: Box::new(create_resolver(subnet)?) }) + let dns_addrs = + FROM_RESERVED_RACK_SUBNET.subnet_to_dns_server_addrs(subnet); + Self::new_from_addrs(dns_addrs) + } + + // Creates a new resolver, using test-supplied DNS servers. + #[cfg(test)] + fn new_for_test( + address_getter: &LocalAddressGetter, + ) -> Result { + let dns_addrs = address_getter.subnet_to_dns_server_addrs(); + Self::new_from_addrs(dns_addrs) + } + + fn new_from_addrs( + dns_addrs: impl Iterator, + ) -> Result { + let mut rc = ResolverConfig::new(); + for socket_addr in dns_addrs { + rc.add_name_server(NameServerConfig { + socket_addr, + protocol: Protocol::Udp, + tls_dns_name: None, + trust_nx_responses: false, + bind_addr: None, + }); + } + let inner = + Box::new(TokioAsyncResolver::tokio(rc, ResolverOpts::default())?); + + Ok(Self { inner }) } /// Convenience wrapper for [`Resolver::new`] which determines the subnet @@ -259,3 +346,350 @@ impl Resolver { Ok(address) } } + +#[cfg(test)] +mod test { + use super::*; + use crate::names::{BackendName, ServiceName, AAAA, SRV}; + use omicron_test_utils::dev::test_setup_log; + use std::str::FromStr; + use std::sync::Arc; + use tempfile::TempDir; + use uuid::Uuid; + + struct DnsServer { + _storage: TempDir, + dns_server: internal_dns::dns_server::Server, + dropshot_server: + dropshot::HttpServer>, + } + + impl DnsServer { + async fn create(log: &Logger) -> Self { + let storage = + TempDir::new().expect("Failed to create temporary directory"); + + let db = Arc::new(sled::open(&storage.path()).unwrap()); + + let dns_server = { + let db = db.clone(); + let log = log.clone(); + let dns_config = internal_dns::dns_server::Config { + bind_address: "[::1]:0".to_string(), + zone: crate::names::DNS_ZONE.into(), + }; + + internal_dns::dns_server::run(log, db, dns_config) + .await + .unwrap() + }; + + let config = internal_dns::Config { + log: dropshot::ConfigLogging::StderrTerminal { + level: dropshot::ConfigLoggingLevel::Info, + }, + dropshot: dropshot::ConfigDropshot { + bind_address: "[::1]:0".parse().unwrap(), + request_body_max_bytes: 1024, + ..Default::default() + }, + data: internal_dns::dns_data::Config { + nmax_messages: 16, + storage_path: storage.path().to_string_lossy().into(), + }, + }; + + let dropshot_server = + internal_dns::start_server(config, log.clone(), db) + .await + .unwrap(); + + Self { _storage: storage, dns_server, dropshot_server } + } + + fn dns_server_address(&self) -> SocketAddr { + self.dns_server.address + } + + fn dropshot_server_address(&self) -> SocketAddr { + self.dropshot_server.local_addr() + } + } + + // The resolver cannot look up IPs before records have been inserted. + #[tokio::test] + async fn lookup_nonexistent_record_fails() { + let logctx = test_setup_log("lookup_nonexistent_record_fails"); + let dns_server = DnsServer::create(&logctx.log).await; + + let mut address_getter = LocalAddressGetter::default(); + address_getter.add_dns_server( + dns_server.dns_server_address(), + dns_server.dropshot_server_address(), + ); + + let resolver = Resolver::new_for_test(&address_getter) + .expect("Error creating localhost resolver"); + + let err = resolver + .lookup_ip(SRV::Service(ServiceName::Cockroach)) + .await + .expect_err("Looking up non-existent service should fail"); + + let dns_error = match err { + ResolveError::Resolve(err) => err, + _ => panic!("Unexpected error: {err}"), + }; + assert!( + matches!( + dns_error.kind(), + trust_dns_resolver::error::ResolveErrorKind::NoRecordsFound { .. }, + ), + "Saw error: {dns_error}", + ); + logctx.cleanup_successful(); + } + + #[derive(Clone)] + struct TestServiceRecord { + aaaa: AAAA, + srv: SRV, + addr: SocketAddrV6, + } + + impl TestServiceRecord { + fn new(aaaa: AAAA, srv: SRV, addr: SocketAddrV6) -> Self { + Self { aaaa, srv, addr } + } + } + + impl Service for TestServiceRecord { + fn aaaa(&self) -> AAAA { + self.aaaa.clone() + } + + fn srv(&self) -> SRV { + self.srv.clone() + } + + fn address(&self) -> SocketAddrV6 { + self.addr + } + } + + // Insert and retreive a single DNS record. + #[tokio::test] + async fn insert_and_lookup_one_record() { + let logctx = test_setup_log("insert_and_lookup_one_record"); + let dns_server = DnsServer::create(&logctx.log).await; + + let mut address_getter = LocalAddressGetter::default(); + address_getter.add_dns_server( + dns_server.dns_server_address(), + dns_server.dropshot_server_address(), + ); + + let resolver = Resolver::new_for_test(&address_getter) + .expect("Error creating localhost resolver"); + let updater = + Updater::new_for_test(&address_getter, logctx.log.clone()); + + let record = TestServiceRecord::new( + AAAA::Zone(Uuid::new_v4()), + SRV::Service(ServiceName::Cockroach), + SocketAddrV6::new( + Ipv6Addr::from_str("ff::01").unwrap(), + 12345, + 0, + 0, + ), + ); + updater.insert_dns_records(&vec![record.clone()]).await.unwrap(); + + let ip = resolver + .lookup_ipv6(SRV::Service(ServiceName::Cockroach)) + .await + .expect("Should have been able to look up IP address"); + assert_eq!(&ip, record.addr.ip()); + + logctx.cleanup_successful(); + } + + // Insert multiple DNS records of different types. + #[tokio::test] + async fn insert_and_lookup_multiple_records() { + let logctx = test_setup_log("insert_and_lookup_multiple_records"); + let dns_server = DnsServer::create(&logctx.log).await; + + let mut address_getter = LocalAddressGetter::default(); + address_getter.add_dns_server( + dns_server.dns_server_address(), + dns_server.dropshot_server_address(), + ); + + let resolver = Resolver::new_for_test(&address_getter) + .expect("Error creating localhost resolver"); + let updater = + Updater::new_for_test(&address_getter, logctx.log.clone()); + + let cockroach_addrs = [ + SocketAddrV6::new( + Ipv6Addr::from_str("ff::01").unwrap(), + 1111, + 0, + 0, + ), + SocketAddrV6::new( + Ipv6Addr::from_str("ff::02").unwrap(), + 2222, + 0, + 0, + ), + SocketAddrV6::new( + Ipv6Addr::from_str("ff::03").unwrap(), + 3333, + 0, + 0, + ), + ]; + let clickhouse_addr = SocketAddrV6::new( + Ipv6Addr::from_str("fe::01").unwrap(), + 4444, + 0, + 0, + ); + let crucible_addr = SocketAddrV6::new( + Ipv6Addr::from_str("fd::02").unwrap(), + 5555, + 0, + 0, + ); + + let records = vec![ + // Three Cockroach services + TestServiceRecord::new( + AAAA::Zone(Uuid::new_v4()), + SRV::Service(ServiceName::Cockroach), + cockroach_addrs[0], + ), + TestServiceRecord::new( + AAAA::Zone(Uuid::new_v4()), + SRV::Service(ServiceName::Cockroach), + cockroach_addrs[1], + ), + TestServiceRecord::new( + AAAA::Zone(Uuid::new_v4()), + SRV::Service(ServiceName::Cockroach), + cockroach_addrs[2], + ), + // One Clickhouse service + TestServiceRecord::new( + AAAA::Zone(Uuid::new_v4()), + SRV::Service(ServiceName::Clickhouse), + clickhouse_addr, + ), + // One Backend service + TestServiceRecord::new( + AAAA::Zone(Uuid::new_v4()), + SRV::Backend(BackendName::Crucible, Uuid::new_v4()), + crucible_addr, + ), + ]; + updater.insert_dns_records(&records).await.unwrap(); + + // Look up Cockroach + let ip = resolver + .lookup_ipv6(SRV::Service(ServiceName::Cockroach)) + .await + .expect("Should have been able to look up IP address"); + assert!(cockroach_addrs.iter().any(|addr| addr.ip() == &ip)); + + // Look up Clickhouse + let ip = resolver + .lookup_ipv6(SRV::Service(ServiceName::Clickhouse)) + .await + .expect("Should have been able to look up IP address"); + assert_eq!(&ip, clickhouse_addr.ip()); + + // Look up Backend Service + let ip = resolver + .lookup_ipv6(records[4].srv.clone()) + .await + .expect("Should have been able to look up IP address"); + assert_eq!(&ip, crucible_addr.ip()); + + // If we remove the AAAA records for two of the CRDB services, + // only one will remain. + updater + .dns_records_delete(&vec![DnsRecordKey { + name: records[0].aaaa.to_string(), + }]) + .await + .expect("Should have been able to delete record"); + updater + .dns_records_delete(&vec![DnsRecordKey { + name: records[1].aaaa.to_string(), + }]) + .await + .expect("Should have been able to delete record"); + let ip = resolver + .lookup_ipv6(SRV::Service(ServiceName::Cockroach)) + .await + .expect("Should have been able to look up IP address"); + assert_eq!(&ip, cockroach_addrs[2].ip()); + + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn update_record() { + let logctx = test_setup_log("update_record"); + let dns_server = DnsServer::create(&logctx.log).await; + + let mut address_getter = LocalAddressGetter::default(); + address_getter.add_dns_server( + dns_server.dns_server_address(), + dns_server.dropshot_server_address(), + ); + + let resolver = Resolver::new_for_test(&address_getter) + .expect("Error creating localhost resolver"); + let updater = + Updater::new_for_test(&address_getter, logctx.log.clone()); + + // Insert a record, observe that it exists. + let mut record = TestServiceRecord::new( + AAAA::Zone(Uuid::new_v4()), + SRV::Service(ServiceName::Cockroach), + SocketAddrV6::new( + Ipv6Addr::from_str("ff::01").unwrap(), + 12345, + 0, + 0, + ), + ); + updater.insert_dns_records(&vec![record.clone()]).await.unwrap(); + let ip = resolver + .lookup_ipv6(SRV::Service(ServiceName::Cockroach)) + .await + .expect("Should have been able to look up IP address"); + assert_eq!(&ip, record.addr.ip()); + + // If we insert the same record with a new address, it should be + // updated. + record.addr = SocketAddrV6::new( + Ipv6Addr::from_str("ee::02").unwrap(), + 54321, + 0, + 0, + ); + updater.insert_dns_records(&vec![record.clone()]).await.unwrap(); + let ip = resolver + .lookup_ipv6(SRV::Service(ServiceName::Cockroach)) + .await + .expect("Should have been able to look up IP address"); + assert_eq!(&ip, record.addr.ip()); + + logctx.cleanup_successful(); + } +} diff --git a/internal-dns-client/src/names.rs b/internal-dns-client/src/names.rs index d920ef77fbd..dbcc0d9f01c 100644 --- a/internal-dns-client/src/names.rs +++ b/internal-dns-client/src/names.rs @@ -2,11 +2,14 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. +//! Naming scheme for Internal DNS names (RFD 248). + use std::fmt; use uuid::Uuid; -const DNS_ZONE: &str = "control-plane.oxide.internal"; +pub(crate) const DNS_ZONE: &str = "control-plane.oxide.internal"; +/// Names for services where backends are interchangeable. #[derive(Clone, Debug, PartialEq, PartialOrd)] pub enum ServiceName { Clickhouse, @@ -28,6 +31,7 @@ impl fmt::Display for ServiceName { } } +/// Names for services where backends are not interchangeable. #[derive(Clone, Debug, PartialEq, PartialOrd)] pub enum BackendName { Crucible, @@ -70,6 +74,7 @@ impl fmt::Display for SRV { } } +#[derive(Clone, Debug, PartialEq, PartialOrd)] pub enum AAAA { /// Identifies an AAAA record for a sled. Sled(Uuid), @@ -90,3 +95,58 @@ impl fmt::Display for AAAA { } } } + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn display_srv_service() { + assert_eq!( + SRV::Service(ServiceName::Clickhouse).to_string(), + "_clickhouse._tcp.control-plane.oxide.internal", + ); + assert_eq!( + SRV::Service(ServiceName::Cockroach).to_string(), + "_cockroach._tcp.control-plane.oxide.internal", + ); + assert_eq!( + SRV::Service(ServiceName::InternalDNS).to_string(), + "_internalDNS._tcp.control-plane.oxide.internal", + ); + assert_eq!( + SRV::Service(ServiceName::Nexus).to_string(), + "_nexus._tcp.control-plane.oxide.internal", + ); + assert_eq!( + SRV::Service(ServiceName::Oximeter).to_string(), + "_oximeter._tcp.control-plane.oxide.internal", + ); + } + + #[test] + fn display_srv_backend() { + let uuid = Uuid::nil(); + assert_eq!( + SRV::Backend(BackendName::Crucible, uuid).to_string(), + "_crucible._tcp.00000000-0000-0000-0000-000000000000.control-plane.oxide.internal", + ); + assert_eq!( + SRV::Backend(BackendName::SledAgent, uuid).to_string(), + "_sledagent._tcp.00000000-0000-0000-0000-000000000000.control-plane.oxide.internal", + ); + } + + #[test] + fn display_aaaa() { + let uuid = Uuid::nil(); + assert_eq!( + AAAA::Sled(uuid).to_string(), + "00000000-0000-0000-0000-000000000000.sled.control-plane.oxide.internal", + ); + assert_eq!( + AAAA::Zone(uuid).to_string(), + "00000000-0000-0000-0000-000000000000.host.control-plane.oxide.internal", + ); + } +} diff --git a/internal-dns/src/bin/dns-server.rs b/internal-dns/src/bin/dns-server.rs index 96e9da6feca..12eafcc3599 100644 --- a/internal-dns/src/bin/dns-server.rs +++ b/internal-dns/src/bin/dns-server.rs @@ -54,20 +54,18 @@ async fn main() -> Result<(), anyhow::Error> { let db = Arc::new(sled::open(&config.data.storage_path)?); - { + let _dns_server = { let db = db.clone(); let log = log.clone(); let dns_config = internal_dns::dns_server::Config { bind_address: dns_address.to_string(), zone: zone.to_string(), }; - tokio::spawn(async move { - internal_dns::dns_server::run(log, db, dns_config).await - }); - } + internal_dns::dns_server::run(log, db, dns_config).await? + }; - let server = internal_dns::start_server(config, log, db).await?; - server + let dropshot_server = internal_dns::start_server(config, log, db).await?; + dropshot_server .await .map_err(|error_message| anyhow!("server exiting: {}", error_message)) } diff --git a/internal-dns/src/dns_server.rs b/internal-dns/src/dns_server.rs index bffda7cc73f..ccebda582f7 100644 --- a/internal-dns/src/dns_server.rs +++ b/internal-dns/src/dns_server.rs @@ -34,23 +34,43 @@ pub struct Config { pub zone: String, } -pub async fn run(log: Logger, db: Arc, config: Config) -> Result<()> { +pub struct Server { + pub address: SocketAddr, + pub handle: tokio::task::JoinHandle>, +} + +impl Server { + pub fn close(self) { + self.handle.abort() + } +} + +pub async fn run( + log: Logger, + db: Arc, + config: Config, +) -> Result { let socket = Arc::new(UdpSocket::bind(config.bind_address).await?); + let address = socket.local_addr()?; - loop { - let mut buf = vec![0u8; 16384]; - let (n, src) = socket.recv_from(&mut buf).await?; - buf.resize(n, 0); + let handle = tokio::task::spawn(async move { + loop { + let mut buf = vec![0u8; 16384]; + let (n, src) = socket.recv_from(&mut buf).await?; + buf.resize(n, 0); - let socket = socket.clone(); - let log = log.clone(); - let db = db.clone(); - let zone = config.zone.clone(); + let socket = socket.clone(); + let log = log.clone(); + let db = db.clone(); + let zone = config.zone.clone(); - tokio::spawn(async move { - handle_req(log, db, socket, src, buf, zone).await - }); - } + tokio::spawn(async move { + handle_req(log, db, socket, src, buf, zone).await + }); + } + }); + + Ok(Server { address, handle }) } async fn respond_nxdomain( diff --git a/internal-dns/tests/basic_test.rs b/internal-dns/tests/basic_test.rs index 29d358970c7..b20d4176d8a 100644 --- a/internal-dns/tests/basic_test.rs +++ b/internal-dns/tests/basic_test.rs @@ -280,13 +280,16 @@ pub async fn servfail() -> Result<(), anyhow::Error> { struct TestContext { client: Client, resolver: TokioAsyncResolver, - server: dropshot::HttpServer>, + dns_server: internal_dns::dns_server::Server, + dropshot_server: + dropshot::HttpServer>, tmp: tempdir::TempDir, } impl TestContext { async fn cleanup(self) { - self.server.close().await.expect("Failed to clean up server"); + self.dns_server.close(); + self.dropshot_server.close().await.expect("Failed to clean up server"); self.tmp.close().expect("Failed to clean up tmp directory"); } } @@ -326,7 +329,7 @@ async fn init_client_server( TokioAsyncResolver::tokio(rc, ResolverOpts::default()).unwrap(); // launch a dns server - { + let dns_server = { let db = db.clone(); let log = log.clone(); let dns_config = internal_dns::dns_server::Config { @@ -334,18 +337,16 @@ async fn init_client_server( zone, }; - tokio::spawn(async move { - internal_dns::dns_server::run(log, db, dns_config).await - }); - } + internal_dns::dns_server::run(log, db, dns_config).await? + }; // launch a dropshot server - let server = internal_dns::start_server(config, log, db).await?; + let dropshot_server = internal_dns::start_server(config, log, db).await?; // wait for server to start tokio::time::sleep(tokio::time::Duration::from_millis(250)).await; - Ok(TestContext { client, resolver, server, tmp }) + Ok(TestContext { client, resolver, dns_server, dropshot_server, tmp }) } fn test_config( diff --git a/nexus/src/db/datastore.rs b/nexus/src/db/datastore.rs index a21895938f3..395954959d5 100644 --- a/nexus/src/db/datastore.rs +++ b/nexus/src/db/datastore.rs @@ -520,7 +520,6 @@ impl DataStore { kind, ); - // TODO: Can we insert all the services at the same time? let svc = Self::service_upsert_sync(conn, service) .map_err(|e| { TxnError::CustomError(ServiceError::Other(e)) @@ -610,7 +609,6 @@ impl DataStore { ServiceKind::InternalDNS, ); - // TODO: Can we insert all the services at the same time? let svc = Self::service_upsert_sync(conn, service) .map_err(|e| { TxnError::CustomError(ServiceError::Other(e)) @@ -761,7 +759,6 @@ impl DataStore { kind, ); - // TODO: Can we insert all the datasets at the same time? let dataset = Self::dataset_upsert_sync(conn, dataset) .map_err(|e| { TxnError::CustomError(DatasetError::Other(e)) From 4b5dab7556167c5d3781655723bb27491376f7dc Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 20 Jun 2022 15:03:52 -0400 Subject: [PATCH 35/88] Clean up test code --- internal-dns-client/src/multiclient.rs | 182 +++++++++++-------------- nexus/src/app/background/services.rs | 2 +- nexus/src/context.rs | 2 +- sled-agent/src/rack_setup/service.rs | 9 +- 4 files changed, 84 insertions(+), 111 deletions(-) diff --git a/internal-dns-client/src/multiclient.rs b/internal-dns-client/src/multiclient.rs index 333f0283dd8..c957f78a6ec 100644 --- a/internal-dns-client/src/multiclient.rs +++ b/internal-dns-client/src/multiclient.rs @@ -19,75 +19,37 @@ use trust_dns_resolver::TokioAsyncResolver; type DnsError = crate::Error; -// A structure which instructs the client APIs how to access -// DNS servers. -// -// These functions exist in a separate struct for comparison -// with the test-utility, [`LocalAddressGetter`]. -struct FromReservedRackSubnet {} - -const FROM_RESERVED_RACK_SUBNET: FromReservedRackSubnet = - FromReservedRackSubnet {}; - -impl FromReservedRackSubnet { - fn subnet_to_ips( - subnet: Ipv6Subnet, - ) -> impl Iterator { - ReservedRackSubnet::new(subnet) - .get_dns_subnets() - .into_iter() - .map(|dns_subnet| IpAddr::V6(dns_subnet.dns_address().ip())) - } - - fn subnet_to_dropshot_server_addrs( - &self, - subnet: Ipv6Subnet, - ) -> impl Iterator { - Self::subnet_to_ips(subnet) - .map(|address| SocketAddr::new(address, DNS_SERVER_PORT)) - } - - fn subnet_to_dns_server_addrs( - &self, - subnet: Ipv6Subnet, - ) -> impl Iterator { - Self::subnet_to_ips(subnet) - .map(|address| SocketAddr::new(address, DNS_PORT)) - } +/// Describes how to find the DNS servers. +/// +/// In production code, this is nearly always [`Ipv6Subnet`], +/// but it allows a point of dependency-injection for tests to supply their +/// own address lookups. +pub trait DnsAddressLookup { + fn dropshot_server_addrs(&self) -> Vec; + + fn dns_server_addrs(&self) -> Vec; } -// A test-only alternative to [`FromReservedRackSubnet`]. -// -// Rather than inferring DNS server addresses from the rack subnet, -// they may be explicitly supplied. This results in easier-to-test code. -#[cfg(test)] -#[derive(Default)] -struct LocalAddressGetter { - addrs: Vec<(SocketAddr, SocketAddr)>, +fn subnet_to_ips( + subnet: Ipv6Subnet, +) -> impl Iterator { + ReservedRackSubnet::new(subnet) + .get_dns_subnets() + .into_iter() + .map(|dns_subnet| IpAddr::V6(dns_subnet.dns_address().ip())) } -#[cfg(test)] -impl LocalAddressGetter { - fn add_dns_server( - &mut self, - dns_address: SocketAddr, - server_address: SocketAddr, - ) { - self.addrs.push((dns_address, server_address)); - } - - fn subnet_to_dropshot_server_addrs( - &self, - ) -> impl Iterator + '_ { - self.addrs - .iter() - .map(|(_dns_address, dropshot_address)| *dropshot_address) +impl DnsAddressLookup for Ipv6Subnet { + fn dropshot_server_addrs(&self) -> Vec { + subnet_to_ips(*self) + .map(|address| SocketAddr::new(address, DNS_SERVER_PORT)) + .collect() } - fn subnet_to_dns_server_addrs( - &self, - ) -> impl Iterator + '_ { - self.addrs.iter().map(|(dns_address, _dropshot_address)| *dns_address) + fn dns_server_addrs(&self) -> Vec { + subnet_to_ips(*self) + .map(|address| SocketAddr::new(address, DNS_PORT)) + .collect() } } @@ -105,26 +67,14 @@ pub struct Updater { } impl Updater { - /// Creates a new "Updater", capable of communicating with all - /// DNS servers within the AZ. - pub fn new(subnet: Ipv6Subnet, log: Logger) -> Self { - let addrs = - FROM_RESERVED_RACK_SUBNET.subnet_to_dropshot_server_addrs(subnet); + pub fn new(address_getter: &impl DnsAddressLookup, log: Logger) -> Self { + let addrs = address_getter.dropshot_server_addrs(); Self::new_from_addrs(addrs, log) } - // Creates a new updater, using test-supplied DNS servers. - #[cfg(test)] - fn new_for_test(address_getter: &LocalAddressGetter, log: Logger) -> Self { - let dns_addrs = address_getter.subnet_to_dropshot_server_addrs(); - Self::new_from_addrs(dns_addrs, log) - } - - fn new_from_addrs( - addrs: impl Iterator, - log: Logger, - ) -> Self { + fn new_from_addrs(addrs: Vec, log: Logger) -> Self { let clients = addrs + .into_iter() .map(|addr| { info!(log, "Adding DNS server: {}", addr); crate::Client::new(&format!("http://{}", addr), log.clone()) @@ -271,28 +221,18 @@ pub struct Resolver { } impl Resolver { - /// Creates a DNS resolver, looking up DNS server addresses based on - /// the provided subnet. - pub fn new(subnet: Ipv6Subnet) -> Result { - let dns_addrs = - FROM_RESERVED_RACK_SUBNET.subnet_to_dns_server_addrs(subnet); - Self::new_from_addrs(dns_addrs) - } - - // Creates a new resolver, using test-supplied DNS servers. - #[cfg(test)] - fn new_for_test( - address_getter: &LocalAddressGetter, + pub fn new( + address_getter: &impl DnsAddressLookup, ) -> Result { - let dns_addrs = address_getter.subnet_to_dns_server_addrs(); + let dns_addrs = address_getter.dns_server_addrs(); Self::new_from_addrs(dns_addrs) } fn new_from_addrs( - dns_addrs: impl Iterator, + dns_addrs: Vec, ) -> Result { let mut rc = ResolverConfig::new(); - for socket_addr in dns_addrs { + for socket_addr in dns_addrs.into_iter() { rc.add_name_server(NameServerConfig { socket_addr, protocol: Protocol::Udp, @@ -312,7 +252,7 @@ impl Resolver { pub fn new_from_ip(address: Ipv6Addr) -> Result { let subnet = Ipv6Subnet::::new(address); - Resolver::new(subnet) + Resolver::new(&subnet) } /// Looks up a single [`Ipv6Addr`] based on the SRV name. @@ -416,6 +356,41 @@ mod test { } } + // A test-only way to infer DNS addresses. + // + // Rather than inferring DNS server addresses from the rack subnet, + // they may be explicitly supplied. This results in easier-to-test code. + #[derive(Default)] + struct LocalAddressGetter { + addrs: Vec<(SocketAddr, SocketAddr)>, + } + + impl LocalAddressGetter { + fn add_dns_server( + &mut self, + dns_address: SocketAddr, + server_address: SocketAddr, + ) { + self.addrs.push((dns_address, server_address)); + } + } + + impl DnsAddressLookup for LocalAddressGetter { + fn dropshot_server_addrs(&self) -> Vec { + self.addrs + .iter() + .map(|(_dns_address, dropshot_address)| *dropshot_address) + .collect() + } + + fn dns_server_addrs(&self) -> Vec { + self.addrs + .iter() + .map(|(dns_address, _dropshot_address)| *dns_address) + .collect() + } + } + // The resolver cannot look up IPs before records have been inserted. #[tokio::test] async fn lookup_nonexistent_record_fails() { @@ -428,7 +403,7 @@ mod test { dns_server.dropshot_server_address(), ); - let resolver = Resolver::new_for_test(&address_getter) + let resolver = Resolver::new(&address_getter) .expect("Error creating localhost resolver"); let err = resolver @@ -489,10 +464,9 @@ mod test { dns_server.dropshot_server_address(), ); - let resolver = Resolver::new_for_test(&address_getter) + let resolver = Resolver::new(&address_getter) .expect("Error creating localhost resolver"); - let updater = - Updater::new_for_test(&address_getter, logctx.log.clone()); + let updater = Updater::new(&address_getter, logctx.log.clone()); let record = TestServiceRecord::new( AAAA::Zone(Uuid::new_v4()), @@ -527,10 +501,9 @@ mod test { dns_server.dropshot_server_address(), ); - let resolver = Resolver::new_for_test(&address_getter) + let resolver = Resolver::new(&address_getter) .expect("Error creating localhost resolver"); - let updater = - Updater::new_for_test(&address_getter, logctx.log.clone()); + let updater = Updater::new(&address_getter, logctx.log.clone()); let cockroach_addrs = [ SocketAddrV6::new( @@ -652,10 +625,9 @@ mod test { dns_server.dropshot_server_address(), ); - let resolver = Resolver::new_for_test(&address_getter) + let resolver = Resolver::new(&address_getter) .expect("Error creating localhost resolver"); - let updater = - Updater::new_for_test(&address_getter, logctx.log.clone()); + let updater = Updater::new(&address_getter, logctx.log.clone()); // Insert a record, observe that it exists. let mut record = TestServiceRecord::new( diff --git a/nexus/src/app/background/services.rs b/nexus/src/app/background/services.rs index 7178ae7930b..94b2059d7ca 100644 --- a/nexus/src/app/background/services.rs +++ b/nexus/src/app/background/services.rs @@ -92,7 +92,7 @@ pub struct ServiceBalancer { impl ServiceBalancer { pub fn new(log: Logger, nexus: Arc) -> Self { let dns_updater = DnsUpdater::new( - nexus.az_subnet(), + &nexus.az_subnet(), log.new(o!("component" => "DNS Updater")), ); diff --git a/nexus/src/context.rs b/nexus/src/context.rs index 31d0af0808a..c4de9e5d044 100644 --- a/nexus/src/context.rs +++ b/nexus/src/context.rs @@ -145,7 +145,7 @@ impl ServerContext { Ipv6Subnet::::new(config.deployment.subnet.net().ip()); info!(log, "Setting up resolver on subnet: {:?}", az_subnet); let resolver = - internal_dns_client::multiclient::Resolver::new(az_subnet) + internal_dns_client::multiclient::Resolver::new(&az_subnet) .map_err(|e| format!("Failed to create DNS resolver: {}", e))?; // Set up DB pool diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 42e821a379e..4b11beb8c16 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -313,9 +313,10 @@ impl ServiceInner { ) -> Result<(), SetupServiceError> { info!(self.log, "Handing off control to Nexus"); - let resolver = - internal_dns_client::multiclient::Resolver::new(config.az_subnet()) - .expect("Failed to create DNS resolver"); + let resolver = internal_dns_client::multiclient::Resolver::new( + &config.az_subnet(), + ) + .expect("Failed to create DNS resolver"); let ip = resolver .lookup_ip(SRV::Service(ServiceName::Nexus)) .await @@ -546,7 +547,7 @@ impl ServiceInner { .collect::>()?; let dns_servers = internal_dns_client::multiclient::Updater::new( - config.az_subnet(), + &config.az_subnet(), self.log.new(o!("client" => "DNS")), ); self.dns_servers From db2b545309ceb695f4d92460e10581bc041e8cc6 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 20 Jun 2022 15:13:41 -0400 Subject: [PATCH 36/88] no retry in client library --- internal-dns-client/src/multiclient.rs | 22 +++------------- sled-agent/src/rack_setup/service.rs | 35 +++++++++++++++++++++----- 2 files changed, 32 insertions(+), 25 deletions(-) diff --git a/internal-dns-client/src/multiclient.rs b/internal-dns-client/src/multiclient.rs index c957f78a6ec..ca8387fca45 100644 --- a/internal-dns-client/src/multiclient.rs +++ b/internal-dns-client/src/multiclient.rs @@ -7,17 +7,14 @@ use futures::stream::{self, StreamExt, TryStreamExt}; use omicron_common::address::{ Ipv6Subnet, ReservedRackSubnet, AZ_PREFIX, DNS_PORT, DNS_SERVER_PORT, }; -use omicron_common::backoff::{ - internal_service_policy, retry_notify, BackoffError, -}; -use slog::{info, warn, Logger}; +use slog::{info, Logger}; use std::net::{IpAddr, Ipv6Addr, SocketAddr, SocketAddrV6}; use trust_dns_resolver::config::{ NameServerConfig, Protocol, ResolverConfig, ResolverOpts, }; use trust_dns_resolver::TokioAsyncResolver; -type DnsError = crate::Error; +pub type DnsError = crate::Error; /// Describes how to find the DNS servers. /// @@ -152,20 +149,7 @@ impl Updater { }) .collect::>(), }); - - let set_record = || async { - self.dns_records_set(&records) - .await - .map_err(BackoffError::transient)?; - Ok::<(), BackoffError>(()) - }; - let log_failure = |error, _| { - warn!(self.log, "Failed to set DNS records"; "error" => ?error); - }; - - retry_notify(internal_service_policy(), set_record, log_failure) - .await?; - Ok(()) + self.dns_records_set(&records).await } /// Sets a records on all DNS servers. diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 4b11beb8c16..55c837ceffa 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -207,11 +207,21 @@ impl ServiceInner { .await?; } - // Initialize DNS records for these datasets. - self.dns_servers - .get() - .expect("DNS servers must be initialized first") - .insert_dns_records(datasets) + let records_put = || async { + self.dns_servers + .get() + .expect("DNS servers must be initialized first") + .insert_dns_records(datasets) + .await + .map_err(BackoffError::transient)?; + Ok::<(), BackoffError>( + (), + ) + }; + let log_failure = |error, _| { + warn!(self.log, "failed to set DNS records"; "error" => ?error); + }; + retry_notify(internal_service_policy(), records_put, log_failure) .await?; Ok(()) @@ -256,7 +266,20 @@ impl ServiceInner { // Insert DNS records, if the DNS servers have been initialized if let Some(dns_servers) = self.dns_servers.get() { - dns_servers.insert_dns_records(services).await?; + let records_put = || async { + dns_servers + .insert_dns_records(services) + .await + .map_err(BackoffError::transient)?; + Ok::<(), BackoffError>( + (), + ) + }; + let log_failure = |error, _| { + warn!(self.log, "failed to set DNS records"; "error" => ?error); + }; + retry_notify(internal_service_policy(), records_put, log_failure) + .await?; } Ok(()) From 027fb3b11bf772e992a6d24e15ea532e03ba3618 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 20 Jun 2022 15:19:46 -0400 Subject: [PATCH 37/88] Fix internal-dns --- Cargo.lock | 10 ------- internal-dns/Cargo.toml | 1 - internal-dns/tests/basic_test.rs | 50 ++++++++++++++------------------ 3 files changed, 22 insertions(+), 39 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8a7b9f25cb0..126aae1dbc9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2315,7 +2315,6 @@ dependencies = [ "omicron-test-utils", "openapi-lint", "openapiv3", - "portpicker", "pretty-hex 0.3.0", "schemars", "serde", @@ -3882,15 +3881,6 @@ dependencies = [ "universal-hash", ] -[[package]] -name = "portpicker" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be97d76faf1bfab666e1375477b23fde79eccf0276e9b63b92a39d676a889ba9" -dependencies = [ - "rand 0.8.5", -] - [[package]] name = "postcard" version = "0.7.3" diff --git a/internal-dns/Cargo.toml b/internal-dns/Cargo.toml index 886fa72cc18..d49859f18c1 100644 --- a/internal-dns/Cargo.toml +++ b/internal-dns/Cargo.toml @@ -30,7 +30,6 @@ expectorate = "1.0.5" omicron-test-utils = { path = "../test-utils" } openapiv3 = "1.0" openapi-lint = { git = "https://github.com/oxidecomputer/openapi-lint", branch = "main" } -portpicker = "0.1" serde_json = "1.0" subprocess = "0.2.9" trust-dns-resolver = "0.21" diff --git a/internal-dns/tests/basic_test.rs b/internal-dns/tests/basic_test.rs index b20d4176d8a..af72ded52cb 100644 --- a/internal-dns/tests/basic_test.rs +++ b/internal-dns/tests/basic_test.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -use std::net::{Ipv6Addr, SocketAddr, SocketAddrV6}; +use std::net::Ipv6Addr; use std::sync::Arc; use anyhow::{Context, Result}; @@ -298,7 +298,7 @@ async fn init_client_server( zone: String, ) -> Result { // initialize dns server config - let (tmp, config, dropshot_port, dns_port) = test_config()?; + let (tmp, config) = test_config()?; let log = config .log .to_logger("internal-dns") @@ -308,17 +308,21 @@ async fn init_client_server( let db = Arc::new(sled::open(&config.data.storage_path)?); db.clear()?; - let client = - Client::new(&format!("http://[::1]:{}", dropshot_port), log.clone()); + // launch a dns server + let dns_server = { + let db = db.clone(); + let log = log.clone(); + let dns_config = internal_dns::dns_server::Config { + bind_address: "[::1]:0".into(), + zone, + }; + + internal_dns::dns_server::run(log, db, dns_config).await? + }; let mut rc = ResolverConfig::new(); rc.add_name_server(NameServerConfig { - socket_addr: SocketAddr::V6(SocketAddrV6::new( - Ipv6Addr::LOCALHOST, - dns_port, - 0, - 0, - )), + socket_addr: dns_server.address, protocol: Protocol::Udp, tls_dns_name: None, trust_nx_responses: false, @@ -328,31 +332,21 @@ async fn init_client_server( let resolver = TokioAsyncResolver::tokio(rc, ResolverOpts::default()).unwrap(); - // launch a dns server - let dns_server = { - let db = db.clone(); - let log = log.clone(); - let dns_config = internal_dns::dns_server::Config { - bind_address: format!("[::1]:{}", dns_port), - zone, - }; - - internal_dns::dns_server::run(log, db, dns_config).await? - }; - // launch a dropshot server - let dropshot_server = internal_dns::start_server(config, log, db).await?; + let dropshot_server = + internal_dns::start_server(config, log.clone(), db).await?; // wait for server to start tokio::time::sleep(tokio::time::Duration::from_millis(250)).await; + let client = + Client::new(&format!("http://{}", dropshot_server.local_addr()), log); + Ok(TestContext { client, resolver, dns_server, dropshot_server, tmp }) } fn test_config( -) -> Result<(tempdir::TempDir, internal_dns::Config, u16, u16), anyhow::Error> { - let dropshot_port = portpicker::pick_unused_port().expect("pick port"); - let dns_port = portpicker::pick_unused_port().expect("pick port"); +) -> Result<(tempdir::TempDir, internal_dns::Config), anyhow::Error> { let tmp_dir = tempdir::TempDir::new("internal-dns-test")?; let mut storage_path = tmp_dir.path().to_path_buf(); storage_path.push("test"); @@ -363,7 +357,7 @@ fn test_config( level: dropshot::ConfigLoggingLevel::Info, }, dropshot: dropshot::ConfigDropshot { - bind_address: format!("[::1]:{}", dropshot_port).parse().unwrap(), + bind_address: format!("[::1]:0").parse().unwrap(), request_body_max_bytes: 1024, ..Default::default() }, @@ -373,5 +367,5 @@ fn test_config( }, }; - Ok((tmp_dir, config, dropshot_port, dns_port)) + Ok((tmp_dir, config)) } From ff2d7b91b45522a56fa906eb34161fd98858ffc8 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 20 Jun 2022 15:22:29 -0400 Subject: [PATCH 38/88] [internal-dns] Avoid 'picking ports' --- Cargo.lock | 10 ----- internal-dns/Cargo.toml | 1 - internal-dns/src/bin/dns-server.rs | 12 +++--- internal-dns/src/dns_server.rs | 46 +++++++++++++++------- internal-dns/tests/basic_test.rs | 61 ++++++++++++++---------------- 5 files changed, 66 insertions(+), 64 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8d997dd9762..14488da8837 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2315,7 +2315,6 @@ dependencies = [ "omicron-test-utils", "openapi-lint", "openapiv3", - "portpicker", "pretty-hex 0.3.0", "schemars", "serde", @@ -3867,15 +3866,6 @@ dependencies = [ "universal-hash", ] -[[package]] -name = "portpicker" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be97d76faf1bfab666e1375477b23fde79eccf0276e9b63b92a39d676a889ba9" -dependencies = [ - "rand 0.8.5", -] - [[package]] name = "postcard" version = "0.7.3" diff --git a/internal-dns/Cargo.toml b/internal-dns/Cargo.toml index 886fa72cc18..d49859f18c1 100644 --- a/internal-dns/Cargo.toml +++ b/internal-dns/Cargo.toml @@ -30,7 +30,6 @@ expectorate = "1.0.5" omicron-test-utils = { path = "../test-utils" } openapiv3 = "1.0" openapi-lint = { git = "https://github.com/oxidecomputer/openapi-lint", branch = "main" } -portpicker = "0.1" serde_json = "1.0" subprocess = "0.2.9" trust-dns-resolver = "0.21" diff --git a/internal-dns/src/bin/dns-server.rs b/internal-dns/src/bin/dns-server.rs index 96e9da6feca..12eafcc3599 100644 --- a/internal-dns/src/bin/dns-server.rs +++ b/internal-dns/src/bin/dns-server.rs @@ -54,20 +54,18 @@ async fn main() -> Result<(), anyhow::Error> { let db = Arc::new(sled::open(&config.data.storage_path)?); - { + let _dns_server = { let db = db.clone(); let log = log.clone(); let dns_config = internal_dns::dns_server::Config { bind_address: dns_address.to_string(), zone: zone.to_string(), }; - tokio::spawn(async move { - internal_dns::dns_server::run(log, db, dns_config).await - }); - } + internal_dns::dns_server::run(log, db, dns_config).await? + }; - let server = internal_dns::start_server(config, log, db).await?; - server + let dropshot_server = internal_dns::start_server(config, log, db).await?; + dropshot_server .await .map_err(|error_message| anyhow!("server exiting: {}", error_message)) } diff --git a/internal-dns/src/dns_server.rs b/internal-dns/src/dns_server.rs index bffda7cc73f..ccebda582f7 100644 --- a/internal-dns/src/dns_server.rs +++ b/internal-dns/src/dns_server.rs @@ -34,23 +34,43 @@ pub struct Config { pub zone: String, } -pub async fn run(log: Logger, db: Arc, config: Config) -> Result<()> { +pub struct Server { + pub address: SocketAddr, + pub handle: tokio::task::JoinHandle>, +} + +impl Server { + pub fn close(self) { + self.handle.abort() + } +} + +pub async fn run( + log: Logger, + db: Arc, + config: Config, +) -> Result { let socket = Arc::new(UdpSocket::bind(config.bind_address).await?); + let address = socket.local_addr()?; - loop { - let mut buf = vec![0u8; 16384]; - let (n, src) = socket.recv_from(&mut buf).await?; - buf.resize(n, 0); + let handle = tokio::task::spawn(async move { + loop { + let mut buf = vec![0u8; 16384]; + let (n, src) = socket.recv_from(&mut buf).await?; + buf.resize(n, 0); - let socket = socket.clone(); - let log = log.clone(); - let db = db.clone(); - let zone = config.zone.clone(); + let socket = socket.clone(); + let log = log.clone(); + let db = db.clone(); + let zone = config.zone.clone(); - tokio::spawn(async move { - handle_req(log, db, socket, src, buf, zone).await - }); - } + tokio::spawn(async move { + handle_req(log, db, socket, src, buf, zone).await + }); + } + }); + + Ok(Server { address, handle }) } async fn respond_nxdomain( diff --git a/internal-dns/tests/basic_test.rs b/internal-dns/tests/basic_test.rs index 29d358970c7..af72ded52cb 100644 --- a/internal-dns/tests/basic_test.rs +++ b/internal-dns/tests/basic_test.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -use std::net::{Ipv6Addr, SocketAddr, SocketAddrV6}; +use std::net::Ipv6Addr; use std::sync::Arc; use anyhow::{Context, Result}; @@ -280,13 +280,16 @@ pub async fn servfail() -> Result<(), anyhow::Error> { struct TestContext { client: Client, resolver: TokioAsyncResolver, - server: dropshot::HttpServer>, + dns_server: internal_dns::dns_server::Server, + dropshot_server: + dropshot::HttpServer>, tmp: tempdir::TempDir, } impl TestContext { async fn cleanup(self) { - self.server.close().await.expect("Failed to clean up server"); + self.dns_server.close(); + self.dropshot_server.close().await.expect("Failed to clean up server"); self.tmp.close().expect("Failed to clean up tmp directory"); } } @@ -295,7 +298,7 @@ async fn init_client_server( zone: String, ) -> Result { // initialize dns server config - let (tmp, config, dropshot_port, dns_port) = test_config()?; + let (tmp, config) = test_config()?; let log = config .log .to_logger("internal-dns") @@ -305,17 +308,21 @@ async fn init_client_server( let db = Arc::new(sled::open(&config.data.storage_path)?); db.clear()?; - let client = - Client::new(&format!("http://[::1]:{}", dropshot_port), log.clone()); + // launch a dns server + let dns_server = { + let db = db.clone(); + let log = log.clone(); + let dns_config = internal_dns::dns_server::Config { + bind_address: "[::1]:0".into(), + zone, + }; + + internal_dns::dns_server::run(log, db, dns_config).await? + }; let mut rc = ResolverConfig::new(); rc.add_name_server(NameServerConfig { - socket_addr: SocketAddr::V6(SocketAddrV6::new( - Ipv6Addr::LOCALHOST, - dns_port, - 0, - 0, - )), + socket_addr: dns_server.address, protocol: Protocol::Udp, tls_dns_name: None, trust_nx_responses: false, @@ -325,33 +332,21 @@ async fn init_client_server( let resolver = TokioAsyncResolver::tokio(rc, ResolverOpts::default()).unwrap(); - // launch a dns server - { - let db = db.clone(); - let log = log.clone(); - let dns_config = internal_dns::dns_server::Config { - bind_address: format!("[::1]:{}", dns_port), - zone, - }; - - tokio::spawn(async move { - internal_dns::dns_server::run(log, db, dns_config).await - }); - } - // launch a dropshot server - let server = internal_dns::start_server(config, log, db).await?; + let dropshot_server = + internal_dns::start_server(config, log.clone(), db).await?; // wait for server to start tokio::time::sleep(tokio::time::Duration::from_millis(250)).await; - Ok(TestContext { client, resolver, server, tmp }) + let client = + Client::new(&format!("http://{}", dropshot_server.local_addr()), log); + + Ok(TestContext { client, resolver, dns_server, dropshot_server, tmp }) } fn test_config( -) -> Result<(tempdir::TempDir, internal_dns::Config, u16, u16), anyhow::Error> { - let dropshot_port = portpicker::pick_unused_port().expect("pick port"); - let dns_port = portpicker::pick_unused_port().expect("pick port"); +) -> Result<(tempdir::TempDir, internal_dns::Config), anyhow::Error> { let tmp_dir = tempdir::TempDir::new("internal-dns-test")?; let mut storage_path = tmp_dir.path().to_path_buf(); storage_path.push("test"); @@ -362,7 +357,7 @@ fn test_config( level: dropshot::ConfigLoggingLevel::Info, }, dropshot: dropshot::ConfigDropshot { - bind_address: format!("[::1]:{}", dropshot_port).parse().unwrap(), + bind_address: format!("[::1]:0").parse().unwrap(), request_body_max_bytes: 1024, ..Default::default() }, @@ -372,5 +367,5 @@ fn test_config( }, }; - Ok((tmp_dir, config, dropshot_port, dns_port)) + Ok((tmp_dir, config)) } From 2a035a5ab9fc56068266d88d9ed2cc8edae1c63f Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 20 Jun 2022 15:29:28 -0400 Subject: [PATCH 39/88] Changes from rss-handoff --- Cargo.lock | 7 + internal-dns-client/Cargo.toml | 9 + internal-dns-client/src/multiclient.rs | 612 ++++++++++++++++++++++--- internal-dns-client/src/names.rs | 103 ++++- 4 files changed, 670 insertions(+), 61 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3956c2c3a4b..fe1cc390b81 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2338,13 +2338,20 @@ dependencies = [ name = "internal-dns-client" version = "0.1.0" dependencies = [ + "dropshot", "futures", + "internal-dns", "omicron-common 0.1.0", + "omicron-test-utils", "progenitor", "reqwest", "serde", "serde_json", + "sled", "slog", + "tempfile", + "thiserror", + "tokio", "trust-dns-proto", "trust-dns-resolver", "uuid", diff --git a/internal-dns-client/Cargo.toml b/internal-dns-client/Cargo.toml index 3303ddfc44c..4872699610a 100644 --- a/internal-dns-client/Cargo.toml +++ b/internal-dns-client/Cargo.toml @@ -12,6 +12,15 @@ reqwest = { version = "0.11", features = ["json", "rustls-tls", "stream"] } serde = { version = "1.0", features = [ "derive" ] } serde_json = "1.0" slog = { version = "2.5.0", features = [ "max_level_trace", "release_max_level_debug" ] } +thiserror = "1.0" trust-dns-proto = "0.21" trust-dns-resolver = "0.21" uuid = { version = "1.1.0", features = [ "v4", "serde" ] } + +[dev-dependencies] +dropshot = { git = "https://github.com/oxidecomputer/dropshot", branch = "main", features = [ "usdt-probes" ] } +internal-dns = { path = "../internal-dns" } +omicron-test-utils = { path = "../test-utils" } +sled = "0.34" +tempfile = "3.3" +tokio = { version = "1.18", features = [ "full" ] } diff --git a/internal-dns-client/src/multiclient.rs b/internal-dns-client/src/multiclient.rs index 24c8817c274..ca8387fca45 100644 --- a/internal-dns-client/src/multiclient.rs +++ b/internal-dns-client/src/multiclient.rs @@ -7,49 +7,122 @@ use futures::stream::{self, StreamExt, TryStreamExt}; use omicron_common::address::{ Ipv6Subnet, ReservedRackSubnet, AZ_PREFIX, DNS_PORT, DNS_SERVER_PORT, }; -use omicron_common::backoff::{ - internal_service_policy, retry_notify, BackoffError, -}; -use slog::{info, warn, Logger}; -use std::net::{SocketAddr, SocketAddrV6}; +use slog::{info, Logger}; +use std::net::{IpAddr, Ipv6Addr, SocketAddr, SocketAddrV6}; use trust_dns_resolver::config::{ NameServerConfig, Protocol, ResolverConfig, ResolverOpts, }; use trust_dns_resolver::TokioAsyncResolver; -type DnsError = crate::Error; +pub type DnsError = crate::Error; + +/// Describes how to find the DNS servers. +/// +/// In production code, this is nearly always [`Ipv6Subnet`], +/// but it allows a point of dependency-injection for tests to supply their +/// own address lookups. +pub trait DnsAddressLookup { + fn dropshot_server_addrs(&self) -> Vec; + + fn dns_server_addrs(&self) -> Vec; +} + +fn subnet_to_ips( + subnet: Ipv6Subnet, +) -> impl Iterator { + ReservedRackSubnet::new(subnet) + .get_dns_subnets() + .into_iter() + .map(|dns_subnet| IpAddr::V6(dns_subnet.dns_address().ip())) +} + +impl DnsAddressLookup for Ipv6Subnet { + fn dropshot_server_addrs(&self) -> Vec { + subnet_to_ips(*self) + .map(|address| SocketAddr::new(address, DNS_SERVER_PORT)) + .collect() + } + + fn dns_server_addrs(&self) -> Vec { + subnet_to_ips(*self) + .map(|address| SocketAddr::new(address, DNS_PORT)) + .collect() + } +} + +/// Describes a service which may be inserted into DNS records. +pub trait Service { + fn aaaa(&self) -> crate::names::AAAA; + fn srv(&self) -> crate::names::SRV; + fn address(&self) -> SocketAddrV6; +} /// A connection used to update multiple DNS servers. pub struct Updater { + log: Logger, clients: Vec, } impl Updater { - /// Creates a new "Updater", capable of communicating with all - /// DNS servers within the AZ. - pub fn new(subnet: Ipv6Subnet, log: Logger) -> Self { - let clients = ReservedRackSubnet::new(subnet) - .get_dns_subnets() + pub fn new(address_getter: &impl DnsAddressLookup, log: Logger) -> Self { + let addrs = address_getter.dropshot_server_addrs(); + Self::new_from_addrs(addrs, log) + } + + fn new_from_addrs(addrs: Vec, log: Logger) -> Self { + let clients = addrs .into_iter() - .map(|dns_subnet| { - let addr = dns_subnet.dns_address().ip(); + .map(|addr| { info!(log, "Adding DNS server: {}", addr); - crate::Client::new( - &format!("http://[{}]:{}", addr, DNS_SERVER_PORT), - log.clone(), - ) + crate::Client::new(&format!("http://{}", addr), log.clone()) }) .collect::>(); - Self { clients } + Self { log, clients } } - /// Utility function to insert: - /// - A set of uniquely-named AAAA records, each corresponding to an address - /// - An SRV record, pointing to each of the AAAA records. + /// Inserts all service records into the DNS server. + /// + /// This method is most efficient when records are sorted by SRV key. pub async fn insert_dns_records( &self, - log: &Logger, + records: &Vec, + ) -> Result<(), DnsError> { + let mut records = records.iter().peekable(); + + while let Some(record) = records.next() { + let srv = record.srv(); + info!(self.log, "Inserting DNS record: {:?}", srv); + + match &srv { + &crate::names::SRV::Service(_) => { + let mut aaaa = vec![(record.aaaa(), record.address())]; + while let Some(record) = records.peek() { + if record.srv() == srv { + let record = records.next().unwrap(); + aaaa.push((record.aaaa(), record.address())); + } else { + break; + } + } + + self.insert_dns_records_internal(aaaa, srv).await?; + } + &crate::names::SRV::Backend(_, _) => { + let aaaa = vec![(record.aaaa(), record.address())]; + self.insert_dns_records_internal(aaaa, record.srv()) + .await?; + } + }; + } + Ok(()) + } + + // Utility function to insert: + // - A set of uniquely-named AAAA records, each corresponding to an address + // - An SRV record, pointing to each of the AAAA records. + async fn insert_dns_records_internal( + &self, aaaa: Vec<(crate::names::AAAA, SocketAddrV6)>, srv_key: crate::names::SRV, ) -> Result<(), DnsError> { @@ -76,20 +149,7 @@ impl Updater { }) .collect::>(), }); - - let set_record = || async { - self.dns_records_set(&records) - .await - .map_err(BackoffError::transient)?; - Ok::<(), BackoffError>(()) - }; - let log_failure = |error, _| { - warn!(log, "Failed to set DNS records"; "error" => ?error); - }; - - retry_notify(internal_service_policy(), set_record, log_failure) - .await?; - Ok(()) + self.dns_records_set(&records).await } /// Sets a records on all DNS servers. @@ -129,27 +189,463 @@ impl Updater { } } -/// Creates a resolver using all internal DNS name servers. -pub fn create_resolver( - subnet: Ipv6Subnet, -) -> Result { - let mut rc = ResolverConfig::new(); - let dns_ips = ReservedRackSubnet::new(subnet) - .get_dns_subnets() - .into_iter() - .map(|subnet| subnet.dns_address().ip()) - .collect::>(); - - for dns_ip in dns_ips { - rc.add_name_server(NameServerConfig { - socket_addr: SocketAddr::V6(SocketAddrV6::new( - dns_ip, DNS_PORT, 0, 0, - )), - protocol: Protocol::Udp, - tls_dns_name: None, - trust_nx_responses: false, - bind_addr: None, - }); +#[derive(Debug, Clone, thiserror::Error)] +pub enum ResolveError { + #[error(transparent)] + Resolve(#[from] trust_dns_resolver::error::ResolveError), + + #[error("Record not found for SRV key: {0}")] + NotFound(crate::names::SRV), +} + +/// A wrapper around a DNS resolver, providing a way to conveniently +/// look up IP addresses of services based on their SRV keys. +pub struct Resolver { + inner: Box, +} + +impl Resolver { + pub fn new( + address_getter: &impl DnsAddressLookup, + ) -> Result { + let dns_addrs = address_getter.dns_server_addrs(); + Self::new_from_addrs(dns_addrs) + } + + fn new_from_addrs( + dns_addrs: Vec, + ) -> Result { + let mut rc = ResolverConfig::new(); + for socket_addr in dns_addrs.into_iter() { + rc.add_name_server(NameServerConfig { + socket_addr, + protocol: Protocol::Udp, + tls_dns_name: None, + trust_nx_responses: false, + bind_addr: None, + }); + } + let inner = + Box::new(TokioAsyncResolver::tokio(rc, ResolverOpts::default())?); + + Ok(Self { inner }) + } + + /// Convenience wrapper for [`Resolver::new`] which determines the subnet + /// based on a provided IP address. + pub fn new_from_ip(address: Ipv6Addr) -> Result { + let subnet = Ipv6Subnet::::new(address); + + Resolver::new(&subnet) + } + + /// Looks up a single [`Ipv6Addr`] based on the SRV name. + /// Returns an error if the record does not exist. + // TODO: There are lots of ways this API can expand: Caching, + // actually respecting TTL, looking up ports, etc. + // + // For now, however, it serves as a very simple "get everyone using DNS" + // API that can be improved upon later. + pub async fn lookup_ipv6( + &self, + srv: crate::names::SRV, + ) -> Result { + let response = self.inner.ipv6_lookup(&srv.to_string()).await?; + let address = response + .iter() + .next() + .ok_or_else(|| ResolveError::NotFound(srv))?; + Ok(*address) + } + + pub async fn lookup_ip( + &self, + srv: crate::names::SRV, + ) -> Result { + let response = self.inner.lookup_ip(&srv.to_string()).await?; + let address = response + .iter() + .next() + .ok_or_else(|| ResolveError::NotFound(srv))?; + Ok(address) + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::names::{BackendName, ServiceName, AAAA, SRV}; + use omicron_test_utils::dev::test_setup_log; + use std::str::FromStr; + use std::sync::Arc; + use tempfile::TempDir; + use uuid::Uuid; + + struct DnsServer { + _storage: TempDir, + dns_server: internal_dns::dns_server::Server, + dropshot_server: + dropshot::HttpServer>, + } + + impl DnsServer { + async fn create(log: &Logger) -> Self { + let storage = + TempDir::new().expect("Failed to create temporary directory"); + + let db = Arc::new(sled::open(&storage.path()).unwrap()); + + let dns_server = { + let db = db.clone(); + let log = log.clone(); + let dns_config = internal_dns::dns_server::Config { + bind_address: "[::1]:0".to_string(), + zone: crate::names::DNS_ZONE.into(), + }; + + internal_dns::dns_server::run(log, db, dns_config) + .await + .unwrap() + }; + + let config = internal_dns::Config { + log: dropshot::ConfigLogging::StderrTerminal { + level: dropshot::ConfigLoggingLevel::Info, + }, + dropshot: dropshot::ConfigDropshot { + bind_address: "[::1]:0".parse().unwrap(), + request_body_max_bytes: 1024, + ..Default::default() + }, + data: internal_dns::dns_data::Config { + nmax_messages: 16, + storage_path: storage.path().to_string_lossy().into(), + }, + }; + + let dropshot_server = + internal_dns::start_server(config, log.clone(), db) + .await + .unwrap(); + + Self { _storage: storage, dns_server, dropshot_server } + } + + fn dns_server_address(&self) -> SocketAddr { + self.dns_server.address + } + + fn dropshot_server_address(&self) -> SocketAddr { + self.dropshot_server.local_addr() + } + } + + // A test-only way to infer DNS addresses. + // + // Rather than inferring DNS server addresses from the rack subnet, + // they may be explicitly supplied. This results in easier-to-test code. + #[derive(Default)] + struct LocalAddressGetter { + addrs: Vec<(SocketAddr, SocketAddr)>, + } + + impl LocalAddressGetter { + fn add_dns_server( + &mut self, + dns_address: SocketAddr, + server_address: SocketAddr, + ) { + self.addrs.push((dns_address, server_address)); + } + } + + impl DnsAddressLookup for LocalAddressGetter { + fn dropshot_server_addrs(&self) -> Vec { + self.addrs + .iter() + .map(|(_dns_address, dropshot_address)| *dropshot_address) + .collect() + } + + fn dns_server_addrs(&self) -> Vec { + self.addrs + .iter() + .map(|(dns_address, _dropshot_address)| *dns_address) + .collect() + } + } + + // The resolver cannot look up IPs before records have been inserted. + #[tokio::test] + async fn lookup_nonexistent_record_fails() { + let logctx = test_setup_log("lookup_nonexistent_record_fails"); + let dns_server = DnsServer::create(&logctx.log).await; + + let mut address_getter = LocalAddressGetter::default(); + address_getter.add_dns_server( + dns_server.dns_server_address(), + dns_server.dropshot_server_address(), + ); + + let resolver = Resolver::new(&address_getter) + .expect("Error creating localhost resolver"); + + let err = resolver + .lookup_ip(SRV::Service(ServiceName::Cockroach)) + .await + .expect_err("Looking up non-existent service should fail"); + + let dns_error = match err { + ResolveError::Resolve(err) => err, + _ => panic!("Unexpected error: {err}"), + }; + assert!( + matches!( + dns_error.kind(), + trust_dns_resolver::error::ResolveErrorKind::NoRecordsFound { .. }, + ), + "Saw error: {dns_error}", + ); + logctx.cleanup_successful(); + } + + #[derive(Clone)] + struct TestServiceRecord { + aaaa: AAAA, + srv: SRV, + addr: SocketAddrV6, + } + + impl TestServiceRecord { + fn new(aaaa: AAAA, srv: SRV, addr: SocketAddrV6) -> Self { + Self { aaaa, srv, addr } + } + } + + impl Service for TestServiceRecord { + fn aaaa(&self) -> AAAA { + self.aaaa.clone() + } + + fn srv(&self) -> SRV { + self.srv.clone() + } + + fn address(&self) -> SocketAddrV6 { + self.addr + } + } + + // Insert and retreive a single DNS record. + #[tokio::test] + async fn insert_and_lookup_one_record() { + let logctx = test_setup_log("insert_and_lookup_one_record"); + let dns_server = DnsServer::create(&logctx.log).await; + + let mut address_getter = LocalAddressGetter::default(); + address_getter.add_dns_server( + dns_server.dns_server_address(), + dns_server.dropshot_server_address(), + ); + + let resolver = Resolver::new(&address_getter) + .expect("Error creating localhost resolver"); + let updater = Updater::new(&address_getter, logctx.log.clone()); + + let record = TestServiceRecord::new( + AAAA::Zone(Uuid::new_v4()), + SRV::Service(ServiceName::Cockroach), + SocketAddrV6::new( + Ipv6Addr::from_str("ff::01").unwrap(), + 12345, + 0, + 0, + ), + ); + updater.insert_dns_records(&vec![record.clone()]).await.unwrap(); + + let ip = resolver + .lookup_ipv6(SRV::Service(ServiceName::Cockroach)) + .await + .expect("Should have been able to look up IP address"); + assert_eq!(&ip, record.addr.ip()); + + logctx.cleanup_successful(); + } + + // Insert multiple DNS records of different types. + #[tokio::test] + async fn insert_and_lookup_multiple_records() { + let logctx = test_setup_log("insert_and_lookup_multiple_records"); + let dns_server = DnsServer::create(&logctx.log).await; + + let mut address_getter = LocalAddressGetter::default(); + address_getter.add_dns_server( + dns_server.dns_server_address(), + dns_server.dropshot_server_address(), + ); + + let resolver = Resolver::new(&address_getter) + .expect("Error creating localhost resolver"); + let updater = Updater::new(&address_getter, logctx.log.clone()); + + let cockroach_addrs = [ + SocketAddrV6::new( + Ipv6Addr::from_str("ff::01").unwrap(), + 1111, + 0, + 0, + ), + SocketAddrV6::new( + Ipv6Addr::from_str("ff::02").unwrap(), + 2222, + 0, + 0, + ), + SocketAddrV6::new( + Ipv6Addr::from_str("ff::03").unwrap(), + 3333, + 0, + 0, + ), + ]; + let clickhouse_addr = SocketAddrV6::new( + Ipv6Addr::from_str("fe::01").unwrap(), + 4444, + 0, + 0, + ); + let crucible_addr = SocketAddrV6::new( + Ipv6Addr::from_str("fd::02").unwrap(), + 5555, + 0, + 0, + ); + + let records = vec![ + // Three Cockroach services + TestServiceRecord::new( + AAAA::Zone(Uuid::new_v4()), + SRV::Service(ServiceName::Cockroach), + cockroach_addrs[0], + ), + TestServiceRecord::new( + AAAA::Zone(Uuid::new_v4()), + SRV::Service(ServiceName::Cockroach), + cockroach_addrs[1], + ), + TestServiceRecord::new( + AAAA::Zone(Uuid::new_v4()), + SRV::Service(ServiceName::Cockroach), + cockroach_addrs[2], + ), + // One Clickhouse service + TestServiceRecord::new( + AAAA::Zone(Uuid::new_v4()), + SRV::Service(ServiceName::Clickhouse), + clickhouse_addr, + ), + // One Backend service + TestServiceRecord::new( + AAAA::Zone(Uuid::new_v4()), + SRV::Backend(BackendName::Crucible, Uuid::new_v4()), + crucible_addr, + ), + ]; + updater.insert_dns_records(&records).await.unwrap(); + + // Look up Cockroach + let ip = resolver + .lookup_ipv6(SRV::Service(ServiceName::Cockroach)) + .await + .expect("Should have been able to look up IP address"); + assert!(cockroach_addrs.iter().any(|addr| addr.ip() == &ip)); + + // Look up Clickhouse + let ip = resolver + .lookup_ipv6(SRV::Service(ServiceName::Clickhouse)) + .await + .expect("Should have been able to look up IP address"); + assert_eq!(&ip, clickhouse_addr.ip()); + + // Look up Backend Service + let ip = resolver + .lookup_ipv6(records[4].srv.clone()) + .await + .expect("Should have been able to look up IP address"); + assert_eq!(&ip, crucible_addr.ip()); + + // If we remove the AAAA records for two of the CRDB services, + // only one will remain. + updater + .dns_records_delete(&vec![DnsRecordKey { + name: records[0].aaaa.to_string(), + }]) + .await + .expect("Should have been able to delete record"); + updater + .dns_records_delete(&vec![DnsRecordKey { + name: records[1].aaaa.to_string(), + }]) + .await + .expect("Should have been able to delete record"); + let ip = resolver + .lookup_ipv6(SRV::Service(ServiceName::Cockroach)) + .await + .expect("Should have been able to look up IP address"); + assert_eq!(&ip, cockroach_addrs[2].ip()); + + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn update_record() { + let logctx = test_setup_log("update_record"); + let dns_server = DnsServer::create(&logctx.log).await; + + let mut address_getter = LocalAddressGetter::default(); + address_getter.add_dns_server( + dns_server.dns_server_address(), + dns_server.dropshot_server_address(), + ); + + let resolver = Resolver::new(&address_getter) + .expect("Error creating localhost resolver"); + let updater = Updater::new(&address_getter, logctx.log.clone()); + + // Insert a record, observe that it exists. + let mut record = TestServiceRecord::new( + AAAA::Zone(Uuid::new_v4()), + SRV::Service(ServiceName::Cockroach), + SocketAddrV6::new( + Ipv6Addr::from_str("ff::01").unwrap(), + 12345, + 0, + 0, + ), + ); + updater.insert_dns_records(&vec![record.clone()]).await.unwrap(); + let ip = resolver + .lookup_ipv6(SRV::Service(ServiceName::Cockroach)) + .await + .expect("Should have been able to look up IP address"); + assert_eq!(&ip, record.addr.ip()); + + // If we insert the same record with a new address, it should be + // updated. + record.addr = SocketAddrV6::new( + Ipv6Addr::from_str("ee::02").unwrap(), + 54321, + 0, + 0, + ); + updater.insert_dns_records(&vec![record.clone()]).await.unwrap(); + let ip = resolver + .lookup_ipv6(SRV::Service(ServiceName::Cockroach)) + .await + .expect("Should have been able to look up IP address"); + assert_eq!(&ip, record.addr.ip()); + + logctx.cleanup_successful(); } - TokioAsyncResolver::tokio(rc, ResolverOpts::default()) } diff --git a/internal-dns-client/src/names.rs b/internal-dns-client/src/names.rs index 6384ec9e503..dbcc0d9f01c 100644 --- a/internal-dns-client/src/names.rs +++ b/internal-dns-client/src/names.rs @@ -2,22 +2,63 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. +//! Naming scheme for Internal DNS names (RFD 248). + use std::fmt; use uuid::Uuid; -const DNS_ZONE: &str = "control-plane.oxide.internal"; +pub(crate) const DNS_ZONE: &str = "control-plane.oxide.internal"; + +/// Names for services where backends are interchangeable. +#[derive(Clone, Debug, PartialEq, PartialOrd)] +pub enum ServiceName { + Clickhouse, + Cockroach, + InternalDNS, + Nexus, + Oximeter, +} +impl fmt::Display for ServiceName { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match &self { + ServiceName::Clickhouse => write!(f, "clickhouse"), + ServiceName::Cockroach => write!(f, "cockroach"), + ServiceName::InternalDNS => write!(f, "internalDNS"), + ServiceName::Nexus => write!(f, "nexus"), + ServiceName::Oximeter => write!(f, "oximeter"), + } + } +} + +/// Names for services where backends are not interchangeable. +#[derive(Clone, Debug, PartialEq, PartialOrd)] +pub enum BackendName { + Crucible, + SledAgent, +} + +impl fmt::Display for BackendName { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match &self { + BackendName::Crucible => write!(f, "crucible"), + BackendName::SledAgent => write!(f, "sledagent"), + } + } +} + +#[derive(Clone, Debug, PartialEq, PartialOrd)] pub enum SRV { /// A service identified and accessed by name, such as "nexus", "CRDB", etc. /// /// This is used in cases where services are interchangeable. - Service(String), + Service(ServiceName), /// A service identified by name and a unique identifier. /// /// This is used in cases where services are not interchangeable, such as /// for the Sled agent. - Backend(String, Uuid), + Backend(BackendName, Uuid), } impl fmt::Display for SRV { @@ -33,6 +74,7 @@ impl fmt::Display for SRV { } } +#[derive(Clone, Debug, PartialEq, PartialOrd)] pub enum AAAA { /// Identifies an AAAA record for a sled. Sled(Uuid), @@ -53,3 +95,58 @@ impl fmt::Display for AAAA { } } } + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn display_srv_service() { + assert_eq!( + SRV::Service(ServiceName::Clickhouse).to_string(), + "_clickhouse._tcp.control-plane.oxide.internal", + ); + assert_eq!( + SRV::Service(ServiceName::Cockroach).to_string(), + "_cockroach._tcp.control-plane.oxide.internal", + ); + assert_eq!( + SRV::Service(ServiceName::InternalDNS).to_string(), + "_internalDNS._tcp.control-plane.oxide.internal", + ); + assert_eq!( + SRV::Service(ServiceName::Nexus).to_string(), + "_nexus._tcp.control-plane.oxide.internal", + ); + assert_eq!( + SRV::Service(ServiceName::Oximeter).to_string(), + "_oximeter._tcp.control-plane.oxide.internal", + ); + } + + #[test] + fn display_srv_backend() { + let uuid = Uuid::nil(); + assert_eq!( + SRV::Backend(BackendName::Crucible, uuid).to_string(), + "_crucible._tcp.00000000-0000-0000-0000-000000000000.control-plane.oxide.internal", + ); + assert_eq!( + SRV::Backend(BackendName::SledAgent, uuid).to_string(), + "_sledagent._tcp.00000000-0000-0000-0000-000000000000.control-plane.oxide.internal", + ); + } + + #[test] + fn display_aaaa() { + let uuid = Uuid::nil(); + assert_eq!( + AAAA::Sled(uuid).to_string(), + "00000000-0000-0000-0000-000000000000.sled.control-plane.oxide.internal", + ); + assert_eq!( + AAAA::Zone(uuid).to_string(), + "00000000-0000-0000-0000-000000000000.host.control-plane.oxide.internal", + ); + } +} From c3a49bb82b6de4a5e138186d83c862655d730eb3 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 20 Jun 2022 15:49:19 -0400 Subject: [PATCH 40/88] [nexus] Add a new user for background tasks --- nexus/src/app/mod.rs | 10 ++++++++++ nexus/src/authn/mod.rs | 11 +++++++++++ nexus/src/db/datastore.rs | 1 + nexus/src/db/fixed_data/role_assignment.rs | 7 +++++++ nexus/src/db/fixed_data/user_builtin.rs | 11 +++++++++++ nexus/tests/integration_tests/users_builtin.rs | 3 +++ 6 files changed, 43 insertions(+) diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index 1cb1f6b6ff7..06f7264a124 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -222,6 +222,16 @@ impl Nexus { &self.opctx_external_authn } + /// Returns an [`OpContext`] used for background tasks. + pub fn opctx_for_background(&self) -> OpContext { + OpContext::for_background( + self.log.new(o!("component" => "BackgroundWork")), + Arc::clone(&self.authz), + authn::Context::internal_db_background(), + Arc::clone(&self.db_datastore), + ) + } + /// Used as the body of a "stub" endpoint -- one that's currently /// unimplemented but that we eventually intend to implement /// diff --git a/nexus/src/authn/mod.rs b/nexus/src/authn/mod.rs index 59e5bc7a889..c9399bdb131 100644 --- a/nexus/src/authn/mod.rs +++ b/nexus/src/authn/mod.rs @@ -30,6 +30,7 @@ pub mod silos; pub use crate::db::fixed_data::silo_user::USER_TEST_PRIVILEGED; pub use crate::db::fixed_data::silo_user::USER_TEST_UNPRIVILEGED; +pub use crate::db::fixed_data::user_builtin::USER_BACKGROUND_WORK; pub use crate::db::fixed_data::user_builtin::USER_DB_INIT; pub use crate::db::fixed_data::user_builtin::USER_EXTERNAL_AUTHN; pub use crate::db::fixed_data::user_builtin::USER_INTERNAL_API; @@ -170,6 +171,11 @@ impl Context { Context::context_for_builtin_user(USER_DB_INIT.id) } + /// Returns an authenticated context for Nexus-driven db work. + pub fn internal_db_background() -> Context { + Context::context_for_builtin_user(USER_BACKGROUND_WORK.id) + } + fn context_for_builtin_user(user_builtin_id: Uuid) -> Context { Context { kind: Kind::Authenticated(Details { @@ -213,6 +219,7 @@ impl Context { #[cfg(test)] mod test { use super::Context; + use super::USER_BACKGROUND_WORK; use super::USER_DB_INIT; use super::USER_INTERNAL_API; use super::USER_INTERNAL_READ; @@ -251,6 +258,10 @@ mod test { let actor = authn.actor().unwrap(); assert_eq!(actor.actor_id(), USER_DB_INIT.id); + let authn = Context::internal_db_background(); + let actor = authn.actor().unwrap(); + assert_eq!(actor.actor_id(), USER_BACKGROUND_WORK.id); + let authn = Context::internal_saga_recovery(); let actor = authn.actor().unwrap(); assert_eq!(actor.actor_id(), USER_SAGA_RECOVERY.id); diff --git a/nexus/src/db/datastore.rs b/nexus/src/db/datastore.rs index 6c28185ce7d..6814b6276ac 100644 --- a/nexus/src/db/datastore.rs +++ b/nexus/src/db/datastore.rs @@ -2990,6 +2990,7 @@ impl DataStore { let builtin_users = [ // Note: "db_init" is also a builtin user, but that one by necessity // is created with the database. + &*authn::USER_BACKGROUND_WORK, &*authn::USER_INTERNAL_API, &*authn::USER_INTERNAL_READ, &*authn::USER_EXTERNAL_AUTHN, diff --git a/nexus/src/db/fixed_data/role_assignment.rs b/nexus/src/db/fixed_data/role_assignment.rs index 94caf552a13..540b57abe50 100644 --- a/nexus/src/db/fixed_data/role_assignment.rs +++ b/nexus/src/db/fixed_data/role_assignment.rs @@ -24,6 +24,13 @@ lazy_static! { *FLEET_ID, role_builtin::FLEET_ADMIN.role_name, ), + RoleAssignment::new( + IdentityType::UserBuiltin, + user_builtin::USER_BACKGROUND_WORK.id, + role_builtin::FLEET_ADMIN.resource_type, + *FLEET_ID, + role_builtin::FLEET_ADMIN.role_name, + ), // The "internal-read" user gets the "viewer" role on the sole // Fleet. This will grant them the ability to read various control diff --git a/nexus/src/db/fixed_data/user_builtin.rs b/nexus/src/db/fixed_data/user_builtin.rs index 1e9dee1b7bf..238a8f5405a 100644 --- a/nexus/src/db/fixed_data/user_builtin.rs +++ b/nexus/src/db/fixed_data/user_builtin.rs @@ -39,6 +39,15 @@ lazy_static! { "used for seeding initial database data", ); + /// Internal user for performing operations driven by Nexus, rather + /// than any API request. + pub static ref USER_BACKGROUND_WORK: UserBuiltinConfig = + UserBuiltinConfig::new_static( + "001de000-05e4-4000-8000-00000000bac3", + "background-work", + "used for Nexus-driven database operations", + ); + /// Internal user used by Nexus when handling internal API requests pub static ref USER_INTERNAL_API: UserBuiltinConfig = UserBuiltinConfig::new_static( @@ -77,6 +86,7 @@ lazy_static! { #[cfg(test)] mod test { use super::super::assert_valid_uuid; + use super::USER_BACKGROUND_WORK; use super::USER_DB_INIT; use super::USER_EXTERNAL_AUTHN; use super::USER_INTERNAL_API; @@ -85,6 +95,7 @@ mod test { #[test] fn test_builtin_user_ids_are_valid() { + assert_valid_uuid(&USER_BACKGROUND_WORK.id); assert_valid_uuid(&USER_DB_INIT.id); assert_valid_uuid(&USER_INTERNAL_API.id); assert_valid_uuid(&USER_EXTERNAL_AUTHN.id); diff --git a/nexus/tests/integration_tests/users_builtin.rs b/nexus/tests/integration_tests/users_builtin.rs index b06741a3067..0df3fbaf04b 100644 --- a/nexus/tests/integration_tests/users_builtin.rs +++ b/nexus/tests/integration_tests/users_builtin.rs @@ -27,6 +27,9 @@ async fn test_users_builtin(cptestctx: &ControlPlaneTestContext) { let u = users.remove(&authn::USER_DB_INIT.name.to_string()).unwrap(); assert_eq!(u.identity.id, authn::USER_DB_INIT.id); + let u = + users.remove(&authn::USER_BACKGROUND_WORK.name.to_string()).unwrap(); + assert_eq!(u.identity.id, authn::USER_BACKGROUND_WORK.id); let u = users.remove(&authn::USER_INTERNAL_API.name.to_string()).unwrap(); assert_eq!(u.identity.id, authn::USER_INTERNAL_API.id); let u = users.remove(&authn::USER_INTERNAL_READ.name.to_string()).unwrap(); From e33fb4bef1a537820d2e1dec4aa092c325c34916 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 20 Jun 2022 15:59:25 -0400 Subject: [PATCH 41/88] fix typos, warnings --- common/src/nexus_config.rs | 2 +- nexus/src/app/background/services.rs | 2 +- nexus/src/db/datastore.rs | 2 -- 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/common/src/nexus_config.rs b/common/src/nexus_config.rs index d0e87d70ebb..a18454e02d0 100644 --- a/common/src/nexus_config.rs +++ b/common/src/nexus_config.rs @@ -102,7 +102,7 @@ pub enum Database { pub struct DeploymentConfig { /// Uuid of the Nexus instance pub id: Uuid, - /// Uuid of the Rack where Nexus is executing + /// Uuid of the Rack where Nexus is executing. pub rack_id: Uuid, /// Dropshot configuration for external API server pub dropshot_external: ConfigDropshot, diff --git a/nexus/src/app/background/services.rs b/nexus/src/app/background/services.rs index 94b2059d7ca..4a97d89c407 100644 --- a/nexus/src/app/background/services.rs +++ b/nexus/src/app/background/services.rs @@ -33,7 +33,7 @@ use std::sync::Arc; #[derive(Debug)] enum ServiceRedundancy { // This service must exist on at least this many sleds - // within the racki. + // within the rack. PerRack(u32), // This service must exist on at least this many sleds diff --git a/nexus/src/db/datastore.rs b/nexus/src/db/datastore.rs index bf1ac6a79d3..12435e28d3e 100644 --- a/nexus/src/db/datastore.rs +++ b/nexus/src/db/datastore.rs @@ -4615,7 +4615,6 @@ mod test { ); let rack_id = Uuid::new_v4(); let sled_id = Uuid::new_v4(); - let rack_id = Uuid::new_v4(); let sled = Sled::new(sled_id, bogus_addr.clone(), rack_id); datastore.sled_upsert(sled).await.unwrap(); sled_id @@ -4969,7 +4968,6 @@ mod test { OpContext::for_tests(logctx.log.new(o!()), datastore.clone()); let rack_id = Uuid::new_v4(); - let rack_id = Uuid::new_v4(); let addr1 = "[fd00:1de::1]:12345".parse().unwrap(); let sled1_id = "0de4b299-e0b4-46f0-d528-85de81a7095f".parse().unwrap(); let sled1 = db::model::Sled::new(sled1_id, addr1, rack_id); From 5440cbf36979efb7ba28ac43083757ffc096cab2 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 20 Jun 2022 16:38:24 -0400 Subject: [PATCH 42/88] [rss] Set DNS records during RSS initialization --- Cargo.lock | 1 + common/src/address.rs | 2 + sled-agent/Cargo.toml | 1 + sled-agent/src/params.rs | 60 +++++++++++++++++++++++----- sled-agent/src/rack_setup/service.rs | 33 ++++++++++++++- 5 files changed, 86 insertions(+), 11 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index fe1cc390b81..3bf3d89f88f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3155,6 +3155,7 @@ dependencies = [ "expectorate", "futures", "http", + "internal-dns-client", "ipnetwork", "libc", "macaddr", diff --git a/common/src/address.rs b/common/src/address.rs index 226dc9ea655..3dee3848b9e 100644 --- a/common/src/address.rs +++ b/common/src/address.rs @@ -34,6 +34,8 @@ pub const SLED_AGENT_PORT: u16 = 12345; /// The port propolis-server listens on inside the propolis zone. pub const PROPOLIS_PORT: u16 = 12400; +pub const OXIMETER_PORT: u16 = 12223; + // Anycast is a mechanism in which a single IP address is shared by multiple // devices, and the destination is located based on routing distance. // diff --git a/sled-agent/Cargo.toml b/sled-agent/Cargo.toml index dc254c75e15..3041a99ae06 100644 --- a/sled-agent/Cargo.toml +++ b/sled-agent/Cargo.toml @@ -17,6 +17,7 @@ clap = { version = "3.2", features = ["derive"] } crucible-agent-client = { git = "https://github.com/oxidecomputer/crucible", rev = "8314eeddd228ec0d76cefa40c4a41d3e2611ac18" } dropshot = { git = "https://github.com/oxidecomputer/dropshot", branch = "main", features = [ "usdt-probes" ] } futures = "0.3.21" +internal-dns-client = { path = "../internal-dns-client" } ipnetwork = "0.18" libc = "0.2.126" macaddr = { version = "1.0.1", features = [ "serde_std" ] } diff --git a/sled-agent/src/params.rs b/sled-agent/src/params.rs index e3370a30200..dbb0eac57a1 100644 --- a/sled-agent/src/params.rs +++ b/sled-agent/src/params.rs @@ -2,6 +2,8 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. +use internal_dns_client::names::{BackendName, ServiceName, AAAA, SRV}; +use omicron_common::address::OXIMETER_PORT; use omicron_common::api::external; use omicron_common::api::internal::nexus::{ DiskRuntimeState, InstanceRuntimeState, @@ -226,7 +228,7 @@ impl std::fmt::Display for DatasetKind { use DatasetKind::*; let s = match self { Crucible => "crucible", - CockroachDb { .. } => "cockroach", + CockroachDb { .. } => "cockroachdb", Clickhouse => "clickhouse", }; write!(f, "{}", s) @@ -247,14 +249,28 @@ pub struct DatasetEnsureBody { pub dataset_kind: DatasetKind, // The address on which the zone will listen for requests. pub address: SocketAddrV6, - // NOTE: We could insert a UUID here, if we want that to be set by the - // caller explicitly? Currently, the lack of a UUID implies that - // "at most one dataset type" exists within a zpool. - // - // It's unclear if this is actually necessary - making this change - // would also require the RSS to query existing datasets before - // requesting new ones (after all, we generally wouldn't want to - // create two CRDB datasets with different UUIDs on the same zpool). +} + +impl internal_dns_client::multiclient::Service for DatasetEnsureBody { + fn aaaa(&self) -> AAAA { + AAAA::Zone(self.id) + } + + fn srv(&self) -> SRV { + match self.dataset_kind { + DatasetKind::Crucible => { + SRV::Backend(BackendName::Crucible, self.id) + } + DatasetKind::Clickhouse => SRV::Service(ServiceName::Clickhouse), + DatasetKind::CockroachDb { .. } => { + SRV::Service(ServiceName::Cockroach) + } + } + } + + fn address(&self) -> SocketAddrV6 { + self.address + } } impl From for sled_agent_client::types::DatasetEnsureBody { @@ -326,6 +342,32 @@ pub struct ServiceRequest { pub service_type: ServiceType, } +impl internal_dns_client::multiclient::Service for ServiceRequest { + fn aaaa(&self) -> AAAA { + AAAA::Zone(self.id) + } + + fn srv(&self) -> SRV { + match self.service_type { + ServiceType::InternalDns { .. } => { + SRV::Service(ServiceName::InternalDNS) + } + ServiceType::Nexus { .. } => SRV::Service(ServiceName::Nexus), + ServiceType::Oximeter => SRV::Service(ServiceName::Oximeter), + } + } + + fn address(&self) -> SocketAddrV6 { + match self.service_type { + ServiceType::InternalDns { server_address, .. } => server_address, + ServiceType::Nexus { internal_address, .. } => internal_address, + ServiceType::Oximeter => { + SocketAddrV6::new(self.addresses[0], OXIMETER_PORT, 0, 0) + } + } + } +} + impl From for sled_agent_client::types::ServiceRequest { fn from(s: ServiceRequest) -> Self { Self { diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index c48a20cc4bc..a4f7032b385 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -13,6 +13,7 @@ use crate::bootstrap::{ trust_quorum::{RackSecret, ShareDistribution}, }; use crate::params::{ServiceRequest, ServiceType}; +use internal_dns_client::multiclient::{DnsError, Updater as DnsUpdater}; use omicron_common::address::{ get_sled_address, ReservedRackSubnet, DNS_PORT, DNS_SERVER_PORT, }; @@ -26,7 +27,7 @@ use std::collections::{HashMap, HashSet}; use std::net::{Ipv6Addr, SocketAddr, SocketAddrV6}; use std::path::PathBuf; use thiserror::Error; -use tokio::sync::Mutex; +use tokio::sync::{Mutex, OnceCell}; use uuid::Uuid; /// Describes errors which may occur while operating the setup service. @@ -56,6 +57,9 @@ pub enum SetupServiceError { #[error("Failed to split rack secret: {0:?}")] SplitRackSecret(vsss_rs::Error), + + #[error("Failed to access DNS servers: {0}")] + Dns(#[from] DnsError), } // The workload / information allocated to a single sled. @@ -150,11 +154,16 @@ enum PeerExpectation { struct ServiceInner { log: Logger, peer_monitor: Mutex, + dns_servers: OnceCell, } impl ServiceInner { fn new(log: Logger, peer_monitor: PeerMonitorObserver) -> Self { - ServiceInner { log, peer_monitor: Mutex::new(peer_monitor) } + ServiceInner { + log, + peer_monitor: Mutex::new(peer_monitor), + dns_servers: OnceCell::new(), + } } async fn initialize_datasets( @@ -574,6 +583,15 @@ impl ServiceInner { .into_iter() .collect::>()?; + let dns_servers = DnsUpdater::new( + &config.az_subnet(), + self.log.new(o!("client" => "DNS")), + ); + self.dns_servers + .set(dns_servers) + .map_err(|_| ()) + .expect("DNS servers should only be set once"); + // Issue the dataset initialization requests to all sleds. futures::future::join_all(plan.iter().map( |(_, allocation)| async move { @@ -585,6 +603,12 @@ impl ServiceInner { &allocation.services_request.datasets, ) .await?; + + self.dns_servers + .get() + .expect("DNS servers must be initialized first") + .insert_dns_records(&allocation.services_request.datasets) + .await?; Ok(()) }, )) @@ -614,6 +638,11 @@ impl ServiceInner { .collect::>(); self.initialize_services(sled_address, &all_services).await?; + self.dns_servers + .get() + .expect("DNS servers must be initialized first") + .insert_dns_records(&all_services) + .await?; Ok(()) }, )) From 3e1495fd1a62e4be814f8df866e0a0e41815eca5 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 20 Jun 2022 17:01:11 -0400 Subject: [PATCH 43/88] [oximeter] Rely on dynamically set arguments within Oximeter --- Cargo.lock | 1 + common/src/address.rs | 4 +- nexus/test-utils/src/lib.rs | 15 ++-- oximeter/collector/Cargo.toml | 1 + oximeter/collector/config.toml | 7 -- oximeter/collector/src/bin/oximeter.rs | 54 +++++++----- oximeter/collector/src/lib.rs | 86 ++++++++++++++----- .../tests/output/cmd-oximeter-noargs-stderr | 14 ++- oximeter/collector/tests/test_commands.rs | 2 +- sled-agent/src/services.rs | 61 ++++++++++--- smf/oximeter/config.toml | 8 -- smf/oximeter/manifest.xml | 7 +- 12 files changed, 174 insertions(+), 86 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3bf3d89f88f..876ef959b8e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3445,6 +3445,7 @@ dependencies = [ "clap 3.2.5", "dropshot", "expectorate", + "internal-dns-client", "nexus-client 0.1.0", "omicron-common 0.1.0", "omicron-test-utils", diff --git a/common/src/address.rs b/common/src/address.rs index 3dee3848b9e..708fbff12bd 100644 --- a/common/src/address.rs +++ b/common/src/address.rs @@ -33,9 +33,11 @@ pub const SLED_AGENT_PORT: u16 = 12345; /// The port propolis-server listens on inside the propolis zone. pub const PROPOLIS_PORT: u16 = 12400; - +pub const CLICKHOUSE_PORT: u16 = 8123; pub const OXIMETER_PORT: u16 = 12223; +pub const NEXUS_INTERNAL_PORT: u16 = 12221; + // Anycast is a mechanism in which a single IP address is shared by multiple // devices, and the destination is located based on routing distance. // diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index ed056f48d8d..0df25b48fcb 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -197,21 +197,20 @@ pub async fn start_oximeter( id: Uuid, ) -> Result { let db = oximeter_collector::DbConfig { - address: SocketAddr::new(Ipv6Addr::LOCALHOST.into(), db_port), + address: Some(SocketAddr::new(Ipv6Addr::LOCALHOST.into(), db_port)), batch_size: 10, batch_interval: 1, }; let config = oximeter_collector::Config { - id, - nexus_address, + nexus_address: Some(nexus_address), db, - dropshot: ConfigDropshot { - bind_address: SocketAddr::new(Ipv6Addr::LOCALHOST.into(), 0), - ..Default::default() - }, log: ConfigLogging::StderrTerminal { level: ConfigLoggingLevel::Error }, }; - Oximeter::new(&config).await.map_err(|e| e.to_string()) + let args = oximeter_collector::OximeterArguments { + id, + address: SocketAddrV6::new(Ipv6Addr::LOCALHOST, 0, 0, 0), + }; + Oximeter::new(&config, &args).await.map_err(|e| e.to_string()) } #[derive(Debug, Clone, oximeter::Target)] diff --git a/oximeter/collector/Cargo.toml b/oximeter/collector/Cargo.toml index 7e36050d9af..10fe6058c0a 100644 --- a/oximeter/collector/Cargo.toml +++ b/oximeter/collector/Cargo.toml @@ -8,6 +8,7 @@ license = "MPL-2.0" [dependencies] clap = { version = "3.2", features = ["derive"] } dropshot = { git = "https://github.com/oxidecomputer/dropshot", branch = "main", features = [ "usdt-probes" ] } +internal-dns-client = { path = "../../internal-dns-client" } nexus-client = { path = "../../nexus-client" } omicron-common = { path = "../../common" } oximeter = { path = "../oximeter" } diff --git a/oximeter/collector/config.toml b/oximeter/collector/config.toml index 6b03a3974d2..0e8557a71bf 100644 --- a/oximeter/collector/config.toml +++ b/oximeter/collector/config.toml @@ -1,16 +1,9 @@ # Example configuration file for running an oximeter collector server -id = "1da65e5b-210c-4859-a7d7-200c1e659972" -nexus_address = "127.0.0.1:12221" - [db] -address = "[::1]:8123" batch_size = 1000 batch_interval = 5 # In seconds [log] level = "debug" mode = "stderr-terminal" - -[dropshot] -bind_address = "[::1]:12223" diff --git a/oximeter/collector/src/bin/oximeter.rs b/oximeter/collector/src/bin/oximeter.rs index 19f9b5b3da0..bf54cf33fa0 100644 --- a/oximeter/collector/src/bin/oximeter.rs +++ b/oximeter/collector/src/bin/oximeter.rs @@ -8,8 +8,10 @@ use clap::Parser; use omicron_common::cmd::fatal; use omicron_common::cmd::CmdError; -use oximeter_collector::{oximeter_api, Config, Oximeter}; +use oximeter_collector::{oximeter_api, Config, Oximeter, OximeterArguments}; +use std::net::SocketAddrV6; use std::path::PathBuf; +use uuid::Uuid; pub fn run_openapi() -> Result<(), String> { oximeter_api() @@ -24,18 +26,22 @@ pub fn run_openapi() -> Result<(), String> { /// Run an oximeter metric collection server in the Oxide Control Plane. #[derive(Parser)] #[clap(name = "oximeter", about = "See README.adoc for more information")] -struct Args { - #[clap( - short = 'O', - long = "openapi", - help = "Print the external OpenAPI Spec document and exit", - action - )] - openapi: bool, - - /// Path to TOML file with configuration for the server - #[clap(name = "CONFIG_FILE", action)] - config_file: PathBuf, +enum Args { + /// Print the external OpenAPI Spec document and exit + Openapi, + + /// Start an Oximeter server + Run { + /// Path to TOML file with configuration for the server + #[clap(name = "CONFIG_FILE", action)] + config_file: PathBuf, + + #[clap(short, long, action)] + id: Uuid, + + #[clap(short, long, action)] + address: SocketAddrV6, + }, } #[tokio::main] @@ -47,15 +53,17 @@ async fn main() { async fn do_run() -> Result<(), CmdError> { let args = Args::parse(); - let config = Config::from_file(args.config_file).unwrap(); - if args.openapi { - run_openapi().map_err(CmdError::Failure) - } else { - Oximeter::new(&config) - .await - .unwrap() - .serve_forever() - .await - .map_err(|e| CmdError::Failure(e.to_string())) + match args { + Args::Openapi => run_openapi().map_err(CmdError::Failure), + Args::Run { config_file, id, address } => { + let config = Config::from_file(config_file).unwrap(); + let args = OximeterArguments { id, address }; + Oximeter::new(&config, &args) + .await + .unwrap() + .serve_forever() + .await + .map_err(|e| CmdError::Failure(e.to_string())) + } } } diff --git a/oximeter/collector/src/lib.rs b/oximeter/collector/src/lib.rs index 4e2f6ca4fda..64a2af4c96a 100644 --- a/oximeter/collector/src/lib.rs +++ b/oximeter/collector/src/lib.rs @@ -11,6 +11,11 @@ use dropshot::{ HttpResponseUpdatedNoContent, HttpServer, HttpServerStarter, RequestContext, TypedBody, }; +use internal_dns_client::{ + multiclient::{ResolveError, Resolver}, + names::{ServiceName, SRV}, +}; +use omicron_common::address::{CLICKHOUSE_PORT, NEXUS_INTERNAL_PORT}; use omicron_common::api::internal::nexus::ProducerEndpoint; use omicron_common::backoff; use oximeter::types::{ProducerResults, ProducerResultsItem}; @@ -18,7 +23,7 @@ use oximeter_db::{Client, DbWrite}; use serde::{Deserialize, Serialize}; use slog::{debug, error, info, o, trace, warn, Drain, Logger}; use std::collections::{btree_map::Entry, BTreeMap}; -use std::net::SocketAddr; +use std::net::{SocketAddr, SocketAddrV6}; use std::path::Path; use std::sync::Arc; use std::time::Duration; @@ -37,6 +42,9 @@ pub enum Error { #[error(transparent)] Database(#[from] oximeter_db::Error), + + #[error(transparent)] + ResolveError(#[from] ResolveError), } // Messages for controlling a collection task @@ -231,8 +239,11 @@ async fn results_sink( /// Configuration for interacting with the metric database. #[derive(Debug, Clone, Copy, Deserialize, Serialize)] pub struct DbConfig { - /// Address of the ClickHouse server - pub address: SocketAddr, + /// Optional address of the ClickHouse server. + /// + /// If "None", will be inferred from DNS. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub address: Option, /// Batch size of samples at which to insert pub batch_size: usize, @@ -259,6 +270,7 @@ impl OximeterAgent { pub async fn with_id( id: Uuid, db_config: DbConfig, + resolver: &Resolver, log: &Logger, ) -> Result { let (result_sender, result_receiver) = mpsc::channel(8); @@ -267,7 +279,17 @@ impl OximeterAgent { // Construct the ClickHouse client first, propagate an error if we can't reach the // database. - let client = Client::new(db_config.address, &log); + let db_address = if let Some(address) = db_config.address { + address + } else { + SocketAddr::new( + resolver + .lookup_ip(SRV::Service(ServiceName::Clickhouse)) + .await?, + CLICKHOUSE_PORT, + ) + }; + let client = Client::new(db_address, &log); client.init_db().await?; // Spawn the task for aggregating and inserting all metrics @@ -334,18 +356,15 @@ impl OximeterAgent { /// Configuration used to initialize an oximeter server #[derive(Clone, Debug, Deserialize, Serialize)] pub struct Config { - /// An unique ID for this oximeter server - pub id: Uuid, - /// The address used to connect to Nexus. - pub nexus_address: SocketAddr, + /// + /// If "None", will be inferred from DNS. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub nexus_address: Option, /// Configuration for working with ClickHouse pub db: DbConfig, - /// The internal Dropshot HTTP server configuration - pub dropshot: ConfigDropshot, - /// Logging configuration pub log: ConfigLogging, } @@ -360,6 +379,11 @@ impl Config { } } +pub struct OximeterArguments { + pub id: Uuid, + pub address: SocketAddrV6, +} + /// A server used to collect metrics from components in the control plane. pub struct Oximeter { _agent: Arc, @@ -371,7 +395,10 @@ impl Oximeter { /// /// This starts an HTTP server used to communicate with other agents in Omicron, especially /// Nexus. It also registers itself as a new `oximeter` instance with Nexus. - pub async fn new(config: &Config) -> Result { + pub async fn new( + config: &Config, + args: &OximeterArguments, + ) -> Result { let (drain, registration) = slog_dtrace::with_drain( config .log @@ -388,10 +415,13 @@ impl Oximeter { } info!(log, "starting oximeter server"); + let resolver = Resolver::new_from_ip(*args.address.ip())?; + let make_agent = || async { debug!(log, "creating ClickHouse client"); Ok(Arc::new( - OximeterAgent::with_id(config.id, config.db, &log).await?, + OximeterAgent::with_id(args.id, config.db, &resolver, &log) + .await?, )) }; let log_client_failure = |error, delay| { @@ -411,7 +441,10 @@ impl Oximeter { let dropshot_log = log.new(o!("component" => "dropshot")); let server = HttpServerStarter::new( - &config.dropshot, + &ConfigDropshot { + bind_address: SocketAddr::V6(args.address), + ..Default::default() + }, oximeter_api(), Arc::clone(&agent), &dropshot_log, @@ -423,20 +456,33 @@ impl Oximeter { let client = reqwest::Client::new(); let notify_nexus = || async { debug!(log, "contacting nexus"); - client - .post(format!( - "http://{}/metrics/collectors", - config.nexus_address + let nexus_address = if let Some(address) = config.nexus_address { + address + } else { + SocketAddr::V6(SocketAddrV6::new( + resolver + .lookup_ipv6(SRV::Service(ServiceName::Nexus)) + .await + .map_err(|e| { + backoff::BackoffError::transient(e.to_string()) + })?, + NEXUS_INTERNAL_PORT, + 0, + 0, )) + }; + + client + .post(format!("http://{}/metrics/collectors", nexus_address,)) .json(&nexus_client::types::OximeterInfo { address: server.local_addr().to_string(), collector_id: agent.id, }) .send() .await - .map_err(backoff::BackoffError::transient)? + .map_err(|e| backoff::BackoffError::transient(e.to_string()))? .error_for_status() - .map_err(backoff::BackoffError::transient) + .map_err(|e| backoff::BackoffError::transient(e.to_string())) }; let log_notification_failure = |error, delay| { warn!( diff --git a/oximeter/collector/tests/output/cmd-oximeter-noargs-stderr b/oximeter/collector/tests/output/cmd-oximeter-noargs-stderr index 1398febf119..dfb062bca75 100644 --- a/oximeter/collector/tests/output/cmd-oximeter-noargs-stderr +++ b/oximeter/collector/tests/output/cmd-oximeter-noargs-stderr @@ -1,7 +1,13 @@ -error: The following required arguments were not provided: - +oximeter +See README.adoc for more information USAGE: - oximeter [OPTIONS] + oximeter -For more information try --help +OPTIONS: + -h, --help Print help information + +SUBCOMMANDS: + help Print this message or the help of the given subcommand(s) + openapi Print the external OpenAPI Spec document and exit + run Start an Oximeter server diff --git a/oximeter/collector/tests/test_commands.rs b/oximeter/collector/tests/test_commands.rs index 7b910a5be4a..d3d66be0580 100644 --- a/oximeter/collector/tests/test_commands.rs +++ b/oximeter/collector/tests/test_commands.rs @@ -50,7 +50,7 @@ fn test_oximeter_openapi() { // But we do know where it is at compile time, so we load it then. let config = include_str!("../../collector/config.toml"); let config_path = write_config(config); - let exec = Exec::cmd(path_to_oximeter()).arg(&config_path).arg("--openapi"); + let exec = Exec::cmd(path_to_oximeter()).arg("openapi"); let (exit_status, stdout_text, stderr_text) = run_command(exec); fs::remove_file(&config_path).expect("failed to remove temporary file"); assert_exit_code(exit_status, EXIT_SUCCESS, &stderr_text); diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index dde2ef47937..ea989c3eab0 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -12,17 +12,15 @@ use crate::illumos::zone::AddressRequest; use crate::params::{ServiceEnsureBody, ServiceRequest, ServiceType}; use crate::zone::Zones; use dropshot::ConfigDropshot; -use omicron_common::address::{Ipv6Subnet, RACK_PREFIX}; +use omicron_common::address::{Ipv6Subnet, OXIMETER_PORT, RACK_PREFIX}; use omicron_common::nexus_config::{ self, DeploymentConfig as NexusDeploymentConfig, }; -use omicron_common::postgres_config::PostgresConfigWithUrl; use slog::Logger; use std::collections::HashSet; use std::iter::FromIterator; use std::net::{IpAddr, Ipv6Addr, SocketAddr}; use std::path::{Path, PathBuf}; -use std::str::FromStr; use tokio::io::AsyncWriteExt; use tokio::sync::Mutex; use uuid::Uuid; @@ -204,11 +202,11 @@ impl ServiceManager { existing_zones: &mut Vec, services: &Vec, ) -> Result<(), Error> { - info!(self.log, "Ensuring services are initialized: {:?}", services); // TODO(https://github.com/oxidecomputer/omicron/issues/726): // As long as we ensure the requests don't overlap, we could // parallelize this request. for service in services { + info!(self.log, "Ensuring service is initialized: {:?}", service); // Before we bother allocating anything for this request, check if // this service has already been created. let expected_zone_name = @@ -334,12 +332,7 @@ impl ServiceManager { subnet: Ipv6Subnet::::new( self.underlay_address, ), - // TODO: Switch to inferring this URL by DNS. - database: nexus_config::Database::FromUrl { - url: PostgresConfigWithUrl::from_str( - "postgresql://root@[fd00:1122:3344:0101::2]:32221/omicron?sslmode=disable" - ).unwrap() - } + database: nexus_config::Database::FromDns, }; // Copy the partial config file to the expected location. @@ -434,8 +427,50 @@ impl ServiceManager { ServiceType::Oximeter => { info!(self.log, "Setting up oximeter service"); - // TODO: Implement with dynamic parameters, when address is - // dynamically assigned. + let address = service.addresses[0]; + running_zone + .run_cmd(&[ + crate::illumos::zone::SVCCFG, + "-s", + &smf_name, + "setprop", + &format!("config/id={}", service.id), + ]) + .map_err(|err| Error::ZoneCommand { + intent: "set server ID".to_string(), + err, + })?; + + running_zone + .run_cmd(&[ + crate::illumos::zone::SVCCFG, + "-s", + &smf_name, + "setprop", + &format!( + "config/address=[{}]:{}", + address, OXIMETER_PORT, + ), + ]) + .map_err(|err| Error::ZoneCommand { + intent: "set server address".to_string(), + err, + })?; + + running_zone + .run_cmd(&[ + crate::illumos::zone::SVCCFG, + "-s", + &default_smf_name, + "refresh", + ]) + .map_err(|err| Error::ZoneCommand { + intent: format!( + "Refresh SMF manifest {}", + default_smf_name + ), + err, + })?; } } @@ -494,7 +529,7 @@ impl ServiceManager { // that removal implicitly. warn!( self.log, - "Cannot request services on this sled, differing configurations: {:?}", + "Cannot request services on this sled, differing configurations: {:#?}", known_set.symmetric_difference(&requested_set) ); return Err(Error::ServicesAlreadyConfigured); diff --git a/smf/oximeter/config.toml b/smf/oximeter/config.toml index 4a0095fdd00..ca14fe6ec8b 100644 --- a/smf/oximeter/config.toml +++ b/smf/oximeter/config.toml @@ -1,11 +1,6 @@ # Example configuration file for running an oximeter collector server -id = "1da65e5b-210c-4859-a7d7-200c1e659972" -# Internal address of nexus -nexus_address = "[fd00:1122:3344:0101::3]:12221" - [db] -address = "[fd00:1122:3344:0101::5]:8123" batch_size = 1000 batch_interval = 5 # In seconds @@ -14,6 +9,3 @@ level = "debug" mode = "file" path = "/dev/stdout" if_exists = "append" - -[dropshot] -bind_address = "[fd00:1122:3344:0101::4]:12223" diff --git a/smf/oximeter/manifest.xml b/smf/oximeter/manifest.xml index 47e3cb254f1..d16efd90d99 100644 --- a/smf/oximeter/manifest.xml +++ b/smf/oximeter/manifest.xml @@ -18,10 +18,15 @@ + + + + + From 5d330bc2af4beaf01d44202bc3b1b9be6040af50 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 20 Jun 2022 17:05:04 -0400 Subject: [PATCH 44/88] fix import --- nexus/test-utils/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index 0df25b48fcb..1f9967da95b 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -18,7 +18,7 @@ use oximeter_collector::Oximeter; use oximeter_producer::Server as ProducerServer; use slog::o; use slog::Logger; -use std::net::{IpAddr, Ipv6Addr, SocketAddr}; +use std::net::{IpAddr, Ipv6Addr, SocketAddr, SocketAddrV6}; use std::path::Path; use std::time::Duration; use uuid::Uuid; From 130ffa056220d02e97c9fec8844ec158864ff4fa Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 20 Jun 2022 17:13:32 -0400 Subject: [PATCH 45/88] okay not THAT dynamic just yet --- sled-agent/src/services.rs | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index ea989c3eab0..be11bfb2a6d 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -16,11 +16,13 @@ use omicron_common::address::{Ipv6Subnet, OXIMETER_PORT, RACK_PREFIX}; use omicron_common::nexus_config::{ self, DeploymentConfig as NexusDeploymentConfig, }; +use omicron_common::postgres_config::PostgresConfigWithUrl; use slog::Logger; use std::collections::HashSet; use std::iter::FromIterator; use std::net::{IpAddr, Ipv6Addr, SocketAddr}; use std::path::{Path, PathBuf}; +use std::str::FromStr; use tokio::io::AsyncWriteExt; use tokio::sync::Mutex; use uuid::Uuid; @@ -332,7 +334,12 @@ impl ServiceManager { subnet: Ipv6Subnet::::new( self.underlay_address, ), - database: nexus_config::Database::FromDns, + // TODO: Switch to inferring this URL by DNS. + database: nexus_config::Database::FromUrl { + url: PostgresConfigWithUrl::from_str( + "postgresql://root@[fd00:1122:3344:0101::2]:32221/omicron?sslmode=disable" + ).unwrap(), + } }; // Copy the partial config file to the expected location. From 7ceb8fafe99003090054dc61244e5ef6ddc7decc Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 20 Jun 2022 20:57:34 -0400 Subject: [PATCH 46/88] [nexus] Populate rack during initialization --- nexus/src/app/mod.rs | 16 ++--- nexus/src/app/rack.rs | 56 +++++++---------- nexus/src/app/update.rs | 25 +++++--- nexus/src/db/datastore.rs | 14 +++++ nexus/src/external_api/http_entrypoints.rs | 10 +-- nexus/src/populate.rs | 71 ++++++++++++++++++---- 6 files changed, 127 insertions(+), 65 deletions(-) diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index 06f7264a124..76da20d6d19 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -10,6 +10,7 @@ use crate::config; use crate::context::OpContext; use crate::db; use crate::populate::populate_start; +use crate::populate::PopulateArgs; use crate::populate::PopulateStatus; use crate::saga_interface::SagaContext; use anyhow::anyhow; @@ -54,15 +55,12 @@ pub struct Nexus { /// uuid for this nexus instance. id: Uuid, - /// uuid for this rack (TODO should also be in persistent storage) + /// uuid for this rack rack_id: Uuid, /// general server log log: Logger, - /// cached rack identity metadata - api_rack_identity: db::model::RackIdentity, - /// persistent storage for resources in the control plane db_datastore: Arc, @@ -139,14 +137,18 @@ impl Nexus { authn::Context::internal_db_init(), Arc::clone(&db_datastore), ); - let populate_status = - populate_start(populate_ctx, Arc::clone(&db_datastore)); + + let populate_args = PopulateArgs::new(rack_id); + let populate_status = populate_start( + populate_ctx, + Arc::clone(&db_datastore), + populate_args, + ); let nexus = Nexus { id: config.deployment.id, rack_id, log: log.new(o!()), - api_rack_identity: db::model::RackIdentity::new(rack_id), db_datastore: Arc::clone(&db_datastore), authz: Arc::clone(&authz), sec_client: Arc::clone(&sec_client), diff --git a/nexus/src/app/rack.rs b/nexus/src/app/rack.rs index a9a10a616aa..dcc7ce92dbc 100644 --- a/nexus/src/app/rack.rs +++ b/nexus/src/app/rack.rs @@ -7,40 +7,21 @@ use crate::authz; use crate::context::OpContext; use crate::db; +use crate::db::lookup::LookupPath; use crate::internal_api::params::ServicePutRequest; -use futures::future::ready; -use futures::StreamExt; use omicron_common::api::external::DataPageParams; use omicron_common::api::external::Error; -use omicron_common::api::external::ListResult; +use omicron_common::api::external::ListResultVec; use omicron_common::api::external::LookupResult; -use omicron_common::api::external::LookupType; -use omicron_common::api::external::ResourceType; use uuid::Uuid; impl super::Nexus { - pub(crate) fn as_rack(&self) -> db::model::Rack { - db::model::Rack { - identity: self.api_rack_identity.clone(), - initialized: true, - tuf_base_url: None, - } - } - pub async fn racks_list( &self, opctx: &OpContext, pagparams: &DataPageParams<'_, Uuid>, - ) -> ListResult { - opctx.authorize(authz::Action::Read, &authz::FLEET).await?; - - if let Some(marker) = pagparams.marker { - if *marker >= self.rack_id { - return Ok(futures::stream::empty().boxed()); - } - } - - Ok(futures::stream::once(ready(Ok(self.as_rack()))).boxed()) + ) -> ListResultVec { + self.db_datastore.rack_list(&opctx, pagparams).await } pub async fn rack_lookup( @@ -48,18 +29,25 @@ impl super::Nexus { opctx: &OpContext, rack_id: &Uuid, ) -> LookupResult { - let authz_rack = authz::Rack::new( - authz::FLEET, - *rack_id, - LookupType::ById(*rack_id), - ); - opctx.authorize(authz::Action::Read, &authz_rack).await?; + let (.., db_rack) = LookupPath::new(opctx, &self.db_datastore) + .rack_id(*rack_id) + .fetch() + .await?; + Ok(db_rack) + } - if *rack_id == self.rack_id { - Ok(self.as_rack()) - } else { - Err(Error::not_found_by_id(ResourceType::Rack, rack_id)) - } + /// Ensures that a rack exists in the DB. + /// + /// If the rack already exists, this function is a no-op. + pub async fn rack_insert( + &self, + opctx: &OpContext, + rack_id: Uuid, + ) -> Result<(), Error> { + self.datastore() + .rack_insert(opctx, &db::model::Rack::new(rack_id)) + .await?; + Ok(()) } /// Marks the rack as initialized with a set of services. diff --git a/nexus/src/app/update.rs b/nexus/src/app/update.rs index 0d6721ec439..2d87a44a84f 100644 --- a/nexus/src/app/update.rs +++ b/nexus/src/app/update.rs @@ -24,11 +24,15 @@ use tokio::io::AsyncWriteExt; static BASE_ARTIFACT_DIR: &str = "/var/tmp/oxide_artifacts"; impl super::Nexus { - fn tuf_base_url(&self) -> Option { - self.updates_config.as_ref().map(|c| { - let rack = self.as_rack(); + async fn tuf_base_url( + &self, + opctx: &OpContext, + ) -> Result, Error> { + let rack = self.rack_lookup(opctx, &self.rack_id).await?; + + Ok(self.updates_config.as_ref().map(|c| { rack.tuf_base_url.unwrap_or_else(|| c.default_base_url.clone()) - }) + })) } pub async fn updates_refresh_metadata( @@ -42,10 +46,11 @@ impl super::Nexus { message: "updates system not configured".into(), } })?; - let base_url = - self.tuf_base_url().ok_or_else(|| Error::InvalidRequest { + let base_url = self.tuf_base_url(opctx).await?.ok_or_else(|| { + Error::InvalidRequest { message: "updates system not configured".into(), - })?; + } + })?; let trusted_root = tokio::fs::read(&updates_config.trusted_root) .await .map_err(|e| Error::InternalError { @@ -129,8 +134,10 @@ impl super::Nexus { artifact: UpdateArtifact, ) -> Result, Error> { let mut base_url = - self.tuf_base_url().ok_or_else(|| Error::InvalidRequest { - message: "updates system not configured".into(), + self.tuf_base_url(opctx).await?.ok_or_else(|| { + Error::InvalidRequest { + message: "updates system not configured".into(), + } })?; if !base_url.ends_with('/') { base_url.push('/'); diff --git a/nexus/src/db/datastore.rs b/nexus/src/db/datastore.rs index 6814b6276ac..b1c984f2218 100644 --- a/nexus/src/db/datastore.rs +++ b/nexus/src/db/datastore.rs @@ -147,6 +147,20 @@ impl DataStore { Ok(self.pool.pool()) } + pub async fn rack_list( + &self, + opctx: &OpContext, + pagparams: &DataPageParams<'_, Uuid>, + ) -> ListResultVec { + opctx.authorize(authz::Action::Read, &authz::FLEET).await?; + use db::schema::rack::dsl; + paginated(dsl::rack, dsl::id, pagparams) + .select(Rack::as_select()) + .load_async(self.pool_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel_pool(e, ErrorHandler::Server)) + } + /// Stores a new rack in the database. /// /// This function is a no-op if the rack already exists. diff --git a/nexus/src/external_api/http_entrypoints.rs b/nexus/src/external_api/http_entrypoints.rs index b47e6ecf12c..cab200e376f 100644 --- a/nexus/src/external_api/http_entrypoints.rs +++ b/nexus/src/external_api/http_entrypoints.rs @@ -2940,11 +2940,13 @@ async fn hardware_racks_get( let query = query_params.into_inner(); let handler = async { let opctx = OpContext::for_external_api(&rqctx).await?; - let rack_stream = nexus + let racks = nexus .racks_list(&opctx, &data_page_params_for(&rqctx, &query)?) - .await?; - let view_list = to_list::(rack_stream).await; - Ok(HttpResponseOk(ScanById::results_page(&query, view_list)?)) + .await? + .into_iter() + .map(|r| r.into()) + .collect(); + Ok(HttpResponseOk(ScanById::results_page(&query, racks)?)) }; apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await } diff --git a/nexus/src/populate.rs b/nexus/src/populate.rs index 9f6bcdcad20..85223aef2b1 100644 --- a/nexus/src/populate.rs +++ b/nexus/src/populate.rs @@ -43,13 +43,14 @@ //! each populator behaves as expected in the above ways. use crate::context::OpContext; -use crate::db::DataStore; +use crate::db::{self, DataStore}; use futures::future::BoxFuture; use futures::FutureExt; use lazy_static::lazy_static; use omicron_common::api::external::Error; use omicron_common::backoff; use std::sync::Arc; +use uuid::Uuid; #[derive(Clone, Debug)] pub enum PopulateStatus { @@ -58,14 +59,26 @@ pub enum PopulateStatus { Failed(String), } +/// Auxiliary data necessary to populate the database. +pub struct PopulateArgs { + rack_id: Uuid, +} + +impl PopulateArgs { + pub fn new(rack_id: Uuid) -> Self { + Self { rack_id } + } +} + pub fn populate_start( opctx: OpContext, datastore: Arc, + args: PopulateArgs, ) -> tokio::sync::watch::Receiver { let (tx, rx) = tokio::sync::watch::channel(PopulateStatus::NotDone); tokio::spawn(async move { - let result = populate(&opctx, &datastore).await; + let result = populate(&opctx, &datastore, &args).await; if let Err(error) = tx.send(match result { Ok(()) => PopulateStatus::Done, Err(message) => PopulateStatus::Failed(message), @@ -80,17 +93,19 @@ pub fn populate_start( async fn populate( opctx: &OpContext, datastore: &DataStore, + args: &PopulateArgs, ) -> Result<(), String> { for p in *ALL_POPULATORS { let db_result = backoff::retry_notify( backoff::internal_service_policy(), || async { - p.populate(opctx, datastore).await.map_err(|error| match &error - { - Error::ServiceUnavailable { .. } => { - backoff::BackoffError::transient(error) + p.populate(opctx, datastore, args).await.map_err(|error| { + match &error { + Error::ServiceUnavailable { .. } => { + backoff::BackoffError::transient(error) + } + _ => backoff::BackoffError::Permanent(error), } - _ => backoff::BackoffError::Permanent(error), }) }, |error, delay| { @@ -130,6 +145,7 @@ trait Populator: std::fmt::Debug + Send + Sync { &self, opctx: &'a OpContext, datastore: &'a DataStore, + args: &'a PopulateArgs, ) -> BoxFuture<'b, Result<(), Error>> where 'a: 'b; @@ -143,6 +159,7 @@ impl Populator for PopulateBuiltinUsers { &self, opctx: &'a OpContext, datastore: &'a DataStore, + _args: &'a PopulateArgs, ) -> BoxFuture<'b, Result<(), Error>> where 'a: 'b, @@ -159,6 +176,7 @@ impl Populator for PopulateBuiltinRoles { &self, opctx: &'a OpContext, datastore: &'a DataStore, + _args: &'a PopulateArgs, ) -> BoxFuture<'b, Result<(), Error>> where 'a: 'b, @@ -175,6 +193,7 @@ impl Populator for PopulateBuiltinRoleAssignments { &self, opctx: &'a OpContext, datastore: &'a DataStore, + _args: &'a PopulateArgs, ) -> BoxFuture<'b, Result<(), Error>> where 'a: 'b, @@ -192,6 +211,7 @@ impl Populator for PopulateBuiltinSilos { &self, opctx: &'a OpContext, datastore: &'a DataStore, + _args: &'a PopulateArgs, ) -> BoxFuture<'b, Result<(), Error>> where 'a: 'b, @@ -214,6 +234,7 @@ impl Populator for PopulateSiloUsers { &self, opctx: &'a OpContext, datastore: &'a DataStore, + _args: &'a PopulateArgs, ) -> BoxFuture<'b, Result<(), Error>> where 'a: 'b, @@ -230,6 +251,7 @@ impl Populator for PopulateSiloUserRoleAssignments { &self, opctx: &'a OpContext, datastore: &'a DataStore, + _args: &'a PopulateArgs, ) -> BoxFuture<'b, Result<(), Error>> where 'a: 'b, @@ -241,19 +263,43 @@ impl Populator for PopulateSiloUserRoleAssignments { } } +#[derive(Debug)] +struct PopulateRack; +impl Populator for PopulateRack { + fn populate<'a, 'b>( + &self, + opctx: &'a OpContext, + datastore: &'a DataStore, + args: &'a PopulateArgs, + ) -> BoxFuture<'b, Result<(), Error>> + where + 'a: 'b, + { + async { + datastore + .rack_insert(opctx, &db::model::Rack::new(args.rack_id)) + .await?; + Ok(()) + } + .boxed() + } +} + lazy_static! { - static ref ALL_POPULATORS: [&'static dyn Populator; 6] = [ + static ref ALL_POPULATORS: [&'static dyn Populator; 7] = [ &PopulateBuiltinUsers, &PopulateBuiltinRoles, &PopulateBuiltinRoleAssignments, &PopulateBuiltinSilos, &PopulateSiloUsers, &PopulateSiloUserRoleAssignments, + &PopulateRack, ]; } #[cfg(test)] mod test { + use super::PopulateArgs; use super::Populator; use super::ALL_POPULATORS; use crate::authn; @@ -265,6 +311,7 @@ mod test { use omicron_common::api::external::Error; use omicron_test_utils::dev; use std::sync::Arc; + use uuid::Uuid; #[tokio::test] async fn test_populators() { @@ -287,16 +334,18 @@ mod test { ); let log = &logctx.log; + let args = PopulateArgs::new(Uuid::new_v4()); + // Running each populator once under normal conditions should work. info!(&log, "populator {:?}, run 1", p); - p.populate(&opctx, &datastore) + p.populate(&opctx, &datastore, &args) .await .with_context(|| format!("populator {:?} (try 1)", p)) .unwrap(); // It should also work fine to run it again. info!(&log, "populator {:?}, run 2 (idempotency check)", p); - p.populate(&opctx, &datastore) + p.populate(&opctx, &datastore, &args) .await .with_context(|| { format!( @@ -331,7 +380,7 @@ mod test { ); info!(&log, "populator {:?}, with database offline", p); - match p.populate(&opctx, &datastore).await { + match p.populate(&opctx, &datastore, &args).await { Err(Error::ServiceUnavailable { .. }) => (), Ok(_) => panic!( "populator {:?}: unexpectedly succeeded with no database", From 9fc49949d8506fed511a994edbd7604ef79fd1af Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 20 Jun 2022 21:08:11 -0400 Subject: [PATCH 47/88] Cleanup imports --- sled-agent/src/rack_setup/service.rs | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 55c837ceffa..397e191cda7 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -15,6 +15,9 @@ use crate::rack_setup::plan::service::{ use crate::rack_setup::plan::sled::{ Plan as SledPlan, PlanError as SledPlanError, }; +use internal_dns_client::multiclient::{ + DnsError, Resolver as DnsResolver, Updater as DnsUpdater, +}; use internal_dns_client::names::{ServiceName, SRV}; use nexus_client::{ types as NexusTypes, Client as NexusClient, Error as NexusError, @@ -72,9 +75,8 @@ pub enum SetupServiceError { #[error("Failed to construct an HTTP client: {0}")] HttpClient(reqwest::Error), - // XXX CLEAN UP - #[error(transparent)] - Dns(#[from] internal_dns_client::Error), + #[error("Failed to access DNS server: {0}")] + Dns(#[from] DnsError), } /// The interface to the Rack Setup Service. @@ -157,7 +159,7 @@ enum PeerExpectation { struct ServiceInner { log: Logger, peer_monitor: Mutex, - dns_servers: OnceCell, + dns_servers: OnceCell, } impl ServiceInner { @@ -214,9 +216,7 @@ impl ServiceInner { .insert_dns_records(datasets) .await .map_err(BackoffError::transient)?; - Ok::<(), BackoffError>( - (), - ) + Ok::<(), BackoffError>(()) }; let log_failure = |error, _| { warn!(self.log, "failed to set DNS records"; "error" => ?error); @@ -271,9 +271,7 @@ impl ServiceInner { .insert_dns_records(services) .await .map_err(BackoffError::transient)?; - Ok::<(), BackoffError>( - (), - ) + Ok::<(), BackoffError>(()) }; let log_failure = |error, _| { warn!(self.log, "failed to set DNS records"; "error" => ?error); @@ -336,10 +334,8 @@ impl ServiceInner { ) -> Result<(), SetupServiceError> { info!(self.log, "Handing off control to Nexus"); - let resolver = internal_dns_client::multiclient::Resolver::new( - &config.az_subnet(), - ) - .expect("Failed to create DNS resolver"); + let resolver = DnsResolver::new(&config.az_subnet()) + .expect("Failed to create DNS resolver"); let ip = resolver .lookup_ip(SRV::Service(ServiceName::Nexus)) .await @@ -569,7 +565,7 @@ impl ServiceInner { .into_iter() .collect::>()?; - let dns_servers = internal_dns_client::multiclient::Updater::new( + let dns_servers = DnsUpdater::new( &config.az_subnet(), self.log.new(o!("client" => "DNS")), ); From 11ebb7bf85630b4441c444cf36888f3dd7ebe065 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 20 Jun 2022 21:36:27 -0400 Subject: [PATCH 48/88] [nexus] Add tests for rack endpoints --- nexus/src/app/mod.rs | 4 +++ nexus/tests/integration_tests/mod.rs | 1 + nexus/tests/integration_tests/rack.rs | 41 +++++++++++++++++++++++++++ 3 files changed, 46 insertions(+) create mode 100644 nexus/tests/integration_tests/rack.rs diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index 682bb406c77..a4be4beda52 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -229,6 +229,10 @@ impl Nexus { &self.tunables } + pub fn rack_id(&self) -> Uuid { + self.rack_id + } + pub async fn wait_for_populate(&self) -> Result<(), anyhow::Error> { let mut my_rx = self.populate_status.clone(); loop { diff --git a/nexus/tests/integration_tests/mod.rs b/nexus/tests/integration_tests/mod.rs index de5de9679bd..6c3e52bd785 100644 --- a/nexus/tests/integration_tests/mod.rs +++ b/nexus/tests/integration_tests/mod.rs @@ -14,6 +14,7 @@ mod instances; mod organizations; mod oximeter; mod projects; +mod rack; mod role_assignments; mod roles_builtin; mod router_routes; diff --git a/nexus/tests/integration_tests/rack.rs b/nexus/tests/integration_tests/rack.rs new file mode 100644 index 00000000000..dfcbde9740f --- /dev/null +++ b/nexus/tests/integration_tests/rack.rs @@ -0,0 +1,41 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use nexus_test_utils::http_testing::AuthnMode; +use nexus_test_utils::http_testing::NexusRequest; +use nexus_test_utils::ControlPlaneTestContext; +use nexus_test_utils_macros::nexus_test; +use omicron_nexus::external_api::views::Rack; + +#[nexus_test] +async fn test_list_own_rack(cptestctx: &ControlPlaneTestContext) { + let client = &cptestctx.external_client; + + let racks_url = "/hardware/racks"; + let racks: Vec = + NexusRequest::iter_collection_authn(client, racks_url, "", None) + .await + .expect("failed to list racks") + .all_items; + + assert_eq!(1, racks.len()); + assert_eq!(cptestctx.server.apictx.nexus.rack_id(), racks[0].identity.id); +} + +#[nexus_test] +async fn test_get_own_rack(cptestctx: &ControlPlaneTestContext) { + let client = &cptestctx.external_client; + + let expected_id = cptestctx.server.apictx.nexus.rack_id(); + let rack_url = format!("/hardware/racks/{}", expected_id); + let rack = NexusRequest::object_get(client, &rack_url) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("failed to get rack") + .parsed_body::() + .unwrap(); + + assert_eq!(expected_id, rack.identity.id); +} From 5188880e32c7186a51ac7d69d0ba6d87af59ae70 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 20 Jun 2022 21:36:27 -0400 Subject: [PATCH 49/88] [nexus] Add tests for rack endpoints --- nexus/src/app/mod.rs | 4 +++ nexus/tests/integration_tests/mod.rs | 1 + nexus/tests/integration_tests/rack.rs | 41 +++++++++++++++++++++++++++ 3 files changed, 46 insertions(+) create mode 100644 nexus/tests/integration_tests/rack.rs diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index 76da20d6d19..e8afa7f3528 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -202,6 +202,10 @@ impl Nexus { &self.tunables } + pub fn rack_id(&self) -> Uuid { + self.rack_id + } + pub async fn wait_for_populate(&self) -> Result<(), anyhow::Error> { let mut my_rx = self.populate_status.clone(); loop { diff --git a/nexus/tests/integration_tests/mod.rs b/nexus/tests/integration_tests/mod.rs index de5de9679bd..6c3e52bd785 100644 --- a/nexus/tests/integration_tests/mod.rs +++ b/nexus/tests/integration_tests/mod.rs @@ -14,6 +14,7 @@ mod instances; mod organizations; mod oximeter; mod projects; +mod rack; mod role_assignments; mod roles_builtin; mod router_routes; diff --git a/nexus/tests/integration_tests/rack.rs b/nexus/tests/integration_tests/rack.rs new file mode 100644 index 00000000000..dfcbde9740f --- /dev/null +++ b/nexus/tests/integration_tests/rack.rs @@ -0,0 +1,41 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use nexus_test_utils::http_testing::AuthnMode; +use nexus_test_utils::http_testing::NexusRequest; +use nexus_test_utils::ControlPlaneTestContext; +use nexus_test_utils_macros::nexus_test; +use omicron_nexus::external_api::views::Rack; + +#[nexus_test] +async fn test_list_own_rack(cptestctx: &ControlPlaneTestContext) { + let client = &cptestctx.external_client; + + let racks_url = "/hardware/racks"; + let racks: Vec = + NexusRequest::iter_collection_authn(client, racks_url, "", None) + .await + .expect("failed to list racks") + .all_items; + + assert_eq!(1, racks.len()); + assert_eq!(cptestctx.server.apictx.nexus.rack_id(), racks[0].identity.id); +} + +#[nexus_test] +async fn test_get_own_rack(cptestctx: &ControlPlaneTestContext) { + let client = &cptestctx.external_client; + + let expected_id = cptestctx.server.apictx.nexus.rack_id(); + let rack_url = format!("/hardware/racks/{}", expected_id); + let rack = NexusRequest::object_get(client, &rack_url) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("failed to get rack") + .parsed_body::() + .unwrap(); + + assert_eq!(expected_id, rack.identity.id); +} From 7cbac9f07beb138fa6f8464aef2e0c761a3bc44a Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 20 Jun 2022 23:24:40 -0400 Subject: [PATCH 50/88] [nexus] Optionally resolve clickhouse, cockroach from DNS names --- Cargo.lock | 1 + common/src/address.rs | 1 + nexus/Cargo.toml | 1 + nexus/src/app/mod.rs | 16 +++++++++--- nexus/src/app/oximeter.rs | 50 +++++++++++++++++++++++++++++++++++++ nexus/src/config.rs | 11 ++++---- nexus/src/context.rs | 43 ++++++++++++++++++++++++------- nexus/src/lib.rs | 3 ++- nexus/test-utils/src/lib.rs | 8 +++++- 9 files changed, 115 insertions(+), 19 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 876ef959b8e..126aae1dbc9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3056,6 +3056,7 @@ dependencies = [ "http", "httptest", "hyper", + "internal-dns-client", "ipnetwork", "lazy_static", "libc", diff --git a/common/src/address.rs b/common/src/address.rs index 708fbff12bd..b9558f78be6 100644 --- a/common/src/address.rs +++ b/common/src/address.rs @@ -33,6 +33,7 @@ pub const SLED_AGENT_PORT: u16 = 12345; /// The port propolis-server listens on inside the propolis zone. pub const PROPOLIS_PORT: u16 = 12400; +pub const COCKROACH_PORT: u16 = 32221; pub const CLICKHOUSE_PORT: u16 = 8123; pub const OXIMETER_PORT: u16 = 12223; diff --git a/nexus/Cargo.toml b/nexus/Cargo.toml index d0a16bbdef7..cf4e02e8201 100644 --- a/nexus/Cargo.toml +++ b/nexus/Cargo.toml @@ -26,6 +26,7 @@ hex = "0.4.3" http = "0.2.7" hyper = "0.14" db-macros = { path = "src/db/db-macros" } +internal-dns-client = { path = "../internal-dns-client" } ipnetwork = "0.18" lazy_static = "1.4.0" libc = "0.2.126" diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index e8afa7f3528..4f9fe29b006 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -4,6 +4,7 @@ //! Nexus, the service that operates much of the control plane in an Oxide fleet +use crate::app::oximeter::LazyTimeseriesClient; use crate::authn; use crate::authz; use crate::config; @@ -77,7 +78,7 @@ pub struct Nexus { populate_status: tokio::sync::watch::Receiver, /// Client to the timeseries database. - timeseries_client: oximeter_db::Client, + timeseries_client: LazyTimeseriesClient, /// Contents of the trusted root role for the TUF repository. updates_config: Option, @@ -102,9 +103,10 @@ pub struct Nexus { impl Nexus { /// Create a new Nexus instance for the given rack id `rack_id` // TODO-polish revisit rack metadata - pub fn new_with_id( + pub async fn new_with_id( rack_id: Uuid, log: Logger, + resolver: internal_dns_client::multiclient::Resolver, pool: db::Pool, config: &config::Config, authz: Arc, @@ -124,8 +126,16 @@ impl Nexus { )), sec_store, )); + + // Connect to clickhouse - but do so lazily. + // Clickhouse may not be executing when Nexus starts. let timeseries_client = - oximeter_db::Client::new(config.pkg.timeseries_db.address, &log); + if let Some(address) = &config.pkg.timeseries_db.address { + // If an address was provided, use it instead of DNS. + LazyTimeseriesClient::new_from_address(log.clone(), *address) + } else { + LazyTimeseriesClient::new_from_dns(log.clone(), resolver) + }; // TODO-cleanup We may want a first-class subsystem for managing startup // background tasks. It could use a Future for each one, a status enum diff --git a/nexus/src/app/oximeter.rs b/nexus/src/app/oximeter.rs index e270868f90c..7f6fb9b6ffd 100644 --- a/nexus/src/app/oximeter.rs +++ b/nexus/src/app/oximeter.rs @@ -9,6 +9,11 @@ use crate::context::OpContext; use crate::db; use crate::db::identity::Asset; use crate::internal_api::params::OximeterInfo; +use internal_dns_client::{ + multiclient::{ResolveError, Resolver}, + names::{ServiceName, SRV}, +}; +use omicron_common::address::CLICKHOUSE_PORT; use omicron_common::api::external::DataPageParams; use omicron_common::api::external::Error; use omicron_common::api::external::ListResultVec; @@ -19,12 +24,54 @@ use oximeter_client::Client as OximeterClient; use oximeter_db::TimeseriesSchema; use oximeter_db::TimeseriesSchemaPaginationParams; use oximeter_producer::register; +use slog::Logger; use std::convert::TryInto; use std::net::SocketAddr; use std::num::NonZeroU32; use std::time::Duration; use uuid::Uuid; +/// A client which knows how to connect to Clickhouse, but does so +/// only when a request is actually made. +/// +/// This allows callers to set up the mechanism of connection (by address +/// or DNS) separately from actually making that connection. This +/// is particularly useful in situations where configurations are parsed +/// prior to Clickhouse existing. +pub struct LazyTimeseriesClient { + log: Logger, + source: ClientSource, +} + +enum ClientSource { + FromDns { resolver: Resolver }, + FromIp { address: SocketAddr }, +} + +impl LazyTimeseriesClient { + pub fn new_from_dns(log: Logger, resolver: Resolver) -> Self { + Self { log, source: ClientSource::FromDns { resolver } } + } + + pub fn new_from_address(log: Logger, address: SocketAddr) -> Self { + Self { log, source: ClientSource::FromIp { address } } + } + + pub async fn get(&self) -> Result { + let address = match &self.source { + ClientSource::FromIp { address } => *address, + ClientSource::FromDns { resolver } => SocketAddr::new( + resolver + .lookup_ip(SRV::Service(ServiceName::Clickhouse)) + .await?, + CLICKHOUSE_PORT, + ), + }; + + Ok(oximeter_db::Client::new(address, &self.log)) + } +} + impl super::Nexus { /// Insert a new record of an Oximeter collector server. pub async fn upsert_oximeter_collector( @@ -160,6 +207,9 @@ impl super::Nexus { ) -> Result, Error> { opctx.authorize(authz::Action::Read, &authz::FLEET).await?; self.timeseries_client + .get() + .await + .map_err(|e| Error::internal_error(&e.to_string()))? .timeseries_schema_list(&pag_params.page, limit) .await .map_err(|e| match e { diff --git a/nexus/src/config.rs b/nexus/src/config.rs index 98cbf0169cf..5ca452e7388 100644 --- a/nexus/src/config.rs +++ b/nexus/src/config.rs @@ -45,10 +45,11 @@ pub struct UpdatesConfig { pub default_base_url: String, } -/// Configuration for the timeseries database. -#[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] +/// Optional configuration for the timeseries database. +#[derive(Clone, Debug, Default, Deserialize, PartialEq, Serialize)] pub struct TimeseriesDbConfig { - pub address: SocketAddr, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub address: Option, } // A deserializable type that does no validation on the tunable parameters. @@ -132,7 +133,7 @@ pub struct PackageConfig { /// Authentication-related configuration pub authn: AuthnConfig, /// Timeseries database configuration. - // TODO: Should this be removed? Nexus needs to initialize it. + #[serde(default)] pub timeseries_db: TimeseriesDbConfig, /// Updates-related configuration. Updates APIs return 400 Bad Request when this is /// unconfigured. @@ -381,7 +382,7 @@ mod test { path: "/nonexistent/path".to_string() }, timeseries_db: TimeseriesDbConfig { - address: "[::1]:8123".parse().unwrap() + address: Some("[::1]:8123".parse().unwrap()) }, updates: Some(UpdatesConfig { trusted_root: PathBuf::from("/path/to/root.json"), diff --git a/nexus/src/context.rs b/nexus/src/context.rs index e940bef6d10..c4de9e5d044 100644 --- a/nexus/src/context.rs +++ b/nexus/src/context.rs @@ -18,8 +18,11 @@ use authn::external::session_cookie::HttpAuthnSessionCookie; use authn::external::spoof::HttpAuthnSpoof; use authn::external::HttpAuthnScheme; use chrono::{DateTime, Duration, Utc}; +use internal_dns_client::names::{ServiceName, SRV}; +use omicron_common::address::{Ipv6Subnet, AZ_PREFIX, COCKROACH_PORT}; use omicron_common::api::external::Error; use omicron_common::nexus_config; +use omicron_common::postgres_config::PostgresConfigWithUrl; use oximeter::types::ProducerRegistry; use oximeter_instruments::http::{HttpService, LatencyTracker}; use slog::Logger; @@ -27,6 +30,7 @@ use std::collections::BTreeMap; use std::env; use std::fmt::Debug; use std::path::PathBuf; +use std::str::FromStr; use std::sync::Arc; use std::time::Instant; use std::time::SystemTime; @@ -68,7 +72,7 @@ pub struct ConsoleConfig { impl ServerContext { /// Create a new context with the given rack id and log. This creates the /// underlying nexus as well. - pub fn new( + pub async fn new( rack_id: Uuid, log: Logger, config: &config::Config, @@ -136,23 +140,44 @@ impl ServerContext { // like console index.html. leaving that out for now so we don't break // nexus in dev for everyone + // Set up DNS Client + let az_subnet = + Ipv6Subnet::::new(config.deployment.subnet.net().ip()); + info!(log, "Setting up resolver on subnet: {:?}", az_subnet); + let resolver = + internal_dns_client::multiclient::Resolver::new(&az_subnet) + .map_err(|e| format!("Failed to create DNS resolver: {}", e))?; + // Set up DB pool let url = match &config.deployment.database { nexus_config::Database::FromUrl { url } => url.clone(), nexus_config::Database::FromDns => { - todo!("Not yet implemented"); + info!(log, "Accessing DB url from DNS"); + let address = resolver + .lookup_ipv6(SRV::Service(ServiceName::Cockroach)) + .await + .map_err(|e| format!("Failed to lookup IP: {}", e))?; + info!(log, "DB address: {}", address); + PostgresConfigWithUrl::from_str(&format!( + "postgresql://root@[{}]:{}/omicron?sslmode=disable", + address, COCKROACH_PORT + )) + .map_err(|e| format!("Cannot parse Postgres URL: {}", e))? } }; let pool = db::Pool::new(&db::Config { url }); + let nexus = Nexus::new_with_id( + rack_id, + log.new(o!("component" => "nexus")), + resolver, + pool, + config, + Arc::clone(&authz), + ) + .await; Ok(Arc::new(ServerContext { - nexus: Nexus::new_with_id( - rack_id, - log.new(o!("component" => "nexus")), - pool, - config, - Arc::clone(&authz), - ), + nexus, log, external_authn, internal_authn, diff --git a/nexus/src/lib.rs b/nexus/src/lib.rs index f0d5210930b..5ab34280c74 100644 --- a/nexus/src/lib.rs +++ b/nexus/src/lib.rs @@ -89,7 +89,8 @@ impl Server { let ctxlog = log.new(o!("component" => "ServerContext")); let apictx = - ServerContext::new(config.deployment.rack_id, ctxlog, &config)?; + ServerContext::new(config.deployment.rack_id, ctxlog, &config) + .await?; let http_server_starter_external = dropshot::HttpServerStarter::new( &config.deployment.dropshot_external, diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index 1f9967da95b..f17d270296d 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -101,7 +101,13 @@ pub async fn test_setup_with_config( // Store actual address/port information for the databases after they start. config.deployment.database = nexus_config::Database::FromUrl { url: database.pg_config().clone() }; - config.pkg.timeseries_db.address.set_port(clickhouse.port()); + config + .pkg + .timeseries_db + .address + .as_mut() + .expect("Tests expect to set a port of Clickhouse") + .set_port(clickhouse.port()); let server = omicron_nexus::Server::start(&config, &logctx.log).await.unwrap(); From 1822762765dfe4ca2292764c36f2c61f7ceb7d7e Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Tue, 21 Jun 2022 00:17:54 -0400 Subject: [PATCH 51/88] [nexus] Add tunable to disable background tasks --- nexus/src/config.rs | 28 +++++++++++++++++++++++++--- nexus/src/lib.rs | 7 ++++++- nexus/tests/config.test.toml | 2 ++ 3 files changed, 33 insertions(+), 4 deletions(-) diff --git a/nexus/src/config.rs b/nexus/src/config.rs index 5ca452e7388..e273cfbc1f7 100644 --- a/nexus/src/config.rs +++ b/nexus/src/config.rs @@ -56,18 +56,33 @@ pub struct TimeseriesDbConfig { #[derive(Clone, Debug, Deserialize, PartialEq)] struct UnvalidatedTunables { max_vpc_ipv4_subnet_prefix: u8, + enable_background_tasks: bool, +} + +fn deserialize_ipv4_subnet<'de, D>(deserializer: D) -> Result +where + D: serde::Deserializer<'de>, +{ + let prefix = u8::deserialize(deserializer)?; + Tunables::validate_ipv4_prefix(prefix) + .map_err(|e| serde::de::Error::custom(e))?; + Ok(prefix) } /// Tunable configuration parameters, intended for use in test environments or /// other situations in which experimentation / tuning is valuable. #[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] -#[serde(try_from = "UnvalidatedTunables")] pub struct Tunables { /// The maximum prefix size supported for VPC Subnet IPv4 subnetworks. /// /// Note that this is the maximum _prefix_ size, which sets the minimum size /// of the subnet. + #[serde(default, deserialize_with = "deserialize_ipv4_subnet")] pub max_vpc_ipv4_subnet_prefix: u8, + + /// Identifies whether or not background tasks will be enabled. + #[serde(default)] + pub enable_background_tasks: bool, } // Convert from the unvalidated tunables, verifying each parameter as needed. @@ -78,6 +93,7 @@ impl TryFrom for Tunables { Tunables::validate_ipv4_prefix(unvalidated.max_vpc_ipv4_subnet_prefix)?; Ok(Tunables { max_vpc_ipv4_subnet_prefix: unvalidated.max_vpc_ipv4_subnet_prefix, + enable_background_tasks: unvalidated.enable_background_tasks, }) } } @@ -119,7 +135,10 @@ pub const MAX_VPC_IPV4_SUBNET_PREFIX: u8 = 26; impl Default for Tunables { fn default() -> Self { - Tunables { max_vpc_ipv4_subnet_prefix: MAX_VPC_IPV4_SUBNET_PREFIX } + Tunables { + max_vpc_ipv4_subnet_prefix: MAX_VPC_IPV4_SUBNET_PREFIX, + enable_background_tasks: true, + } } } @@ -388,7 +407,10 @@ mod test { trusted_root: PathBuf::from("/path/to/root.json"), default_base_url: "http://example.invalid/".into(), }), - tunables: Tunables { max_vpc_ipv4_subnet_prefix: 27 }, + tunables: Tunables { + max_vpc_ipv4_subnet_prefix: 27, + enable_background_tasks: false, + }, }, } ); diff --git a/nexus/src/lib.rs b/nexus/src/lib.rs index 1a461a61559..e11e7745b55 100644 --- a/nexus/src/lib.rs +++ b/nexus/src/lib.rs @@ -130,7 +130,12 @@ impl Server { // Wait until RSS handoff completes. let opctx = apictx.nexus.opctx_for_background(); apictx.nexus.await_rack_initialization(&opctx).await; - apictx.nexus.start_background_tasks().map_err(|e| e.to_string())?; + + // With the exception of integration tests environments, + // we expect background tasks to be enabled. + if config.pkg.tunables.enable_background_tasks { + apictx.nexus.start_background_tasks().map_err(|e| e.to_string())?; + } let http_server_starter_external = dropshot::HttpServerStarter::new( &config.deployment.dropshot_external, diff --git a/nexus/tests/config.test.toml b/nexus/tests/config.test.toml index fdfeb5effb4..c451a341a5e 100644 --- a/nexus/tests/config.test.toml +++ b/nexus/tests/config.test.toml @@ -34,6 +34,8 @@ address = "[::1]:0" [tunables] # Allow small subnets, so we can test IP address exhaustion easily / quickly max_vpc_ipv4_subnet_prefix = 29 +# Disable background tests to help with test determinism +enable_background_tasks = false [deployment] # Identifier for this instance of Nexus. From 01c746533d0b5c2e9af844d8fd4af23998de9a82 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Tue, 21 Jun 2022 00:25:35 -0400 Subject: [PATCH 52/88] [nexus] Add service manager to Nexus - Adds multiple indices to the database for service querying - Creates a background "TaskRunner" which periodically executes services. - Within the "TaskRunner", creates a background task for managing services. - Adds a configuration option to disable the background task for tests. --- common/src/address.rs | 8 +- common/src/sql/dbinit.sql | 23 +- nexus/src/app/background/mod.rs | 40 + nexus/src/app/background/services.rs | 409 ++++++++ nexus/src/app/mod.rs | 28 +- nexus/src/app/rack.rs | 3 +- nexus/src/app/sled.rs | 4 +- nexus/src/config.rs | 38 +- nexus/src/db/datastore.rs | 1139 ++++++++++++++++++++- nexus/src/db/ipv6.rs | 12 +- nexus/src/db/model/dataset.rs | 37 +- nexus/src/db/model/dataset_kind.rs | 15 +- nexus/src/db/model/service.rs | 32 +- nexus/src/db/model/service_kind.rs | 4 +- nexus/src/db/model/sled.rs | 2 +- nexus/src/db/model/zpool.rs | 2 +- nexus/src/internal_api/params.rs | 2 +- nexus/tests/config.test.toml | 1 + nexus/tests/integration_tests/datasets.rs | 8 +- smf/nexus/config-partial.toml | 4 + 20 files changed, 1717 insertions(+), 94 deletions(-) create mode 100644 nexus/src/app/background/mod.rs create mode 100644 nexus/src/app/background/services.rs diff --git a/common/src/address.rs b/common/src/address.rs index b9558f78be6..32fef1c3994 100644 --- a/common/src/address.rs +++ b/common/src/address.rs @@ -20,12 +20,12 @@ pub const SLED_PREFIX: u8 = 64; /// The amount of redundancy for DNS servers. /// /// Must be less than MAX_DNS_REDUNDANCY. -pub const DNS_REDUNDANCY: usize = 1; +pub const DNS_REDUNDANCY: u32 = 1; /// The maximum amount of redundancy for DNS servers. /// /// This determines the number of addresses which are /// reserved for DNS servers. -pub const MAX_DNS_REDUNDANCY: usize = 5; +pub const MAX_DNS_REDUNDANCY: u32 = 5; pub const DNS_PORT: u16 = 53; pub const DNS_SERVER_PORT: u16 = 5353; @@ -34,9 +34,11 @@ pub const SLED_AGENT_PORT: u16 = 12345; /// The port propolis-server listens on inside the propolis zone. pub const PROPOLIS_PORT: u16 = 12400; pub const COCKROACH_PORT: u16 = 32221; +pub const CRUCIBLE_PORT: u16 = 32345; pub const CLICKHOUSE_PORT: u16 = 8123; pub const OXIMETER_PORT: u16 = 12223; +pub const NEXUS_EXTERNAL_PORT: u16 = 12220; pub const NEXUS_INTERNAL_PORT: u16 = 12221; // Anycast is a mechanism in which a single IP address is shared by multiple @@ -177,7 +179,7 @@ mod test { // Observe the first DNS subnet within this reserved rack subnet. let dns_subnets = rack_subnet.get_dns_subnets(); - assert_eq!(DNS_REDUNDANCY, dns_subnets.len()); + assert_eq!(DNS_REDUNDANCY, dns_subnets.len() as u32); // The DNS address and GZ address should be only differing by one. assert_eq!( diff --git a/common/src/sql/dbinit.sql b/common/src/sql/dbinit.sql index e358c9a227e..0ffa5450afc 100644 --- a/common/src/sql/dbinit.sql +++ b/common/src/sql/dbinit.sql @@ -86,10 +86,11 @@ CREATE TABLE omicron.public.sled ( last_used_address INET NOT NULL ); -/* Add an index which lets us look up sleds on a rack */ +/* Add an index which lets us look up the sleds on a rack */ CREATE INDEX ON omicron.public.sled ( rack_id -) WHERE time_deleted IS NULL; +) WHERE + time_deleted IS NULL; /* * Services @@ -117,7 +118,13 @@ CREATE TABLE omicron.public.service ( /* Add an index which lets us look up the services on a sled */ CREATE INDEX ON omicron.public.service ( - sled_id + sled_id, + kind +); + +/* Add an index which lets us look up services of a particular kind on a sled */ +CREATE INDEX ON omicron.public.service ( + kind ); /* @@ -140,6 +147,11 @@ CREATE TABLE omicron.public.Zpool ( total_size INT NOT NULL ); +/* Create an index which allows looking up all zpools on a sled */ +CREATE INDEX on omicron.public.Zpool ( + sled_id +) WHERE time_deleted IS NULL; + CREATE TYPE omicron.public.dataset_kind AS ENUM ( 'crucible', 'cockroach', @@ -170,6 +182,11 @@ CREATE TABLE omicron.public.Dataset ( size_used INT ); +/* Create an index which allows looking up all datasets in a pool */ +CREATE INDEX on omicron.public.Dataset ( + pool_id +) WHERE time_deleted IS NULL; + /* Create an index on the size usage for Crucible's allocation */ CREATE INDEX on omicron.public.Dataset ( size_used diff --git a/nexus/src/app/background/mod.rs b/nexus/src/app/background/mod.rs new file mode 100644 index 00000000000..82e08c2b680 --- /dev/null +++ b/nexus/src/app/background/mod.rs @@ -0,0 +1,40 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Background tasks managed by Nexus. + +mod services; + +use crate::app::Nexus; +use std::sync::Arc; +use tokio::task::{spawn, JoinHandle}; + +/// Management structure which encapsulates periodically-executing background +/// tasks. +pub struct TaskRunner { + _handle: JoinHandle<()>, +} + +impl TaskRunner { + pub fn new(nexus: Arc) -> Self { + let handle = spawn(async move { + let log = nexus.log.new(o!("component" => "BackgroundTaskRunner")); + let service_balancer = + services::ServiceBalancer::new(log.clone(), nexus.clone()); + + loop { + // TODO: We may want triggers to exist here, to invoke this task + // more frequently (e.g., on Sled failure). + let opctx = nexus.opctx_for_background(); + if let Err(e) = service_balancer.balance_services(&opctx).await + { + warn!(log, "Failed to balance services: {:?}", e); + } + + tokio::time::sleep(std::time::Duration::from_secs(30)).await; + } + }); + Self { _handle: handle } + } +} diff --git a/nexus/src/app/background/services.rs b/nexus/src/app/background/services.rs new file mode 100644 index 00000000000..4a97d89c407 --- /dev/null +++ b/nexus/src/app/background/services.rs @@ -0,0 +1,409 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Task which ensures that expected Nexus services exist. + +use crate::context::OpContext; +use crate::db::datastore::DatasetRedundancy; +use crate::db::identity::Asset; +use crate::db::model::Dataset; +use crate::db::model::DatasetKind; +use crate::db::model::Service; +use crate::db::model::ServiceKind; +use crate::db::model::Sled; +use crate::db::model::Zpool; +use crate::Nexus; +use futures::stream::{self, StreamExt, TryStreamExt}; +use internal_dns_client::multiclient::{ + Service as DnsService, Updater as DnsUpdater, +}; +use omicron_common::address::{ + DNS_PORT, DNS_REDUNDANCY, DNS_SERVER_PORT, NEXUS_EXTERNAL_PORT, + NEXUS_INTERNAL_PORT, +}; +use omicron_common::api::external::Error; +use sled_agent_client::types as SledAgentTypes; +use slog::Logger; +use std::collections::{HashMap, HashSet}; +use std::net::{Ipv6Addr, SocketAddrV6}; +use std::sync::Arc; + +// Policy for the number of services to be provisioned. +#[derive(Debug)] +enum ServiceRedundancy { + // This service must exist on at least this many sleds + // within the rack. + PerRack(u32), + + // This service must exist on at least this many sleds + // within the availability zone. + DnsPerAz(u32), +} + +#[derive(Debug)] +struct ExpectedService { + kind: ServiceKind, + redundancy: ServiceRedundancy, +} + +const EXPECTED_SERVICES: [ExpectedService; 3] = [ + ExpectedService { + kind: ServiceKind::InternalDNS, + redundancy: ServiceRedundancy::DnsPerAz(DNS_REDUNDANCY), + }, + ExpectedService { + kind: ServiceKind::Nexus, + redundancy: ServiceRedundancy::PerRack(1), + }, + ExpectedService { + kind: ServiceKind::Oximeter, + redundancy: ServiceRedundancy::PerRack(1), + }, +]; + +#[derive(Debug)] +struct ExpectedDataset { + kind: DatasetKind, + redundancy: DatasetRedundancy, +} + +const EXPECTED_DATASETS: [ExpectedDataset; 3] = [ + ExpectedDataset { + kind: DatasetKind::Crucible, + redundancy: DatasetRedundancy::OnAll, + }, + ExpectedDataset { + kind: DatasetKind::Cockroach, + redundancy: DatasetRedundancy::PerRack(1), + }, + ExpectedDataset { + kind: DatasetKind::Clickhouse, + redundancy: DatasetRedundancy::PerRack(1), + }, +]; + +pub struct ServiceBalancer { + log: Logger, + nexus: Arc, + dns_updater: DnsUpdater, +} + +impl ServiceBalancer { + pub fn new(log: Logger, nexus: Arc) -> Self { + let dns_updater = DnsUpdater::new( + &nexus.az_subnet(), + log.new(o!("component" => "DNS Updater")), + ); + + Self { log, nexus, dns_updater } + } + + // Reaches out to all sled agents implied in "services", and + // requests that the desired services are executing. + async fn instantiate_services( + &self, + opctx: &OpContext, + mut services: Vec, + ) -> Result<(), Error> { + let mut sled_ids = HashSet::new(); + for svc in &services { + sled_ids.insert(svc.sled_id); + } + + // For all sleds requiring an update, request all services be + // instantiated. + stream::iter(&sled_ids) + .map(Ok::<_, Error>) + .try_for_each_concurrent(None, |sled_id| async { + // TODO: This interface kinda sucks; ideally we would + // only insert the *new* services. + // + // Inserting the old ones too is costing us an extra query. + let services = self + .nexus + .datastore() + .service_list(opctx, *sled_id) + .await?; + let sled_client = self.nexus.sled_client(sled_id).await?; + + info!(self.log, "instantiate_services: {:?}", services); + + sled_client + .services_put(&SledAgentTypes::ServiceEnsureBody { + services: services + .iter() + .map(|s| { + let address = Ipv6Addr::from(s.ip); + let (name, service_type) = + Self::get_service_name_and_type( + address, s.kind, + ); + + // TODO: This is hacky, specifically to inject + // global zone addresses in the DNS service. + let gz_addresses = match &s.kind { + ServiceKind::InternalDNS => { + let mut octets = address.octets(); + octets[15] = octets[15] + 1; + vec![Ipv6Addr::from(octets)] + } + _ => vec![], + }; + + SledAgentTypes::ServiceRequest { + id: s.id(), + name, + addresses: vec![address], + gz_addresses, + service_type, + } + }) + .collect(), + }) + .await?; + Ok(()) + }) + .await?; + + // Putting records of the same SRV right next to each other isn't + // strictly necessary, but doing so makes the record insertion more + // efficient. + services.sort_by(|a, b| a.srv().partial_cmp(&b.srv()).unwrap()); + self.dns_updater + .insert_dns_records(&services) + .await + .map_err(|e| Error::internal_error(&e.to_string()))?; + + Ok(()) + } + + // Translates (address, db kind) to Sled Agent client types. + fn get_service_name_and_type( + address: Ipv6Addr, + kind: ServiceKind, + ) -> (String, SledAgentTypes::ServiceType) { + match kind { + ServiceKind::Nexus => ( + "nexus".to_string(), + SledAgentTypes::ServiceType::Nexus { + internal_address: SocketAddrV6::new( + address, + NEXUS_INTERNAL_PORT, + 0, + 0, + ) + .to_string(), + external_address: SocketAddrV6::new( + address, + NEXUS_EXTERNAL_PORT, + 0, + 0, + ) + .to_string(), + }, + ), + ServiceKind::InternalDNS => ( + "internal-dns".to_string(), + SledAgentTypes::ServiceType::InternalDns { + server_address: SocketAddrV6::new( + address, + DNS_SERVER_PORT, + 0, + 0, + ) + .to_string(), + dns_address: SocketAddrV6::new(address, DNS_PORT, 0, 0) + .to_string(), + }, + ), + ServiceKind::Oximeter => { + ("oximeter".to_string(), SledAgentTypes::ServiceType::Oximeter) + } + } + } + + // Provision the services within the database. + async fn provision_rack_service( + &self, + opctx: &OpContext, + kind: ServiceKind, + desired_count: u32, + ) -> Result, Error> { + self.nexus + .datastore() + .ensure_rack_service(opctx, self.nexus.rack_id, kind, desired_count) + .await + } + + // Provision the services within the database. + async fn provision_dns_service( + &self, + opctx: &OpContext, + desired_count: u32, + ) -> Result, Error> { + self.nexus + .datastore() + .ensure_dns_service(opctx, self.nexus.rack_subnet, desired_count) + .await + } + + // TODO: Consider using sagas to ensure the rollout of services. + // + // Not using sagas *happens* to be fine because these operations are + // re-tried periodically, but that's kind forcing a dependency on the + // caller. + async fn ensure_services_provisioned( + &self, + opctx: &OpContext, + ) -> Result<(), Error> { + // Provision services within the database. + let mut svcs = vec![]; + for expected_svc in &EXPECTED_SERVICES { + info!(self.log, "Ensuring service {:?} exists", expected_svc); + match expected_svc.redundancy { + ServiceRedundancy::PerRack(desired_count) => { + svcs.extend_from_slice( + &self + .provision_rack_service( + opctx, + expected_svc.kind, + desired_count, + ) + .await?, + ); + } + ServiceRedundancy::DnsPerAz(desired_count) => { + svcs.extend_from_slice( + &self + .provision_dns_service(opctx, desired_count) + .await?, + ); + } + } + } + + // Ensure services exist on the target sleds. + self.instantiate_services(opctx, svcs).await?; + Ok(()) + } + + async fn ensure_rack_dataset( + &self, + opctx: &OpContext, + kind: DatasetKind, + redundancy: DatasetRedundancy, + ) -> Result<(), Error> { + // Provision the datasets within the database. + let new_datasets = self + .nexus + .datastore() + .ensure_rack_dataset(opctx, self.nexus.rack_id, kind, redundancy) + .await?; + + // Actually instantiate those datasets. + self.instantiate_datasets(new_datasets, kind).await + } + + // Reaches out to all sled agents implied in "services", and + // requests that the desired services are executing. + async fn instantiate_datasets( + &self, + datasets: Vec<(Sled, Zpool, Dataset)>, + kind: DatasetKind, + ) -> Result<(), Error> { + if datasets.is_empty() { + return Ok(()); + } + + // Ensure that there is one connection per sled. + let mut sled_clients = HashMap::new(); + for (sled, _, _) in &datasets { + if sled_clients.get(&sled.id()).is_none() { + let sled_client = self.nexus.sled_client(&sled.id()).await?; + sled_clients.insert(sled.id(), sled_client); + } + } + + // Issue all dataset instantiation requests concurrently. + stream::iter(&datasets) + .map(Ok::<_, Error>) + .try_for_each_concurrent(None, |(sled, zpool, dataset)| async { + let sled_client = sled_clients.get(&sled.id()).unwrap(); + + let dataset_kind = match kind { + // TODO: This set of "all addresses" isn't right. + // TODO: ... should we even be using "all addresses" to contact CRDB? + // Can it just rely on DNS, somehow? + DatasetKind::Cockroach => { + SledAgentTypes::DatasetKind::CockroachDb(vec![]) + } + DatasetKind::Crucible => { + SledAgentTypes::DatasetKind::Crucible + } + DatasetKind::Clickhouse => { + SledAgentTypes::DatasetKind::Clickhouse + } + }; + + // Instantiate each dataset. + sled_client + .filesystem_put(&SledAgentTypes::DatasetEnsureBody { + id: dataset.id(), + zpool_id: zpool.id(), + dataset_kind, + address: dataset.address().to_string(), + }) + .await?; + Ok(()) + }) + .await?; + + // Ensure all DNS records are updated for the created datasets. + self.dns_updater + .insert_dns_records( + &datasets.into_iter().map(|(_, _, dataset)| dataset).collect(), + ) + .await + .map_err(|e| Error::internal_error(&e.to_string()))?; + + Ok(()) + } + + async fn ensure_datasets_provisioned( + &self, + opctx: &OpContext, + ) -> Result<(), Error> { + // Provision all dataset types concurrently. + stream::iter(&EXPECTED_DATASETS) + .map(Ok::<_, Error>) + .try_for_each_concurrent(None, |expected_dataset| async move { + info!( + self.log, + "Ensuring dataset {:?} exists", expected_dataset + ); + self.ensure_rack_dataset( + opctx, + expected_dataset.kind, + expected_dataset.redundancy, + ) + .await?; + Ok(()) + }) + .await + } + + // Provides a single point-in-time evaluation and adjustment of + // the services provisioned within the rack. + // + // May adjust the provisioned services to meet the redundancy of the + // rack, if necessary. + pub async fn balance_services( + &self, + opctx: &OpContext, + ) -> Result<(), Error> { + self.ensure_datasets_provisioned(opctx).await?; + self.ensure_services_provisioned(opctx).await?; + Ok(()) + } +} diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index 4f9fe29b006..a4be4beda52 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -15,9 +15,11 @@ use crate::populate::PopulateArgs; use crate::populate::PopulateStatus; use crate::saga_interface::SagaContext; use anyhow::anyhow; +use omicron_common::address::{Ipv6Subnet, AZ_PREFIX, RACK_PREFIX}; use omicron_common::api::external::Error; use slog::Logger; use std::sync::Arc; +use tokio::sync::OnceCell; use uuid::Uuid; // The implementation of Nexus is large, and split into a number of submodules @@ -40,6 +42,9 @@ mod vpc; mod vpc_router; mod vpc_subnet; +// Background tasks exist in the "background" module. +mod background; + // Sagas are not part of the "Nexus" implementation, but they are // application logic. mod sagas; @@ -59,6 +64,9 @@ pub struct Nexus { /// uuid for this rack rack_id: Uuid, + /// subnet of this rack + rack_subnet: Ipv6Subnet, + /// general server log log: Logger, @@ -77,6 +85,9 @@ pub struct Nexus { /// Status of background task to populate database populate_status: tokio::sync::watch::Receiver, + /// Background task for Nexus. + background_task_runner: OnceCell, + /// Client to the timeseries database. timeseries_client: LazyTimeseriesClient, @@ -127,7 +138,7 @@ impl Nexus { sec_store, )); - // Connect to clickhouse - but do so lazily. + // Connect to Clickhouse - but do so lazily. // Clickhouse may not be executing when Nexus starts. let timeseries_client = if let Some(address) = &config.pkg.timeseries_db.address { @@ -158,12 +169,14 @@ impl Nexus { let nexus = Nexus { id: config.deployment.id, rack_id, + rack_subnet: config.deployment.subnet, log: log.new(o!()), db_datastore: Arc::clone(&db_datastore), authz: Arc::clone(&authz), sec_client: Arc::clone(&sec_client), recovery_task: std::sync::Mutex::new(None), populate_status, + background_task_runner: OnceCell::new(), timeseries_client, updates_config: config.pkg.updates.clone(), tunables: config.pkg.tunables.clone(), @@ -207,6 +220,10 @@ impl Nexus { nexus } + pub fn az_subnet(&self) -> Ipv6Subnet { + Ipv6Subnet::::new(self.rack_subnet.net().ip()) + } + /// Return the tunable configuration parameters, e.g. for use in tests. pub fn tunables(&self) -> &config::Tunables { &self.tunables @@ -233,6 +250,15 @@ impl Nexus { } } + pub fn start_background_tasks( + self: &Arc, + ) -> Result<(), anyhow::Error> { + let nexus = self.clone(); + self.background_task_runner + .set(background::TaskRunner::new(nexus)) + .map_err(|error| anyhow!(error.to_string())) + } + /// Returns an [`OpContext`] used for authenticating external requests pub fn opctx_external_authn(&self) -> &OpContext { &self.opctx_external_authn diff --git a/nexus/src/app/rack.rs b/nexus/src/app/rack.rs index dcc7ce92dbc..dca24f078c6 100644 --- a/nexus/src/app/rack.rs +++ b/nexus/src/app/rack.rs @@ -74,8 +74,9 @@ impl super::Nexus { }) .collect(); + // TODO: Actually supply datasets provided from the sled agent. self.db_datastore - .rack_set_initialized(opctx, rack_id, services) + .rack_set_initialized(opctx, rack_id, services, vec![]) .await?; Ok(()) diff --git a/nexus/src/app/sled.rs b/nexus/src/app/sled.rs index e4fc616f095..0e01112c532 100644 --- a/nexus/src/app/sled.rs +++ b/nexus/src/app/sled.rs @@ -16,7 +16,7 @@ use omicron_common::api::external::Error; use omicron_common::api::external::ListResultVec; use omicron_common::api::external::LookupResult; use sled_agent_client::Client as SledAgentClient; -use std::net::{Ipv6Addr, SocketAddr, SocketAddrV6}; +use std::net::{Ipv6Addr, SocketAddrV6}; use std::sync::Arc; use uuid::Uuid; @@ -135,7 +135,7 @@ impl super::Nexus { &self, id: Uuid, zpool_id: Uuid, - address: SocketAddr, + address: SocketAddrV6, kind: DatasetKind, ) -> Result<(), Error> { info!(self.log, "upserting dataset"; "zpool_id" => zpool_id.to_string(), "dataset_id" => id.to_string(), "address" => address.to_string()); diff --git a/nexus/src/config.rs b/nexus/src/config.rs index 5ca452e7388..e626f3c422c 100644 --- a/nexus/src/config.rs +++ b/nexus/src/config.rs @@ -52,34 +52,30 @@ pub struct TimeseriesDbConfig { pub address: Option, } -// A deserializable type that does no validation on the tunable parameters. -#[derive(Clone, Debug, Deserialize, PartialEq)] -struct UnvalidatedTunables { - max_vpc_ipv4_subnet_prefix: u8, +fn deserialize_ipv4_subnet<'de, D>(deserializer: D) -> Result +where + D: serde::Deserializer<'de>, +{ + let prefix = u8::deserialize(deserializer)?; + Tunables::validate_ipv4_prefix(prefix) + .map_err(|e| serde::de::Error::custom(e))?; + Ok(prefix) } /// Tunable configuration parameters, intended for use in test environments or /// other situations in which experimentation / tuning is valuable. #[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] -#[serde(try_from = "UnvalidatedTunables")] pub struct Tunables { /// The maximum prefix size supported for VPC Subnet IPv4 subnetworks. /// /// Note that this is the maximum _prefix_ size, which sets the minimum size /// of the subnet. + #[serde(default, deserialize_with = "deserialize_ipv4_subnet")] pub max_vpc_ipv4_subnet_prefix: u8, -} - -// Convert from the unvalidated tunables, verifying each parameter as needed. -impl TryFrom for Tunables { - type Error = InvalidTunable; - fn try_from(unvalidated: UnvalidatedTunables) -> Result { - Tunables::validate_ipv4_prefix(unvalidated.max_vpc_ipv4_subnet_prefix)?; - Ok(Tunables { - max_vpc_ipv4_subnet_prefix: unvalidated.max_vpc_ipv4_subnet_prefix, - }) - } + /// Identifies whether or not background tasks will be enabled. + #[serde(default)] + pub enable_background_tasks: bool, } impl Tunables { @@ -119,7 +115,10 @@ pub const MAX_VPC_IPV4_SUBNET_PREFIX: u8 = 26; impl Default for Tunables { fn default() -> Self { - Tunables { max_vpc_ipv4_subnet_prefix: MAX_VPC_IPV4_SUBNET_PREFIX } + Tunables { + max_vpc_ipv4_subnet_prefix: MAX_VPC_IPV4_SUBNET_PREFIX, + enable_background_tasks: true, + } } } @@ -388,7 +387,10 @@ mod test { trusted_root: PathBuf::from("/path/to/root.json"), default_base_url: "http://example.invalid/".into(), }), - tunables: Tunables { max_vpc_ipv4_subnet_prefix: 27 }, + tunables: Tunables { + max_vpc_ipv4_subnet_prefix: 27, + enable_background_tasks: false, + }, }, } ); diff --git a/nexus/src/db/datastore.rs b/nexus/src/db/datastore.rs index b1c984f2218..12435e28d3e 100644 --- a/nexus/src/db/datastore.rs +++ b/nexus/src/db/datastore.rs @@ -57,7 +57,7 @@ use crate::db::{ Instance, InstanceRuntimeState, Name, NetworkInterface, Organization, OrganizationUpdate, OximeterInfo, ProducerEndpoint, Project, ProjectUpdate, Rack, Region, RoleAssignment, RoleBuiltin, RouterRoute, - RouterRouteUpdate, Service, Silo, SiloUser, Sled, SshKey, + RouterRouteUpdate, Service, ServiceKind, Silo, SiloUser, Sled, SshKey, UpdateAvailableArtifact, UserBuiltin, Volume, VpcFirewallRule, VpcRouter, VpcRouterUpdate, VpcSubnet, VpcSubnetUpdate, VpcUpdate, Zpool, @@ -76,6 +76,9 @@ use diesel::query_builder::{QueryFragment, QueryId}; use diesel::query_dsl::methods::LoadQuery; use diesel::upsert::excluded; use diesel::{ExpressionMethods, QueryDsl, SelectableHelper}; +use omicron_common::address::{ + Ipv6Subnet, ReservedRackSubnet, DNS_REDUNDANCY, RACK_PREFIX, +}; use omicron_common::api; use omicron_common::api::external; use omicron_common::api::external::DataPageParams; @@ -92,7 +95,7 @@ use omicron_common::api::external::{ use omicron_common::bail_unless; use sled_agent_client::types as sled_client_types; use std::convert::{TryFrom, TryInto}; -use std::net::Ipv6Addr; +use std::net::{Ipv6Addr, SocketAddrV6}; use std::sync::Arc; use uuid::Uuid; @@ -121,6 +124,15 @@ impl RunnableQuery for T where { } +// Redundancy for the number of datasets to be provisioned. +#[derive(Clone, Copy, Debug)] +pub enum DatasetRedundancy { + // The dataset should exist on all zpools. + OnAll, + // The dataset should exist on at least this many zpools. + PerRack(u32), +} + pub struct DataStore { pool: Arc, } @@ -147,20 +159,6 @@ impl DataStore { Ok(self.pool.pool()) } - pub async fn rack_list( - &self, - opctx: &OpContext, - pagparams: &DataPageParams<'_, Uuid>, - ) -> ListResultVec { - opctx.authorize(authz::Action::Read, &authz::FLEET).await?; - use db::schema::rack::dsl; - paginated(dsl::rack, dsl::id, pagparams) - .select(Rack::as_select()) - .load_async(self.pool_authorized(opctx).await?) - .await - .map_err(|e| public_error_from_diesel_pool(e, ErrorHandler::Server)) - } - /// Stores a new rack in the database. /// /// This function is a no-op if the rack already exists. @@ -197,19 +195,29 @@ impl DataStore { opctx: &OpContext, rack_id: Uuid, services: Vec, + datasets: Vec, ) -> UpdateResult { use db::schema::rack::dsl as rack_dsl; - use db::schema::service::dsl as service_dsl; #[derive(Debug)] enum RackInitError { - ServiceInsert { err: SyncInsertError, sled_id: Uuid, svc_id: Uuid }, + ServiceInsert { + err: SyncInsertError, + sled_id: Uuid, + svc_id: Uuid, + }, + DatasetInsert { + err: SyncInsertError, + zpool_id: Uuid, + dataset_id: Uuid, + }, RackUpdate(diesel::result::Error), } type TxnError = TransactionError; // NOTE: This operation could likely be optimized with a CTE, but given // the low-frequency of calls, this optimization has been deferred. + let log = opctx.log.clone(); self.pool_authorized(opctx) .await? .transaction(move |conn| { @@ -222,25 +230,25 @@ impl DataStore { TxnError::CustomError(RackInitError::RackUpdate(e)) })?; if rack.initialized { + info!(log, "Early exit: Rack already initialized"); return Ok(rack); } - // Otherwise, insert services and set rack.initialized = true. + // Otherwise, insert services and datasets for svc in services { + use db::schema::service::dsl; let sled_id = svc.sled_id; >::insert_resource( sled_id, - diesel::insert_into(service_dsl::service) + diesel::insert_into(dsl::service) .values(svc.clone()) - .on_conflict(service_dsl::id) + .on_conflict(dsl::id) .do_update() .set(( - service_dsl::time_modified.eq(Utc::now()), - service_dsl::sled_id - .eq(excluded(service_dsl::sled_id)), - service_dsl::ip.eq(excluded(service_dsl::ip)), - service_dsl::kind - .eq(excluded(service_dsl::kind)), + dsl::time_modified.eq(Utc::now()), + dsl::sled_id.eq(excluded(dsl::sled_id)), + dsl::ip.eq(excluded(dsl::ip)), + dsl::kind.eq(excluded(dsl::kind)), )), ) .insert_and_get_result(conn) @@ -252,7 +260,37 @@ impl DataStore { }) })?; } - diesel::update(rack_dsl::rack) + info!(log, "Inserted services"); + for dataset in datasets { + use db::schema::dataset::dsl; + let zpool_id = dataset.pool_id; + >::insert_resource( + zpool_id, + diesel::insert_into(dsl::dataset) + .values(dataset.clone()) + .on_conflict(dsl::id) + .do_update() + .set(( + dsl::time_modified.eq(Utc::now()), + dsl::pool_id.eq(excluded(dsl::pool_id)), + dsl::ip.eq(excluded(dsl::ip)), + dsl::port.eq(excluded(dsl::port)), + dsl::kind.eq(excluded(dsl::kind)), + )), + ) + .insert_and_get_result(conn) + .map_err(|err| { + TxnError::CustomError(RackInitError::DatasetInsert { + err, + zpool_id, + dataset_id: dataset.id(), + }) + })?; + } + info!(log, "Inserted datasets"); + + // Set the rack to "initialized" once the handoff is complete + let rack = diesel::update(rack_dsl::rack) .filter(rack_dsl::id.eq(rack_id)) .set(( rack_dsl::initialized.eq(true), @@ -262,10 +300,31 @@ impl DataStore { .get_result::(conn) .map_err(|e| { TxnError::CustomError(RackInitError::RackUpdate(e)) - }) + })?; + info!(log, "Updated rack (set initialized to true)"); + Ok(rack) }) .await .map_err(|e| match e { + TxnError::CustomError(RackInitError::DatasetInsert { + err, + zpool_id, + dataset_id, + }) => match err { + SyncInsertError::CollectionNotFound => { + Error::ObjectNotFound { + type_name: ResourceType::Zpool, + lookup_type: LookupType::ById(zpool_id), + } + } + SyncInsertError::DatabaseError(e) => { + public_error_from_diesel_create( + e, + ResourceType::Dataset, + &dataset_id.to_string(), + ) + } + }, TxnError::CustomError(RackInitError::ServiceInsert { err, sled_id, @@ -298,6 +357,20 @@ impl DataStore { }) } + pub async fn rack_list( + &self, + opctx: &OpContext, + pagparams: &DataPageParams<'_, Uuid>, + ) -> ListResultVec { + opctx.authorize(authz::Action::Read, &authz::FLEET).await?; + use db::schema::rack::dsl; + paginated(dsl::rack, dsl::id, pagparams) + .select(Rack::as_select()) + .load_async(self.pool_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel_pool(e, ErrorHandler::Server)) + } + /// Stores a new sled in the database. pub async fn sled_upsert(&self, sled: Sled) -> CreateResult { use db::schema::sled::dsl; @@ -338,6 +411,375 @@ impl DataStore { .map_err(|e| public_error_from_diesel_pool(e, ErrorHandler::Server)) } + fn sled_list_with_limit_sync( + conn: &mut DbConnection, + limit: u32, + ) -> Result, diesel::result::Error> { + use db::schema::sled::dsl; + dsl::sled + .filter(dsl::time_deleted.is_null()) + .limit(limit as i64) + .select(Sled::as_select()) + .load(conn) + } + + pub async fn service_list( + &self, + opctx: &OpContext, + sled_id: Uuid, + ) -> Result, Error> { + opctx.authorize(authz::Action::Read, &authz::FLEET).await?; + use db::schema::service::dsl; + dsl::service + .filter(dsl::sled_id.eq(sled_id)) + .select(Service::as_select()) + .load_async(self.pool_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel_pool(e, ErrorHandler::Server)) + } + + fn sled_and_service_list_sync( + conn: &mut DbConnection, + rack_id: Uuid, + kind: ServiceKind, + ) -> Result)>, diesel::result::Error> { + use db::schema::service::dsl as svc_dsl; + use db::schema::sled::dsl as sled_dsl; + + db::schema::sled::table + .filter(sled_dsl::time_deleted.is_null()) + .filter(sled_dsl::rack_id.eq(rack_id)) + .left_outer_join(db::schema::service::table.on( + svc_dsl::sled_id.eq(sled_dsl::id).and(svc_dsl::kind.eq(kind)), + )) + .select(<(Sled, Option)>::as_select()) + .get_results(conn) + } + + pub async fn ensure_rack_service( + &self, + opctx: &OpContext, + rack_id: Uuid, + kind: ServiceKind, + redundancy: u32, + ) -> Result, Error> { + opctx.authorize(authz::Action::Read, &authz::FLEET).await?; + + #[derive(Debug)] + enum ServiceError { + NotEnoughSleds, + Other(Error), + } + type TxnError = TransactionError; + + self.pool() + .transaction(move |conn| { + let sleds_and_maybe_svcs = + Self::sled_and_service_list_sync(conn, rack_id, kind)?; + + // Split the set of returned sleds into "those with" and "those + // without" the requested service. + let (sleds_with_svc, sleds_without_svc): (Vec<_>, Vec<_>) = + sleds_and_maybe_svcs + .into_iter() + .partition(|(_, maybe_svc)| maybe_svc.is_some()); + // Identify sleds without services (targets for future + // allocation). + let mut sleds_without_svc = + sleds_without_svc.into_iter().map(|(sled, _)| sled); + + // Identify sleds with services (part of output). + let mut svcs: Vec<_> = sleds_with_svc + .into_iter() + .map(|(_, maybe_svc)| { + maybe_svc.expect( + "Should have filtered by sleds with the service", + ) + }) + .collect(); + + // Add services to sleds, in-order, until we've met a + // number sufficient for our redundancy. + // + // The selection of "which sleds run this service" is completely + // arbitrary. + while svcs.len() < (redundancy as usize) { + let sled = sleds_without_svc.next().ok_or_else(|| { + TxnError::CustomError(ServiceError::NotEnoughSleds) + })?; + let svc_id = Uuid::new_v4(); + let address = Self::next_ipv6_address_sync(conn, sled.id()) + .map_err(|e| { + TxnError::CustomError(ServiceError::Other(e)) + })?; + + let service = db::model::Service::new( + svc_id, + sled.id(), + address, + kind, + ); + + let svc = Self::service_upsert_sync(conn, service) + .map_err(|e| { + TxnError::CustomError(ServiceError::Other(e)) + })?; + svcs.push(svc); + } + + return Ok(svcs); + }) + .await + .map_err(|e| match e { + TxnError::CustomError(ServiceError::NotEnoughSleds) => { + Error::unavail("Not enough sleds for service allocation") + } + TxnError::CustomError(ServiceError::Other(e)) => e, + TxnError::Pool(e) => { + public_error_from_diesel_pool(e, ErrorHandler::Server) + } + }) + } + + pub async fn ensure_dns_service( + &self, + opctx: &OpContext, + rack_subnet: Ipv6Subnet, + redundancy: u32, + ) -> Result, Error> { + opctx.authorize(authz::Action::Read, &authz::FLEET).await?; + + #[derive(Debug)] + enum ServiceError { + NotEnoughSleds, + NotEnoughIps, + Other(Error), + } + type TxnError = TransactionError; + + self.pool() + .transaction(move |conn| { + let mut svcs = Self::dns_service_list_sync(conn)?; + + // Get all subnets not allocated to existing services. + let mut usable_dns_subnets = ReservedRackSubnet(rack_subnet) + .get_dns_subnets() + .into_iter() + .filter(|subnet| { + // If any existing services are using this address, + // skip it. + !svcs.iter().any(|svc| { + Ipv6Addr::from(svc.ip) == subnet.dns_address().ip() + }) + }) + .collect::>() + .into_iter(); + + // Get all sleds which aren't already running DNS services. + let mut target_sleds = + Self::sled_list_with_limit_sync(conn, redundancy)? + .into_iter() + .filter(|sled| { + // The target sleds are only considered if they aren't already + // running a DNS service. + svcs.iter().all(|svc| svc.sled_id != sled.id()) + }) + .collect::>() + .into_iter(); + + while svcs.len() < (redundancy as usize) { + let sled = target_sleds.next().ok_or_else(|| { + TxnError::CustomError(ServiceError::NotEnoughSleds) + })?; + let svc_id = Uuid::new_v4(); + let dns_subnet = + usable_dns_subnets.next().ok_or_else(|| { + TxnError::CustomError(ServiceError::NotEnoughIps) + })?; + let address = dns_subnet.dns_address().ip(); + + // TODO: How are we tracking the GZ address that must be + // allocated? They're tracked by the "DnsSubnet" object + // in address.rs, but I don't think they're getting + // propagated out of here. + let service = db::model::Service::new( + svc_id, + sled.id(), + address, + ServiceKind::InternalDNS, + ); + + let svc = Self::service_upsert_sync(conn, service) + .map_err(|e| { + TxnError::CustomError(ServiceError::Other(e)) + })?; + + svcs.push(svc); + } + return Ok(svcs); + }) + .await + .map_err(|e| match e { + TxnError::CustomError(ServiceError::NotEnoughSleds) => { + Error::unavail("Not enough sleds for service allocation") + } + TxnError::CustomError(ServiceError::NotEnoughIps) => { + Error::unavail( + "Not enough IP addresses for service allocation", + ) + } + TxnError::CustomError(ServiceError::Other(e)) => e, + TxnError::Pool(e) => { + public_error_from_diesel_pool(e, ErrorHandler::Server) + } + }) + } + + fn dns_service_list_sync( + conn: &mut DbConnection, + ) -> Result, diesel::result::Error> { + use db::schema::service::dsl as svc; + + svc::service + .filter(svc::kind.eq(ServiceKind::InternalDNS)) + .limit(DNS_REDUNDANCY.into()) + .select(Service::as_select()) + .get_results(conn) + } + + fn sled_zpool_and_dataset_list_sync( + conn: &mut DbConnection, + rack_id: Uuid, + kind: DatasetKind, + ) -> Result)>, diesel::result::Error> + { + use db::schema::dataset::dsl as dataset_dsl; + use db::schema::sled::dsl as sled_dsl; + use db::schema::zpool::dsl as zpool_dsl; + + db::schema::sled::table + .filter(sled_dsl::time_deleted.is_null()) + .filter(sled_dsl::rack_id.eq(rack_id)) + .inner_join( + db::schema::zpool::table.on(zpool_dsl::sled_id + .eq(sled_dsl::id) + .and(zpool_dsl::time_deleted.is_null())), + ) + .left_outer_join( + db::schema::dataset::table.on(dataset_dsl::pool_id + .eq(zpool_dsl::id) + .and(dataset_dsl::kind.eq(kind)) + .and(dataset_dsl::time_deleted.is_null())), + ) + .select(<(Sled, Zpool, Option)>::as_select()) + .get_results(conn) + } + + pub async fn ensure_rack_dataset( + &self, + opctx: &OpContext, + rack_id: Uuid, + kind: DatasetKind, + redundancy: DatasetRedundancy, + ) -> Result, Error> { + opctx.authorize(authz::Action::Read, &authz::FLEET).await?; + + #[derive(Debug)] + enum DatasetError { + NotEnoughZpools, + Other(Error), + } + type TxnError = TransactionError; + + self.pool() + .transaction(move |conn| { + let sleds_zpools_and_maybe_datasets = + Self::sled_zpool_and_dataset_list_sync( + conn, rack_id, kind, + )?; + + // Split the set of returned zpools into "those with" and "those + // without" the requested dataset. + let (zpools_with_dataset, zpools_without_dataset): ( + Vec<_>, + Vec<_>, + ) = sleds_zpools_and_maybe_datasets + .into_iter() + .partition(|(_, _, maybe_dataset)| maybe_dataset.is_some()); + let mut zpools_without_dataset = zpools_without_dataset + .into_iter() + .map(|(sled, zpool, _)| (sled, zpool)) + .peekable(); + + let mut datasets: Vec<_> = zpools_with_dataset + .into_iter() + .map(|(sled, zpool, maybe_dataset)| { + ( + sled, + zpool, + maybe_dataset.expect("Dataset should exist"), + ) + }) + .collect(); + + // Add datasets to zpools, in-order, until we've met a + // number sufficient for our redundancy. + // + // The selection of "which zpools contain this dataset" is completely + // arbitrary. + loop { + match redundancy { + DatasetRedundancy::OnAll => { + if zpools_without_dataset.peek().is_none() { + break; + } + } + DatasetRedundancy::PerRack(desired) => { + if datasets.len() >= (desired as usize) { + break; + } + } + }; + + let (sled, zpool) = + zpools_without_dataset.next().ok_or_else(|| { + TxnError::CustomError(DatasetError::NotEnoughZpools) + })?; + let dataset_id = Uuid::new_v4(); + let address = Self::next_ipv6_address_sync(conn, sled.id()) + .map_err(|e| { + TxnError::CustomError(DatasetError::Other(e)) + }) + .map(|ip| SocketAddrV6::new(ip, kind.port(), 0, 0))?; + + let dataset = db::model::Dataset::new( + dataset_id, + zpool.id(), + address, + kind, + ); + + let dataset = Self::dataset_upsert_sync(conn, dataset) + .map_err(|e| { + TxnError::CustomError(DatasetError::Other(e)) + })?; + datasets.push((sled, zpool, dataset)); + } + + return Ok(datasets); + }) + .await + .map_err(|e| match e { + TxnError::CustomError(DatasetError::NotEnoughZpools) => { + Error::unavail("Not enough zpools for dataset allocation") + } + TxnError::CustomError(DatasetError::Other(e)) => e, + TxnError::Pool(e) => { + public_error_from_diesel_pool(e, ErrorHandler::Server) + } + }) + } + /// Stores a new zpool in the database. pub async fn zpool_upsert(&self, zpool: Zpool) -> CreateResult { use db::schema::zpool::dsl; @@ -374,6 +816,25 @@ impl DataStore { }) } + // NOTE: This doesn't need to be test-only, it just happens to be test-only + // to avoid unused warnings. + #[cfg(test)] + async fn dataset_list( + &self, + opctx: &OpContext, + zpool_id: Uuid, + ) -> Result, Error> { + opctx.authorize(authz::Action::Read, &authz::FLEET).await?; + use db::schema::dataset::dsl; + dsl::dataset + .filter(dsl::time_deleted.is_null()) + .filter(dsl::pool_id.eq(zpool_id)) + .select(Dataset::as_select()) + .load_async(self.pool_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel_pool(e, ErrorHandler::Server)) + } + /// Stores a new dataset in the database. pub async fn dataset_upsert( &self, @@ -415,6 +876,44 @@ impl DataStore { }) } + /// Stores a new dataset in the database. + fn dataset_upsert_sync( + conn: &mut DbConnection, + dataset: Dataset, + ) -> CreateResult { + use db::schema::dataset::dsl; + + let zpool_id = dataset.pool_id; + Zpool::insert_resource( + zpool_id, + diesel::insert_into(dsl::dataset) + .values(dataset.clone()) + .on_conflict(dsl::id) + .do_update() + .set(( + dsl::time_modified.eq(Utc::now()), + dsl::pool_id.eq(excluded(dsl::pool_id)), + dsl::ip.eq(excluded(dsl::ip)), + dsl::port.eq(excluded(dsl::port)), + dsl::kind.eq(excluded(dsl::kind)), + )), + ) + .insert_and_get_result(conn) + .map_err(|e| match e { + SyncInsertError::CollectionNotFound => Error::ObjectNotFound { + type_name: ResourceType::Zpool, + lookup_type: LookupType::ById(zpool_id), + }, + SyncInsertError::DatabaseError(e) => { + public_error_from_diesel_create( + e, + ResourceType::Dataset, + &dataset.id().to_string(), + ) + } + }) + } + /// Stores a new service in the database. pub async fn service_upsert( &self, @@ -456,6 +955,42 @@ impl DataStore { }) } + fn service_upsert_sync( + conn: &mut DbConnection, + service: Service, + ) -> CreateResult { + use db::schema::service::dsl; + + let sled_id = service.sled_id; + Sled::insert_resource( + sled_id, + diesel::insert_into(dsl::service) + .values(service.clone()) + .on_conflict(dsl::id) + .do_update() + .set(( + dsl::time_modified.eq(Utc::now()), + dsl::sled_id.eq(excluded(dsl::sled_id)), + dsl::ip.eq(excluded(dsl::ip)), + dsl::kind.eq(excluded(dsl::kind)), + )), + ) + .insert_and_get_result(conn) + .map_err(|e| match e { + SyncInsertError::CollectionNotFound => Error::ObjectNotFound { + type_name: ResourceType::Sled, + lookup_type: LookupType::ById(sled_id), + }, + SyncInsertError::DatabaseError(e) => { + public_error_from_diesel_create( + e, + ResourceType::Service, + &service.id().to_string(), + ) + } + }) + } + fn get_allocated_regions_query( volume_id: Uuid, ) -> impl RunnableQuery<(Dataset, Region)> { @@ -3584,6 +4119,36 @@ impl DataStore { } } + /// Return the next available IPv6 address for an Oxide service running on + /// the provided sled. + fn next_ipv6_address_sync( + conn: &mut DbConnection, + sled_id: Uuid, + ) -> Result { + use db::schema::sled::dsl; + let net = diesel::update( + dsl::sled.find(sled_id).filter(dsl::time_deleted.is_null()), + ) + .set(dsl::last_used_address.eq(dsl::last_used_address + 1)) + .returning(dsl::last_used_address) + .get_result(conn) + .map_err(|e| { + public_error_from_diesel_lookup( + e, + ResourceType::Sled, + &LookupType::ById(sled_id), + ) + })?; + + // TODO-correctness: We could ensure that this address is actually + // within the sled's underlay prefix, once that's included in the + // database record. + match net { + ipnetwork::IpNetwork::V6(net) => Ok(net.ip()), + _ => panic!("Sled IP must be IPv6"), + } + } + pub async fn global_image_list_images( &self, opctx: &OpContext, @@ -3879,6 +4444,7 @@ mod test { use crate::authz; use crate::db::explain::ExplainableAsync; use crate::db::fixed_data::silo::SILO_ID; + use crate::db::identity::Asset; use crate::db::identity::Resource; use crate::db::lookup::LookupPath; use crate::db::model::{ConsoleSession, DatasetKind, Project, ServiceKind}; @@ -3889,10 +4455,8 @@ mod test { ByteCount, Error, IdentityMetadataCreateParams, LookupType, Name, }; use omicron_test_utils::dev; - use std::collections::HashSet; - use std::net::Ipv6Addr; - use std::net::SocketAddrV6; - use std::net::{IpAddr, Ipv4Addr, SocketAddr}; + use std::collections::{HashMap, HashSet}; + use std::net::{Ipv6Addr, SocketAddrV6}; use std::sync::Arc; use uuid::Uuid; @@ -4108,8 +4672,7 @@ mod test { // ... and datasets within that zpool. let dataset_count = REGION_REDUNDANCY_THRESHOLD * 2; - let bogus_addr = - SocketAddr::new(IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)), 8080); + let bogus_addr = SocketAddrV6::new(Ipv6Addr::LOCALHOST, 8080, 0, 0); let dataset_ids: Vec = (0..dataset_count).map(|_| Uuid::new_v4()).collect(); for id in &dataset_ids { @@ -4190,8 +4753,7 @@ mod test { // ... and datasets within that zpool. let dataset_count = REGION_REDUNDANCY_THRESHOLD; - let bogus_addr = - SocketAddr::new(IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)), 8080); + let bogus_addr = SocketAddrV6::new(Ipv6Addr::LOCALHOST, 8080, 0, 0); let dataset_ids: Vec = (0..dataset_count).map(|_| Uuid::new_v4()).collect(); for id in &dataset_ids { @@ -4257,8 +4819,7 @@ mod test { // ... and datasets within that zpool. let dataset_count = REGION_REDUNDANCY_THRESHOLD - 1; - let bogus_addr = - SocketAddr::new(IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)), 8080); + let bogus_addr = SocketAddrV6::new(Ipv6Addr::LOCALHOST, 8080, 0, 0); let dataset_ids: Vec = (0..dataset_count).map(|_| Uuid::new_v4()).collect(); for id in &dataset_ids { @@ -4309,8 +4870,7 @@ mod test { // ... and datasets within that zpool. let dataset_count = REGION_REDUNDANCY_THRESHOLD; - let bogus_addr = - SocketAddr::new(IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)), 8080); + let bogus_addr = SocketAddrV6::new(Ipv6Addr::LOCALHOST, 8080, 0, 0); let dataset_ids: Vec = (0..dataset_count).map(|_| Uuid::new_v4()).collect(); for id in &dataset_ids { @@ -4406,8 +4966,8 @@ mod test { let datastore = Arc::new(DataStore::new(Arc::clone(&pool))); let opctx = OpContext::for_tests(logctx.log.new(o!()), datastore.clone()); - let rack_id = Uuid::new_v4(); + let addr1 = "[fd00:1de::1]:12345".parse().unwrap(); let sled1_id = "0de4b299-e0b4-46f0-d528-85de81a7095f".parse().unwrap(); let sled1 = db::model::Sled::new(sled1_id, addr1, rack_id); @@ -4555,6 +5115,497 @@ mod test { logctx.cleanup_successful(); } + #[tokio::test] + async fn test_ensure_rack_service() { + let logctx = dev::test_setup_log("test_ensure_rack_service"); + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = datastore_test(&logctx, &db).await; + + // Create a sled on which the service should exist. + let sled_addr = SocketAddrV6::new( + Ipv6Addr::new(0xfd00, 0, 0, 0, 0, 0, 0, 1), + 8080, + 0, + 0, + ); + let sled_id = Uuid::new_v4(); + let rack_id = Uuid::new_v4(); + let sled = Sled::new(sled_id, sled_addr.clone(), rack_id); + datastore + .sled_upsert(sled) + .await + .expect("Should be able to upsert sled"); + + // Ensure a service exists on the rack. + let services = datastore + .ensure_rack_service(&opctx, rack_id, ServiceKind::Nexus, 1) + .await + .expect("Should have allocated service"); + + // Only a single service was allocated, with the type / address we + // expect. + assert_eq!(1, services.len()); + assert_eq!(ServiceKind::Nexus, services[0].kind); + assert_eq!(sled_id, services[0].sled_id); + + // Listing services only shows this one. + let observed_services = datastore + .service_list(&opctx, sled_id) + .await + .expect("Should be able to list services"); + assert_eq!(1, observed_services.len()); + assert_eq!(services[0].id(), observed_services[0].id()); + + // Test that ensuring services is idempotent. + let services_again = datastore + .ensure_rack_service(&opctx, rack_id, ServiceKind::Nexus, 1) + .await + .expect("Should have allocated service"); + assert_eq!(services_again, services); + + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_ensure_rack_service_multi_sled() { + let logctx = dev::test_setup_log("test_ensure_rack_service_multi_sled"); + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = datastore_test(&logctx, &db).await; + + let rack_id = Uuid::new_v4(); + + // Create sleds with distinct underlay subnets. + const SLED_COUNT: usize = 3; + let mut sleds = HashMap::new(); + for i in 0..SLED_COUNT { + let sled_addr = SocketAddrV6::new( + Ipv6Addr::new(0xfd00, 0, 0, i.try_into().unwrap(), 0, 0, 0, 1), + 8080, + 0, + 0, + ); + let sled_id = Uuid::new_v4(); + let sled = Sled::new(sled_id, sled_addr.clone(), rack_id); + datastore + .sled_upsert(sled.clone()) + .await + .expect("Should be able to upsert sled"); + sleds.insert(sled.id(), sled); + } + + // Ensure a service exists on the rack, with some redundancy. + const NEXUS_COUNT: u32 = 3; + let mut services = datastore + .ensure_rack_service( + &opctx, + rack_id, + ServiceKind::Nexus, + NEXUS_COUNT, + ) + .await + .expect("Should have allocated service"); + services.sort_by(|a, b| a.id().cmp(&b.id())); + + assert_eq!(NEXUS_COUNT, services.len() as u32); + for svc in &services { + assert_eq!(ServiceKind::Nexus, svc.kind); + + // Each service should have been provisioned to a distinct sled. + let observed_services = datastore + .service_list(&opctx, svc.sled_id) + .await + .expect("Should be able to list services"); + assert_eq!(1, observed_services.len()); + assert_eq!(svc.id(), observed_services[0].id()); + } + + // Test that ensuring services is idempotent. + let mut services_again = datastore + .ensure_rack_service( + &opctx, + rack_id, + ServiceKind::Nexus, + NEXUS_COUNT, + ) + .await + .expect("Should have allocated service"); + services_again.sort_by(|a, b| a.id().cmp(&b.id())); + assert_eq!(services_again, services); + + // Ask for a different service type on the rack. + let oximeter_services = datastore + .ensure_rack_service(&opctx, rack_id, ServiceKind::Oximeter, 1) + .await + .expect("Should have allocated service"); + + // This should only return a single service + assert_eq!(1, oximeter_services.len()); + + // The target sled should contain both the nexus and oximeter services + let observed_services = datastore + .service_list(&opctx, oximeter_services[0].sled_id) + .await + .expect("Should be able to list services"); + assert_eq!(2, observed_services.len()); + + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_ensure_rack_service_not_enough_sleds() { + let logctx = + dev::test_setup_log("test_ensure_rack_service_not_enough_sleds"); + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = datastore_test(&logctx, &db).await; + + // Create a sled on which the service should exist. + let sled_addr = SocketAddrV6::new( + Ipv6Addr::new(0xfd00, 0, 0, 0, 0, 0, 0, 1), + 8080, + 0, + 0, + ); + let sled_id = Uuid::new_v4(); + let rack_id = Uuid::new_v4(); + let sled = Sled::new(sled_id, sled_addr.clone(), rack_id); + datastore + .sled_upsert(sled) + .await + .expect("Should be able to upsert sled"); + + // Try to request a redundancy which is larger than the number of sleds. + let err = datastore + .ensure_rack_service(&opctx, rack_id, ServiceKind::Nexus, 2) + .await + .expect_err("Should have failed to allocate service"); + + assert!( + matches!(err, Error::ServiceUnavailable { .. }), + "Error should have been ServiceUnavailable: {:?}", + err + ); + assert!( + err.to_string().contains("Not enough sleds"), + "Error should have identified 'Not enough sleds' as the cause: {:?}", err + ); + + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_ensure_dns_service() { + let logctx = dev::test_setup_log("test_ensure_dns_service"); + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = datastore_test(&logctx, &db).await; + + // Create a sled on which the service should exist. + let sled_addr = SocketAddrV6::new( + Ipv6Addr::new(0xfd00, 0, 0, 0, 0, 0, 0, 1), + 8080, + 0, + 0, + ); + let sled_id = Uuid::new_v4(); + let rack_id = Uuid::new_v4(); + let sled = Sled::new(sled_id, sled_addr.clone(), rack_id); + datastore + .sled_upsert(sled) + .await + .expect("Should be able to upsert sled"); + + let rack_subnet = Ipv6Subnet::::new(*sled_addr.ip()); + + // Ensure a service exists on the rack. + let services = datastore + .ensure_dns_service(&opctx, rack_subnet, 1) + .await + .expect("Should have allocated service"); + + // Only a single service was allocated, with the type / address we + // expect. + assert_eq!(1, services.len()); + assert_eq!(ServiceKind::InternalDNS, services[0].kind); + assert_eq!(sled_id, services[0].sled_id); + + // Listing services only shows this one. + let observed_services = datastore + .service_list(&opctx, sled_id) + .await + .expect("Should be able to list services"); + assert_eq!(1, observed_services.len()); + assert_eq!(services[0].id(), observed_services[0].id()); + + // Test that ensuring services is idempotent. + let services_again = datastore + .ensure_dns_service(&opctx, rack_subnet, 1) + .await + .expect("Should have allocated service"); + assert_eq!(services_again, services); + + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_ensure_dns_service_multi_sled() { + let logctx = dev::test_setup_log("test_ensure_dns_service_multi_sled"); + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = datastore_test(&logctx, &db).await; + + let rack_id = Uuid::new_v4(); + + // Create sleds with distinct underlay subnets. + const SLED_COUNT: u32 = DNS_REDUNDANCY; + let mut sleds = HashMap::new(); + for i in 0..SLED_COUNT { + let sled_addr = SocketAddrV6::new( + Ipv6Addr::new(0xfd00, 0, 0, i.try_into().unwrap(), 0, 0, 0, 1), + 8080, + 0, + 0, + ); + let sled_id = Uuid::new_v4(); + let sled = Sled::new(sled_id, sled_addr.clone(), rack_id); + datastore + .sled_upsert(sled.clone()) + .await + .expect("Should be able to upsert sled"); + sleds.insert(sled.id(), sled); + } + let rack_subnet = Ipv6Subnet::::new(Ipv6Addr::from( + sleds.values().next().unwrap().ip, + )); + + for sled in sleds.values() { + assert_eq!( + rack_subnet, + Ipv6Subnet::::new(Ipv6Addr::from(sled.ip)), + "Test pre-condition violated: All sleds must belong to the same rack" + ); + } + + // Ensure a service exists on the rack. + const DNS_COUNT: u32 = DNS_REDUNDANCY; + let mut services = datastore + .ensure_dns_service(&opctx, rack_subnet, DNS_COUNT) + .await + .expect("Should have allocated service"); + services.sort_by(|a, b| a.id().cmp(&b.id())); + + assert_eq!(DNS_COUNT, services.len() as u32); + for svc in &services { + assert_eq!(ServiceKind::InternalDNS, svc.kind); + + // Each service should have been provisioned to a distinct sled. + let observed_services = datastore + .service_list(&opctx, svc.sled_id) + .await + .expect("Should be able to list services"); + assert_eq!(1, observed_services.len()); + assert_eq!(svc.id(), observed_services[0].id()); + } + + // Test for idempotency + let mut services_again = datastore + .ensure_dns_service(&opctx, rack_subnet, DNS_COUNT) + .await + .expect("Should have allocated service"); + services_again.sort_by(|a, b| a.id().cmp(&b.id())); + assert_eq!(services_again, services); + + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_ensure_rack_dataset() { + let logctx = dev::test_setup_log("test_ensure_rack_dataset"); + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = datastore_test(&logctx, &db).await; + + // Create a sled on which the dataset should exist. + let sled_addr = SocketAddrV6::new( + Ipv6Addr::new(0xfd00, 0, 0, 0, 0, 0, 0, 1), + 8080, + 0, + 0, + ); + let sled_id = Uuid::new_v4(); + let rack_id = Uuid::new_v4(); + let sled = Sled::new(sled_id, sled_addr.clone(), rack_id); + datastore + .sled_upsert(sled) + .await + .expect("Should be able to upsert sled"); + let zpool_id = create_test_zpool(&datastore, sled_id).await; + + // Ensure a dataset exists on the rack. + let output = datastore + .ensure_rack_dataset( + &opctx, + rack_id, + DatasetKind::Crucible, + DatasetRedundancy::PerRack(1), + ) + .await + .expect("Should have allocated dataset"); + + // Observe that only a single dataset was allocated + assert_eq!(1, output.len()); + let (_, _, output_dataset) = &output[0]; + assert_eq!(DatasetKind::Crucible, output_dataset.kind); + assert_eq!(zpool_id, output_dataset.pool_id); + + // Listing datasets only shows this one. + let observed_datasets = datastore + .dataset_list(&opctx, zpool_id) + .await + .expect("Should be able to list datasets"); + assert_eq!(1, observed_datasets.len()); + assert_eq!(output_dataset.id(), observed_datasets[0].id()); + + // Test that ensuring datasets is idempotent. + let output_again = datastore + .ensure_rack_dataset( + &opctx, + rack_id, + DatasetKind::Crucible, + DatasetRedundancy::PerRack(1), + ) + .await + .expect("Should have allocated dataset"); + let (_, _, output_dataset_again) = &output_again[0]; + assert_eq!(output_dataset_again, output_dataset); + + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_ensure_rack_dataset_not_enough_zpools() { + let logctx = + dev::test_setup_log("test_ensure_rack_dataset_not_enough_zpools"); + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = datastore_test(&logctx, &db).await; + + // Create a sled on which the dataset should exist. + let sled_addr = SocketAddrV6::new( + Ipv6Addr::new(0xfd00, 0, 0, 0, 0, 0, 0, 1), + 8080, + 0, + 0, + ); + let sled_id = Uuid::new_v4(); + let rack_id = Uuid::new_v4(); + let sled = Sled::new(sled_id, sled_addr.clone(), rack_id); + datastore + .sled_upsert(sled) + .await + .expect("Should be able to upsert sled"); + + // Attempt to allocate a dataset on a rack without zpools. + let err = datastore + .ensure_rack_dataset( + &opctx, + rack_id, + DatasetKind::Crucible, + DatasetRedundancy::PerRack(1), + ) + .await + .expect_err("Should not have allocated dataset"); + + assert!( + matches!(err, Error::ServiceUnavailable { .. }), + "Error should have been ServiceUnavailable: {:?}", + err + ); + assert!( + err.to_string().contains("Not enough zpools"), + "Error should have identified 'Not enough zpools' as the cause: {:?}", err + ); + + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_ensure_rack_dataset_multi_sled() { + let logctx = dev::test_setup_log("test_ensure_rack_dataset_multi_sled"); + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = datastore_test(&logctx, &db).await; + + let rack_id = Uuid::new_v4(); + + // Create sleds with distinct underlay subnets. + const SLED_COUNT: usize = 3; + let mut sleds = HashMap::new(); + for i in 0..SLED_COUNT { + let sled_addr = SocketAddrV6::new( + Ipv6Addr::new(0xfd00, 0, 0, i.try_into().unwrap(), 0, 0, 0, 1), + 8080, + 0, + 0, + ); + let sled_id = Uuid::new_v4(); + let sled = Sled::new(sled_id, sled_addr.clone(), rack_id); + datastore + .sled_upsert(sled.clone()) + .await + .expect("Should be able to upsert sled"); + sleds.insert(sled.id(), sled); + create_test_zpool(&datastore, sled_id).await; + } + + // Ensure datasets exist on the rack. + let output = datastore + .ensure_rack_dataset( + &opctx, + rack_id, + DatasetKind::Crucible, + DatasetRedundancy::OnAll, + ) + .await + .expect("Should have allocated dataset"); + assert_eq!(SLED_COUNT, output.len()); + for (sled, zpool, dataset) in &output { + assert_eq!(DatasetKind::Crucible, dataset.kind); + assert_eq!(zpool.id(), dataset.pool_id); + assert_eq!(sled.id(), zpool.sled_id); + + let observed_datasets = datastore + .dataset_list(&opctx, zpool.id()) + .await + .expect("Should be able to list datasets"); + assert_eq!(1, observed_datasets.len()); + assert_eq!(dataset.id(), observed_datasets[0].id()) + } + + // Test that ensuring datasets is idempotent. + let output_again = datastore + .ensure_rack_dataset( + &opctx, + rack_id, + DatasetKind::Crucible, + DatasetRedundancy::OnAll, + ) + .await + .expect("Should have allocated dataset"); + + let mut output: Vec<_> = + output.into_iter().map(|(_, _, dataset)| dataset).collect(); + output.sort_by(|a, b| a.id().cmp(&b.id())); + let mut output_again: Vec<_> = + output_again.into_iter().map(|(_, _, dataset)| dataset).collect(); + output_again.sort_by(|a, b| a.id().cmp(&b.id())); + assert_eq!(output, output_again); + + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } + #[tokio::test] async fn test_rack_initialize_is_idempotent() { let logctx = dev::test_setup_log("test_rack_initialize_is_idempotent"); @@ -4574,14 +5625,14 @@ mod test { // Initialize the Rack. let result = datastore - .rack_set_initialized(&opctx, rack.id(), vec![]) + .rack_set_initialized(&opctx, rack.id(), vec![], vec![]) .await .unwrap(); assert!(result.initialized); // Re-initialize the rack (check for idempotency) let result = datastore - .rack_set_initialized(&opctx, rack.id(), vec![]) + .rack_set_initialized(&opctx, rack.id(), vec![], vec![]) .await .unwrap(); assert!(result.initialized); diff --git a/nexus/src/db/ipv6.rs b/nexus/src/db/ipv6.rs index 2b494100825..60f7c0558c6 100644 --- a/nexus/src/db/ipv6.rs +++ b/nexus/src/db/ipv6.rs @@ -16,9 +16,19 @@ use diesel::sql_types::Inet; use ipnetwork::IpNetwork; use ipnetwork::Ipv6Network; use omicron_common::api::external::Error; +use serde::{Deserialize, Serialize}; #[derive( - Clone, Copy, AsExpression, FromSqlRow, PartialEq, Ord, PartialOrd, Eq, + Clone, + Copy, + AsExpression, + FromSqlRow, + PartialEq, + Ord, + PartialOrd, + Eq, + Deserialize, + Serialize, )] #[diesel(sql_type = Inet)] pub struct Ipv6Addr(std::net::Ipv6Addr); diff --git a/nexus/src/db/model/dataset.rs b/nexus/src/db/model/dataset.rs index fd4d24eee40..4b2b294542a 100644 --- a/nexus/src/db/model/dataset.rs +++ b/nexus/src/db/model/dataset.rs @@ -4,11 +4,14 @@ use super::{DatasetKind, Generation, Region, SqlU16}; use crate::db::collection_insert::DatastoreCollection; +use crate::db::identity::Asset; +use crate::db::ipv6; use crate::db::schema::{dataset, region}; use chrono::{DateTime, Utc}; use db_macros::Asset; +use internal_dns_client::names::{BackendName, ServiceName, AAAA, SRV}; use serde::{Deserialize, Serialize}; -use std::net::SocketAddr; +use std::net::{Ipv6Addr, SocketAddrV6}; use uuid::Uuid; /// Database representation of a Dataset. @@ -35,10 +38,10 @@ pub struct Dataset { pub pool_id: Uuid, - ip: ipnetwork::IpNetwork, + ip: ipv6::Ipv6Addr, port: SqlU16, - kind: DatasetKind, + pub kind: DatasetKind, pub size_used: Option, } @@ -46,7 +49,7 @@ impl Dataset { pub fn new( id: Uuid, pool_id: Uuid, - addr: SocketAddr, + addr: SocketAddrV6, kind: DatasetKind, ) -> Self { let size_used = match kind { @@ -65,12 +68,32 @@ impl Dataset { } } - pub fn address(&self) -> SocketAddr { + pub fn address(&self) -> SocketAddrV6 { self.address_with_port(self.port.into()) } - pub fn address_with_port(&self, port: u16) -> SocketAddr { - SocketAddr::new(self.ip.ip(), port) + pub fn address_with_port(&self, port: u16) -> SocketAddrV6 { + SocketAddrV6::new(Ipv6Addr::from(self.ip), port, 0, 0) + } +} + +impl internal_dns_client::multiclient::Service for Dataset { + fn aaaa(&self) -> AAAA { + AAAA::Zone(self.id()) + } + + fn srv(&self) -> SRV { + match self.kind { + DatasetKind::Crucible => { + SRV::Backend(BackendName::Crucible, self.id()) + } + DatasetKind::Clickhouse => SRV::Service(ServiceName::Clickhouse), + DatasetKind::Cockroach => SRV::Service(ServiceName::Cockroach), + } + } + + fn address(&self) -> SocketAddrV6 { + self.address() } } diff --git a/nexus/src/db/model/dataset_kind.rs b/nexus/src/db/model/dataset_kind.rs index c760a12f53c..ef004bef9bf 100644 --- a/nexus/src/db/model/dataset_kind.rs +++ b/nexus/src/db/model/dataset_kind.rs @@ -4,15 +4,16 @@ use super::impl_enum_type; use crate::internal_api; +use omicron_common::address::{CLICKHOUSE_PORT, COCKROACH_PORT, CRUCIBLE_PORT}; use serde::{Deserialize, Serialize}; use std::io::Write; impl_enum_type!( - #[derive(SqlType, Debug, QueryId)] + #[derive(Clone, SqlType, Debug, QueryId)] #[diesel(postgres_type(name = "dataset_kind"))] pub struct DatasetKindEnum; - #[derive(Clone, Debug, AsExpression, FromSqlRow, Serialize, Deserialize, PartialEq)] + #[derive(Clone, Copy, Debug, AsExpression, FromSqlRow, Serialize, Deserialize, PartialEq)] #[diesel(sql_type = DatasetKindEnum)] pub enum DatasetKind; @@ -22,6 +23,16 @@ impl_enum_type!( Clickhouse => b"clickhouse" ); +impl DatasetKind { + pub fn port(&self) -> u16 { + match self { + DatasetKind::Crucible => CRUCIBLE_PORT, + DatasetKind::Cockroach => COCKROACH_PORT, + DatasetKind::Clickhouse => CLICKHOUSE_PORT, + } + } +} + impl From for DatasetKind { fn from(k: internal_api::params::DatasetKind) -> Self { match k { diff --git a/nexus/src/db/model/service.rs b/nexus/src/db/model/service.rs index 3501337e42b..6f05011d415 100644 --- a/nexus/src/db/model/service.rs +++ b/nexus/src/db/model/service.rs @@ -3,14 +3,19 @@ // file, You can obtain one at https://mozilla.org/MPL/2.0/. use super::ServiceKind; +use crate::db::identity::Asset; use crate::db::ipv6; use crate::db::schema::service; use db_macros::Asset; -use std::net::Ipv6Addr; +use internal_dns_client::names::{ServiceName, AAAA, SRV}; +use omicron_common::address::{ + DNS_SERVER_PORT, NEXUS_INTERNAL_PORT, OXIMETER_PORT, +}; +use std::net::{Ipv6Addr, SocketAddrV6}; use uuid::Uuid; /// Representation of services which may run on Sleds. -#[derive(Queryable, Insertable, Debug, Clone, Selectable, Asset)] +#[derive(Queryable, Insertable, Debug, Clone, Selectable, Asset, PartialEq)] #[diesel(table_name = service)] pub struct Service { #[diesel(embed)] @@ -36,3 +41,26 @@ impl Service { } } } + +impl internal_dns_client::multiclient::Service for Service { + fn aaaa(&self) -> AAAA { + AAAA::Zone(self.id()) + } + + fn srv(&self) -> SRV { + match self.kind { + ServiceKind::InternalDNS => SRV::Service(ServiceName::InternalDNS), + ServiceKind::Nexus => SRV::Service(ServiceName::Nexus), + ServiceKind::Oximeter => SRV::Service(ServiceName::Oximeter), + } + } + + fn address(&self) -> SocketAddrV6 { + let port = match self.kind { + ServiceKind::InternalDNS => DNS_SERVER_PORT, + ServiceKind::Nexus => NEXUS_INTERNAL_PORT, + ServiceKind::Oximeter => OXIMETER_PORT, + }; + SocketAddrV6::new(Ipv6Addr::from(self.ip), port, 0, 0) + } +} diff --git a/nexus/src/db/model/service_kind.rs b/nexus/src/db/model/service_kind.rs index f66532e64c0..0cbb0d0f658 100644 --- a/nexus/src/db/model/service_kind.rs +++ b/nexus/src/db/model/service_kind.rs @@ -8,11 +8,11 @@ use serde::{Deserialize, Serialize}; use std::io::Write; impl_enum_type!( - #[derive(SqlType, Debug, QueryId)] + #[derive(Clone, SqlType, Debug, QueryId)] #[diesel(postgres_type(name = "service_kind"))] pub struct ServiceKindEnum; - #[derive(Clone, Debug, AsExpression, FromSqlRow, Serialize, Deserialize, PartialEq)] + #[derive(Clone, Copy, Debug, AsExpression, FromSqlRow, Serialize, Deserialize, PartialEq)] #[diesel(sql_type = ServiceKindEnum)] pub enum ServiceKind; diff --git a/nexus/src/db/model/sled.rs b/nexus/src/db/model/sled.rs index ebe492c7459..84882679087 100644 --- a/nexus/src/db/model/sled.rs +++ b/nexus/src/db/model/sled.rs @@ -13,7 +13,7 @@ use std::net::SocketAddrV6; use uuid::Uuid; /// Database representation of a Sled. -#[derive(Queryable, Insertable, Debug, Clone, Selectable, Asset)] +#[derive(Queryable, Insertable, Debug, Clone, Selectable, Asset, PartialEq)] #[diesel(table_name = sled)] pub struct Sled { #[diesel(embed)] diff --git a/nexus/src/db/model/zpool.rs b/nexus/src/db/model/zpool.rs index 511312a3382..475fc7bf0ee 100644 --- a/nexus/src/db/model/zpool.rs +++ b/nexus/src/db/model/zpool.rs @@ -14,7 +14,7 @@ use uuid::Uuid; /// /// A zpool represents a ZFS storage pool, allocated on a single /// physical sled. -#[derive(Queryable, Insertable, Debug, Clone, Selectable, Asset)] +#[derive(Queryable, Insertable, Debug, Clone, Selectable, Asset, PartialEq)] #[diesel(table_name = zpool)] pub struct Zpool { #[diesel(embed)] diff --git a/nexus/src/internal_api/params.rs b/nexus/src/internal_api/params.rs index 8b83138c2b5..0840feac5ce 100644 --- a/nexus/src/internal_api/params.rs +++ b/nexus/src/internal_api/params.rs @@ -74,7 +74,7 @@ impl FromStr for DatasetKind { pub struct DatasetPutRequest { /// Address on which a service is responding to requests for the /// dataset. - pub address: SocketAddr, + pub address: SocketAddrV6, /// Type of dataset being inserted. pub kind: DatasetKind, diff --git a/nexus/tests/config.test.toml b/nexus/tests/config.test.toml index fdfeb5effb4..1462ae11814 100644 --- a/nexus/tests/config.test.toml +++ b/nexus/tests/config.test.toml @@ -34,6 +34,7 @@ address = "[::1]:0" [tunables] # Allow small subnets, so we can test IP address exhaustion easily / quickly max_vpc_ipv4_subnet_prefix = 29 +enable_background_tasks = false [deployment] # Identifier for this instance of Nexus. diff --git a/nexus/tests/integration_tests/datasets.rs b/nexus/tests/integration_tests/datasets.rs index ebc89f71378..d65a7fa1f81 100644 --- a/nexus/tests/integration_tests/datasets.rs +++ b/nexus/tests/integration_tests/datasets.rs @@ -8,7 +8,7 @@ use omicron_common::api::external::ByteCount; use omicron_nexus::internal_api::params::{ DatasetKind, DatasetPutRequest, ZpoolPutRequest, }; -use std::net::{IpAddr, Ipv4Addr, SocketAddr}; +use std::net::{Ipv6Addr, SocketAddrV6}; use uuid::Uuid; use nexus_test_utils::{ControlPlaneTestContext, SLED_AGENT_UUID}; @@ -36,8 +36,7 @@ async fn test_dataset_put_success(cptestctx: &ControlPlaneTestContext) { .await .unwrap(); - let address = - SocketAddr::new(IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)), 8080); + let address = SocketAddrV6::new(Ipv6Addr::LOCALHOST, 8080, 0, 0); let kind = DatasetKind::Crucible; let request = DatasetPutRequest { address, kind }; let dataset_id = Uuid::new_v4(); @@ -69,8 +68,7 @@ async fn test_dataset_put_bad_zpool_returns_not_found( let dataset_put_url = format!("/zpools/{}/dataset/{}", zpool_id, dataset_id); - let address = - SocketAddr::new(IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)), 8080); + let address = SocketAddrV6::new(Ipv6Addr::LOCALHOST, 8080, 0, 0); let kind = DatasetKind::Crucible; let request = DatasetPutRequest { address, kind }; diff --git a/smf/nexus/config-partial.toml b/smf/nexus/config-partial.toml index b77ffc3137f..71f5b841a0a 100644 --- a/smf/nexus/config-partial.toml +++ b/smf/nexus/config-partial.toml @@ -23,3 +23,7 @@ if_exists = "append" # Configuration for interacting with the timeseries database [timeseries_db] address = "[fd00:1122:3344:0101::5]:8123" + +[tunables] +# TODO: Remove when RSS transfer to Nexus is fully fleshed out +enable_background_tasks = false From d2536d7d45f386a0e6516c90683de8f524cdc7a6 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Tue, 21 Jun 2022 10:02:12 -0400 Subject: [PATCH 53/88] Delete out-dated docs --- docs/how-to-run.adoc | 21 +-------------------- 1 file changed, 1 insertion(+), 20 deletions(-) diff --git a/docs/how-to-run.adoc b/docs/how-to-run.adoc index 61d70d17b1a..8cef8af83c4 100644 --- a/docs/how-to-run.adoc +++ b/docs/how-to-run.adoc @@ -109,28 +109,9 @@ When we deploy, we're effectively creating a number of different zones for all the components that make up Omicron (Nexus, Clickhouse, Crucible, etc). Since all these services run in different zones they cannot communicate with each other (and Sled Agent in the global zone) via `localhost`. In practice, -we'll assign addresses as per RFD 63 as well as incorporating DNS based +we assign addresses as per RFD 63 as well as incorporating DNS based service discovery. -For the purposes of local development today, we specify some hardcoded IPv6 -unique local addresses in the subnet of the first Sled Agent: `fd00:1122:3344:1::/64`: - -[options="header"] -|=================================================================================================== -| Service | Endpoint -| Sled Agent: Bootstrap | Derived from MAC address of physical data link. -| Sled Agent: Dropshot API | `[fd00:1122:3344:0101::1]:12345` -| Cockroach DB | `[fd00:1122:3344:0101::2]:32221` -| Nexus: External API | `[fd00:1122:3344:0101::3]:12220` -| Nexus: Internal API | `[fd00:1122:3344:0101::3]:12221` -| Oximeter | `[fd00:1122:3344:0101::4]:12223` -| Clickhouse | `[fd00:1122:3344:0101::5]:8123` -| Crucible Downstairs 1 | `[fd00:1122:3344:0101::6]:32345` -| Crucible Downstairs 2 | `[fd00:1122:3344:0101::7]:32345` -| Crucible Downstairs 3 | `[fd00:1122:3344:0101::8]:32345` -| Internal DNS Service | `[fd00:1122:3344:0001::1]:5353` -|=================================================================================================== - Note that Sled Agent runs in the global zone and is the one responsible for bringing up all the other other services and allocating them with vNICs and IPv6 addresses. From 4df23c2031efc4cdf09d9739f823203bafa15117 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Tue, 21 Jun 2022 10:08:50 -0400 Subject: [PATCH 54/88] jgallagher feedback --- internal-dns/src/bin/dns-server.rs | 3 ++- internal-dns/src/dns_server.rs | 4 ++-- internal-dns/src/lib.rs | 2 +- internal-dns/tests/basic_test.rs | 2 +- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/internal-dns/src/bin/dns-server.rs b/internal-dns/src/bin/dns-server.rs index 12eafcc3599..b8520efdb26 100644 --- a/internal-dns/src/bin/dns-server.rs +++ b/internal-dns/src/bin/dns-server.rs @@ -64,7 +64,8 @@ async fn main() -> Result<(), anyhow::Error> { internal_dns::dns_server::run(log, db, dns_config).await? }; - let dropshot_server = internal_dns::start_server(config, log, db).await?; + let dropshot_server = + internal_dns::start_dropshot_server(config, log, db).await?; dropshot_server .await .map_err(|error_message| anyhow!("server exiting: {}", error_message)) diff --git a/internal-dns/src/dns_server.rs b/internal-dns/src/dns_server.rs index ccebda582f7..51a84899812 100644 --- a/internal-dns/src/dns_server.rs +++ b/internal-dns/src/dns_server.rs @@ -39,8 +39,8 @@ pub struct Server { pub handle: tokio::task::JoinHandle>, } -impl Server { - pub fn close(self) { +impl Drop for Server { + fn drop(&mut self) { self.handle.abort() } } diff --git a/internal-dns/src/lib.rs b/internal-dns/src/lib.rs index 786750c1a8f..7fee156787e 100644 --- a/internal-dns/src/lib.rs +++ b/internal-dns/src/lib.rs @@ -20,7 +20,7 @@ pub struct Config { pub data: dns_data::Config, } -pub async fn start_server( +pub async fn start_dropshot_server( config: Config, log: slog::Logger, db: Arc, diff --git a/internal-dns/tests/basic_test.rs b/internal-dns/tests/basic_test.rs index af72ded52cb..d6784bddae0 100644 --- a/internal-dns/tests/basic_test.rs +++ b/internal-dns/tests/basic_test.rs @@ -334,7 +334,7 @@ async fn init_client_server( // launch a dropshot server let dropshot_server = - internal_dns::start_server(config, log.clone(), db).await?; + internal_dns::start_dropshot_server(config, log.clone(), db).await?; // wait for server to start tokio::time::sleep(tokio::time::Duration::from_millis(250)).await; From 5556d5f5e5b3df2ae764015eea31935287877694 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Tue, 21 Jun 2022 10:12:08 -0400 Subject: [PATCH 55/88] Patch tests --- internal-dns/tests/basic_test.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal-dns/tests/basic_test.rs b/internal-dns/tests/basic_test.rs index d6784bddae0..d09e27f18c6 100644 --- a/internal-dns/tests/basic_test.rs +++ b/internal-dns/tests/basic_test.rs @@ -288,7 +288,7 @@ struct TestContext { impl TestContext { async fn cleanup(self) { - self.dns_server.close(); + drop(self.dns_server); self.dropshot_server.close().await.expect("Failed to clean up server"); self.tmp.close().expect("Failed to clean up tmp directory"); } From 6126e41f6b87ffa39c206ad81f91ee7620e78fc6 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Tue, 21 Jun 2022 10:12:39 -0400 Subject: [PATCH 56/88] merge --- internal-dns-client/src/multiclient.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal-dns-client/src/multiclient.rs b/internal-dns-client/src/multiclient.rs index ca8387fca45..58b2cdea012 100644 --- a/internal-dns-client/src/multiclient.rs +++ b/internal-dns-client/src/multiclient.rs @@ -324,7 +324,7 @@ mod test { }; let dropshot_server = - internal_dns::start_server(config, log.clone(), db) + internal_dns::start_dropshot_server(config, log.clone(), db) .await .unwrap(); From d6e3c9de0d23cfe10e59bee4c23e20ee0ae847be Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 22 Jun 2022 12:12:38 -0400 Subject: [PATCH 57/88] background-work -> service-balancer --- nexus/src/app/mod.rs | 8 ++++---- nexus/src/authn/mod.rs | 14 +++++++------- nexus/src/db/datastore.rs | 2 +- nexus/src/db/fixed_data/role_assignment.rs | 2 +- nexus/src/db/fixed_data/user_builtin.rs | 14 +++++++------- nexus/tests/integration_tests/users_builtin.rs | 4 ++-- 6 files changed, 22 insertions(+), 22 deletions(-) diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index 06f7264a124..0f8a1333773 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -222,12 +222,12 @@ impl Nexus { &self.opctx_external_authn } - /// Returns an [`OpContext`] used for background tasks. - pub fn opctx_for_background(&self) -> OpContext { + /// Returns an [`OpContext`] used for balancing services. + pub fn opctx_for_service_balancer(&self) -> OpContext { OpContext::for_background( - self.log.new(o!("component" => "BackgroundWork")), + self.log.new(o!("component" => "ServiceBalancer")), Arc::clone(&self.authz), - authn::Context::internal_db_background(), + authn::Context::internal_service_balancer(), Arc::clone(&self.db_datastore), ) } diff --git a/nexus/src/authn/mod.rs b/nexus/src/authn/mod.rs index c9399bdb131..f939743e769 100644 --- a/nexus/src/authn/mod.rs +++ b/nexus/src/authn/mod.rs @@ -30,12 +30,12 @@ pub mod silos; pub use crate::db::fixed_data::silo_user::USER_TEST_PRIVILEGED; pub use crate::db::fixed_data::silo_user::USER_TEST_UNPRIVILEGED; -pub use crate::db::fixed_data::user_builtin::USER_BACKGROUND_WORK; pub use crate::db::fixed_data::user_builtin::USER_DB_INIT; pub use crate::db::fixed_data::user_builtin::USER_EXTERNAL_AUTHN; pub use crate::db::fixed_data::user_builtin::USER_INTERNAL_API; pub use crate::db::fixed_data::user_builtin::USER_INTERNAL_READ; pub use crate::db::fixed_data::user_builtin::USER_SAGA_RECOVERY; +pub use crate::db::fixed_data::user_builtin::USER_SERVICE_BALANCER; use crate::db::model::ConsoleSession; use crate::authz; @@ -171,9 +171,9 @@ impl Context { Context::context_for_builtin_user(USER_DB_INIT.id) } - /// Returns an authenticated context for Nexus-driven db work. - pub fn internal_db_background() -> Context { - Context::context_for_builtin_user(USER_BACKGROUND_WORK.id) + /// Returns an authenticated context for Nexus-driven service balancing. + pub fn internal_service_balancer() -> Context { + Context::context_for_builtin_user(USER_SERVICE_BALANCER.id) } fn context_for_builtin_user(user_builtin_id: Uuid) -> Context { @@ -219,11 +219,11 @@ impl Context { #[cfg(test)] mod test { use super::Context; - use super::USER_BACKGROUND_WORK; use super::USER_DB_INIT; use super::USER_INTERNAL_API; use super::USER_INTERNAL_READ; use super::USER_SAGA_RECOVERY; + use super::USER_SERVICE_BALANCER; use super::USER_TEST_PRIVILEGED; use super::USER_TEST_UNPRIVILEGED; use crate::db::fixed_data::user_builtin::USER_EXTERNAL_AUTHN; @@ -258,9 +258,9 @@ mod test { let actor = authn.actor().unwrap(); assert_eq!(actor.actor_id(), USER_DB_INIT.id); - let authn = Context::internal_db_background(); + let authn = Context::internal_service_balancer(); let actor = authn.actor().unwrap(); - assert_eq!(actor.actor_id(), USER_BACKGROUND_WORK.id); + assert_eq!(actor.actor_id(), USER_SERVICE_BALANCER.id); let authn = Context::internal_saga_recovery(); let actor = authn.actor().unwrap(); diff --git a/nexus/src/db/datastore.rs b/nexus/src/db/datastore.rs index 6814b6276ac..c705c10d24f 100644 --- a/nexus/src/db/datastore.rs +++ b/nexus/src/db/datastore.rs @@ -2990,7 +2990,7 @@ impl DataStore { let builtin_users = [ // Note: "db_init" is also a builtin user, but that one by necessity // is created with the database. - &*authn::USER_BACKGROUND_WORK, + &*authn::USER_SERVICE_BALANCER, &*authn::USER_INTERNAL_API, &*authn::USER_INTERNAL_READ, &*authn::USER_EXTERNAL_AUTHN, diff --git a/nexus/src/db/fixed_data/role_assignment.rs b/nexus/src/db/fixed_data/role_assignment.rs index 540b57abe50..f6bbb951b6d 100644 --- a/nexus/src/db/fixed_data/role_assignment.rs +++ b/nexus/src/db/fixed_data/role_assignment.rs @@ -26,7 +26,7 @@ lazy_static! { ), RoleAssignment::new( IdentityType::UserBuiltin, - user_builtin::USER_BACKGROUND_WORK.id, + user_builtin::USER_SERVICE_BALANCER.id, role_builtin::FLEET_ADMIN.resource_type, *FLEET_ID, role_builtin::FLEET_ADMIN.role_name, diff --git a/nexus/src/db/fixed_data/user_builtin.rs b/nexus/src/db/fixed_data/user_builtin.rs index 238a8f5405a..87f33fa3558 100644 --- a/nexus/src/db/fixed_data/user_builtin.rs +++ b/nexus/src/db/fixed_data/user_builtin.rs @@ -39,13 +39,13 @@ lazy_static! { "used for seeding initial database data", ); - /// Internal user for performing operations driven by Nexus, rather - /// than any API request. - pub static ref USER_BACKGROUND_WORK: UserBuiltinConfig = + /// Internal user for performing operations to manage the + /// provisioning of services across the fleet. + pub static ref USER_SERVICE_BALANCER: UserBuiltinConfig = UserBuiltinConfig::new_static( "001de000-05e4-4000-8000-00000000bac3", - "background-work", - "used for Nexus-driven database operations", + "service-balancer", + "used for Nexus-driven service balancing", ); /// Internal user used by Nexus when handling internal API requests @@ -86,16 +86,16 @@ lazy_static! { #[cfg(test)] mod test { use super::super::assert_valid_uuid; - use super::USER_BACKGROUND_WORK; use super::USER_DB_INIT; use super::USER_EXTERNAL_AUTHN; use super::USER_INTERNAL_API; use super::USER_INTERNAL_READ; use super::USER_SAGA_RECOVERY; + use super::USER_SERVICE_BALANCER; #[test] fn test_builtin_user_ids_are_valid() { - assert_valid_uuid(&USER_BACKGROUND_WORK.id); + assert_valid_uuid(&USER_SERVICE_BALANCER.id); assert_valid_uuid(&USER_DB_INIT.id); assert_valid_uuid(&USER_INTERNAL_API.id); assert_valid_uuid(&USER_EXTERNAL_AUTHN.id); diff --git a/nexus/tests/integration_tests/users_builtin.rs b/nexus/tests/integration_tests/users_builtin.rs index 0df3fbaf04b..ee4da338fcc 100644 --- a/nexus/tests/integration_tests/users_builtin.rs +++ b/nexus/tests/integration_tests/users_builtin.rs @@ -28,8 +28,8 @@ async fn test_users_builtin(cptestctx: &ControlPlaneTestContext) { let u = users.remove(&authn::USER_DB_INIT.name.to_string()).unwrap(); assert_eq!(u.identity.id, authn::USER_DB_INIT.id); let u = - users.remove(&authn::USER_BACKGROUND_WORK.name.to_string()).unwrap(); - assert_eq!(u.identity.id, authn::USER_BACKGROUND_WORK.id); + users.remove(&authn::USER_SERVICE_BALANCER.name.to_string()).unwrap(); + assert_eq!(u.identity.id, authn::USER_SERVICE_BALANCER.id); let u = users.remove(&authn::USER_INTERNAL_API.name.to_string()).unwrap(); assert_eq!(u.identity.id, authn::USER_INTERNAL_API.id); let u = users.remove(&authn::USER_INTERNAL_READ.name.to_string()).unwrap(); From 8150d607a6943ba0af79935d702193c543df69d9 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 22 Jun 2022 15:29:43 -0400 Subject: [PATCH 58/88] background opctx -> service balancer opctx --- nexus/src/app/background/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nexus/src/app/background/mod.rs b/nexus/src/app/background/mod.rs index 82e08c2b680..86e9f27300c 100644 --- a/nexus/src/app/background/mod.rs +++ b/nexus/src/app/background/mod.rs @@ -26,7 +26,7 @@ impl TaskRunner { loop { // TODO: We may want triggers to exist here, to invoke this task // more frequently (e.g., on Sled failure). - let opctx = nexus.opctx_for_background(); + let opctx = nexus.opctx_for_service_balancer(); if let Err(e) = service_balancer.balance_services(&opctx).await { warn!(log, "Failed to balance services: {:?}", e); From 0ff033a7978ae223f316bb7425bae42807904f9d Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 22 Jun 2022 15:32:02 -0400 Subject: [PATCH 59/88] renamed opctx --- nexus/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nexus/src/lib.rs b/nexus/src/lib.rs index e11e7745b55..61186c144ac 100644 --- a/nexus/src/lib.rs +++ b/nexus/src/lib.rs @@ -128,7 +128,7 @@ impl Server { let config = internal.config; // Wait until RSS handoff completes. - let opctx = apictx.nexus.opctx_for_background(); + let opctx = apictx.nexus.opctx_for_service_balancer(); apictx.nexus.await_rack_initialization(&opctx).await; // With the exception of integration tests environments, From 4d7a46cf481782db886e78bca990bfc4d2e32fd4 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 22 Jun 2022 15:46:52 -0400 Subject: [PATCH 60/88] in tests too --- nexus/test-utils/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index 3fcfcba3bdd..63779b05e09 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -124,7 +124,7 @@ pub async fn test_setup_with_config( // Perform the "handoff from RSS". // // However, RSS isn't running, so we'll do the handoff ourselves. - let opctx = internal_server.apictx.nexus.opctx_for_background(); + let opctx = internal_server.apictx.nexus.opctx_for_service_balancer(); internal_server .apictx .nexus From 470da8b19fcbc6abcf4b58b43fc8a60b12b2211c Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Thu, 23 Jun 2022 21:43:06 -0400 Subject: [PATCH 61/88] review feedback --- internal-dns-client/src/multiclient.rs | 189 +++++++++---------------- internal-dns-client/src/names.rs | 6 +- 2 files changed, 70 insertions(+), 125 deletions(-) diff --git a/internal-dns-client/src/multiclient.rs b/internal-dns-client/src/multiclient.rs index 58b2cdea012..2fc9089e334 100644 --- a/internal-dns-client/src/multiclient.rs +++ b/internal-dns-client/src/multiclient.rs @@ -8,6 +8,7 @@ use omicron_common::address::{ Ipv6Subnet, ReservedRackSubnet, AZ_PREFIX, DNS_PORT, DNS_SERVER_PORT, }; use slog::{info, Logger}; +use std::collections::HashMap; use std::net::{IpAddr, Ipv6Addr, SocketAddr, SocketAddrV6}; use trust_dns_resolver::config::{ NameServerConfig, Protocol, ResolverConfig, ResolverOpts, @@ -16,6 +17,8 @@ use trust_dns_resolver::TokioAsyncResolver; pub type DnsError = crate::Error; +pub type AAAARecord = (crate::names::AAAA, SocketAddrV6); + /// Describes how to find the DNS servers. /// /// In production code, this is nearly always [`Ipv6Subnet`], @@ -50,13 +53,6 @@ impl DnsAddressLookup for Ipv6Subnet { } } -/// Describes a service which may be inserted into DNS records. -pub trait Service { - fn aaaa(&self) -> crate::names::AAAA; - fn srv(&self) -> crate::names::SRV; - fn address(&self) -> SocketAddrV6; -} - /// A connection used to update multiple DNS servers. pub struct Updater { log: Logger, @@ -83,37 +79,15 @@ impl Updater { /// Inserts all service records into the DNS server. /// - /// This method is most efficient when records are sorted by SRV key. + /// Each SRV record should have one or more AAAA records. pub async fn insert_dns_records( &self, - records: &Vec, + records: &HashMap>, ) -> Result<(), DnsError> { - let mut records = records.iter().peekable(); - - while let Some(record) = records.next() { - let srv = record.srv(); + for (srv, aaaa) in records.iter() { info!(self.log, "Inserting DNS record: {:?}", srv); - match &srv { - &crate::names::SRV::Service(_) => { - let mut aaaa = vec![(record.aaaa(), record.address())]; - while let Some(record) = records.peek() { - if record.srv() == srv { - let record = records.next().unwrap(); - aaaa.push((record.aaaa(), record.address())); - } else { - break; - } - } - - self.insert_dns_records_internal(aaaa, srv).await?; - } - &crate::names::SRV::Backend(_, _) => { - let aaaa = vec![(record.aaaa(), record.address())]; - self.insert_dns_records_internal(aaaa, record.srv()) - .await?; - } - }; + self.insert_dns_records_internal(aaaa, srv).await?; } Ok(()) } @@ -123,8 +97,8 @@ impl Updater { // - An SRV record, pointing to each of the AAAA records. async fn insert_dns_records_internal( &self, - aaaa: Vec<(crate::names::AAAA, SocketAddrV6)>, - srv_key: crate::names::SRV, + aaaa: &Vec, + srv_key: &crate::names::SRV, ) -> Result<(), DnsError> { let mut records = Vec::with_capacity(aaaa.len() + 1); @@ -409,33 +383,6 @@ mod test { logctx.cleanup_successful(); } - #[derive(Clone)] - struct TestServiceRecord { - aaaa: AAAA, - srv: SRV, - addr: SocketAddrV6, - } - - impl TestServiceRecord { - fn new(aaaa: AAAA, srv: SRV, addr: SocketAddrV6) -> Self { - Self { aaaa, srv, addr } - } - } - - impl Service for TestServiceRecord { - fn aaaa(&self) -> AAAA { - self.aaaa.clone() - } - - fn srv(&self) -> SRV { - self.srv.clone() - } - - fn address(&self) -> SocketAddrV6 { - self.addr - } - } - // Insert and retreive a single DNS record. #[tokio::test] async fn insert_and_lookup_one_record() { @@ -452,23 +399,28 @@ mod test { .expect("Error creating localhost resolver"); let updater = Updater::new(&address_getter, logctx.log.clone()); - let record = TestServiceRecord::new( - AAAA::Zone(Uuid::new_v4()), + let records = HashMap::from([( SRV::Service(ServiceName::Cockroach), - SocketAddrV6::new( - Ipv6Addr::from_str("ff::01").unwrap(), - 12345, - 0, - 0, - ), - ); - updater.insert_dns_records(&vec![record.clone()]).await.unwrap(); + vec![( + AAAA::Zone(Uuid::new_v4()), + SocketAddrV6::new( + Ipv6Addr::from_str("ff::01").unwrap(), + 12345, + 0, + 0, + ), + )], + )]); + updater.insert_dns_records(&records).await.unwrap(); let ip = resolver .lookup_ipv6(SRV::Service(ServiceName::Cockroach)) .await .expect("Should have been able to look up IP address"); - assert_eq!(&ip, record.addr.ip()); + assert_eq!( + &ip, + records[&SRV::Service(ServiceName::Cockroach)][0].1.ip() + ); logctx.cleanup_successful(); } @@ -522,36 +474,31 @@ mod test { 0, ); - let records = vec![ + let srv_crdb = SRV::Service(ServiceName::Cockroach); + let srv_clickhouse = SRV::Service(ServiceName::Clickhouse); + let srv_backend = SRV::Backend(BackendName::Crucible, Uuid::new_v4()); + + let records = HashMap::from([ // Three Cockroach services - TestServiceRecord::new( - AAAA::Zone(Uuid::new_v4()), - SRV::Service(ServiceName::Cockroach), - cockroach_addrs[0], - ), - TestServiceRecord::new( - AAAA::Zone(Uuid::new_v4()), - SRV::Service(ServiceName::Cockroach), - cockroach_addrs[1], - ), - TestServiceRecord::new( - AAAA::Zone(Uuid::new_v4()), - SRV::Service(ServiceName::Cockroach), - cockroach_addrs[2], + ( + srv_crdb.clone(), + vec![ + (AAAA::Zone(Uuid::new_v4()), cockroach_addrs[0]), + (AAAA::Zone(Uuid::new_v4()), cockroach_addrs[1]), + (AAAA::Zone(Uuid::new_v4()), cockroach_addrs[2]), + ], ), // One Clickhouse service - TestServiceRecord::new( - AAAA::Zone(Uuid::new_v4()), - SRV::Service(ServiceName::Clickhouse), - clickhouse_addr, + ( + srv_clickhouse.clone(), + vec![(AAAA::Zone(Uuid::new_v4()), clickhouse_addr)], ), // One Backend service - TestServiceRecord::new( - AAAA::Zone(Uuid::new_v4()), - SRV::Backend(BackendName::Crucible, Uuid::new_v4()), - crucible_addr, + ( + srv_backend.clone(), + vec![(AAAA::Zone(Uuid::new_v4()), crucible_addr)], ), - ]; + ]); updater.insert_dns_records(&records).await.unwrap(); // Look up Cockroach @@ -570,7 +517,7 @@ mod test { // Look up Backend Service let ip = resolver - .lookup_ipv6(records[4].srv.clone()) + .lookup_ipv6(srv_backend) .await .expect("Should have been able to look up IP address"); assert_eq!(&ip, crucible_addr.ip()); @@ -578,15 +525,10 @@ mod test { // If we remove the AAAA records for two of the CRDB services, // only one will remain. updater - .dns_records_delete(&vec![DnsRecordKey { - name: records[0].aaaa.to_string(), - }]) - .await - .expect("Should have been able to delete record"); - updater - .dns_records_delete(&vec![DnsRecordKey { - name: records[1].aaaa.to_string(), - }]) + .dns_records_delete(&vec![ + DnsRecordKey { name: records[&srv_crdb][0].0.to_string() }, + DnsRecordKey { name: records[&srv_crdb][1].0.to_string() }, + ]) .await .expect("Should have been able to delete record"); let ip = resolver @@ -614,37 +556,40 @@ mod test { let updater = Updater::new(&address_getter, logctx.log.clone()); // Insert a record, observe that it exists. - let mut record = TestServiceRecord::new( - AAAA::Zone(Uuid::new_v4()), - SRV::Service(ServiceName::Cockroach), - SocketAddrV6::new( - Ipv6Addr::from_str("ff::01").unwrap(), - 12345, - 0, - 0, - ), - ); - updater.insert_dns_records(&vec![record.clone()]).await.unwrap(); + let srv_crdb = SRV::Service(ServiceName::Cockroach); + let mut records = HashMap::from([( + srv_crdb.clone(), + vec![( + AAAA::Zone(Uuid::new_v4()), + SocketAddrV6::new( + Ipv6Addr::from_str("ff::01").unwrap(), + 12345, + 0, + 0, + ), + )], + )]); + updater.insert_dns_records(&records).await.unwrap(); let ip = resolver .lookup_ipv6(SRV::Service(ServiceName::Cockroach)) .await .expect("Should have been able to look up IP address"); - assert_eq!(&ip, record.addr.ip()); + assert_eq!(&ip, records[&srv_crdb][0].1.ip()); // If we insert the same record with a new address, it should be // updated. - record.addr = SocketAddrV6::new( + records.get_mut(&srv_crdb).unwrap()[0].1 = SocketAddrV6::new( Ipv6Addr::from_str("ee::02").unwrap(), 54321, 0, 0, ); - updater.insert_dns_records(&vec![record.clone()]).await.unwrap(); + updater.insert_dns_records(&records).await.unwrap(); let ip = resolver .lookup_ipv6(SRV::Service(ServiceName::Cockroach)) .await .expect("Should have been able to look up IP address"); - assert_eq!(&ip, record.addr.ip()); + assert_eq!(&ip, records[&srv_crdb][0].1.ip()); logctx.cleanup_successful(); } diff --git a/internal-dns-client/src/names.rs b/internal-dns-client/src/names.rs index dbcc0d9f01c..1b633f915e1 100644 --- a/internal-dns-client/src/names.rs +++ b/internal-dns-client/src/names.rs @@ -10,7 +10,7 @@ use uuid::Uuid; pub(crate) const DNS_ZONE: &str = "control-plane.oxide.internal"; /// Names for services where backends are interchangeable. -#[derive(Clone, Debug, PartialEq, PartialOrd)] +#[derive(Clone, Debug, Hash, Eq, PartialEq, PartialOrd)] pub enum ServiceName { Clickhouse, Cockroach, @@ -32,7 +32,7 @@ impl fmt::Display for ServiceName { } /// Names for services where backends are not interchangeable. -#[derive(Clone, Debug, PartialEq, PartialOrd)] +#[derive(Clone, Debug, Hash, Eq, PartialEq, PartialOrd)] pub enum BackendName { Crucible, SledAgent, @@ -47,7 +47,7 @@ impl fmt::Display for BackendName { } } -#[derive(Clone, Debug, PartialEq, PartialOrd)] +#[derive(Clone, Debug, Hash, Eq, PartialEq, PartialOrd)] pub enum SRV { /// A service identified and accessed by name, such as "nexus", "CRDB", etc. /// From b5916e00d7834b792ce78452546a1899df8f1853 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Thu, 23 Jun 2022 22:16:57 -0400 Subject: [PATCH 62/88] merge --- sled-agent/src/params.rs | 16 ++++++++-------- sled-agent/src/rack_setup/service.rs | 19 +++++++++++++++++-- 2 files changed, 25 insertions(+), 10 deletions(-) diff --git a/sled-agent/src/params.rs b/sled-agent/src/params.rs index dbb0eac57a1..4752caad940 100644 --- a/sled-agent/src/params.rs +++ b/sled-agent/src/params.rs @@ -251,12 +251,12 @@ pub struct DatasetEnsureBody { pub address: SocketAddrV6, } -impl internal_dns_client::multiclient::Service for DatasetEnsureBody { - fn aaaa(&self) -> AAAA { +impl DatasetEnsureBody { + pub fn aaaa(&self) -> AAAA { AAAA::Zone(self.id) } - fn srv(&self) -> SRV { + pub fn srv(&self) -> SRV { match self.dataset_kind { DatasetKind::Crucible => { SRV::Backend(BackendName::Crucible, self.id) @@ -268,7 +268,7 @@ impl internal_dns_client::multiclient::Service for DatasetEnsureBody { } } - fn address(&self) -> SocketAddrV6 { + pub fn address(&self) -> SocketAddrV6 { self.address } } @@ -342,12 +342,12 @@ pub struct ServiceRequest { pub service_type: ServiceType, } -impl internal_dns_client::multiclient::Service for ServiceRequest { - fn aaaa(&self) -> AAAA { +impl ServiceRequest { + pub fn aaaa(&self) -> AAAA { AAAA::Zone(self.id) } - fn srv(&self) -> SRV { + pub fn srv(&self) -> SRV { match self.service_type { ServiceType::InternalDns { .. } => { SRV::Service(ServiceName::InternalDNS) @@ -357,7 +357,7 @@ impl internal_dns_client::multiclient::Service for ServiceRequest { } } - fn address(&self) -> SocketAddrV6 { + pub fn address(&self) -> SocketAddrV6 { match self.service_type { ServiceType::InternalDns { server_address, .. } => server_address, ServiceType::Nexus { internal_address, .. } => internal_address, diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index a4f7032b385..96aa12dfacf 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -604,10 +604,17 @@ impl ServiceInner { ) .await?; + let mut records = HashMap::new(); + for dataset in &allocation.services_request.datasets { + records + .entry(dataset.srv()) + .or_insert_with(Vec::new) + .push((dataset.aaaa(), dataset.address())); + } self.dns_servers .get() .expect("DNS servers must be initialized first") - .insert_dns_records(&allocation.services_request.datasets) + .insert_dns_records(&records) .await?; Ok(()) }, @@ -638,10 +645,18 @@ impl ServiceInner { .collect::>(); self.initialize_services(sled_address, &all_services).await?; + + let mut records = HashMap::new(); + for service in &all_services { + records + .entry(service.srv()) + .or_insert_with(Vec::new) + .push((service.aaaa(), service.address())); + } self.dns_servers .get() .expect("DNS servers must be initialized first") - .insert_dns_records(&all_services) + .insert_dns_records(&records) .await?; Ok(()) }, From b5f1e920d472c12cbb14285f84c19568422226bd Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Thu, 23 Jun 2022 22:46:26 -0400 Subject: [PATCH 63/88] merge --- nexus/src/app/background/services.rs | 30 +++++++++++++++++----------- nexus/src/db/model/dataset.rs | 10 ++-------- nexus/src/db/model/service.rs | 8 +++----- 3 files changed, 23 insertions(+), 25 deletions(-) diff --git a/nexus/src/app/background/services.rs b/nexus/src/app/background/services.rs index 4a97d89c407..f2815230362 100644 --- a/nexus/src/app/background/services.rs +++ b/nexus/src/app/background/services.rs @@ -15,9 +15,7 @@ use crate::db::model::Sled; use crate::db::model::Zpool; use crate::Nexus; use futures::stream::{self, StreamExt, TryStreamExt}; -use internal_dns_client::multiclient::{ - Service as DnsService, Updater as DnsUpdater, -}; +use internal_dns_client::multiclient::Updater as DnsUpdater; use omicron_common::address::{ DNS_PORT, DNS_REDUNDANCY, DNS_SERVER_PORT, NEXUS_EXTERNAL_PORT, NEXUS_INTERNAL_PORT, @@ -104,7 +102,7 @@ impl ServiceBalancer { async fn instantiate_services( &self, opctx: &OpContext, - mut services: Vec, + services: Vec, ) -> Result<(), Error> { let mut sled_ids = HashSet::new(); for svc in &services { @@ -166,12 +164,15 @@ impl ServiceBalancer { }) .await?; - // Putting records of the same SRV right next to each other isn't - // strictly necessary, but doing so makes the record insertion more - // efficient. - services.sort_by(|a, b| a.srv().partial_cmp(&b.srv()).unwrap()); + let mut records = HashMap::new(); + for service in &services { + records + .entry(service.srv()) + .or_insert_with(Vec::new) + .push((service.aaaa(), service.address())); + } self.dns_updater - .insert_dns_records(&services) + .insert_dns_records(&records) .await .map_err(|e| Error::internal_error(&e.to_string()))?; @@ -360,10 +361,15 @@ impl ServiceBalancer { .await?; // Ensure all DNS records are updated for the created datasets. + let mut records = HashMap::new(); + for (_, _, dataset) in &datasets { + records + .entry(dataset.srv()) + .or_insert_with(Vec::new) + .push((dataset.aaaa(), dataset.address())); + } self.dns_updater - .insert_dns_records( - &datasets.into_iter().map(|(_, _, dataset)| dataset).collect(), - ) + .insert_dns_records(&records) .await .map_err(|e| Error::internal_error(&e.to_string()))?; diff --git a/nexus/src/db/model/dataset.rs b/nexus/src/db/model/dataset.rs index 4b2b294542a..b85e97b1dfe 100644 --- a/nexus/src/db/model/dataset.rs +++ b/nexus/src/db/model/dataset.rs @@ -75,14 +75,12 @@ impl Dataset { pub fn address_with_port(&self, port: u16) -> SocketAddrV6 { SocketAddrV6::new(Ipv6Addr::from(self.ip), port, 0, 0) } -} -impl internal_dns_client::multiclient::Service for Dataset { - fn aaaa(&self) -> AAAA { + pub fn aaaa(&self) -> AAAA { AAAA::Zone(self.id()) } - fn srv(&self) -> SRV { + pub fn srv(&self) -> SRV { match self.kind { DatasetKind::Crucible => { SRV::Backend(BackendName::Crucible, self.id()) @@ -91,10 +89,6 @@ impl internal_dns_client::multiclient::Service for Dataset { DatasetKind::Cockroach => SRV::Service(ServiceName::Cockroach), } } - - fn address(&self) -> SocketAddrV6 { - self.address() - } } // Datasets contain regions diff --git a/nexus/src/db/model/service.rs b/nexus/src/db/model/service.rs index 6f05011d415..9aeb3d0e873 100644 --- a/nexus/src/db/model/service.rs +++ b/nexus/src/db/model/service.rs @@ -40,14 +40,12 @@ impl Service { kind, } } -} -impl internal_dns_client::multiclient::Service for Service { - fn aaaa(&self) -> AAAA { + pub fn aaaa(&self) -> AAAA { AAAA::Zone(self.id()) } - fn srv(&self) -> SRV { + pub fn srv(&self) -> SRV { match self.kind { ServiceKind::InternalDNS => SRV::Service(ServiceName::InternalDNS), ServiceKind::Nexus => SRV::Service(ServiceName::Nexus), @@ -55,7 +53,7 @@ impl internal_dns_client::multiclient::Service for Service { } } - fn address(&self) -> SocketAddrV6 { + pub fn address(&self) -> SocketAddrV6 { let port = match self.kind { ServiceKind::InternalDNS => DNS_SERVER_PORT, ServiceKind::Nexus => NEXUS_INTERNAL_PORT, From 364efb39be87a6822174a98de0ab8b7497aa0d06 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Thu, 23 Jun 2022 23:14:59 -0400 Subject: [PATCH 64/88] rack ID to test interfaces --- nexus/src/app/mod.rs | 4 ---- nexus/src/app/test_interfaces.rs | 7 +++++++ nexus/tests/integration_tests/rack.rs | 1 + 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index 921fce51790..bf19528dd61 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -202,10 +202,6 @@ impl Nexus { &self.tunables } - pub fn rack_id(&self) -> Uuid { - self.rack_id - } - pub async fn wait_for_populate(&self) -> Result<(), anyhow::Error> { let mut my_rx = self.populate_status.clone(); loop { diff --git a/nexus/src/app/test_interfaces.rs b/nexus/src/app/test_interfaces.rs index a15f46096a8..40faaae5e1c 100644 --- a/nexus/src/app/test_interfaces.rs +++ b/nexus/src/app/test_interfaces.rs @@ -14,6 +14,9 @@ use uuid::Uuid; /// Exposes additional [`Nexus`] interfaces for use by the test suite #[async_trait] pub trait TestInterfaces { + /// Access the Rack ID of the currently executing Nexus. + fn rack_id(&self) -> Uuid; + /// Returns the SledAgentClient for an Instance from its id. We may also /// want to split this up into instance_lookup_by_id() and instance_sled(), /// but after all it's a test suite special to begin with. @@ -39,6 +42,10 @@ pub trait TestInterfaces { #[async_trait] impl TestInterfaces for super::Nexus { + fn rack_id(&self) -> Uuid { + self.rack_id + } + async fn instance_sled_by_id( &self, id: &Uuid, diff --git a/nexus/tests/integration_tests/rack.rs b/nexus/tests/integration_tests/rack.rs index dfcbde9740f..5a6e28ab70a 100644 --- a/nexus/tests/integration_tests/rack.rs +++ b/nexus/tests/integration_tests/rack.rs @@ -7,6 +7,7 @@ use nexus_test_utils::http_testing::NexusRequest; use nexus_test_utils::ControlPlaneTestContext; use nexus_test_utils_macros::nexus_test; use omicron_nexus::external_api::views::Rack; +use omicron_nexus::TestInterfaces; #[nexus_test] async fn test_list_own_rack(cptestctx: &ControlPlaneTestContext) { From 19deb801548f9f1f5fdd4ab9579c6e19835f8637 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Fri, 24 Jun 2022 13:58:12 -0400 Subject: [PATCH 65/88] comments --- nexus/src/app/background/services.rs | 6 ++++++ nexus/src/app/rack.rs | 5 ++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/nexus/src/app/background/services.rs b/nexus/src/app/background/services.rs index f2815230362..e2c021bcefe 100644 --- a/nexus/src/app/background/services.rs +++ b/nexus/src/app/background/services.rs @@ -45,6 +45,12 @@ struct ExpectedService { redundancy: ServiceRedundancy, } +// NOTE: longer-term, when we integrate multi-rack support, +// it is expected that Nexus will manage multiple racks +// within the fleet, rather than simply per-rack services. +// +// When that happens, it is likely that many of the "per-rack" +// services will become "per-fleet", such as Nexus and CRDB. const EXPECTED_SERVICES: [ExpectedService; 3] = [ ExpectedService { kind: ServiceKind::InternalDNS, diff --git a/nexus/src/app/rack.rs b/nexus/src/app/rack.rs index dca24f078c6..8b81dc61fdb 100644 --- a/nexus/src/app/rack.rs +++ b/nexus/src/app/rack.rs @@ -74,7 +74,10 @@ impl super::Nexus { }) .collect(); - // TODO: Actually supply datasets provided from the sled agent. + // TODO(https://github.com/oxidecomputer/omicron/pull/1216): + // Actually supply datasets provided from the sled agent. + // + // This requires corresponding changes on the RSS side. self.db_datastore .rack_set_initialized(opctx, rack_id, services, vec![]) .await?; From c526ebae8c0d2554639ab33b3b90e06e0e1c09a8 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 27 Jun 2022 09:06:36 -0400 Subject: [PATCH 66/88] WIP - test skeleton --- nexus/src/app/background/mod.rs | 8 +- nexus/src/app/background/services.rs | 250 ++++++++++++++++++++++++--- 2 files changed, 236 insertions(+), 22 deletions(-) diff --git a/nexus/src/app/background/mod.rs b/nexus/src/app/background/mod.rs index 86e9f27300c..c5c94845d58 100644 --- a/nexus/src/app/background/mod.rs +++ b/nexus/src/app/background/mod.rs @@ -7,6 +7,7 @@ mod services; use crate::app::Nexus; +use internal_dns_client::multiclient::Updater as DnsUpdater; use std::sync::Arc; use tokio::task::{spawn, JoinHandle}; @@ -20,8 +21,13 @@ impl TaskRunner { pub fn new(nexus: Arc) -> Self { let handle = spawn(async move { let log = nexus.log.new(o!("component" => "BackgroundTaskRunner")); + + let dns_updater = DnsUpdater::new( + &nexus.az_subnet(), + log.new(o!("component" => "DNS Updater")), + ); let service_balancer = - services::ServiceBalancer::new(log.clone(), nexus.clone()); + services::ServiceBalancer::new(log.clone(), nexus.clone(), dns_updater); loop { // TODO: We may want triggers to exist here, to invoke this task diff --git a/nexus/src/app/background/services.rs b/nexus/src/app/background/services.rs index e2c021bcefe..b24bc373023 100644 --- a/nexus/src/app/background/services.rs +++ b/nexus/src/app/background/services.rs @@ -4,8 +4,9 @@ //! Task which ensures that expected Nexus services exist. +use async_trait::async_trait; use crate::context::OpContext; -use crate::db::datastore::DatasetRedundancy; +use crate::db::datastore::{DataStore, DatasetRedundancy}; use crate::db::identity::Asset; use crate::db::model::Dataset; use crate::db::model::DatasetKind; @@ -15,7 +16,14 @@ use crate::db::model::Sled; use crate::db::model::Zpool; use crate::Nexus; use futures::stream::{self, StreamExt, TryStreamExt}; -use internal_dns_client::multiclient::Updater as DnsUpdater; +use internal_dns_client::{ + multiclient::{ + AAAARecord, + DnsError, + Updater as DnsUpdater + }, + names::SRV, +}; use omicron_common::address::{ DNS_PORT, DNS_REDUNDANCY, DNS_SERVER_PORT, NEXUS_EXTERNAL_PORT, NEXUS_INTERNAL_PORT, @@ -26,6 +34,7 @@ use slog::Logger; use std::collections::{HashMap, HashSet}; use std::net::{Ipv6Addr, SocketAddrV6}; use std::sync::Arc; +use uuid::Uuid; // Policy for the number of services to be provisioned. #[derive(Debug)] @@ -87,6 +96,62 @@ const EXPECTED_DATASETS: [ExpectedDataset; 3] = [ }, ]; +// A trait intended to aid testing. +// +// The non-test implementation should be as simple as possible. +#[async_trait] +trait SledClientInterface { + async fn services_put(&self, body: &SledAgentTypes::ServiceEnsureBody) -> Result<(), Error>; +} + +#[async_trait] +impl SledClientInterface for sled_agent_client::Client { + async fn services_put(&self, body: &SledAgentTypes::ServiceEnsureBody) -> Result<(), Error> { + self.services_put(body).await?; + Ok(()) + } +} + +// A trait intended to aid testing. +// +// The non-test implementation should be as simple as possible. +#[async_trait] +trait NexusInterface { + fn rack_id(&self) -> Uuid; + fn datastore(&self) -> &Arc; + async fn sled_client(&self, id: &Uuid) -> Result, Error>; +} + +#[async_trait] +impl NexusInterface for Nexus { + fn rack_id(&self) -> Uuid { + self.rack_id + } + + fn datastore(&self) -> &Arc { + self.datastore() + } + + async fn sled_client(&self, id: &Uuid) -> Result, Error> { + self.sled_client(id).await + } +} + +// A trait intended to aid testing. +// +// The non-test implementation should be as simple as possible. +#[async_trait] +trait DnsUpdaterInterface { + async fn insert_dns_records(&self, records: &HashMap>) -> Result<(), DnsError>; +} + +#[async_trait] +impl DnsUpdaterInterface for DnsUpdater { + async fn insert_dns_records(&self, records: &HashMap>) -> Result<(), DnsError> { + self.insert_dns_records(records).await + } +} + pub struct ServiceBalancer { log: Logger, nexus: Arc, @@ -94,12 +159,7 @@ pub struct ServiceBalancer { } impl ServiceBalancer { - pub fn new(log: Logger, nexus: Arc) -> Self { - let dns_updater = DnsUpdater::new( - &nexus.az_subnet(), - log.new(o!("component" => "DNS Updater")), - ); - + pub fn new(log: Logger, nexus: Arc, dns_updater: DnsUpdater) -> Self { Self { log, nexus, dns_updater } } @@ -255,18 +315,14 @@ impl ServiceBalancer { .await } - // TODO: Consider using sagas to ensure the rollout of services. - // - // Not using sagas *happens* to be fine because these operations are - // re-tried periodically, but that's kind forcing a dependency on the - // caller. async fn ensure_services_provisioned( &self, opctx: &OpContext, + expected_services: &[ExpectedService], ) -> Result<(), Error> { // Provision services within the database. let mut svcs = vec![]; - for expected_svc in &EXPECTED_SERVICES { + for expected_svc in expected_services { info!(self.log, "Ensuring service {:?} exists", expected_svc); match expected_svc.redundancy { ServiceRedundancy::PerRack(desired_count) => { @@ -385,9 +441,10 @@ impl ServiceBalancer { async fn ensure_datasets_provisioned( &self, opctx: &OpContext, + expected_datasets: &[ExpectedDataset] ) -> Result<(), Error> { // Provision all dataset types concurrently. - stream::iter(&EXPECTED_DATASETS) + stream::iter(expected_datasets) .map(Ok::<_, Error>) .try_for_each_concurrent(None, |expected_dataset| async move { info!( @@ -405,17 +462,168 @@ impl ServiceBalancer { .await } - // Provides a single point-in-time evaluation and adjustment of - // the services provisioned within the rack. + /// Provides a single point-in-time evaluation and adjustment of + /// the services provisioned within the rack. + /// + /// May adjust the provisioned services to meet the redundancy of the + /// rack, if necessary. + // TODO: Consider using sagas to ensure the rollout of services. // - // May adjust the provisioned services to meet the redundancy of the - // rack, if necessary. + // Not using sagas *happens* to be fine because these operations are + // re-tried periodically, but that's kind forcing a dependency on the + // caller. pub async fn balance_services( &self, opctx: &OpContext, ) -> Result<(), Error> { - self.ensure_datasets_provisioned(opctx).await?; - self.ensure_services_provisioned(opctx).await?; + self.ensure_datasets_provisioned(opctx, &EXPECTED_DATASETS).await?; + self.ensure_services_provisioned(opctx, &EXPECTED_SERVICES).await?; Ok(()) } } + +#[cfg(test)] +mod test { + use super::*; + + use crate::{authn, authz}; + use dropshot::test_util::LogContext; + use nexus_test_utils::db::test_setup_database; + use omicron_test_utils::dev; + use std::sync::Arc; + + // TODO: maybe figure out what you *want* to test? + // I suspect we'll need to refactor this API for testability. + // + // - Dataset init: + // - Call to DB + // - For each new dataset... + // - Call to Sled (filesystem put) + // - Update DNS record + // + // - Service init: + // - Call to DB + // - For each sled... + // - List svcs + // - Put svcs + // - For each new service... + // - Update DNS record + // + // TODO: Also, idempotency check + + struct ProvisionTest { + logctx: LogContext, + opctx: OpContext, + db: dev::db::CockroachInstance, + datastore: Arc, + } + + impl ProvisionTest { + // Create the logger and setup the database. + async fn new(name: &str) -> Self { + let logctx = dev::test_setup_log(name); + let db = test_setup_database(&logctx.log).await; + let (_, datastore) = + crate::db::datastore::datastore_test(&logctx, &db).await; + let opctx = OpContext::for_background( + logctx.log.new(o!()), + Arc::new(authz::Authz::new(&logctx.log)), + authn::Context::internal_service_balancer(), + datastore.clone(), + ); + Self { + logctx, + opctx, + db, + datastore, + } + } + + async fn cleanup(mut self) { + self.db.cleanup().await.unwrap(); + self.logctx.cleanup_successful(); + } + } + + #[derive(Clone)] + struct FakeNexus { + datastore: Arc, + } + + #[derive(Clone)] + struct FakeDnsUpdater { + + } + + // TODO: interfaces: + // + // - Nexus + // - Datastore: ✔ + // - Sled Client: + // - Rack ID: easy + // - Rack Subnet: easy + // + // + // - DNS service + // - insert dns records + + #[tokio::test] + async fn test_provision_dataset_on_all() { + let test = ProvisionTest::new("test_provision_dataset_on_all").await; + + // TODO: move into "test"? + let nexus = Arc::new(FakeNexus { + datastore: test.datastore.clone(), + }); + let dns_updater = FakeDnsUpdater {}; + + let service_balancer = ServiceBalancer::new( + test.logctx.log.clone(), + nexus.clone(), + dns_updater.clone(), + ); + + // TODO: Upsert zpools? + // TODO: Also, maybe add a test when invoking this fn on "no zpools". + + + // Make the request to the service balancer for Crucibles on all Zpools. + let expected_datasets = [ + ExpectedDataset { + kind: DatasetKind::Crucible, + redundancy: DatasetRedundancy::OnAll, + } + ]; + service_balancer.ensure_datasets_provisioned( + &test.opctx, + &expected_datasets, + ).await.unwrap(); + + // TODO: Validate that: + // - That "filesystem_put" was invoked -> Store the calls? + // - That the DNS record was updated -> Store the records? + + test.cleanup().await; + } + + #[tokio::test] + async fn test_provision_dataset_per_rack() { + let expected_datasets = [ + ExpectedDataset { + kind: DatasetKind::Crucible, + redundancy: DatasetRedundancy::PerRack(2), + } + ]; + todo!(); + } + + #[tokio::test] + async fn test_provision_service_per_rack() { + todo!(); + } + + #[tokio::test] + async fn test_provision_service_dns_per_az() { + todo!(); + } +} From f2584885189c2f87e23f58d05229a1a040b8cf09 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 27 Jun 2022 20:28:07 -0400 Subject: [PATCH 67/88] More work towards tests --- nexus/src/app/background/fakes.rs | 164 ++++++++++++++++ nexus/src/app/background/interfaces.rs | 92 +++++++++ nexus/src/app/background/mod.rs | 3 + nexus/src/app/background/services.rs | 247 ++++++++++++++----------- 4 files changed, 400 insertions(+), 106 deletions(-) create mode 100644 nexus/src/app/background/fakes.rs create mode 100644 nexus/src/app/background/interfaces.rs diff --git a/nexus/src/app/background/fakes.rs b/nexus/src/app/background/fakes.rs new file mode 100644 index 00000000000..cfbcce0f59a --- /dev/null +++ b/nexus/src/app/background/fakes.rs @@ -0,0 +1,164 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Test-only implementations of interfaces used by background tasks. + +use super::interfaces::{ + DnsUpdaterInterface, + NexusInterface, + SledClientInterface, +}; + +use async_trait::async_trait; +use crate::db::datastore::DataStore; +use internal_dns_client::{ + multiclient::{ + AAAARecord, + DnsError, + }, + names::SRV, +}; +use omicron_common::address::{ + RACK_PREFIX, Ipv6Subnet, +}; +use omicron_common::api::external::Error; +use sled_agent_client::types as SledAgentTypes; +use std::collections::HashMap; +use std::sync::{Arc, Mutex}; +use uuid::Uuid; + +/// A fake implementation of a Sled Agent client. +/// +/// In lieu of any networked requests, stores onto the requested services and +/// datasets for later inspection. +pub struct FakeSledClientInner { + service_request: Option, + dataset_requests: Vec, +} + +#[derive(Clone)] +pub struct FakeSledClient { + inner: Arc>, +} + +impl FakeSledClient { + fn new() -> Arc { + Arc::new( + Self { + inner: Arc::new(Mutex::new( + FakeSledClientInner { + service_request: None, + dataset_requests: vec![], + } + )) + } + ) + } + + pub fn service_requests(&self) -> Vec { + self.inner.lock().unwrap().service_request + .as_ref() + .map(|request| request.services.clone()) + .unwrap_or(vec![]) + } + + pub fn dataset_requests(&self) -> Vec { + self.inner.lock().unwrap().dataset_requests.clone() + } +} + +#[async_trait] +impl SledClientInterface for FakeSledClient { + async fn services_put(&self, body: &SledAgentTypes::ServiceEnsureBody) -> Result<(), Error> { + let old = self.inner.lock().unwrap().service_request.replace(body.clone()); + assert!(old.is_none(), "Should only set services once (was {old:?}, inserted {body:?})"); + Ok(()) + } + + async fn filesystem_put(&self, body: &SledAgentTypes::DatasetEnsureBody) -> Result<(), Error> { + self.inner.lock().unwrap().dataset_requests.push(body.clone()); + Ok(()) + } +} + +/// Provides an abstraction of Nexus which can be used by tests. +/// +/// Wraps a real datastore, but fakes out all networked requests. +#[derive(Clone)] +pub struct FakeNexus { + datastore: Arc, + rack_id: Uuid, + rack_subnet: Ipv6Subnet, + sleds: Arc>>>, +} + +impl FakeNexus { + pub fn new(datastore: Arc, rack_subnet: Ipv6Subnet) -> Arc { + Arc::new(Self { + datastore, + rack_id: Uuid::new_v4(), + rack_subnet, + sleds: Arc::new(Mutex::new(HashMap::new())), + }) + } +} + +#[async_trait] +impl NexusInterface for FakeNexus { + fn rack_id(&self) -> Uuid { + self.rack_id + } + + fn rack_subnet(&self) -> Ipv6Subnet { + self.rack_subnet + } + + fn datastore(&self) -> &Arc { + &self.datastore + } + + async fn sled_client(&self, id: &Uuid) -> Result, Error> { + let sled = self.sleds + .lock() + .unwrap() + .entry(*id) + .or_insert_with(|| FakeSledClient::new()) + .clone(); + Ok(sled) + } +} + +/// A fake implementation of the DNS updater. +/// +/// Avoids all networking, instead storing all outgoing requests for later +/// inspection. +#[derive(Clone)] +pub struct FakeDnsUpdater { + records: Arc>>>, +} + +impl FakeDnsUpdater { + pub fn new() -> Self { + Self { + records: Arc::new(Mutex::new(HashMap::new())), + } + } + + // Get a copy of all records. + pub fn records(&self) -> HashMap> { + self.records.lock().unwrap().clone() + } +} + +#[async_trait] +impl DnsUpdaterInterface for FakeDnsUpdater { + async fn insert_dns_records(&self, records: &HashMap>) -> Result<(), DnsError> { + let mut our_records = self.records.lock().unwrap(); + for (k, v) in records { + let old = our_records.insert(k.clone(), v.clone()); + assert!(old.is_none(), "Inserted key {k}, but found old value: {old:?}"); + } + Ok(()) + } +} diff --git a/nexus/src/app/background/interfaces.rs b/nexus/src/app/background/interfaces.rs new file mode 100644 index 00000000000..43549a52415 --- /dev/null +++ b/nexus/src/app/background/interfaces.rs @@ -0,0 +1,92 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Interfaces which can be faked out for testing. + +use async_trait::async_trait; +use crate::db::datastore::DataStore; +use crate::Nexus; +use internal_dns_client::{ + multiclient::{ + AAAARecord, + DnsError, + Updater as DnsUpdater + }, + names::SRV, +}; +use omicron_common::address::{ + RACK_PREFIX, Ipv6Subnet, +}; +use omicron_common::api::external::Error; +use sled_agent_client::types as SledAgentTypes; +use std::collections::HashMap; +use std::sync::Arc; +use uuid::Uuid; + +// A trait intended to aid testing. +// +// The non-test implementation should be as simple as possible. +#[async_trait] +pub trait SledClientInterface { + async fn services_put(&self, body: &SledAgentTypes::ServiceEnsureBody) -> Result<(), Error>; + async fn filesystem_put(&self, body: &SledAgentTypes::DatasetEnsureBody) -> Result<(), Error>; +} + +#[async_trait] +impl SledClientInterface for sled_agent_client::Client { + async fn services_put(&self, body: &SledAgentTypes::ServiceEnsureBody) -> Result<(), Error> { + self.services_put(body).await?; + Ok(()) + } + + async fn filesystem_put(&self, body: &SledAgentTypes::DatasetEnsureBody) -> Result<(), Error> { + self.filesystem_put(body).await?; + Ok(()) + } +} + +// A trait intended to aid testing. +// +// The non-test implementation should be as simple as possible. +#[async_trait] +pub trait NexusInterface { + fn rack_id(&self) -> Uuid; + fn rack_subnet(&self) -> Ipv6Subnet; + fn datastore(&self) -> &Arc; + async fn sled_client(&self, id: &Uuid) -> Result, Error>; +} + +#[async_trait] +impl NexusInterface for Nexus { + fn rack_id(&self) -> Uuid { + self.rack_id + } + + fn rack_subnet(&self) -> Ipv6Subnet { + self.rack_subnet + } + + fn datastore(&self) -> &Arc { + self.datastore() + } + + async fn sled_client(&self, id: &Uuid) -> Result, Error> { + self.sled_client(id).await + } +} + +// A trait intended to aid testing. +// +// The non-test implementation should be as simple as possible. +#[async_trait] +pub trait DnsUpdaterInterface { + async fn insert_dns_records(&self, records: &HashMap>) -> Result<(), DnsError>; +} + +#[async_trait] +impl DnsUpdaterInterface for DnsUpdater { + async fn insert_dns_records(&self, records: &HashMap>) -> Result<(), DnsError> { + self.insert_dns_records(records).await + } +} diff --git a/nexus/src/app/background/mod.rs b/nexus/src/app/background/mod.rs index c5c94845d58..684ba5f9cfd 100644 --- a/nexus/src/app/background/mod.rs +++ b/nexus/src/app/background/mod.rs @@ -4,6 +4,9 @@ //! Background tasks managed by Nexus. +#[cfg(test)] +mod fakes; +mod interfaces; mod services; use crate::app::Nexus; diff --git a/nexus/src/app/background/services.rs b/nexus/src/app/background/services.rs index b24bc373023..3023d9b8354 100644 --- a/nexus/src/app/background/services.rs +++ b/nexus/src/app/background/services.rs @@ -4,9 +4,8 @@ //! Task which ensures that expected Nexus services exist. -use async_trait::async_trait; use crate::context::OpContext; -use crate::db::datastore::{DataStore, DatasetRedundancy}; +use crate::db::datastore::DatasetRedundancy; use crate::db::identity::Asset; use crate::db::model::Dataset; use crate::db::model::DatasetKind; @@ -14,16 +13,7 @@ use crate::db::model::Service; use crate::db::model::ServiceKind; use crate::db::model::Sled; use crate::db::model::Zpool; -use crate::Nexus; use futures::stream::{self, StreamExt, TryStreamExt}; -use internal_dns_client::{ - multiclient::{ - AAAARecord, - DnsError, - Updater as DnsUpdater - }, - names::SRV, -}; use omicron_common::address::{ DNS_PORT, DNS_REDUNDANCY, DNS_SERVER_PORT, NEXUS_EXTERNAL_PORT, NEXUS_INTERNAL_PORT, @@ -32,9 +22,10 @@ use omicron_common::api::external::Error; use sled_agent_client::types as SledAgentTypes; use slog::Logger; use std::collections::{HashMap, HashSet}; +use std::marker::PhantomData; use std::net::{Ipv6Addr, SocketAddrV6}; use std::sync::Arc; -use uuid::Uuid; +use super::interfaces::{DnsUpdaterInterface, NexusInterface, SledClientInterface}; // Policy for the number of services to be provisioned. #[derive(Debug)] @@ -96,71 +87,31 @@ const EXPECTED_DATASETS: [ExpectedDataset; 3] = [ }, ]; -// A trait intended to aid testing. -// -// The non-test implementation should be as simple as possible. -#[async_trait] -trait SledClientInterface { - async fn services_put(&self, body: &SledAgentTypes::ServiceEnsureBody) -> Result<(), Error>; -} - -#[async_trait] -impl SledClientInterface for sled_agent_client::Client { - async fn services_put(&self, body: &SledAgentTypes::ServiceEnsureBody) -> Result<(), Error> { - self.services_put(body).await?; - Ok(()) - } -} - -// A trait intended to aid testing. -// -// The non-test implementation should be as simple as possible. -#[async_trait] -trait NexusInterface { - fn rack_id(&self) -> Uuid; - fn datastore(&self) -> &Arc; - async fn sled_client(&self, id: &Uuid) -> Result, Error>; -} - -#[async_trait] -impl NexusInterface for Nexus { - fn rack_id(&self) -> Uuid { - self.rack_id - } - - fn datastore(&self) -> &Arc { - self.datastore() - } - - async fn sled_client(&self, id: &Uuid) -> Result, Error> { - self.sled_client(id).await - } -} - -// A trait intended to aid testing. -// -// The non-test implementation should be as simple as possible. -#[async_trait] -trait DnsUpdaterInterface { - async fn insert_dns_records(&self, records: &HashMap>) -> Result<(), DnsError>; -} - -#[async_trait] -impl DnsUpdaterInterface for DnsUpdater { - async fn insert_dns_records(&self, records: &HashMap>) -> Result<(), DnsError> { - self.insert_dns_records(records).await - } -} - -pub struct ServiceBalancer { +/// Contains logic for balancing services within a fleet. +/// +/// This struct operates on generic parameters to easily permit +/// dependency injection via testing, but in production, practically +/// operates on the same concrete types. +pub struct ServiceBalancer +where + D: DnsUpdaterInterface, + N: NexusInterface, + S: SledClientInterface, +{ log: Logger, - nexus: Arc, - dns_updater: DnsUpdater, + nexus: Arc, + dns_updater: D, + phantom: PhantomData, } -impl ServiceBalancer { - pub fn new(log: Logger, nexus: Arc, dns_updater: DnsUpdater) -> Self { - Self { log, nexus, dns_updater } +impl ServiceBalancer +where + D: DnsUpdaterInterface, + N: NexusInterface, + S: SledClientInterface, +{ + pub fn new(log: Logger, nexus: Arc, dns_updater: D) -> Self { + Self { log, nexus, dns_updater, phantom: PhantomData } } // Reaches out to all sled agents implied in "services", and @@ -299,7 +250,7 @@ impl ServiceBalancer { ) -> Result, Error> { self.nexus .datastore() - .ensure_rack_service(opctx, self.nexus.rack_id, kind, desired_count) + .ensure_rack_service(opctx, self.nexus.rack_id(), kind, desired_count) .await } @@ -311,7 +262,7 @@ impl ServiceBalancer { ) -> Result, Error> { self.nexus .datastore() - .ensure_dns_service(opctx, self.nexus.rack_subnet, desired_count) + .ensure_dns_service(opctx, self.nexus.rack_subnet(), desired_count) .await } @@ -361,7 +312,7 @@ impl ServiceBalancer { let new_datasets = self .nexus .datastore() - .ensure_rack_dataset(opctx, self.nexus.rack_id, kind, redundancy) + .ensure_rack_dataset(opctx, self.nexus.rack_id(), kind, redundancy) .await?; // Actually instantiate those datasets. @@ -486,11 +437,17 @@ impl ServiceBalancer { mod test { use super::*; + use crate::app::background::fakes::{FakeDnsUpdater, FakeNexus}; use crate::{authn, authz}; + use crate::db::datastore::DataStore; use dropshot::test_util::LogContext; + use internal_dns_client::names::{AAAA, BackendName, SRV}; use nexus_test_utils::db::test_setup_database; + use omicron_common::address::Ipv6Subnet; + use omicron_common::api::external::ByteCount; use omicron_test_utils::dev; use std::sync::Arc; + use uuid::Uuid; // TODO: maybe figure out what you *want* to test? // I suspect we'll need to refactor this API for testability. @@ -545,47 +502,92 @@ mod test { } } - #[derive(Clone)] - struct FakeNexus { - datastore: Arc, + async fn create_test_sled(rack_id: Uuid, datastore: &DataStore) -> Uuid { + let bogus_addr = SocketAddrV6::new( + Ipv6Addr::new(0xfd00, 0, 0, 0, 0, 0, 0, 1), + 8080, + 0, + 0, + ); + let sled_id = Uuid::new_v4(); + let sled = Sled::new(sled_id, bogus_addr.clone(), rack_id); + datastore.sled_upsert(sled).await.unwrap(); + sled_id } - #[derive(Clone)] - struct FakeDnsUpdater { - + async fn create_test_zpool(datastore: &DataStore, sled_id: Uuid) -> Uuid { + let zpool_id = Uuid::new_v4(); + let zpool = Zpool::new( + zpool_id, + sled_id, + &crate::internal_api::params::ZpoolPutRequest { + size: ByteCount::from_gibibytes_u32(10), + }, + ); + datastore.zpool_upsert(zpool).await.unwrap(); + zpool_id } - // TODO: interfaces: - // - // - Nexus - // - Datastore: ✔ - // - Sled Client: - // - Rack ID: easy - // - Rack Subnet: easy - // - // - // - DNS service - // - insert dns records + #[tokio::test] + async fn test_provision_dataset_on_all_no_zpools() { + let test = ProvisionTest::new("test_provision_dataset_on_all_no_zpools").await; + + let rack_subnet = Ipv6Subnet::new(Ipv6Addr::LOCALHOST); + let nexus = FakeNexus::new(test.datastore.clone(), rack_subnet); + let dns_updater = FakeDnsUpdater::new(); + let service_balancer = ServiceBalancer::new( + test.logctx.log.clone(), + nexus.clone(), + dns_updater.clone(), + ); + + // Setup: One sled, no zpools. + let sled_id = create_test_sled(nexus.rack_id(), &test.datastore).await; + + // Make the request to the service balancer for Crucibles on all Zpools. + let expected_datasets = [ + ExpectedDataset { + kind: DatasetKind::Crucible, + redundancy: DatasetRedundancy::OnAll, + } + ]; + service_balancer.ensure_datasets_provisioned( + &test.opctx, + &expected_datasets, + ).await.unwrap(); + + // Observe that nothing was requested at the sled. + let sled = nexus.sled_client(&sled_id).await.unwrap(); + assert!(sled.service_requests().is_empty()); + assert!(sled.dataset_requests().is_empty()); + + // Observe that no DNS records were updated. + let records = dns_updater.records(); + assert!(records.is_empty()); + + test.cleanup().await; + } #[tokio::test] async fn test_provision_dataset_on_all() { let test = ProvisionTest::new("test_provision_dataset_on_all").await; - // TODO: move into "test"? - let nexus = Arc::new(FakeNexus { - datastore: test.datastore.clone(), - }); - let dns_updater = FakeDnsUpdater {}; - + let rack_subnet = Ipv6Subnet::new(Ipv6Addr::LOCALHOST); + let nexus = FakeNexus::new(test.datastore.clone(), rack_subnet); + let dns_updater = FakeDnsUpdater::new(); let service_balancer = ServiceBalancer::new( test.logctx.log.clone(), nexus.clone(), dns_updater.clone(), ); - // TODO: Upsert zpools? - // TODO: Also, maybe add a test when invoking this fn on "no zpools". - + // Setup: One sled, multiple zpools + let sled_id = create_test_sled(nexus.rack_id(), &test.datastore).await; + const ZPOOL_COUNT: usize = 3; + let mut zpools = vec![]; + for _ in 0..ZPOOL_COUNT { + zpools.push(create_test_zpool(&test.datastore, sled_id).await); + } // Make the request to the service balancer for Crucibles on all Zpools. let expected_datasets = [ @@ -599,13 +601,45 @@ mod test { &expected_datasets, ).await.unwrap(); - // TODO: Validate that: - // - That "filesystem_put" was invoked -> Store the calls? - // - That the DNS record was updated -> Store the records? + // Observe that datasets were requested on each zpool. + let sled = nexus.sled_client(&sled_id).await.unwrap(); + assert!(sled.service_requests().is_empty()); + let dataset_requests = sled.dataset_requests(); + assert_eq!(ZPOOL_COUNT, dataset_requests.len()); + for request in &dataset_requests { + assert!(zpools.contains(&request.zpool_id), "Dataset request for unexpected zpool"); + assert!(matches!(request.dataset_kind, SledAgentTypes::DatasetKind::Crucible)); + } + + // Observe that DNS records for each Crucible exist. + let records = dns_updater.records(); + assert_eq!(ZPOOL_COUNT, records.len()); + for (srv, aaaas) in &records { + assert_eq!(1, aaaas.len()); + match srv { + SRV::Backend(BackendName::Crucible, dataset_id) => { + let expected_address = dataset_requests.iter().find_map(|request| { + if request.id == *dataset_id { + Some(request.address) + } else { + None + } + }).unwrap(); + + let (aaaa_name, dns_addr) = aaaas[0]; + assert_eq!(dns_addr.to_string(), expected_address); + assert!(matches!(aaaa_name, AAAA::Zone(dataset_id))); + }, + _ => panic!("Unexpected SRV record"), + } + } test.cleanup().await; } + // TODO: test provision outside rack + + /* #[tokio::test] async fn test_provision_dataset_per_rack() { let expected_datasets = [ @@ -626,4 +660,5 @@ mod test { async fn test_provision_service_dns_per_az() { todo!(); } + */ } From c56a6fd0de731ebab0c21fa64ca527c7e2a1b68f Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Tue, 5 Jul 2022 16:59:56 -0400 Subject: [PATCH 68/88] Fmt, expand on tests --- nexus/src/app/background/fakes.rs | 81 ++++++----- nexus/src/app/background/interfaces.rs | 47 ++++--- nexus/src/app/background/mod.rs | 7 +- nexus/src/app/background/services.rs | 186 +++++++++++++++++-------- nexus/src/db/datastore.rs | 2 +- 5 files changed, 213 insertions(+), 110 deletions(-) diff --git a/nexus/src/app/background/fakes.rs b/nexus/src/app/background/fakes.rs index cfbcce0f59a..375dd27b488 100644 --- a/nexus/src/app/background/fakes.rs +++ b/nexus/src/app/background/fakes.rs @@ -5,23 +5,16 @@ //! Test-only implementations of interfaces used by background tasks. use super::interfaces::{ - DnsUpdaterInterface, - NexusInterface, - SledClientInterface, + DnsUpdaterInterface, NexusInterface, SledClientInterface, }; -use async_trait::async_trait; use crate::db::datastore::DataStore; +use async_trait::async_trait; use internal_dns_client::{ - multiclient::{ - AAAARecord, - DnsError, - }, + multiclient::{AAAARecord, DnsError}, names::SRV, }; -use omicron_common::address::{ - RACK_PREFIX, Ipv6Subnet, -}; +use omicron_common::address::{Ipv6Subnet, RACK_PREFIX}; use omicron_common::api::external::Error; use sled_agent_client::types as SledAgentTypes; use std::collections::HashMap; @@ -44,20 +37,19 @@ pub struct FakeSledClient { impl FakeSledClient { fn new() -> Arc { - Arc::new( - Self { - inner: Arc::new(Mutex::new( - FakeSledClientInner { - service_request: None, - dataset_requests: vec![], - } - )) - } - ) + Arc::new(Self { + inner: Arc::new(Mutex::new(FakeSledClientInner { + service_request: None, + dataset_requests: vec![], + })), + }) } pub fn service_requests(&self) -> Vec { - self.inner.lock().unwrap().service_request + self.inner + .lock() + .unwrap() + .service_request .as_ref() .map(|request| request.services.clone()) .unwrap_or(vec![]) @@ -70,13 +62,23 @@ impl FakeSledClient { #[async_trait] impl SledClientInterface for FakeSledClient { - async fn services_put(&self, body: &SledAgentTypes::ServiceEnsureBody) -> Result<(), Error> { - let old = self.inner.lock().unwrap().service_request.replace(body.clone()); - assert!(old.is_none(), "Should only set services once (was {old:?}, inserted {body:?})"); + async fn services_put( + &self, + body: &SledAgentTypes::ServiceEnsureBody, + ) -> Result<(), Error> { + let old = + self.inner.lock().unwrap().service_request.replace(body.clone()); + assert!( + old.is_none(), + "Should only set services once (was {old:?}, inserted {body:?})" + ); Ok(()) } - async fn filesystem_put(&self, body: &SledAgentTypes::DatasetEnsureBody) -> Result<(), Error> { + async fn filesystem_put( + &self, + body: &SledAgentTypes::DatasetEnsureBody, + ) -> Result<(), Error> { self.inner.lock().unwrap().dataset_requests.push(body.clone()); Ok(()) } @@ -94,7 +96,10 @@ pub struct FakeNexus { } impl FakeNexus { - pub fn new(datastore: Arc, rack_subnet: Ipv6Subnet) -> Arc { + pub fn new( + datastore: Arc, + rack_subnet: Ipv6Subnet, + ) -> Arc { Arc::new(Self { datastore, rack_id: Uuid::new_v4(), @@ -118,8 +123,12 @@ impl NexusInterface for FakeNexus { &self.datastore } - async fn sled_client(&self, id: &Uuid) -> Result, Error> { - let sled = self.sleds + async fn sled_client( + &self, + id: &Uuid, + ) -> Result, Error> { + let sled = self + .sleds .lock() .unwrap() .entry(*id) @@ -140,9 +149,7 @@ pub struct FakeDnsUpdater { impl FakeDnsUpdater { pub fn new() -> Self { - Self { - records: Arc::new(Mutex::new(HashMap::new())), - } + Self { records: Arc::new(Mutex::new(HashMap::new())) } } // Get a copy of all records. @@ -153,11 +160,17 @@ impl FakeDnsUpdater { #[async_trait] impl DnsUpdaterInterface for FakeDnsUpdater { - async fn insert_dns_records(&self, records: &HashMap>) -> Result<(), DnsError> { + async fn insert_dns_records( + &self, + records: &HashMap>, + ) -> Result<(), DnsError> { let mut our_records = self.records.lock().unwrap(); for (k, v) in records { let old = our_records.insert(k.clone(), v.clone()); - assert!(old.is_none(), "Inserted key {k}, but found old value: {old:?}"); + assert!( + old.is_none(), + "Inserted key {k}, but found old value: {old:?}" + ); } Ok(()) } diff --git a/nexus/src/app/background/interfaces.rs b/nexus/src/app/background/interfaces.rs index 43549a52415..daf0e849e44 100644 --- a/nexus/src/app/background/interfaces.rs +++ b/nexus/src/app/background/interfaces.rs @@ -4,20 +4,14 @@ //! Interfaces which can be faked out for testing. -use async_trait::async_trait; use crate::db::datastore::DataStore; use crate::Nexus; +use async_trait::async_trait; use internal_dns_client::{ - multiclient::{ - AAAARecord, - DnsError, - Updater as DnsUpdater - }, + multiclient::{AAAARecord, DnsError, Updater as DnsUpdater}, names::SRV, }; -use omicron_common::address::{ - RACK_PREFIX, Ipv6Subnet, -}; +use omicron_common::address::{Ipv6Subnet, RACK_PREFIX}; use omicron_common::api::external::Error; use sled_agent_client::types as SledAgentTypes; use std::collections::HashMap; @@ -29,18 +23,30 @@ use uuid::Uuid; // The non-test implementation should be as simple as possible. #[async_trait] pub trait SledClientInterface { - async fn services_put(&self, body: &SledAgentTypes::ServiceEnsureBody) -> Result<(), Error>; - async fn filesystem_put(&self, body: &SledAgentTypes::DatasetEnsureBody) -> Result<(), Error>; + async fn services_put( + &self, + body: &SledAgentTypes::ServiceEnsureBody, + ) -> Result<(), Error>; + async fn filesystem_put( + &self, + body: &SledAgentTypes::DatasetEnsureBody, + ) -> Result<(), Error>; } #[async_trait] impl SledClientInterface for sled_agent_client::Client { - async fn services_put(&self, body: &SledAgentTypes::ServiceEnsureBody) -> Result<(), Error> { + async fn services_put( + &self, + body: &SledAgentTypes::ServiceEnsureBody, + ) -> Result<(), Error> { self.services_put(body).await?; Ok(()) } - async fn filesystem_put(&self, body: &SledAgentTypes::DatasetEnsureBody) -> Result<(), Error> { + async fn filesystem_put( + &self, + body: &SledAgentTypes::DatasetEnsureBody, + ) -> Result<(), Error> { self.filesystem_put(body).await?; Ok(()) } @@ -71,7 +77,10 @@ impl NexusInterface for Nexus { self.datastore() } - async fn sled_client(&self, id: &Uuid) -> Result, Error> { + async fn sled_client( + &self, + id: &Uuid, + ) -> Result, Error> { self.sled_client(id).await } } @@ -81,12 +90,18 @@ impl NexusInterface for Nexus { // The non-test implementation should be as simple as possible. #[async_trait] pub trait DnsUpdaterInterface { - async fn insert_dns_records(&self, records: &HashMap>) -> Result<(), DnsError>; + async fn insert_dns_records( + &self, + records: &HashMap>, + ) -> Result<(), DnsError>; } #[async_trait] impl DnsUpdaterInterface for DnsUpdater { - async fn insert_dns_records(&self, records: &HashMap>) -> Result<(), DnsError> { + async fn insert_dns_records( + &self, + records: &HashMap>, + ) -> Result<(), DnsError> { self.insert_dns_records(records).await } } diff --git a/nexus/src/app/background/mod.rs b/nexus/src/app/background/mod.rs index 684ba5f9cfd..d7604f849d7 100644 --- a/nexus/src/app/background/mod.rs +++ b/nexus/src/app/background/mod.rs @@ -29,8 +29,11 @@ impl TaskRunner { &nexus.az_subnet(), log.new(o!("component" => "DNS Updater")), ); - let service_balancer = - services::ServiceBalancer::new(log.clone(), nexus.clone(), dns_updater); + let service_balancer = services::ServiceBalancer::new( + log.clone(), + nexus.clone(), + dns_updater, + ); loop { // TODO: We may want triggers to exist here, to invoke this task diff --git a/nexus/src/app/background/services.rs b/nexus/src/app/background/services.rs index 3023d9b8354..28cb618e083 100644 --- a/nexus/src/app/background/services.rs +++ b/nexus/src/app/background/services.rs @@ -4,6 +4,9 @@ //! Task which ensures that expected Nexus services exist. +use super::interfaces::{ + DnsUpdaterInterface, NexusInterface, SledClientInterface, +}; use crate::context::OpContext; use crate::db::datastore::DatasetRedundancy; use crate::db::identity::Asset; @@ -25,7 +28,6 @@ use std::collections::{HashMap, HashSet}; use std::marker::PhantomData; use std::net::{Ipv6Addr, SocketAddrV6}; use std::sync::Arc; -use super::interfaces::{DnsUpdaterInterface, NexusInterface, SledClientInterface}; // Policy for the number of services to be provisioned. #[derive(Debug)] @@ -250,7 +252,12 @@ where ) -> Result, Error> { self.nexus .datastore() - .ensure_rack_service(opctx, self.nexus.rack_id(), kind, desired_count) + .ensure_rack_service( + opctx, + self.nexus.rack_id(), + kind, + desired_count, + ) .await } @@ -392,7 +399,7 @@ where async fn ensure_datasets_provisioned( &self, opctx: &OpContext, - expected_datasets: &[ExpectedDataset] + expected_datasets: &[ExpectedDataset], ) -> Result<(), Error> { // Provision all dataset types concurrently. stream::iter(expected_datasets) @@ -438,10 +445,10 @@ mod test { use super::*; use crate::app::background::fakes::{FakeDnsUpdater, FakeNexus}; - use crate::{authn, authz}; use crate::db::datastore::DataStore; + use crate::{authn, authz}; use dropshot::test_util::LogContext; - use internal_dns_client::names::{AAAA, BackendName, SRV}; + use internal_dns_client::names::{BackendName, AAAA, SRV}; use nexus_test_utils::db::test_setup_database; use omicron_common::address::Ipv6Subnet; use omicron_common::api::external::ByteCount; @@ -488,12 +495,7 @@ mod test { authn::Context::internal_service_balancer(), datastore.clone(), ); - Self { - logctx, - opctx, - db, - datastore, - } + Self { logctx, opctx, db, datastore } } async fn cleanup(mut self) { @@ -530,7 +532,8 @@ mod test { #[tokio::test] async fn test_provision_dataset_on_all_no_zpools() { - let test = ProvisionTest::new("test_provision_dataset_on_all_no_zpools").await; + let test = + ProvisionTest::new("test_provision_dataset_on_all_no_zpools").await; let rack_subnet = Ipv6Subnet::new(Ipv6Addr::LOCALHOST); let nexus = FakeNexus::new(test.datastore.clone(), rack_subnet); @@ -545,16 +548,16 @@ mod test { let sled_id = create_test_sled(nexus.rack_id(), &test.datastore).await; // Make the request to the service balancer for Crucibles on all Zpools. - let expected_datasets = [ - ExpectedDataset { - kind: DatasetKind::Crucible, - redundancy: DatasetRedundancy::OnAll, - } - ]; - service_balancer.ensure_datasets_provisioned( - &test.opctx, - &expected_datasets, - ).await.unwrap(); + // + // However, with no zpools, this is a no-op. + let expected_datasets = [ExpectedDataset { + kind: DatasetKind::Crucible, + redundancy: DatasetRedundancy::OnAll, + }]; + service_balancer + .ensure_datasets_provisioned(&test.opctx, &expected_datasets) + .await + .unwrap(); // Observe that nothing was requested at the sled. let sled = nexus.sled_client(&sled_id).await.unwrap(); @@ -569,8 +572,9 @@ mod test { } #[tokio::test] - async fn test_provision_dataset_on_all() { - let test = ProvisionTest::new("test_provision_dataset_on_all").await; + async fn test_provision_dataset_on_all_zpools() { + let test = + ProvisionTest::new("test_provision_dataset_on_all_zpools").await; let rack_subnet = Ipv6Subnet::new(Ipv6Addr::LOCALHOST); let nexus = FakeNexus::new(test.datastore.clone(), rack_subnet); @@ -590,16 +594,14 @@ mod test { } // Make the request to the service balancer for Crucibles on all Zpools. - let expected_datasets = [ - ExpectedDataset { - kind: DatasetKind::Crucible, - redundancy: DatasetRedundancy::OnAll, - } - ]; - service_balancer.ensure_datasets_provisioned( - &test.opctx, - &expected_datasets, - ).await.unwrap(); + let expected_datasets = [ExpectedDataset { + kind: DatasetKind::Crucible, + redundancy: DatasetRedundancy::OnAll, + }]; + service_balancer + .ensure_datasets_provisioned(&test.opctx, &expected_datasets) + .await + .unwrap(); // Observe that datasets were requested on each zpool. let sled = nexus.sled_client(&sled_id).await.unwrap(); @@ -607,29 +609,47 @@ mod test { let dataset_requests = sled.dataset_requests(); assert_eq!(ZPOOL_COUNT, dataset_requests.len()); for request in &dataset_requests { - assert!(zpools.contains(&request.zpool_id), "Dataset request for unexpected zpool"); - assert!(matches!(request.dataset_kind, SledAgentTypes::DatasetKind::Crucible)); + assert!( + zpools.contains(&request.zpool_id), + "Dataset request for unexpected zpool" + ); + assert!(matches!( + request.dataset_kind, + SledAgentTypes::DatasetKind::Crucible + )); } // Observe that DNS records for each Crucible exist. let records = dns_updater.records(); assert_eq!(ZPOOL_COUNT, records.len()); - for (srv, aaaas) in &records { - assert_eq!(1, aaaas.len()); + for (srv, aaaas) in records { match srv { SRV::Backend(BackendName::Crucible, dataset_id) => { - let expected_address = dataset_requests.iter().find_map(|request| { - if request.id == *dataset_id { - Some(request.address) + let expected_address = dataset_requests + .iter() + .find_map(|request| { + if request.id == dataset_id { + Some(request.address.clone()) } else { None } - }).unwrap(); - - let (aaaa_name, dns_addr) = aaaas[0]; - assert_eq!(dns_addr.to_string(), expected_address); - assert!(matches!(aaaa_name, AAAA::Zone(dataset_id))); - }, + }) + .unwrap(); + + assert_eq!(1, aaaas.len()); + let (aaaa_name, dns_addr) = &aaaas[0]; + assert_eq!(dns_addr.to_string(), expected_address); + if let AAAA::Zone(zone_id) = aaaa_name { + assert_eq!( + *zone_id, dataset_id, + "Expected AAAA UUID to match SRV record", + ); + } else { + panic!( + "Expected AAAA record for Zone from {aaaa_name}" + ); + } + } _ => panic!("Unexpected SRV record"), } } @@ -637,20 +657,72 @@ mod test { test.cleanup().await; } - // TODO: test provision outside rack - - /* #[tokio::test] async fn test_provision_dataset_per_rack() { - let expected_datasets = [ - ExpectedDataset { - kind: DatasetKind::Crucible, - redundancy: DatasetRedundancy::PerRack(2), - } - ]; - todo!(); + let test = ProvisionTest::new("test_provision_dataset_per_rack").await; + + let rack_subnet = Ipv6Subnet::new(Ipv6Addr::LOCALHOST); + let nexus = FakeNexus::new(test.datastore.clone(), rack_subnet); + let dns_updater = FakeDnsUpdater::new(); + let service_balancer = ServiceBalancer::new( + test.logctx.log.clone(), + nexus.clone(), + dns_updater.clone(), + ); + + // Setup: Create a couple sleds on the first rack, and create a third + // sled on a "different rack". + // + // Each sled gets a single zpool. + let mut zpools = vec![]; + + let sled1_id = create_test_sled(nexus.rack_id(), &test.datastore).await; + zpools.push(create_test_zpool(&test.datastore, sled1_id).await); + + let sled2_id = create_test_sled(nexus.rack_id(), &test.datastore).await; + zpools.push(create_test_zpool(&test.datastore, sled2_id).await); + + let other_rack_id = Uuid::new_v4(); + let other_rack_sled_id = + create_test_sled(other_rack_id, &test.datastore).await; + zpools + .push(create_test_zpool(&test.datastore, other_rack_sled_id).await); + + // Ask for one dataset per rack. + let expected_datasets = [ExpectedDataset { + kind: DatasetKind::Cockroach, + redundancy: DatasetRedundancy::PerRack(1), + }]; + service_balancer + .ensure_datasets_provisioned(&test.opctx, &expected_datasets) + .await + .unwrap(); + + // Observe that the datasets were requested on each rack. + let sled = nexus.sled_client(&sled1_id).await.unwrap(); + let requests = sled.dataset_requests(); + assert_eq!(1, requests.len()); + assert_eq!(zpools[0], requests[0].zpool_id); + let sled = nexus.sled_client(&sled2_id).await.unwrap(); + let requests = sled.dataset_requests(); + assert_eq!(0, requests.len()); + + // TODO: This is currently failing, because the API to + // "ensure_rack_dataset" takes a single rack ID. + // + // I think "ensure_rack_service" would likely suffer from a similar + // issue; namely, that the requests will be scoped to a single rack. + // + // TODO: We could iterate over racks IDs? Would that be so awful? + let sled = nexus.sled_client(&other_rack_sled_id).await.unwrap(); + let requests = sled.dataset_requests(); + assert_eq!(1, requests.len()); + assert_eq!(zpools[2], requests[0].zpool_id); + + test.cleanup().await; } + /* #[tokio::test] async fn test_provision_service_per_rack() { todo!(); diff --git a/nexus/src/db/datastore.rs b/nexus/src/db/datastore.rs index f19f69058d7..f08bc92f01d 100644 --- a/nexus/src/db/datastore.rs +++ b/nexus/src/db/datastore.rs @@ -5046,7 +5046,7 @@ mod test { }; use omicron_test_utils::dev; use std::collections::{HashMap, HashSet}; - use std::net::{Ipv6Addr, SocketAddrV6}; + use std::net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddrV6}; use std::sync::Arc; use uuid::Uuid; From 7e4a1edd6bc99479a5209719bfcedccc5328d4fc Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 11 Jul 2022 09:04:03 -0400 Subject: [PATCH 69/88] Merge with dendrite --- nexus/src/app/background/services.rs | 6 ++++++ nexus/src/db/model/service.rs | 4 +++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/nexus/src/app/background/services.rs b/nexus/src/app/background/services.rs index 28cb618e083..f74aed63a17 100644 --- a/nexus/src/app/background/services.rs +++ b/nexus/src/app/background/services.rs @@ -240,6 +240,12 @@ where ServiceKind::Oximeter => { ("oximeter".to_string(), SledAgentTypes::ServiceType::Oximeter) } + ServiceKind::Dendrite => ( + "dendrite".to_string(), + SledAgentTypes::ServiceType::Dendrite { + asic: SledAgentTypes::DendriteAsic::TofinoStub, + }, + ), } } diff --git a/nexus/src/db/model/service.rs b/nexus/src/db/model/service.rs index 9aeb3d0e873..1e41fcf2e98 100644 --- a/nexus/src/db/model/service.rs +++ b/nexus/src/db/model/service.rs @@ -9,7 +9,7 @@ use crate::db::schema::service; use db_macros::Asset; use internal_dns_client::names::{ServiceName, AAAA, SRV}; use omicron_common::address::{ - DNS_SERVER_PORT, NEXUS_INTERNAL_PORT, OXIMETER_PORT, + DENDRITE_PORT, DNS_SERVER_PORT, NEXUS_INTERNAL_PORT, OXIMETER_PORT, }; use std::net::{Ipv6Addr, SocketAddrV6}; use uuid::Uuid; @@ -50,6 +50,7 @@ impl Service { ServiceKind::InternalDNS => SRV::Service(ServiceName::InternalDNS), ServiceKind::Nexus => SRV::Service(ServiceName::Nexus), ServiceKind::Oximeter => SRV::Service(ServiceName::Oximeter), + ServiceKind::Dendrite => SRV::Service(ServiceName::Dendrite), } } @@ -58,6 +59,7 @@ impl Service { ServiceKind::InternalDNS => DNS_SERVER_PORT, ServiceKind::Nexus => NEXUS_INTERNAL_PORT, ServiceKind::Oximeter => OXIMETER_PORT, + ServiceKind::Dendrite => DENDRITE_PORT, }; SocketAddrV6::new(Ipv6Addr::from(self.ip), port, 0, 0) } From 6047b93b4ed98950653c819576cce63881838689 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 11 Jul 2022 09:08:38 -0400 Subject: [PATCH 70/88] remove unused --- sled-agent/src/config.rs | 2 +- sled-agent/src/sled_agent.rs | 1 - sled-agent/src/sp/simulated.rs | 1 - 3 files changed, 1 insertion(+), 3 deletions(-) diff --git a/sled-agent/src/config.rs b/sled-agent/src/config.rs index c3bf3e26a52..3c454fdb637 100644 --- a/sled-agent/src/config.rs +++ b/sled-agent/src/config.rs @@ -13,7 +13,7 @@ use serde::Deserialize; use serde_with::serde_as; use serde_with::DisplayFromStr; use serde_with::PickFirst; -use std::net::{Ipv4Addr, SocketAddr}; +use std::net::Ipv4Addr; use std::path::{Path, PathBuf}; /// Configuration for a sled agent diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 8d0af9aed73..2d1662b6ed1 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -28,7 +28,6 @@ use omicron_common::api::{ use slog::Logger; use std::net::SocketAddrV6; use std::process::Command; -use std::sync::Arc; use uuid::Uuid; #[cfg(not(test))] diff --git a/sled-agent/src/sp/simulated.rs b/sled-agent/src/sp/simulated.rs index f642b6189df..fe3b3cbac2a 100644 --- a/sled-agent/src/sp/simulated.rs +++ b/sled-agent/src/sp/simulated.rs @@ -5,7 +5,6 @@ //! Implementation of a simulated SP / RoT. use super::SpError; -use crate::config::Config as SledConfig; use crate::illumos::dladm::Dladm; use crate::zone::Zones; use slog::Logger; From eba4486a72cbc87896e48b90aa0e8145aa9fb4d2 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 11 Jul 2022 09:46:20 -0400 Subject: [PATCH 71/88] Finish merge --- sled-agent/src/rack_setup/config.rs | 7 +- sled-agent/src/rack_setup/plan/service.rs | 8 +- sled-agent/src/rack_setup/service.rs | 3 + smf/sled-agent/config-rss.toml | 127 +--------------------- 4 files changed, 13 insertions(+), 132 deletions(-) diff --git a/sled-agent/src/rack_setup/config.rs b/sled-agent/src/rack_setup/config.rs index 3d389ed7dd1..60687a16764 100644 --- a/sled-agent/src/rack_setup/config.rs +++ b/sled-agent/src/rack_setup/config.rs @@ -10,7 +10,7 @@ use omicron_common::address::{ }; use serde::Deserialize; use serde::Serialize; -use std::net::Ipv6Addr; +use std::net::{IpAddr, Ipv6Addr}; use std::path::Path; /// Configuration for the "rack setup service", which is controlled during @@ -32,6 +32,10 @@ pub struct SetupServiceConfig { /// If this value is less than 2, no rack secret will be created on startup; /// this is the typical case for single-server test/development. pub rack_secret_threshold: usize, + + /// The address on which Nexus should serve an external interface. + // TODO: Eventually, this should be pulled from a pool of addresses. + pub nexus_external_address: IpAddr, } impl SetupServiceConfig { @@ -68,6 +72,7 @@ mod test { let cfg = SetupServiceConfig { rack_subnet: "fd00:1122:3344:0100::".parse().unwrap(), rack_secret_threshold: 0, + nexus_external_address: "192.168.1.20".parse().unwrap(), }; assert_eq!( diff --git a/sled-agent/src/rack_setup/plan/service.rs b/sled-agent/src/rack_setup/plan/service.rs index 0bdf332d748..429d612d62e 100644 --- a/sled-agent/src/rack_setup/plan/service.rs +++ b/sled-agent/src/rack_setup/plan/service.rs @@ -19,7 +19,7 @@ use sled_agent_client::{ }; use slog::Logger; use std::collections::HashMap; -use std::net::{Ipv6Addr, SocketAddrV6}; +use std::net::{Ipv6Addr, SocketAddr, SocketAddrV6}; use std::path::{Path, PathBuf}; use thiserror::Error; use uuid::Uuid; @@ -193,11 +193,9 @@ impl Plan { 0, 0, ), - external_address: SocketAddrV6::new( - address, + external_address: SocketAddr::new( + config.nexus_external_address, NEXUS_EXTERNAL_PORT, - 0, - 0, ), }, }) diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index cbf424b14af..4bc314c33e4 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -416,6 +416,9 @@ impl ServiceInner { NexusTypes::ServiceKind::InternalDNS } ServiceType::Oximeter => NexusTypes::ServiceKind::Oximeter, + ServiceType::Dendrite { .. } => { + NexusTypes::ServiceKind::Dendrite + } }; services.push(NexusTypes::ServicePutRequest { diff --git a/smf/sled-agent/config-rss.toml b/smf/sled-agent/config-rss.toml index 33aa0dc7f6b..0adcc276f67 100644 --- a/smf/sled-agent/config-rss.toml +++ b/smf/sled-agent/config-rss.toml @@ -10,131 +10,6 @@ rack_subnet = "fd00:1122:3344:0100::" # # For values less than 2, no rack secret will be generated. rack_secret_threshold = 1 -<<<<<<< HEAD -||||||| c56a6fd0 -[[request]] - -# TODO(https://github.com/oxidecomputer/omicron/issues/732): Nexus -# should allocate crucible datasets. -[[request.dataset]] -id = "09a9a25f-2602-4e2f-9630-31af9c492c3e" -zpool_id = "d462a7f7-b628-40fe-80ff-4e4189e2d62b" -address = "[fd00:1122:3344:0101::6]:32345" -dataset_kind.type = "crucible" - -[[request.dataset]] -id = "2713b37a-3043-4ed5-aaff-f38200e45cfb" -zpool_id = "e4b4dc87-ab46-49fb-a4b4-d361ae214c03" -address = "[fd00:1122:3344:0101::7]:32345" -dataset_kind.type = "crucible" - -[[request.dataset]] -id = "ffd16cad-e5d5-495e-9c59-4312a3857d91" -zpool_id = "f4b4dc87-ab46-49fb-a4b4-d361ae214c03" -address = "[fd00:1122:3344:0101::8]:32345" -dataset_kind.type = "crucible" - -[[request.dataset]] -id = "4d08fc19-3d5f-4f6b-9c48-925f8eac7255" -zpool_id = "d462a7f7-b628-40fe-80ff-4e4189e2d62b" -address = "[fd00:1122:3344:0101::2]:32221" -dataset_kind.type = "cockroach_db" -dataset_kind.all_addresses = [ "[fd00:1122:3344:0101::2]:32221" ] - -# TODO(https://github.com/oxidecomputer/omicron/issues/732): Nexus -# should allocate clickhouse datasets. -[[request.dataset]] -id = "a3505b41-a592-420b-84f2-3d76bf0e0a81" -zpool_id = "d462a7f7-b628-40fe-80ff-4e4189e2d62b" -address = "[fd00:1122:3344:0101::5]:8123" -dataset_kind.type = "clickhouse" - -[[request.service]] -id = "e6bff1ff-24fb-49dc-a54e-c6a350cd4d6c" -name = "nexus" -addresses = [ "fd00:1122:3344:0101::3" ] -gz_addresses = [] -[request.service.service_type] -type = "nexus" -internal_address = "[fd00:1122:3344:0101::3]:12221" -external_address = "[fd00:1122:3344:0101::3]:12220" - -# TODO(https://github.com/oxidecomputer/omicron/issues/732): Nexus -# should allocate Oximeter services. -[[request.service]] -id = "1da65e5b-210c-4859-a7d7-200c1e659972" -name = "oximeter" -addresses = [ "fd00:1122:3344:0101::4" ] -gz_addresses = [] -[request.service.service_type] -type = "oximeter" -======= - -[[request]] - -# TODO(https://github.com/oxidecomputer/omicron/issues/732): Nexus -# should allocate crucible datasets. -[[request.dataset]] -id = "09a9a25f-2602-4e2f-9630-31af9c492c3e" -zpool_id = "d462a7f7-b628-40fe-80ff-4e4189e2d62b" -address = "[fd00:1122:3344:0101::6]:32345" -dataset_kind.type = "crucible" - -[[request.dataset]] -id = "2713b37a-3043-4ed5-aaff-f38200e45cfb" -zpool_id = "e4b4dc87-ab46-49fb-a4b4-d361ae214c03" -address = "[fd00:1122:3344:0101::7]:32345" -dataset_kind.type = "crucible" - -[[request.dataset]] -id = "ffd16cad-e5d5-495e-9c59-4312a3857d91" -zpool_id = "f4b4dc87-ab46-49fb-a4b4-d361ae214c03" -address = "[fd00:1122:3344:0101::8]:32345" -dataset_kind.type = "crucible" - -[[request.dataset]] -id = "4d08fc19-3d5f-4f6b-9c48-925f8eac7255" -zpool_id = "d462a7f7-b628-40fe-80ff-4e4189e2d62b" -address = "[fd00:1122:3344:0101::2]:32221" -dataset_kind.type = "cockroach_db" -dataset_kind.all_addresses = [ "[fd00:1122:3344:0101::2]:32221" ] - -# TODO(https://github.com/oxidecomputer/omicron/issues/732): Nexus -# should allocate clickhouse datasets. -[[request.dataset]] -id = "a3505b41-a592-420b-84f2-3d76bf0e0a81" -zpool_id = "d462a7f7-b628-40fe-80ff-4e4189e2d62b" -address = "[fd00:1122:3344:0101::5]:8123" -dataset_kind.type = "clickhouse" - -[[request.service]] -id = "e6bff1ff-24fb-49dc-a54e-c6a350cd4d6c" -name = "nexus" -addresses = [ "fd00:1122:3344:0101::3" ] -gz_addresses = [] -[request.service.service_type] -type = "nexus" -internal_address = "[fd00:1122:3344:0101::3]:12221" # NOTE: In the lab, use "172.20.15.226" -external_address = "192.168.1.20:80" - -# TODO(https://github.com/oxidecomputer/omicron/issues/732): Nexus -# should allocate Oximeter services. -[[request.service]] -id = "1da65e5b-210c-4859-a7d7-200c1e659972" -name = "oximeter" -addresses = [ "fd00:1122:3344:0101::4" ] -gz_addresses = [] -[request.service.service_type] -type = "oximeter" - -[[request.service]] -id = "a0fe5ebc-9261-6f77-acc1-972481755789" -name = "dendrite" -addresses = [ "fd00:1122:3344:0101::9" ] -gz_addresses = [] -[request.service.service_type] -type = "dendrite" -asic = "tofino_stub" ->>>>>>> f11200e2534cc252c0ab8db5ab85112ae6d79e3c +nexus_external_address = "192.168.1.20" From 65ac8e557852625daa32935b34699225a35ed086 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 27 Jul 2022 20:37:36 -0400 Subject: [PATCH 72/88] Fix multi-rack test --- nexus/src/app/background/fakes.rs | 2 ++ nexus/src/app/background/services.rs | 45 +++++++++++++++++++--------- 2 files changed, 33 insertions(+), 14 deletions(-) diff --git a/nexus/src/app/background/fakes.rs b/nexus/src/app/background/fakes.rs index 375dd27b488..2096c9235ac 100644 --- a/nexus/src/app/background/fakes.rs +++ b/nexus/src/app/background/fakes.rs @@ -45,6 +45,7 @@ impl FakeSledClient { }) } + /// Returns the requests to create services on the sled. pub fn service_requests(&self) -> Vec { self.inner .lock() @@ -55,6 +56,7 @@ impl FakeSledClient { .unwrap_or(vec![]) } + /// Returns the requests to create datasets on the sled. pub fn dataset_requests(&self) -> Vec { self.inner.lock().unwrap().dataset_requests.clone() } diff --git a/nexus/src/app/background/services.rs b/nexus/src/app/background/services.rs index f74aed63a17..a1d84f59320 100644 --- a/nexus/src/app/background/services.rs +++ b/nexus/src/app/background/services.rs @@ -663,6 +663,10 @@ mod test { test.cleanup().await; } + // Observe that "per-rack" dataset provisions can be completed. + // + // This test uses multiple racks, and verifies that a provision occurs + // on each one. #[tokio::test] async fn test_provision_dataset_per_rack() { let test = ProvisionTest::new("test_provision_dataset_per_rack").await; @@ -705,25 +709,38 @@ mod test { .unwrap(); // Observe that the datasets were requested on each rack. + + // Rack 1: One of the two sleds should have a dataset. let sled = nexus.sled_client(&sled1_id).await.unwrap(); - let requests = sled.dataset_requests(); - assert_eq!(1, requests.len()); - assert_eq!(zpools[0], requests[0].zpool_id); + let requests1 = sled.dataset_requests(); + if !requests1.is_empty() { + assert_eq!(1, requests1.len()); + assert_eq!(zpools[0], requests1[0].zpool_id); + } let sled = nexus.sled_client(&sled2_id).await.unwrap(); - let requests = sled.dataset_requests(); - assert_eq!(0, requests.len()); + let requests2 = sled.dataset_requests(); + if !requests2.is_empty() { + assert_eq!(1, requests2.len()); + assert_eq!(zpools[1], requests2[0].zpool_id); + } + assert!( + requests1.is_empty() ^ requests2.is_empty(), + "One of the sleds should have a dataset, the other should not" + ); - // TODO: This is currently failing, because the API to - // "ensure_rack_dataset" takes a single rack ID. - // - // I think "ensure_rack_service" would likely suffer from a similar - // issue; namely, that the requests will be scoped to a single rack. - // - // TODO: We could iterate over racks IDs? Would that be so awful? + // Rack 2: The sled should have a dataset. let sled = nexus.sled_client(&other_rack_sled_id).await.unwrap(); let requests = sled.dataset_requests(); - assert_eq!(1, requests.len()); - assert_eq!(zpools[2], requests[0].zpool_id); + // TODO(https://github.com/oxidecomputer/omicron/issues/1276): + // We should see a request to the "other rack" when multi-rack + // is supported. + // + // At the moment, however, all requests for service-balancing are + // "rack-local". + assert_eq!(0, requests.len()); + + // We should be able to assert this when multi-rack is supported. + // assert_eq!(zpools[2], requests[0].zpool_id); test.cleanup().await; } From c33119b0f52f5428ab1a89cfe6d0a7a54b385751 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 27 Jul 2022 21:47:49 -0400 Subject: [PATCH 73/88] Reduce hacks, docs cleanup --- common/src/address.rs | 5 +++ nexus/src/app/background/fakes.rs | 7 +++- nexus/src/app/background/services.rs | 55 +++++++++++----------------- nexus/src/db/datastore/service.rs | 5 --- 4 files changed, 32 insertions(+), 40 deletions(-) diff --git a/common/src/address.rs b/common/src/address.rs index 5f9fcd0af01..6fc14964686 100644 --- a/common/src/address.rs +++ b/common/src/address.rs @@ -87,6 +87,11 @@ pub struct DnsSubnet { } impl DnsSubnet { + /// Creates a subnet, given an address for the DNS server itself. + pub fn from_dns_address(address: Ipv6Addr) -> Self { + Self { subnet: Ipv6Subnet::new(address) } + } + /// Returns the DNS server address within the subnet. /// /// This is the first address within the subnet. diff --git a/nexus/src/app/background/fakes.rs b/nexus/src/app/background/fakes.rs index 2096c9235ac..03f13e04fd3 100644 --- a/nexus/src/app/background/fakes.rs +++ b/nexus/src/app/background/fakes.rs @@ -23,7 +23,7 @@ use uuid::Uuid; /// A fake implementation of a Sled Agent client. /// -/// In lieu of any networked requests, stores onto the requested services and +/// In lieu of any networked requests, stores the requested services and /// datasets for later inspection. pub struct FakeSledClientInner { service_request: Option, @@ -70,6 +70,11 @@ impl SledClientInterface for FakeSledClient { ) -> Result<(), Error> { let old = self.inner.lock().unwrap().service_request.replace(body.clone()); + + // NOTE: This is technically a limitation of the fake. + // + // We can relax this constraint if it's useful, but we should + // deal with conflicts of prior invocations. assert!( old.is_none(), "Should only set services once (was {old:?}, inserted {body:?})" diff --git a/nexus/src/app/background/services.rs b/nexus/src/app/background/services.rs index a1d84f59320..a0cfd49c20d 100644 --- a/nexus/src/app/background/services.rs +++ b/nexus/src/app/background/services.rs @@ -18,7 +18,7 @@ use crate::db::model::Sled; use crate::db::model::Zpool; use futures::stream::{self, StreamExt, TryStreamExt}; use omicron_common::address::{ - DNS_PORT, DNS_REDUNDANCY, DNS_SERVER_PORT, NEXUS_EXTERNAL_PORT, + DnsSubnet, DNS_PORT, DNS_REDUNDANCY, DNS_SERVER_PORT, NEXUS_EXTERNAL_PORT, NEXUS_INTERNAL_PORT, }; use omicron_common::api::external::Error; @@ -37,7 +37,9 @@ enum ServiceRedundancy { PerRack(u32), // This service must exist on at least this many sleds - // within the availability zone. + // within the availability zone. Note that this is specific + // for the DNS service, as some expectations surrounding + // addressing are specific to that service. DnsPerAz(u32), } @@ -47,7 +49,8 @@ struct ExpectedService { redundancy: ServiceRedundancy, } -// NOTE: longer-term, when we integrate multi-rack support, +// TODO(https://github.com/oxidecomputer/omicron/issues/1276): +// Longer-term, when we integrate multi-rack support, // it is expected that Nexus will manage multiple racks // within the fleet, rather than simply per-rack services. // @@ -81,6 +84,8 @@ const EXPECTED_DATASETS: [ExpectedDataset; 3] = [ }, ExpectedDataset { kind: DatasetKind::Cockroach, + // TODO(https://github.com/oxidecomputer/omicron/issues/727): + // Update this to more than one. redundancy: DatasetRedundancy::PerRack(1), }, ExpectedDataset { @@ -133,10 +138,12 @@ where stream::iter(&sled_ids) .map(Ok::<_, Error>) .try_for_each_concurrent(None, |sled_id| async { - // TODO: This interface kinda sucks; ideally we would - // only insert the *new* services. + // Query for all services that should be running on a Sled, + // and notify Sled Agent about all of them. // - // Inserting the old ones too is costing us an extra query. + // TODO: This interface could be better; ideally we would only + // insert the *new* services. Inserting the old ones too is + // costing us an extra query. let services = self .nexus .datastore() @@ -156,14 +163,13 @@ where Self::get_service_name_and_type( address, s.kind, ); - - // TODO: This is hacky, specifically to inject - // global zone addresses in the DNS service. let gz_addresses = match &s.kind { ServiceKind::InternalDNS => { - let mut octets = address.octets(); - octets[15] = octets[15] + 1; - vec![Ipv6Addr::from(octets)] + vec![DnsSubnet::from_dns_address( + address, + ) + .gz_address() + .ip()] } _ => vec![], }; @@ -359,9 +365,9 @@ where let sled_client = sled_clients.get(&sled.id()).unwrap(); let dataset_kind = match kind { - // TODO: This set of "all addresses" isn't right. - // TODO: ... should we even be using "all addresses" to contact CRDB? - // Can it just rely on DNS, somehow? + // TODO(https://github.com/oxidecomputer/omicron/issues/727): + // This set of "all addresses" isn't right. We'll need to + // deal with that before supporting multi-node CRDB. DatasetKind::Cockroach => { SledAgentTypes::DatasetKind::CockroachDb(vec![]) } @@ -462,25 +468,6 @@ mod test { use std::sync::Arc; use uuid::Uuid; - // TODO: maybe figure out what you *want* to test? - // I suspect we'll need to refactor this API for testability. - // - // - Dataset init: - // - Call to DB - // - For each new dataset... - // - Call to Sled (filesystem put) - // - Update DNS record - // - // - Service init: - // - Call to DB - // - For each sled... - // - List svcs - // - Put svcs - // - For each new service... - // - Update DNS record - // - // TODO: Also, idempotency check - struct ProvisionTest { logctx: LogContext, opctx: OpContext, diff --git a/nexus/src/db/datastore/service.rs b/nexus/src/db/datastore/service.rs index a9fd19137a8..358eda4f8c5 100644 --- a/nexus/src/db/datastore/service.rs +++ b/nexus/src/db/datastore/service.rs @@ -299,11 +299,6 @@ impl DataStore { TxnError::CustomError(ServiceError::NotEnoughIps) })?; let address = dns_subnet.dns_address().ip(); - - // TODO: How are we tracking the GZ address that must be - // allocated? They're tracked by the "DnsSubnet" object - // in address.rs, but I don't think they're getting - // propagated out of here. let service = db::model::Service::new( svc_id, sled.id(), From 0becaa541530f0a2491897a8c5424b1f5bfbea52 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 27 Jul 2022 22:01:56 -0400 Subject: [PATCH 74/88] Add expectation for Dendrite - will rely on isScrimletOrNot functionality --- nexus/src/app/background/services.rs | 9 ++++++- nexus/src/db/datastore/service.rs | 37 ++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 1 deletion(-) diff --git a/nexus/src/app/background/services.rs b/nexus/src/app/background/services.rs index a0cfd49c20d..d2160c9e8b0 100644 --- a/nexus/src/app/background/services.rs +++ b/nexus/src/app/background/services.rs @@ -36,6 +36,9 @@ enum ServiceRedundancy { // within the rack. PerRack(u32), + // This service must exist on all Scrimlets within the rack. + AllScrimlets, + // This service must exist on at least this many sleds // within the availability zone. Note that this is specific // for the DNS service, as some expectations surrounding @@ -56,7 +59,7 @@ struct ExpectedService { // // When that happens, it is likely that many of the "per-rack" // services will become "per-fleet", such as Nexus and CRDB. -const EXPECTED_SERVICES: [ExpectedService; 3] = [ +const EXPECTED_SERVICES: [ExpectedService; 4] = [ ExpectedService { kind: ServiceKind::InternalDNS, redundancy: ServiceRedundancy::DnsPerAz(DNS_REDUNDANCY), @@ -69,6 +72,10 @@ const EXPECTED_SERVICES: [ExpectedService; 3] = [ kind: ServiceKind::Oximeter, redundancy: ServiceRedundancy::PerRack(1), }, + ExpectedService { + kind: ServiceKind::Dendrite, + redundancy: ServiceRedundancy::AllScrimlets, + }, ]; #[derive(Debug)] diff --git a/nexus/src/db/datastore/service.rs b/nexus/src/db/datastore/service.rs index 358eda4f8c5..d30f039cac3 100644 --- a/nexus/src/db/datastore/service.rs +++ b/nexus/src/db/datastore/service.rs @@ -158,6 +158,43 @@ impl DataStore { .get_results(conn) } + /// Ensures that all Scrimlets in `rack_id` have the `kind` service + /// provisioned. + pub async fn ensure_scrimlet_service( + &self, + opctx: &OpContext, + rack_id: Uuid, + kind: ServiceKind, + ) -> Result, Error> { + opctx.authorize(authz::Action::Read, &authz::FLEET).await?; + + #[derive(Debug)] + enum ServiceError { + NotEnoughSleds, + Other(Error), + } + type TxnError = TransactionError; + + self.pool() + .transaction(move |conn| { + // TODO: We should implement this once we have a way of + // identifying sleds as scrimlets or not. + todo!() + }) + .await + .map_err(|e| match e { + TxnError::CustomError(ServiceError::NotEnoughSleds) => { + Error::unavail("Not enough sleds for service allocation") + } + TxnError::CustomError(ServiceError::Other(e)) => e, + TxnError::Pool(e) => { + public_error_from_diesel_pool(e, ErrorHandler::Server) + } + }) + } + + /// Ensures that `redundancy` sleds within `rack_id` have the `kind` service + /// provisioned. pub async fn ensure_rack_service( &self, opctx: &OpContext, From 029603dbcbef0f8b48da63bf0343aeb998121736 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 1 Aug 2022 12:44:54 -0400 Subject: [PATCH 75/88] Start plumbing auxiliary nexus info through RSS handoff --- nexus/db-model/src/service_kind.rs | 4 +- nexus/src/app/rack.rs | 2 + nexus/types/src/internal_api/params.rs | 37 ++++++------- openapi/nexus-internal.json | 76 ++++++++++++++++++++++++-- sled-agent/src/rack_setup/config.rs | 3 +- sled-agent/src/rack_setup/service.rs | 7 ++- 6 files changed, 100 insertions(+), 29 deletions(-) diff --git a/nexus/db-model/src/service_kind.rs b/nexus/db-model/src/service_kind.rs index 661bde449a6..4f6fada7fa9 100644 --- a/nexus/db-model/src/service_kind.rs +++ b/nexus/db-model/src/service_kind.rs @@ -28,7 +28,9 @@ impl From for ServiceKind { internal_api::params::ServiceKind::InternalDNS => { ServiceKind::InternalDNS } - internal_api::params::ServiceKind::Nexus => ServiceKind::Nexus, + internal_api::params::ServiceKind::Nexus { .. } => { + ServiceKind::Nexus + } internal_api::params::ServiceKind::Oximeter => { ServiceKind::Oximeter } diff --git a/nexus/src/app/rack.rs b/nexus/src/app/rack.rs index 5abf7b4ea99..aa4ebba7e4a 100644 --- a/nexus/src/app/rack.rs +++ b/nexus/src/app/rack.rs @@ -75,6 +75,8 @@ impl super::Nexus { }) .collect(); + // TODO: If nexus, add a pool? + let datasets: Vec<_> = request .datasets .into_iter() diff --git a/nexus/types/src/internal_api/params.rs b/nexus/types/src/internal_api/params.rs index 5e70d0fe8fa..5efcddffbc8 100644 --- a/nexus/types/src/internal_api/params.rs +++ b/nexus/types/src/internal_api/params.rs @@ -7,6 +7,7 @@ use omicron_common::api::external::ByteCount; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use std::fmt; +use std::net::IpAddr; use std::net::Ipv6Addr; use std::net::SocketAddr; use std::net::SocketAddrV6; @@ -118,10 +119,16 @@ pub struct DatasetPutResponse { /// Describes the purpose of the service. #[derive(Debug, Serialize, Deserialize, JsonSchema, Clone, Copy, PartialEq)] -#[serde(rename_all = "snake_case")] +#[serde(rename_all = "snake_case", tag = "type", content = "content")] pub enum ServiceKind { InternalDNS, - Nexus, + Nexus { + // TODO(https://github.com/oxidecomputer/omicron/issues/1530): + // While it's true that Nexus will only run with a single address, + // we want to convey information about the available pool of addresses + // when handing off from RSS -> Nexus. + external_address: IpAddr, + }, Oximeter, Dendrite, } @@ -131,7 +138,7 @@ impl fmt::Display for ServiceKind { use ServiceKind::*; let s = match self { InternalDNS => "internal_dns", - Nexus => "nexus", + Nexus { .. } => "nexus", Oximeter => "oximeter", Dendrite => "dendrite", }; @@ -139,23 +146,6 @@ impl fmt::Display for ServiceKind { } } -impl FromStr for ServiceKind { - type Err = omicron_common::api::external::Error; - - fn from_str(s: &str) -> Result { - use ServiceKind::*; - match s { - "nexus" => Ok(Nexus), - "oximeter" => Ok(Oximeter), - "internal_dns" => Ok(InternalDNS), - "dendrite" => Ok(Dendrite), - _ => Err(Self::Err::InternalError { - internal_message: format!("Unknown service kind: {}", s), - }), - } - } -} - /// Describes a service on a sled #[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] pub struct ServicePutRequest { @@ -180,6 +170,13 @@ pub struct DatasetCreateRequest { pub struct RackInitializationRequest { pub services: Vec, pub datasets: Vec, + // TODO(https://github.com/oxidecomputer/omicron/issues/1530): + // While it's true that Nexus will only run with a single address, + // we want to convey information about the available pool of addresses + // when handing off from RSS -> Nexus. + + // TODO(https://github.com/oxidecomputer/omicron/issues/1528): + // Support passing x509 cert info. } /// Message used to notify Nexus that this oximeter instance is up and running. diff --git a/openapi/nexus-internal.json b/openapi/nexus-internal.json index cde21d9e2f8..373108e8989 100644 --- a/openapi/nexus-internal.json +++ b/openapi/nexus-internal.json @@ -1781,12 +1781,76 @@ }, "ServiceKind": { "description": "Describes the purpose of the service.", - "type": "string", - "enum": [ - "internal_d_n_s", - "nexus", - "oximeter", - "dendrite" + "oneOf": [ + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "internal_d_n_s" + ] + } + }, + "required": [ + "type" + ] + }, + { + "type": "object", + "properties": { + "content": { + "type": "object", + "properties": { + "external_address": { + "type": "string", + "format": "ip" + } + }, + "required": [ + "external_address" + ] + }, + "type": { + "type": "string", + "enum": [ + "nexus" + ] + } + }, + "required": [ + "content", + "type" + ] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "oximeter" + ] + } + }, + "required": [ + "type" + ] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "dendrite" + ] + } + }, + "required": [ + "type" + ] + } ] }, "ServicePutRequest": { diff --git a/sled-agent/src/rack_setup/config.rs b/sled-agent/src/rack_setup/config.rs index 60687a16764..a576e7e52fe 100644 --- a/sled-agent/src/rack_setup/config.rs +++ b/sled-agent/src/rack_setup/config.rs @@ -34,7 +34,8 @@ pub struct SetupServiceConfig { pub rack_secret_threshold: usize, /// The address on which Nexus should serve an external interface. - // TODO: Eventually, this should be pulled from a pool of addresses. + // TODO(https://github.com/oxidecomputer/omicron/issues/1530): Eventually, + // this should be pulled from a pool of addresses. pub nexus_external_address: IpAddr, } diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 1cb245a6314..552bfa76fd0 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -411,7 +411,12 @@ impl ServiceInner { .chain(service_request.dns_services.iter()) { let kind = match svc.service_type { - ServiceType::Nexus { .. } => NexusTypes::ServiceKind::Nexus, + ServiceType::Nexus { + external_address, + internal_address: _, + } => NexusTypes::ServiceKind::Nexus { + external_address: external_address.ip(), + }, ServiceType::InternalDns { .. } => { NexusTypes::ServiceKind::InternalDNS } From 75ea3d680537dbd04cc66f20a9f14f17be3728cf Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 17 Aug 2022 01:28:42 -0400 Subject: [PATCH 76/88] Add support for allocating external service IP addresses --- common/src/sql/dbinit.sql | 16 +- nexus/db-model/src/external_ip.rs | 59 ++++- nexus/db-model/src/schema.rs | 2 +- nexus/db-model/src/service.rs | 7 + nexus/src/app/background/services.rs | 106 +++++--- .../src/db/datastore/instance_external_ip.rs | 18 ++ nexus/src/db/datastore/mod.rs | 6 +- nexus/src/db/queries/external_ip.rs | 246 ++++++++++++++++-- sled-agent/src/rack_setup/plan/service.rs | 6 - sled-agent/src/rack_setup/plan/sled.rs | 2 +- sled-agent/src/rack_setup/service.rs | 11 +- sled-agent/src/services.rs | 10 +- 12 files changed, 387 insertions(+), 102 deletions(-) diff --git a/common/src/sql/dbinit.sql b/common/src/sql/dbinit.sql index f983c7f52dd..0257e861eaa 100644 --- a/common/src/sql/dbinit.sql +++ b/common/src/sql/dbinit.sql @@ -1122,7 +1122,12 @@ CREATE TYPE omicron.public.ip_kind AS ENUM ( * known address that can be moved between instances. Its lifetime is not * fixed to any instance. */ - 'floating' + 'floating', + + /* + * A service IP is an IP not attached to a project or instance. + */ + 'service' ); /* @@ -1149,7 +1154,7 @@ CREATE TABLE omicron.public.instance_external_ip ( ip_pool_range_id UUID NOT NULL, /* FK to the `project` table. */ - project_id UUID NOT NULL, + project_id UUID, /* FK to the `instance` table. See the constraints below. */ instance_id UUID, @@ -1179,12 +1184,13 @@ CREATE TABLE omicron.public.instance_external_ip ( ), /* - * Only nullable if this is a floating IP, which may exist not attached - * to any instance. + * Only nullable if this is a floating/service IP, which may exist not + * attached to any instance. */ CONSTRAINT null_non_fip_instance_id CHECK ( (kind != 'floating' AND instance_id IS NOT NULL) OR - (kind = 'floating') + (kind = 'floating') OR + (kind = 'service') ) ); diff --git a/nexus/db-model/src/external_ip.rs b/nexus/db-model/src/external_ip.rs index 969ba50bf80..4d16a74c50b 100644 --- a/nexus/db-model/src/external_ip.rs +++ b/nexus/db-model/src/external_ip.rs @@ -32,6 +32,7 @@ impl_enum_type!( SNat => b"snat" Ephemeral => b"ephemeral" Floating => b"floating" + Service => b"service" ); /// The main model type for external IP addresses for instances. @@ -56,7 +57,7 @@ pub struct InstanceExternalIp { pub time_deleted: Option>, pub ip_pool_id: Uuid, pub ip_pool_range_id: Uuid, - pub project_id: Uuid, + pub project_id: Option, // This is Some(_) for: // - all instance SNAT IPs // - all ephemeral IPs @@ -78,6 +79,18 @@ impl From for sled_agent_client::types::SourceNatConfig { } } +/// Describes where the IP candidates for allocation come from: either +/// from an IP pool, or from a project. +/// +/// This ensures that a source is always specified, and a caller cannot +/// request an external IP allocation without providing at least one of +/// these options. +#[derive(Debug, Clone, Copy)] +pub enum IpSource { + Pool(Uuid), + Project(Uuid), +} + /// An incomplete external IP, used to store state required for issuing the /// database query that selects an available IP and stores the resulting record. #[derive(Debug, Clone)] @@ -87,9 +100,9 @@ pub struct IncompleteInstanceExternalIp { description: Option, time_created: DateTime, kind: IpKind, - project_id: Uuid, + project_id: Option, instance_id: Option, - pool_id: Option, + source: IpSource, } impl IncompleteInstanceExternalIp { @@ -99,15 +112,18 @@ impl IncompleteInstanceExternalIp { instance_id: Uuid, pool_id: Option, ) -> Self { + let source = pool_id + .map(|id| IpSource::Pool(id)) + .unwrap_or_else(|| IpSource::Project(project_id)); Self { id, name: None, description: None, time_created: Utc::now(), kind: IpKind::SNat, - project_id, + project_id: Some(project_id), instance_id: Some(instance_id), - pool_id, + source, } } @@ -117,15 +133,18 @@ impl IncompleteInstanceExternalIp { instance_id: Uuid, pool_id: Option, ) -> Self { + let source = pool_id + .map(|id| IpSource::Pool(id)) + .unwrap_or_else(|| IpSource::Project(project_id)); Self { id, name: None, description: None, time_created: Utc::now(), kind: IpKind::Ephemeral, - project_id, + project_id: Some(project_id), instance_id: Some(instance_id), - pool_id, + source, } } @@ -136,15 +155,31 @@ impl IncompleteInstanceExternalIp { project_id: Uuid, pool_id: Option, ) -> Self { + let source = pool_id + .map(|id| IpSource::Pool(id)) + .unwrap_or_else(|| IpSource::Project(project_id)); Self { id, name: Some(name.clone()), description: Some(description.to_string()), time_created: Utc::now(), kind: IpKind::Floating, - project_id, + project_id: Some(project_id), + instance_id: None, + source, + } + } + + pub fn for_service(id: Uuid, pool_id: Uuid) -> Self { + Self { + id, + name: None, + description: None, + time_created: Utc::now(), + kind: IpKind::Service, + project_id: None, instance_id: None, - pool_id, + source: IpSource::Pool(pool_id), } } @@ -168,7 +203,7 @@ impl IncompleteInstanceExternalIp { &self.kind } - pub fn project_id(&self) -> &Uuid { + pub fn project_id(&self) -> &Option { &self.project_id } @@ -176,8 +211,8 @@ impl IncompleteInstanceExternalIp { &self.instance_id } - pub fn pool_id(&self) -> &Option { - &self.pool_id + pub fn source(&self) -> &IpSource { + &self.source } } diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index ae0f355edb7..cfa74483ad5 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -173,7 +173,7 @@ table! { time_deleted -> Nullable, ip_pool_id -> Uuid, ip_pool_range_id -> Uuid, - project_id -> Uuid, + project_id -> Nullable, instance_id -> Nullable, kind -> crate::IpKindEnum, ip -> Inet, diff --git a/nexus/db-model/src/service.rs b/nexus/db-model/src/service.rs index 9deaeef2c37..ffa53681dc4 100644 --- a/nexus/db-model/src/service.rs +++ b/nexus/db-model/src/service.rs @@ -24,6 +24,13 @@ pub struct Service { pub sled_id: Uuid, pub ip: ipv6::Ipv6Addr, pub kind: ServiceKind, + // TODO: Nexus needs to store: + // - External IP + // - Cert info. + // Where's that coming from? + // + // Could be in-line (forced on all services that aren't nexus) + // or out-of-line (forces extra query for Nexus) } impl Service { diff --git a/nexus/src/app/background/services.rs b/nexus/src/app/background/services.rs index 1c425a66c35..3ad550cc8ff 100644 --- a/nexus/src/app/background/services.rs +++ b/nexus/src/app/background/services.rs @@ -127,6 +127,30 @@ where Self { log, nexus, dns_updater, phantom: PhantomData } } + // If necessary, allocates an external IP for the service. + async fn allocate_external_ip( + &self, + service_kind: ServiceKind, + ) -> Result, Error> { + match service_kind { + ServiceKind::Nexus => { + // TODO: does this need to be in a txn somewhere? + // TODO: THIS SHOULDN'T BE DOING THE ALLOCATION HERE. + // let ip_id = uuid::Uuid::new_v4(); + // IncompleteInstanceExternalIp::for_service( + // ip_id, + // pool_id, + // ); + + // TODO: NO! + Ok(Some(IpAddr::V4(std::net::Ipv4Addr::LOCALHOST))) + } + ServiceKind::InternalDNS + | ServiceKind::Oximeter + | ServiceKind::Dendrite => Ok(None), + } + } + // Reaches out to all sled agents implied in "services", and // requests that the desired services are executing. async fn instantiate_services( @@ -155,36 +179,38 @@ where info!(self.log, "instantiate_services: {:?}", services); + let mut service_requests = vec![]; + for service in &services { + let internal_address = Ipv6Addr::from(service.ip); + let external_address = + self.allocate_external_ip(service.kind).await?; + + let (name, service_type) = Self::get_service_name_and_type( + service.kind, + internal_address, + external_address, + ); + let gz_addresses = match &service.kind { + ServiceKind::InternalDNS => { + vec![DnsSubnet::from_dns_address(internal_address) + .gz_address() + .ip()] + } + _ => vec![], + }; + + service_requests.push(SledAgentTypes::ServiceRequest { + id: service.id(), + name, + addresses: vec![internal_address], + gz_addresses, + service_type, + }); + } + sled_client .services_put(&SledAgentTypes::ServiceEnsureBody { - services: services - .iter() - .map(|s| { - let address = Ipv6Addr::from(s.ip); - let (name, service_type) = - Self::get_service_name_and_type( - address, s.kind, - ); - let gz_addresses = match &s.kind { - ServiceKind::InternalDNS => { - vec![DnsSubnet::from_dns_address( - address, - ) - .gz_address() - .ip()] - } - _ => vec![], - }; - - SledAgentTypes::ServiceRequest { - id: s.id(), - name, - addresses: vec![address], - gz_addresses, - service_type, - } - }) - .collect(), + services: service_requests, }) .await?; Ok(()) @@ -208,30 +234,37 @@ where // Translates (address, db kind) to Sled Agent client types. fn get_service_name_and_type( - address: Ipv6Addr, kind: ServiceKind, + internal_address: Ipv6Addr, + external_address: Option, ) -> (String, SledAgentTypes::ServiceType) { match kind { ServiceKind::Nexus => ( "nexus".to_string(), SledAgentTypes::ServiceType::Nexus { - internal_ip: address, + internal_ip: internal_address, // TODO: This is wrong! needs a separate address for Nexus - external_ip: IpAddr::V6(address), + external_ip: external_address + .expect("Nexus needs an external address"), }, ), ServiceKind::InternalDNS => ( "internal-dns".to_string(), SledAgentTypes::ServiceType::InternalDns { server_address: SocketAddrV6::new( - address, + internal_address, DNS_SERVER_PORT, 0, 0, ) .to_string(), - dns_address: SocketAddrV6::new(address, DNS_PORT, 0, 0) - .to_string(), + dns_address: SocketAddrV6::new( + internal_address, + DNS_PORT, + 0, + 0, + ) + .to_string(), }, ), ServiceKind::Oximeter => { @@ -829,10 +862,7 @@ mod test { // TODO: This is currently failing! We need to make // the Nexus external IP come from an IP pool for // external addresses. - assert_ne!( - internal_ip, - external_ip, - ); + assert_ne!(internal_ip, external_ip,); // TODO: check ports too, maybe? } diff --git a/nexus/src/db/datastore/instance_external_ip.rs b/nexus/src/db/datastore/instance_external_ip.rs index 66574d215ea..7e42bd90021 100644 --- a/nexus/src/db/datastore/instance_external_ip.rs +++ b/nexus/src/db/datastore/instance_external_ip.rs @@ -20,6 +20,7 @@ use crate::db::update_and_check::UpdateStatus; use async_bb8_diesel::AsyncRunQueryDsl; use chrono::Utc; use diesel::prelude::*; +use nexus_types::identity::Resource; use omicron_common::api::external::CreateResult; use omicron_common::api::external::Error; use omicron_common::api::external::LookupResult; @@ -101,6 +102,23 @@ impl DataStore { self.allocate_instance_external_ip(opctx, data).await } + // TODO-correctness: This should be made idempotent. + // + // It mostly *is* idemptent, but fails when there are no + // addresses left. + pub async fn allocate_service_ip( + &self, + opctx: &OpContext, + ip_id: Uuid, + rack_id: Uuid, + ) -> CreateResult { + let (.., pool) = + self.ip_pools_lookup_by_rack_id(opctx, rack_id).await?; + + let data = IncompleteInstanceExternalIp::for_service(ip_id, pool.id()); + self.allocate_instance_external_ip(opctx, data).await + } + async fn allocate_instance_external_ip( &self, opctx: &OpContext, diff --git a/nexus/src/db/datastore/mod.rs b/nexus/src/db/datastore/mod.rs index 025be68ba3c..efcefa5ec7f 100644 --- a/nexus/src/db/datastore/mod.rs +++ b/nexus/src/db/datastore/mod.rs @@ -1574,7 +1574,7 @@ mod test { time_deleted: None, ip_pool_id: Uuid::new_v4(), ip_pool_range_id: Uuid::new_v4(), - project_id: Uuid::new_v4(), + project_id: Some(Uuid::new_v4()), instance_id: Some(instance_id), kind: IpKind::Ephemeral, ip: ipnetwork::IpNetwork::from(IpAddr::from(Ipv4Addr::new( @@ -1634,7 +1634,7 @@ mod test { time_deleted: None, ip_pool_id: Uuid::new_v4(), ip_pool_range_id: Uuid::new_v4(), - project_id: Uuid::new_v4(), + project_id: Some(Uuid::new_v4()), instance_id: Some(Uuid::new_v4()), kind: IpKind::SNat, ip: ipnetwork::IpNetwork::from(IpAddr::from(Ipv4Addr::new( @@ -1709,7 +1709,7 @@ mod test { time_deleted: None, ip_pool_id: Uuid::new_v4(), ip_pool_range_id: Uuid::new_v4(), - project_id: Uuid::new_v4(), + project_id: Some(Uuid::new_v4()), instance_id: Some(Uuid::new_v4()), kind: IpKind::Floating, ip: addresses.next().unwrap().into(), diff --git a/nexus/src/db/queries/external_ip.rs b/nexus/src/db/queries/external_ip.rs index 7e032e38a65..7b42aa9af95 100644 --- a/nexus/src/db/queries/external_ip.rs +++ b/nexus/src/db/queries/external_ip.rs @@ -9,6 +9,7 @@ use crate::db::model::IncompleteInstanceExternalIp; use crate::db::model::InstanceExternalIp; use crate::db::model::IpKind; use crate::db::model::IpKindEnum; +use crate::db::model::IpSource; use crate::db::model::Name; use crate::db::pool::DbConnection; use crate::db::schema; @@ -122,7 +123,7 @@ const MAX_PORT: i32 = u16::MAX as _; /// (ip, first_port, time_deleted IS NULL) = /// (candidate_ip, candidate_first_port, TRUE) /// WHERE -/// ip IS NULL +/// (ip IS NULL) OR (id = ) /// ORDER BY /// candidate_ip, candidate_first_port /// LIMIT 1 @@ -134,8 +135,8 @@ const MAX_PORT: i32 = u16::MAX as _; /// -- possible on replay of a saga node. /// INSERT INTO /// instance_external_ip -/// (SELECT * FROM external_ip) -/// ON CONFLICT +/// (SELECT * FROM next_external_ip) +/// ON CONFLICT (id) /// DO UPDATE SET /// time_created = excluded.time_created, /// time_modified = excluded.time_modified, @@ -295,7 +296,7 @@ impl NextExternalIp { out.push_sql(", "); // Project ID - out.push_bind_param::(self.ip.project_id())?; + out.push_bind_param::, Option>(self.ip.project_id())?; out.push_sql(" AS "); out.push_identifier(dsl::project_id::NAME)?; out.push_sql(", "); @@ -390,10 +391,14 @@ impl NextExternalIp { // and possibly first port, but since it's been soft-deleted, it's not a // match. In that case, we can get away with _only_ filtering the join // results on the IP from the `instance_external_ip` table being NULL. - out.push_sql(" WHERE "); + out.push_sql(" WHERE ("); out.push_identifier(dsl::ip::NAME)?; + out.push_sql(" IS NULL) OR ("); + out.push_identifier(dsl::id::NAME)?; + out.push_sql(" = "); + out.push_bind_param::(self.ip.id())?; out.push_sql( - " IS NULL \ + ") \ ORDER BY candidate_ip, candidate_first_port \ LIMIT 1", ); @@ -437,18 +442,21 @@ impl NextExternalIp { out.push_sql(") AS candidate_ip FROM "); IP_POOL_RANGE_FROM_CLAUSE.walk_ast(out.reborrow())?; out.push_sql(" WHERE "); - if let Some(ref pool_id) = self.ip.pool_id() { - out.push_identifier(dsl::ip_pool_id::NAME)?; - out.push_sql(" = "); - out.push_bind_param::(pool_id)?; - } else { - out.push_sql("("); - out.push_identifier(dsl::project_id::NAME)?; - out.push_sql(" = "); - out.push_bind_param::(self.ip.project_id())?; - out.push_sql(" OR "); - out.push_identifier(dsl::project_id::NAME)?; - out.push_sql(" IS NULL)"); + match self.ip.source() { + IpSource::Pool(pool_id) => { + out.push_identifier(dsl::ip_pool_id::NAME)?; + out.push_sql(" = "); + out.push_bind_param::(pool_id)?; + } + IpSource::Project(project_id) => { + out.push_sql("("); + out.push_identifier(dsl::project_id::NAME)?; + out.push_sql(" = "); + out.push_bind_param::(project_id)?; + out.push_sql(" OR "); + out.push_identifier(dsl::project_id::NAME)?; + out.push_sql(" IS NULL)"); + } } out.push_sql(" AND "); out.push_identifier(dsl::time_deleted::NAME)?; @@ -620,6 +628,7 @@ mod tests { use async_bb8_diesel::AsyncRunQueryDsl; use dropshot::test_util::LogContext; use nexus_test_utils::db::test_setup_database; + use nexus_test_utils::RACK_UUID; use omicron_common::api::external::Error; use omicron_common::api::external::IdentityMetadataCreateParams; use omicron_test_utils::dev; @@ -641,6 +650,7 @@ mod tests { let logctx = dev::test_setup_log(test_name); let log = logctx.log.new(o!()); let db = test_setup_database(&log).await; + crate::db::datastore::datastore_test(&logctx, &db).await; let cfg = crate::db::Config { url: db.pg_config().clone() }; let pool = Arc::new(crate::db::Pool::new(&cfg)); let db_datastore = @@ -650,22 +660,21 @@ mod tests { Self { logctx, opctx, db, db_datastore } } - async fn create_ip_pool( + async fn create_ip_pool_internal( &self, name: &str, range: IpRange, project_id: Option, + rack_id: Option, ) { - // Create with no org/project name, set project_id manually. - let mut pool = IpPool::new( + let pool = IpPool::new( &IdentityMetadataCreateParams { name: String::from(name).parse().unwrap(), description: format!("ip pool {}", name), }, - /* project_id= */ None, - /* rack_id= */ None, + project_id, + rack_id, ); - pool.project_id = project_id; diesel::insert_into(crate::db::schema::ip_pool::dsl::ip_pool) .values(pool.clone()) @@ -690,6 +699,33 @@ mod tests { .expect("Failed to create IP Pool range"); } + async fn create_rack_ip_pool( + &self, + name: &str, + range: IpRange, + rack_id: Uuid, + ) { + self.create_ip_pool_internal( + name, + range, + /* project_id= */ None, + Some(rack_id), + ) + .await; + } + + async fn create_ip_pool( + &self, + name: &str, + range: IpRange, + project_id: Option, + ) { + self.create_ip_pool_internal( + name, range, project_id, /* rack_id= */ None, + ) + .await; + } + async fn success(mut self) { self.db.cleanup().await.unwrap(); self.logctx.cleanup_successful(); @@ -943,7 +979,7 @@ mod tests { assert_eq!(ip.ip.ip(), second_range.first_address()); assert_eq!(ip.first_port.0, 0); assert_eq!(ip.last_port.0, u16::MAX); - assert_eq!(ip.project_id, instance_project_id); + assert_eq!(ip.project_id.unwrap(), instance_project_id); // Allocating an address on an instance in the same project should get // an address from the first pool. @@ -966,7 +1002,161 @@ mod tests { assert_eq!(ip.ip.ip(), first_range.first_address()); assert_eq!(ip.first_port.0, 0); assert_eq!(ip.last_port.0, u16::MAX); - assert_eq!(ip.project_id, project_id); + assert_eq!(ip.project_id.unwrap(), project_id); + + context.success().await; + } + + #[tokio::test] + async fn test_next_external_ip_for_service() { + let context = + TestContext::new("test_next_external_ip_for_service").await; + + // Create an IP pool without an associated project. + let rack_id = Uuid::parse_str(RACK_UUID).unwrap(); + let ip_range = IpRange::try_from(( + Ipv4Addr::new(10, 0, 0, 1), + Ipv4Addr::new(10, 0, 0, 2), + )) + .unwrap(); + context.create_rack_ip_pool("p0", ip_range, rack_id).await; + + // Allocate an IP address as we would for an external, rack-associated + // service. + let id1 = Uuid::new_v4(); + let ip1 = context + .db_datastore + .allocate_service_ip(&context.opctx, id1, rack_id) + .await + .expect("Failed to allocate service IP address"); + assert_eq!(ip1.kind, IpKind::Service); + assert_eq!(ip1.ip.ip(), IpAddr::V4(Ipv4Addr::new(10, 0, 0, 1))); + assert_eq!(ip1.first_port.0, 0); + assert_eq!(ip1.last_port.0, u16::MAX); + assert!(ip1.instance_id.is_none()); + assert!(ip1.project_id.is_none()); + + // Allocate the next (last) IP address + let id2 = Uuid::new_v4(); + let ip2 = context + .db_datastore + .allocate_service_ip(&context.opctx, id2, rack_id) + .await + .expect("Failed to allocate service IP address"); + assert_eq!(ip2.kind, IpKind::Service); + assert_eq!(ip2.ip.ip(), IpAddr::V4(Ipv4Addr::new(10, 0, 0, 2))); + assert_eq!(ip2.first_port.0, 0); + assert_eq!(ip2.last_port.0, u16::MAX); + assert!(ip2.instance_id.is_none()); + assert!(ip2.project_id.is_none()); + + // Once we're out of IP addresses, test that we see the right error. + let id3 = Uuid::new_v4(); + let err = context + .db_datastore + .allocate_service_ip(&context.opctx, id3, rack_id) + .await + .expect_err("Should have failed to allocate after pool exhausted"); + assert_eq!( + err, + Error::InvalidRequest { + message: String::from( + // TODO: The error is a bit misleading; this isn't an IP + // intended for an instance necessarily. + "No external IP addresses available for new instance" + ), + } + ); + + context.success().await; + } + + #[tokio::test] + async fn test_insert_external_ip_for_service_is_idempoent() { + let context = TestContext::new( + "test_insert_external_ip_for_service_is_idempotent", + ) + .await; + + // Create an IP pool without an associated project. + let rack_id = Uuid::parse_str(RACK_UUID).unwrap(); + let ip_range = IpRange::try_from(( + Ipv4Addr::new(10, 0, 0, 1), + Ipv4Addr::new(10, 0, 0, 2), + )) + .unwrap(); + context.create_rack_ip_pool("p0", ip_range, rack_id).await; + + // Allocate an IP address as we would for an external, rack-associated + // service. + let id = Uuid::new_v4(); + let ip = context + .db_datastore + .allocate_service_ip(&context.opctx, id, rack_id) + .await + .expect("Failed to allocate service IP address"); + assert_eq!(ip.kind, IpKind::Service); + assert_eq!(ip.ip.ip(), IpAddr::V4(Ipv4Addr::new(10, 0, 0, 1))); + assert_eq!(ip.first_port.0, 0); + assert_eq!(ip.last_port.0, u16::MAX); + assert!(ip.instance_id.is_none()); + assert!(ip.project_id.is_none()); + + let ip_again = context + .db_datastore + .allocate_service_ip(&context.opctx, id, rack_id) + .await + .expect("Failed to allocate service IP address"); + + assert_eq!(ip.id, ip_again.id); + assert_eq!(ip.ip.ip(), ip_again.ip.ip()); + + context.success().await; + } + + // This test is identical to "test_insert_external_ip_is_idempotent", + // but tries to make an idempotent allocation after all addresses in the + // pool have been allocated. + #[tokio::test] + async fn test_insert_external_ip_for_service_is_idempotent_even_when_full() + { + let context = TestContext::new( + "test_insert_external_ip_is_idempotent_even_when_full", + ) + .await; + + // Create an IP pool without an associated project. + let rack_id = Uuid::parse_str(RACK_UUID).unwrap(); + let ip_range = IpRange::try_from(( + Ipv4Addr::new(10, 0, 0, 1), + Ipv4Addr::new(10, 0, 0, 1), + )) + .unwrap(); + context.create_rack_ip_pool("p0", ip_range, rack_id).await; + + // Allocate an IP address as we would for an external, rack-associated + // service. + let id = Uuid::new_v4(); + let ip = context + .db_datastore + .allocate_service_ip(&context.opctx, id, rack_id) + .await + .expect("Failed to allocate service IP address"); + assert_eq!(ip.kind, IpKind::Service); + assert_eq!(ip.ip.ip(), IpAddr::V4(Ipv4Addr::new(10, 0, 0, 1))); + assert_eq!(ip.first_port.0, 0); + assert_eq!(ip.last_port.0, u16::MAX); + assert!(ip.instance_id.is_none()); + assert!(ip.project_id.is_none()); + + let ip_again = context + .db_datastore + .allocate_service_ip(&context.opctx, id, rack_id) + .await + .expect("Failed to allocate service IP address"); + + assert_eq!(ip.id, ip_again.id); + assert_eq!(ip.ip.ip(), ip_again.ip.ip()); context.success().await; } @@ -1005,7 +1195,7 @@ mod tests { usize::from(ip.last_port.0), super::NUM_SOURCE_NAT_PORTS - 1 ); - assert_eq!(ip.project_id, project_id); + assert_eq!(ip.project_id.unwrap(), project_id); // Create a new IP, with the _same_ ID, and ensure we get back the same // value. @@ -1079,7 +1269,7 @@ mod tests { assert_eq!(ip.ip.ip(), second_range.first_address()); assert_eq!(ip.first_port.0, 0); assert_eq!(ip.last_port.0, u16::MAX); - assert_eq!(ip.project_id, project_id); + assert_eq!(ip.project_id.unwrap(), project_id); context.success().await; } diff --git a/sled-agent/src/rack_setup/plan/service.rs b/sled-agent/src/rack_setup/plan/service.rs index 3ab9f8e342f..3268b3a511f 100644 --- a/sled-agent/src/rack_setup/plan/service.rs +++ b/sled-agent/src/rack_setup/plan/service.rs @@ -188,13 +188,7 @@ impl Plan { gz_addresses: vec![], service_type: ServiceType::Nexus { internal_ip: address, -// NEXUS_INTERNAL_PORT, -// 0, -// 0, external_ip: config.nexus_external_address, - // config.nexus_external_address, -// NEXUS_EXTERNAL_PORT, -// ), }, }) } diff --git a/sled-agent/src/rack_setup/plan/sled.rs b/sled-agent/src/rack_setup/plan/sled.rs index 59c5a2f3ce1..cde70deb690 100644 --- a/sled-agent/src/rack_setup/plan/sled.rs +++ b/sled-agent/src/rack_setup/plan/sled.rs @@ -139,7 +139,7 @@ impl Plan { id: Uuid::new_v4(), subnet, gateway: config.gateway.clone(), - rack_id + rack_id, }, ) }); diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 1b73d31e19f..493bb43bd27 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -411,12 +411,11 @@ impl ServiceInner { .chain(service_request.dns_services.iter()) { let kind = match svc.service_type { - ServiceType::Nexus { - external_ip, - internal_ip: _, - } => NexusTypes::ServiceKind::Nexus { - external_address: external_ip, - }, + ServiceType::Nexus { external_ip, internal_ip: _ } => { + NexusTypes::ServiceKind::Nexus { + external_address: external_ip, + } + } ServiceType::InternalDns { .. } => { NexusTypes::ServiceKind::InternalDNS } diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 62a7741d9ca..182be829eb3 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -415,7 +415,10 @@ impl ServiceManager { dropshot::ConfigDropshot { bind_address: SocketAddr::new(external_ip, 443), request_body_max_bytes: 1048576, - tls: Some(dropshot::ConfigTls { cert_file, key_file }), + tls: Some(dropshot::ConfigTls { + cert_file, + key_file, + }), }, dropshot::ConfigDropshot { bind_address: SocketAddr::new(external_ip, 80), @@ -424,7 +427,10 @@ impl ServiceManager { }, ], dropshot_internal: dropshot::ConfigDropshot { - bind_address: SocketAddr::new(IpAddr::V6(internal_ip), NEXUS_INTERNAL_PORT), + bind_address: SocketAddr::new( + IpAddr::V6(internal_ip), + NEXUS_INTERNAL_PORT, + ), request_body_max_bytes: 1048576, ..Default::default() }, From c19c217262fc3db1a22a13237e9788648f7e663b Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Thu, 18 Aug 2022 14:10:07 -0400 Subject: [PATCH 77/88] nexus config in DB, starting to move service allocation to saga --- common/src/sql/dbinit.sql | 27 ++ nexus/db-model/src/schema.rs | 17 ++ nexus/db-model/src/service.rs | 28 +- nexus/src/app/sagas/mod.rs | 5 +- nexus/src/app/sagas/service_balance.rs | 260 ++++++++++++++++++ .../src/db/datastore/instance_external_ip.rs | 28 +- nexus/src/db/datastore/ip_pool.rs | 14 + nexus/src/db/datastore/service.rs | 86 +++++- 8 files changed, 452 insertions(+), 13 deletions(-) create mode 100644 nexus/src/app/sagas/service_balance.rs diff --git a/common/src/sql/dbinit.sql b/common/src/sql/dbinit.sql index 0257e861eaa..edf66995746 100644 --- a/common/src/sql/dbinit.sql +++ b/common/src/sql/dbinit.sql @@ -130,6 +130,33 @@ CREATE INDEX ON omicron.public.service ( kind ); +/* + * Additional context for services of "kind = nexus" + * This table should be treated as an optional extension + * of the service table itself. + */ +CREATE TABLE omicron.public.nexus_service ( + id UUID PRIMARY KEY, + + /* FK to the service table */ + service_id UUID NOT NULL, + /* FK to the instance_external_ip table */ + external_ip_id UUID NOT NULL, + /* FK to the nexus_certificate table */ + certificate_id UUID NOT NULL +); + +/* + * Information about x509 certificates used to serve Nexus' external interface. + * These certificates may be used by multiple instantiations of the Nexus + * service simultaneously. + */ +CREATE TABLE omicron.public.nexus_certificate ( + id UUID PRIMARY KEY, + public_cert BYTES NOT NULL, + private_key BYTES NOT NULL +); + /* * ZPools of Storage, attached to Sleds. * Typically these are backed by a single physical disk. diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index cfa74483ad5..d7eb70a4478 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -390,6 +390,23 @@ table! { } } +table! { + nexus_service (id) { + id -> Uuid, + service_id -> Uuid, + external_ip_id -> Uuid, + certificate_id -> Uuid, + } +} + +table! { + nexus_certificate (id) { + id -> Uuid, + public_cert -> Binary, + private_key -> Binary, + } +} + table! { zpool (id) { id -> Uuid, diff --git a/nexus/db-model/src/service.rs b/nexus/db-model/src/service.rs index ffa53681dc4..e2f7b784614 100644 --- a/nexus/db-model/src/service.rs +++ b/nexus/db-model/src/service.rs @@ -4,6 +4,8 @@ use super::ServiceKind; use crate::ipv6; +use crate::schema::nexus_certificate; +use crate::schema::nexus_service; use crate::schema::service; use db_macros::Asset; use internal_dns_client::names::{ServiceName, AAAA, SRV}; @@ -24,13 +26,25 @@ pub struct Service { pub sled_id: Uuid, pub ip: ipv6::Ipv6Addr, pub kind: ServiceKind, - // TODO: Nexus needs to store: - // - External IP - // - Cert info. - // Where's that coming from? - // - // Could be in-line (forced on all services that aren't nexus) - // or out-of-line (forces extra query for Nexus) +} + +#[derive(Queryable, Insertable, Debug, Clone, Selectable, PartialEq)] +#[diesel(table_name = nexus_service)] +pub struct NexusService { + id: Uuid, + + service_id: Uuid, + external_ip_id: Uuid, + certificate_id: Uuid, +} + +#[derive(Queryable, Insertable, Debug, Clone, Selectable, PartialEq)] +#[diesel(table_name = nexus_certificate)] +pub struct NexusCertificate { + id: Uuid, + + public_cert: Vec, + private_key: Vec, } impl Service { diff --git a/nexus/src/app/sagas/mod.rs b/nexus/src/app/sagas/mod.rs index ad5460c20c4..2032691703f 100644 --- a/nexus/src/app/sagas/mod.rs +++ b/nexus/src/app/sagas/mod.rs @@ -23,6 +23,7 @@ pub mod disk_create; pub mod disk_delete; pub mod instance_create; pub mod instance_migrate; +pub mod service_balance; #[derive(Debug)] pub struct NexusSagaType; @@ -93,7 +94,9 @@ fn make_action_registry() -> ActionRegistry { ::register_actions( &mut registry, ); - + ::register_actions( + &mut registry, + ); registry } diff --git a/nexus/src/app/sagas/service_balance.rs b/nexus/src/app/sagas/service_balance.rs new file mode 100644 index 00000000000..57c02f66c8a --- /dev/null +++ b/nexus/src/app/sagas/service_balance.rs @@ -0,0 +1,260 @@ +// This Source Ccode Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use super::{NexusActionContext, NexusSaga, SagaInitError, ACTION_GENERATE_ID}; +use crate::app::sagas::NexusAction; +use crate::context::OpContext; +use crate::db::identity::Resource; +use crate::db::model::ServiceKind; +use crate::authn; +use chrono::Utc; +use lazy_static::lazy_static; +use omicron_common::address::Ipv6Subnet; +use omicron_common::address::RACK_PREFIX; +use omicron_common::api::external::Error; +use serde::Deserialize; +use serde::Serialize; +use std::fmt::Debug; +use std::sync::Arc; +use steno::new_action_noop_undo; +use steno::ActionError; +use steno::ActionFunc; +use steno::Node; +use steno::{DagBuilder, SagaName}; +use uuid::Uuid; + +// service balance saga: input parameters + +/// Describes the target location where the services should +/// eventually be running. +#[derive(Clone, Debug, Deserialize, Serialize)] +pub enum ServiceDestination { + Scrimlet, + Rack, +} + +/// Parameters used to balance many services. +#[derive(Clone, Debug, Deserialize, Serialize)] +pub struct Params { + pub serialized_authn: authn::saga::Serialized, + pub destination: ServiceDestination, + pub kind: ServiceKind, + pub rack_id: Uuid, + pub redundancy: u32, +} + +/// Parameters used to instantiate a single service. +/// +/// This is used within a sub-saga. +#[derive(Clone, Debug, Deserialize, Serialize)] +pub struct ServiceParams { + which: u32, + kind: ServiceKind, + rack_id: Uuid, +} + +lazy_static! { + static ref MARK_RACK_BALANCING: NexusAction = ActionFunc::new_action( + "service-balance.mark-rack-balancing", + mark_rack_balancing, + mark_rack_balancing_undo, + ); + + static ref PICK_DESTINATION_SLEDS: NexusAction = new_action_noop_undo( + "service-balance.pick-destination-sleds", + pick_destination_sleds, + ); + + static ref CREATE_SERVICE_RECORD: NexusAction = ActionFunc::new_action( + "service-balance.create-service-record", + create_service_record, + create_service_record_undo, + ); + + static ref CREATE_INTERNAL_IP: NexusAction = ActionFunc::new_action( + "service-balance.create-internal-ip", + create_internal_ip, + create_internal_ip_undo, + ); + + static ref CREATE_EXTERNAL_IP: NexusAction = ActionFunc::new_action( + "service-balance.create-external-ip", + create_external_ip, + destroy_external_ip, + ); + + static ref UNMARK_RACK_BALANCING: NexusAction = new_action_noop_undo( + "service-balance.unmark-rack-balancing", + unmark_rack_balancing, + ); +} + +// Helper function for appending subsagas to our parent saga. +fn subsaga_append( + node_basename: &'static str, + subsaga_builder: steno::DagBuilder, + parent_builder: &mut steno::DagBuilder, + params: S, + which: u32, +) -> Result<(), SagaInitError> { + // The "parameter" node is a constant node that goes into the outer saga. + let params_node_name = format!("{}_params{}", node_basename, which); + parent_builder.append(Node::constant( + ¶ms_node_name, + serde_json::to_value(¶ms).map_err(|e| { + SagaInitError::SerializeError(params_node_name.clone(), e) + })?, + )); + + let output_name = format!("{}{}", node_basename, which); + parent_builder.append(Node::subsaga( + output_name.as_str(), + subsaga_builder.build()?, + params_node_name, + )); + Ok(()) +} + +#[derive(Debug)] +pub struct SagaServiceBalance; +impl NexusSaga for SagaServiceBalance { + const NAME: &'static str = "service-balance"; + type Params = Params; + + fn register_actions(registry: &mut super::ActionRegistry) { + registry.register(Arc::clone(&*MARK_RACK_BALANCING)); + registry.register(Arc::clone(&*PICK_DESTINATION_SLEDS)); + registry.register(Arc::clone(&*CREATE_SERVICE_RECORD)); + registry.register(Arc::clone(&*CREATE_INTERNAL_IP)); + registry.register(Arc::clone(&*CREATE_EXTERNAL_IP)); + registry.register(Arc::clone(&*UNMARK_RACK_BALANCING)); + } + + fn make_saga_dag( + params: &Self::Params, + mut builder: steno::DagBuilder, + ) -> Result { + let instance_id = Uuid::new_v4(); + + builder.append(Node::action( + "mark_balancing", + "MarkBalancing", + MARK_RACK_BALANCING.as_ref(), + )); + + builder.append(Node::action( + "destination_sleds", + "PickDestinationSleds", + PICK_DESTINATION_SLEDS.as_ref(), + )); + + // After selecting destination sleds for our desired number of services, + // we need to actually provision the services themselves. + // + // We do so by creating subsagas for each potential to-be-allocated + // service. + for i in 0..params.redundancy { + let repeat_params = ServiceParams { + which: i, + kind: params.kind, + rack_id: params.rack_id, + }; + let subsaga_name = + SagaName::new(&format!("create-service{i}")); + let mut subsaga_builder = DagBuilder::new(subsaga_name); + subsaga_builder.append(Node::action( + "internal_ip{i}", + format!("CreateServiceIp{i}").as_str(), + CREATE_INTERNAL_IP.as_ref(), + )); + subsaga_append( + "network_interface", + subsaga_builder, + &mut builder, + repeat_params, + i, + )?; + } + + builder.append(Node::action( + "unmark_balancing", + "UnmarkBalancing", + UNMARK_RACK_BALANCING.as_ref(), + )); + + Ok(builder.build()?) + } + +} + +async fn mark_rack_balancing( + sagactx: NexusActionContext, +) -> Result<(), ActionError> { + // TODO + Ok(()) +} + +async fn mark_rack_balancing_undo( + sagactx: NexusActionContext, +) -> Result<(), anyhow::Error> { + // TODO + Ok(()) +} + +async fn unmark_rack_balancing( + sagactx: NexusActionContext, +) -> Result<(), ActionError> { + // TODO + Ok(()) +} + +async fn create_service_record( + sagactx: NexusActionContext, +) -> Result<(), ActionError> { + // TODO + Ok(()) +} + +async fn create_service_record_undo( + sagactx: NexusActionContext, +) -> Result<(), anyhow::Error> { + // TODO + Ok(()) +} + +async fn create_internal_ip( + sagactx: NexusActionContext, +) -> Result<(), ActionError> { + // TODO + Ok(()) +} + +async fn create_internal_ip_undo( + sagactx: NexusActionContext, +) -> Result<(), anyhow::Error> { + // TODO + Ok(()) +} + +async fn create_external_ip( + sagactx: NexusActionContext, +) -> Result<(), ActionError> { + // TODO + Ok(()) +} + +async fn destroy_external_ip( + sagactx: NexusActionContext, +) -> Result<(), anyhow::Error> { + // TODO + Ok(()) +} + +async fn pick_destination_sleds( + sagactx: NexusActionContext, +) -> Result<(), ActionError> { + // TODO + Ok(()) +} + diff --git a/nexus/src/db/datastore/instance_external_ip.rs b/nexus/src/db/datastore/instance_external_ip.rs index 7e42bd90021..89a1c1e4752 100644 --- a/nexus/src/db/datastore/instance_external_ip.rs +++ b/nexus/src/db/datastore/instance_external_ip.rs @@ -102,10 +102,6 @@ impl DataStore { self.allocate_instance_external_ip(opctx, data).await } - // TODO-correctness: This should be made idempotent. - // - // It mostly *is* idemptent, but fails when there are no - // addresses left. pub async fn allocate_service_ip( &self, opctx: &OpContext, @@ -119,6 +115,30 @@ impl DataStore { self.allocate_instance_external_ip(opctx, data).await } + // TODO: it's a little quirky that the async version looks up the pool + // based on your rack, but this version doesn't. + // maybe we should always leave it up to the caller? + pub fn allocate_service_ip_sync( + conn: &mut crate::db::pool::DbConnection, + ip_id: Uuid, + pool_id: Uuid, + ) -> CreateResult { + let data = IncompleteInstanceExternalIp::for_service(ip_id, pool_id); + NextExternalIp::new(data) + .get_result(conn) + .map_err(|e| { + use diesel::result::Error::NotFound; + match e { + NotFound => Error::invalid_request( + "No external IP addresses available for new instance", + ), + _ => Error::internal_error(&format!( + "Unknown diesel error allocating external IP: {:#}", e + )), + } + }) + } + async fn allocate_instance_external_ip( &self, opctx: &OpContext, diff --git a/nexus/src/db/datastore/ip_pool.rs b/nexus/src/db/datastore/ip_pool.rs index 7d3a40ae390..0850d740fd1 100644 --- a/nexus/src/db/datastore/ip_pool.rs +++ b/nexus/src/db/datastore/ip_pool.rs @@ -125,6 +125,20 @@ impl DataStore { Ok((authz_pool, pool)) } + pub fn ip_pools_lookup_by_rack_id_sync( + conn: &mut crate::db::pool::DbConnection, + rack_id: Uuid, + ) -> Result { + use db::schema::ip_pool::dsl; + + // Look up this IP pool by rack ID. + dsl::ip_pool + .filter(dsl::rack_id.eq(Some(rack_id))) + .filter(dsl::time_deleted.is_null()) + .select(IpPool::as_select()) + .get_result(conn) + } + /// Creates a new IP pool. /// /// - If `rack_id` is provided, this IP pool is used for Oxide diff --git a/nexus/src/db/datastore/service.rs b/nexus/src/db/datastore/service.rs index b7abd1c4650..4e33ea51123 100644 --- a/nexus/src/db/datastore/service.rs +++ b/nexus/src/db/datastore/service.rs @@ -24,6 +24,7 @@ use async_bb8_diesel::{AsyncConnection, AsyncRunQueryDsl}; use chrono::Utc; use diesel::prelude::*; use diesel::upsert::excluded; +use nexus_types::identity::Resource; use omicron_common::address::Ipv6Subnet; use omicron_common::address::ReservedRackSubnet; use omicron_common::address::DNS_REDUNDANCY; @@ -245,6 +246,7 @@ impl DataStore { redundancy: u32, ) -> Result, Error> { opctx.authorize(authz::Action::Read, &authz::FLEET).await?; + opctx.authorize(authz::Action::ListChildren, &authz::IP_POOL_LIST).await?; #[derive(Debug)] enum ServiceError { @@ -253,6 +255,63 @@ impl DataStore { } type TxnError = TransactionError; + // NOTE: We could also make parts of this a saga? + // + // - Mark rack as "rebalancing" + // - List sleds + services, return sleds with/without services + // - Pick sleds that are targets probably all up-front + // - FOR EACH + // - Provision IPv6 + // - Upsert service record + // - IF NEXUS + // - Provision external IP + // - Find cert + // - Upsert nexus service record + // - Unmark rack as "rebalancing" + + // NOTE: It's probably possible to do this without the transaction. + // + // Something like this - heavily inspired by the external IP allocation + // CTE: + // + // WITH + // existing_count AS ( + // SELECT COUNT(1) FROM services WHERE allocated AND not deleted + // ), + // new_count AS ( + // -- Use "GREATEST" to avoid underflow if we've somehow + // -- over-allocated services beyond the redundancy. + // GREATEST(, existing_count) - existing_count + // ), + // candidate_sleds AS ( + // SELECT all sleds in the allocation scope (in the rack?) + // LEFT OUTER JOIN with allocated services + // ON service_type + // WHERE service_type IS NULL (svc not allocated to the sled) + // LIMIT new_count + // ), + // new_internal_ips AS ( + // UPDATE sled + // SET + // last_used_address = last_used_address + 1 + // WHERE + // sled_id IN candidate_sleds + // RETURNING + // last_used_address + // ), + // new_external_ips AS ( + // (need to insert the external IP allocation CTE here somehow) + // ), + // candidate_services AS ( + // JOIN all the sleds with the IPs they need + // ), + // new_services AS ( + // INSERT INTO services + // SELECT * FROM candidate_services + // ON CONFLICT (id) + // DO NOTHING + // RETURNING * + // ), self.pool() .transaction(move |conn| { let sleds_and_maybe_svcs = @@ -290,11 +349,36 @@ impl DataStore { TxnError::CustomError(ServiceError::NotEnoughSleds) })?; let svc_id = Uuid::new_v4(); + + // Always allocate an internal IP address to this service. let address = Self::next_ipv6_address_sync(conn, sled.id()) .map_err(|e| { TxnError::CustomError(ServiceError::Other(e)) })?; + // If requested, allocate an external IP address for this + // service too. + let external_ip = if matches!(kind, ServiceKind::Nexus) { + let pool = Self::ip_pools_lookup_by_rack_id_sync(conn, rack_id)?; + + let external_ip = Self::allocate_service_ip_sync( + conn, + Uuid::new_v4(), + pool.id(), + ).map_err(|e| { + TxnError::CustomError(ServiceError::Other(e)) + })?; + + Some(external_ip) + } else { + None + }; + + // TODO: We actually have to *use* the external_ip + // - Use the NexusCertificate table (look up by UUID) + // - Create a NexusService table (reference service, ip, + // certs) + let service = db::model::Service::new( svc_id, sled.id(), @@ -315,7 +399,7 @@ impl DataStore { .map_err(|e| match e { TxnError::CustomError(ServiceError::NotEnoughSleds) => { Error::unavail("Not enough sleds for service allocation") - } + }, TxnError::CustomError(ServiceError::Other(e)) => e, TxnError::Pool(e) => { public_error_from_diesel_pool(e, ErrorHandler::Server) From e9944ba3066a28e04bc680448870b2a0346f8230 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Sun, 11 Sep 2022 23:35:38 -0400 Subject: [PATCH 78/88] partway through patching sync errors --- Cargo.lock | 2 +- nexus/Cargo.toml | 2 +- nexus/src/app/sagas/service_balance.rs | 14 +-- nexus/src/db/datastore/external_ip.rs | 32 +++++++ nexus/src/db/datastore/ip_pool.rs | 9 +- nexus/src/db/datastore/mod.rs | 45 +++------- nexus/src/db/datastore/service.rs | 117 +++++++++++++------------ nexus/src/db/error.rs | 7 ++ 8 files changed, 129 insertions(+), 99 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5c0035d971b..6757f0223dd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -98,7 +98,7 @@ checksum = "9b34d609dfbaf33d6889b2b7106d3ca345eacad44200913df5ba02bfd31d2ba9" [[package]] name = "async-bb8-diesel" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/async-bb8-diesel?rev=51de79fe02b334899be5d5fd8b469f9d140ea887#51de79fe02b334899be5d5fd8b469f9d140ea887" +source = "git+https://github.com/oxidecomputer/async-bb8-diesel?rev=7944dafc8a36dc6e20a1405eca59d04662de2bb7#7944dafc8a36dc6e20a1405eca59d04662de2bb7" dependencies = [ "async-trait", "bb8", diff --git a/nexus/Cargo.toml b/nexus/Cargo.toml index de3c5086604..61fecaac7e3 100644 --- a/nexus/Cargo.toml +++ b/nexus/Cargo.toml @@ -9,7 +9,7 @@ path = "../rpaths" [dependencies] anyhow = "1.0" -async-bb8-diesel = { git = "https://github.com/oxidecomputer/async-bb8-diesel", rev = "51de79fe02b334899be5d5fd8b469f9d140ea887" } +async-bb8-diesel = { git = "https://github.com/oxidecomputer/async-bb8-diesel", rev = "7944dafc8a36dc6e20a1405eca59d04662de2bb7" } async-trait = "0.1.56" base64 = "0.13.0" bb8 = "0.8.0" diff --git a/nexus/src/app/sagas/service_balance.rs b/nexus/src/app/sagas/service_balance.rs index aee087c0896..431563b28fa 100644 --- a/nexus/src/app/sagas/service_balance.rs +++ b/nexus/src/app/sagas/service_balance.rs @@ -2,17 +2,17 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -use super::{NexusActionContext, NexusSaga, SagaInitError, ACTION_GENERATE_ID}; +use super::{NexusActionContext, NexusSaga, SagaInitError}; use crate::app::sagas::NexusAction; use crate::authn; -use crate::context::OpContext; -use crate::db::identity::Resource; +//use crate::context::OpContext; +//use crate::db::identity::Resource; use crate::db::model::ServiceKind; -use chrono::Utc; +//use chrono::Utc; use lazy_static::lazy_static; -use omicron_common::address::Ipv6Subnet; -use omicron_common::address::RACK_PREFIX; -use omicron_common::api::external::Error; +//use omicron_common::address::Ipv6Subnet; +//use omicron_common::address::RACK_PREFIX; +//use omicron_common::api::external::Error; use serde::Deserialize; use serde::Serialize; use std::fmt::Debug; diff --git a/nexus/src/db/datastore/external_ip.rs b/nexus/src/db/datastore/external_ip.rs index 4c908461924..a4618734283 100644 --- a/nexus/src/db/datastore/external_ip.rs +++ b/nexus/src/db/datastore/external_ip.rs @@ -115,6 +115,18 @@ impl DataStore { self.allocate_external_ip(opctx, data).await } + pub async fn allocate_service_ip_on_connection( + conn: &async_bb8_diesel::Connection, + ip_id: Uuid, + rack_id: Uuid, + ) -> Result { + let pool = + Self::ip_pools_lookup_by_rack_id_on_connection(conn, rack_id).await?; + + let data = IncompleteExternalIp::for_service(ip_id, pool.id()); + Self::allocate_external_ip_on_connection(conn, data).await + } + async fn allocate_external_ip( &self, opctx: &OpContext, @@ -136,6 +148,26 @@ impl DataStore { }) } + async fn allocate_external_ip_on_connection( + conn: &async_bb8_diesel::Connection, + data: IncompleteExternalIp, + ) -> Result { + NextExternalIp::new(data) + .get_result_async(&conn) + .await + .map_err(|e| { + use async_bb8_diesel::ConnectionError::Query; + use async_bb8_diesel::PoolError::Connection; + use diesel::result::Error::NotFound; + match e { + Connection(Query(NotFound)) => Error::invalid_request( + "No external IP addresses available", + ), + _ => public_error_from_diesel_pool(e, ErrorHandler::Server), + } + }) + } + /// Deallocate the external IP address with the provided ID. /// /// To support idempotency, such as in saga operations, this method returns diff --git a/nexus/src/db/datastore/ip_pool.rs b/nexus/src/db/datastore/ip_pool.rs index 182ae6d3e5e..d8ff1e0b3b1 100644 --- a/nexus/src/db/datastore/ip_pool.rs +++ b/nexus/src/db/datastore/ip_pool.rs @@ -125,10 +125,10 @@ impl DataStore { Ok((authz_pool, pool)) } - pub fn ip_pools_lookup_by_rack_id_sync( - conn: &mut crate::db::pool::DbConnection, + pub async fn ip_pools_lookup_by_rack_id_on_connection( + conn: &async_bb8_diesel::Connection, rack_id: Uuid, - ) -> Result { + ) -> Result { use db::schema::ip_pool::dsl; // Look up this IP pool by rack ID. @@ -136,7 +136,8 @@ impl DataStore { .filter(dsl::rack_id.eq(Some(rack_id))) .filter(dsl::time_deleted.is_null()) .select(IpPool::as_select()) - .get_result(conn) + .get_result_async(conn) + .await } /// Creates a new IP pool. diff --git a/nexus/src/db/datastore/mod.rs b/nexus/src/db/datastore/mod.rs index 0671acdd995..68a377a5282 100644 --- a/nexus/src/db/datastore/mod.rs +++ b/nexus/src/db/datastore/mod.rs @@ -25,7 +25,7 @@ use crate::context::OpContext; use crate::db::{ self, error::{ - public_error_from_diesel_lookup, public_error_from_diesel_pool, + public_error_from_diesel_pool, ErrorHandler, }, }; @@ -150,21 +150,24 @@ impl DataStore { .returning(dsl::last_used_address) } - pub fn next_ipv6_address_sync( - conn: &mut DbConnection, + pub async fn next_ipv6_address_on_connection( + conn: &async_bb8_diesel::Connection, sled_id: Uuid, ) -> Result { let net = Self::next_ipv6_address_query(sled_id) - .get_result(conn) + .get_result_async(conn) + .await .map_err(|e| { - public_error_from_diesel_lookup( - e, - ResourceType::Sled, - &LookupType::ById(sled_id), + public_error_from_diesel_pool( + async_bb8_diesel::PoolError::Connection(e), + ErrorHandler::NotFoundByLookup( + ResourceType::Sled, + LookupType::ById(sled_id), + ), ) })?; - // TODO-correctness: We could ensure that this address is actually + // TODO-correctness: We need to ensure that this address is actually // within the sled's underlay prefix, once that's included in the // database record. match net { @@ -182,28 +185,8 @@ impl DataStore { opctx: &OpContext, sled_id: Uuid, ) -> Result { - let net = Self::next_ipv6_address_query(sled_id) - .get_result_async(self.pool_authorized(opctx).await?) - .await - .map_err(|e| { - public_error_from_diesel_pool( - e, - ErrorHandler::NotFoundByLookup( - ResourceType::Sled, - LookupType::ById(sled_id), - ), - ) - })?; - - // TODO-correctness: We need to ensure that this address is actually - // within the sled's underlay prefix, once that's included in the - // database record. - match net { - ipnetwork::IpNetwork::V6(net) => Ok(net.ip()), - _ => Err(Error::InternalError { - internal_message: String::from("Sled IP address must be IPv6"), - }), - } + let conn = self.pool_authorized(opctx).await?; + Self::next_ipv6_address_on_connection(&conn).await } // Test interfaces diff --git a/nexus/src/db/datastore/service.rs b/nexus/src/db/datastore/service.rs index b9d33c6e9f7..910c43b5f07 100644 --- a/nexus/src/db/datastore/service.rs +++ b/nexus/src/db/datastore/service.rs @@ -10,13 +10,10 @@ use crate::context::OpContext; use crate::db; use crate::db::collection_insert::AsyncInsertError; use crate::db::collection_insert::DatastoreCollection; -use crate::db::collection_insert::SyncInsertError; -use crate::db::error::public_error_from_diesel_create; use crate::db::error::public_error_from_diesel_pool; use crate::db::error::ErrorHandler; use crate::db::error::TransactionError; use crate::db::identity::Asset; -use crate::db::model::ExternalIp; use crate::db::model::Service; use crate::db::model::ServiceKind; use crate::db::model::Sled; @@ -79,10 +76,10 @@ impl DataStore { }) } - fn service_upsert_sync( - conn: &mut DbConnection, + async fn service_upsert_on_connection( + conn: &async_bb8_diesel::Connection, service: Service, - ) -> CreateResult { + ) -> Result { use db::schema::service::dsl; let sled_id = service.sled_id; @@ -99,32 +96,36 @@ impl DataStore { dsl::kind.eq(excluded(dsl::kind)), )), ) - .insert_and_get_result(conn) + .insert_and_get_result_async(conn) + .await .map_err(|e| match e { - SyncInsertError::CollectionNotFound => Error::ObjectNotFound { + AsyncInsertError::CollectionNotFound => Error::ObjectNotFound { type_name: ResourceType::Sled, lookup_type: LookupType::ById(sled_id), }, - SyncInsertError::DatabaseError(e) => { - public_error_from_diesel_create( + AsyncInsertError::DatabaseError(e) => { + public_error_from_diesel_pool( e, - ResourceType::Service, - &service.id().to_string(), + ErrorHandler::Conflict( + ResourceType::Service, + &service.id().to_string(), + ), ) } }) } - fn sled_list_with_limit_sync( - conn: &mut DbConnection, + async fn sled_list_with_limit_on_connection( + conn: &async_bb8_diesel::Connection, limit: u32, - ) -> Result, diesel::result::Error> { + ) -> Result, async_bb8_diesel::ConnectionError> { use db::schema::sled::dsl; dsl::sled .filter(dsl::time_deleted.is_null()) .limit(limit as i64) .select(Sled::as_select()) - .load(conn) + .load_async(conn) + .await } pub async fn service_list( @@ -144,11 +145,11 @@ impl DataStore { // List all sleds on a rack, with info about provisioned services of a // particular type. - fn sled_and_service_list_sync( - conn: &mut DbConnection, + async fn sled_and_service_list( + conn: &async_bb8_diesel::Connection, rack_id: Uuid, kind: ServiceKind, - ) -> Result)>, diesel::result::Error> { + ) -> Result)>, async_bb8_diesel::ConnectionError> { use db::schema::service::dsl as svc_dsl; use db::schema::sled::dsl as sled_dsl; @@ -159,7 +160,8 @@ impl DataStore { svc_dsl::sled_id.eq(sled_dsl::id).and(svc_dsl::kind.eq(kind)), )) .select(<(Sled, Option)>::as_select()) - .get_results(conn) + .get_results_async(conn) + .await } /// Ensures that all Scrimlets in `rack_id` have the `kind` service @@ -177,9 +179,9 @@ impl DataStore { type TxnError = TransactionError; self.pool() - .transaction(move |conn| { + .transaction_async(|conn| async move { let sleds_and_maybe_svcs = - Self::sled_and_service_list_sync(conn, rack_id, kind)?; + Self::sled_and_service_list(&conn, rack_id, kind).await?; // Split the set of returned sleds into "those with" and "those // without" the requested service. @@ -208,7 +210,8 @@ impl DataStore { if sled.is_scrimlet() { let svc_id = Uuid::new_v4(); let address = - Self::next_ipv6_address_sync(conn, sled.id()) + Self::next_ipv6_address_on_connection(&conn, sled.id()) + .await .map_err(|e| TxnError::CustomError(e))?; let service = db::model::Service::new( @@ -218,7 +221,8 @@ impl DataStore { kind, ); - let svc = Self::service_upsert_sync(conn, service) + let svc = Self::service_upsert_on_connection(&conn, service) + .await .map_err(|e| TxnError::CustomError(e))?; svcs.push(svc); } @@ -316,9 +320,11 @@ impl DataStore { // RETURNING * // ), self.pool() - .transaction(move |conn| { + .transaction_async(|conn| async move { let sleds_and_maybe_svcs = - Self::sled_and_service_list_sync(conn, rack_id, kind)?; + Self::sled_and_service_list(&conn, rack_id, kind) + .await + .map_err(|e| TxnError::Pool(e.into()))?; // Split the set of returned sleds into "those with" and "those // without" the requested service. @@ -354,30 +360,31 @@ impl DataStore { let svc_id = Uuid::new_v4(); // Always allocate an internal IP address to this service. - let address = Self::next_ipv6_address_sync(conn, sled.id()) + let address = Self::next_ipv6_address_on_connection(&conn, sled.id()) + .await .map_err(|e| { - TxnError::CustomError(ServiceError::Other(e)) + TxnError::CustomError(ServiceError::Other(e.into())) })?; // If requested, allocate an external IP address for this // service too. - let external_ip: Option = if matches!(kind, ServiceKind::Nexus) { - let pool = Self::ip_pools_lookup_by_rack_id_sync( - conn, rack_id, - )?; - - todo!("We are deleting the sync version, right?"); - /* - let external_ip = Self::allocate_service_ip_sync( - conn, + let external_ip = if matches!(kind, ServiceKind::Nexus) { + let pool = Self::ip_pools_lookup_by_rack_id_on_connection( + &conn, rack_id, + ).await + .map_err(|e| { + TxnError::Pool(e.into()) + })?; + + let external_ip = Self::allocate_service_ip_on_connection( + &conn, Uuid::new_v4(), pool.id(), - ).map_err(|e| { - TxnError::CustomError(ServiceError::Other(e)) + ).await.map_err(|e| { + TxnError::Pool(e.into()) })?; Some(external_ip) - */ } else { None }; @@ -394,10 +401,9 @@ impl DataStore { kind, ); - let svc = Self::service_upsert_sync(conn, service) - .map_err(|e| { - TxnError::CustomError(ServiceError::Other(e)) - })?; + let svc = Self::service_upsert_on_connection(&conn, service) + .await + .map_err(|e| TxnError::Pool(e.into()))?; svcs.push(svc); } @@ -436,8 +442,8 @@ impl DataStore { type TxnError = TransactionError; self.pool() - .transaction(move |conn| { - let mut svcs = Self::dns_service_list_sync(conn)?; + .transaction_async(|conn| async move { + let mut svcs = Self::dns_service_list(&conn).await?; // Get all subnets not allocated to existing services. let mut usable_dns_subnets = ReservedRackSubnet(rack_subnet) @@ -455,7 +461,8 @@ impl DataStore { // Get all sleds which aren't already running DNS services. let mut target_sleds = - Self::sled_list_with_limit_sync(conn, redundancy)? + Self::sled_list_with_limit_on_connection(&conn, redundancy) + .await? .into_iter() .filter(|sled| { // The target sleds are only considered if they aren't already @@ -482,10 +489,9 @@ impl DataStore { ServiceKind::InternalDNS, ); - let svc = Self::service_upsert_sync(conn, service) - .map_err(|e| { - TxnError::CustomError(ServiceError::Other(e)) - })?; + let svc = Self::service_upsert_on_connection(&conn, service) + .await + .map_err(|e| TxnError::Pool(e.into()))?; svcs.push(svc); } @@ -508,15 +514,16 @@ impl DataStore { }) } - fn dns_service_list_sync( - conn: &mut DbConnection, - ) -> Result, diesel::result::Error> { + async fn dns_service_list( + conn: &async_bb8_diesel::Connection, + ) -> Result, async_bb8_diesel::ConnectionError> { use db::schema::service::dsl as svc; svc::service .filter(svc::kind.eq(ServiceKind::InternalDNS)) .limit(DNS_REDUNDANCY.into()) .select(Service::as_select()) - .get_results(conn) + .get_results_async(conn) + .await } } diff --git a/nexus/src/db/error.rs b/nexus/src/db/error.rs index dfaa1922e66..6d0131a4456 100644 --- a/nexus/src/db/error.rs +++ b/nexus/src/db/error.rs @@ -43,6 +43,13 @@ impl From for TransactionError { } } +impl From for TransactionError { + fn from(err: async_bb8_diesel::ConnectionError) -> Self { + TransactionError::Pool(async_bb8_diesel::PoolError::Connection(err)) + } +} + + /// Summarizes details provided with a database error. fn format_database_error( kind: DieselErrorKind, From 80f9ffd5217841807f18840e9b68be7e4bea6aed Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 12 Sep 2022 00:23:34 -0400 Subject: [PATCH 79/88] Welp, it compiles --- nexus/src/db/datastore/dataset.rs | 72 ++++++++++++---------- nexus/src/db/datastore/external_ip.rs | 34 +++++------ nexus/src/db/datastore/ip_pool.rs | 20 ++++++- nexus/src/db/datastore/mod.rs | 20 ++++--- nexus/src/db/datastore/rack.rs | 25 ++++---- nexus/src/db/datastore/service.rs | 86 +++++++++++++++++---------- nexus/src/db/error.rs | 7 --- 7 files changed, 158 insertions(+), 106 deletions(-) diff --git a/nexus/src/db/datastore/dataset.rs b/nexus/src/db/datastore/dataset.rs index 90abb0501a3..9325dfc6d6e 100644 --- a/nexus/src/db/datastore/dataset.rs +++ b/nexus/src/db/datastore/dataset.rs @@ -11,10 +11,8 @@ use crate::authz; use crate::db; use crate::db::collection_insert::AsyncInsertError; use crate::db::collection_insert::DatastoreCollection; -use crate::db::collection_insert::SyncInsertError; use crate::db::datastore::DatasetRedundancy; use crate::db::datastore::OpContext; -use crate::db::error::public_error_from_diesel_create; use crate::db::error::public_error_from_diesel_pool; use crate::db::error::ErrorHandler; use crate::db::error::TransactionError; @@ -79,8 +77,8 @@ impl DataStore { } /// Stores a new dataset in the database. - fn dataset_upsert_sync( - conn: &mut DbConnection, + async fn dataset_upsert_on_connection( + conn: &async_bb8_diesel::Connection, dataset: Dataset, ) -> CreateResult { use db::schema::dataset::dsl; @@ -100,18 +98,15 @@ impl DataStore { dsl::kind.eq(excluded(dsl::kind)), )), ) - .insert_and_get_result(conn) + .insert_and_get_result_async(conn) + .await .map_err(|e| match e { - SyncInsertError::CollectionNotFound => Error::ObjectNotFound { + AsyncInsertError::CollectionNotFound => Error::ObjectNotFound { type_name: ResourceType::Zpool, lookup_type: LookupType::ById(zpool_id), }, - SyncInsertError::DatabaseError(e) => { - public_error_from_diesel_create( - e, - ResourceType::Dataset, - &dataset.id().to_string(), - ) + AsyncInsertError::DatabaseError(e) => { + public_error_from_diesel_pool(e, ErrorHandler::Server) } }) } @@ -146,12 +141,11 @@ impl DataStore { .limit(REGION_REDUNDANCY_THRESHOLD.try_into().unwrap()) } - fn sled_zpool_and_dataset_list_sync( - conn: &mut DbConnection, + async fn sled_zpool_and_dataset_list_on_connection( + conn: &async_bb8_diesel::Connection, rack_id: Uuid, kind: DatasetKind, - ) -> Result)>, diesel::result::Error> - { + ) -> Result)>, Error> { use db::schema::dataset::dsl as dataset_dsl; use db::schema::sled::dsl as sled_dsl; use db::schema::zpool::dsl as zpool_dsl; @@ -171,7 +165,11 @@ impl DataStore { .and(dataset_dsl::time_deleted.is_null())), ) .select(<(Sled, Zpool, Option)>::as_select()) - .get_results(conn) + .get_results_async(conn) + .await + .map_err(|e| { + public_error_from_diesel_pool(e.into(), ErrorHandler::Server) + }) } pub async fn ensure_rack_dataset( @@ -191,11 +189,15 @@ impl DataStore { type TxnError = TransactionError; self.pool() - .transaction(move |conn| { + .transaction_async(|conn| async move { let sleds_zpools_and_maybe_datasets = - Self::sled_zpool_and_dataset_list_sync( - conn, rack_id, kind, - )?; + Self::sled_zpool_and_dataset_list_on_connection( + &conn, rack_id, kind, + ) + .await + .map_err(|e| { + TxnError::CustomError(DatasetError::Other(e.into())) + })?; // Split the set of returned zpools into "those with" and "those // without" the requested dataset. @@ -245,11 +247,17 @@ impl DataStore { TxnError::CustomError(DatasetError::NotEnoughZpools) })?; let dataset_id = Uuid::new_v4(); - let address = Self::next_ipv6_address_sync(conn, sled.id()) - .map_err(|e| { - TxnError::CustomError(DatasetError::Other(e)) - }) - .map(|ip| SocketAddrV6::new(ip, kind.port(), 0, 0))?; + let address = + Self::next_ipv6_address_on_connection(&conn, sled.id()) + .await + .map_err(|e| { + TxnError::CustomError(DatasetError::Other( + e.into(), + )) + }) + .map(|ip| { + SocketAddrV6::new(ip, kind.port(), 0, 0) + })?; let dataset = db::model::Dataset::new( dataset_id, @@ -258,10 +266,14 @@ impl DataStore { kind, ); - let dataset = Self::dataset_upsert_sync(conn, dataset) - .map_err(|e| { - TxnError::CustomError(DatasetError::Other(e)) - })?; + let dataset = + Self::dataset_upsert_on_connection(&conn, dataset) + .await + .map_err(|e| { + TxnError::CustomError(DatasetError::Other( + e.into(), + )) + })?; datasets.push((sled, zpool, dataset)); } diff --git a/nexus/src/db/datastore/external_ip.rs b/nexus/src/db/datastore/external_ip.rs index a4618734283..58dc696c975 100644 --- a/nexus/src/db/datastore/external_ip.rs +++ b/nexus/src/db/datastore/external_ip.rs @@ -119,9 +119,10 @@ impl DataStore { conn: &async_bb8_diesel::Connection, ip_id: Uuid, rack_id: Uuid, - ) -> Result { - let pool = - Self::ip_pools_lookup_by_rack_id_on_connection(conn, rack_id).await?; + ) -> CreateResult { + let (.., pool) = + Self::ip_pools_lookup_by_rack_id_on_connection(conn, rack_id) + .await?; let data = IncompleteExternalIp::for_service(ip_id, pool.id()); Self::allocate_external_ip_on_connection(conn, data).await @@ -151,21 +152,20 @@ impl DataStore { async fn allocate_external_ip_on_connection( conn: &async_bb8_diesel::Connection, data: IncompleteExternalIp, - ) -> Result { - NextExternalIp::new(data) - .get_result_async(&conn) - .await - .map_err(|e| { - use async_bb8_diesel::ConnectionError::Query; - use async_bb8_diesel::PoolError::Connection; - use diesel::result::Error::NotFound; - match e { - Connection(Query(NotFound)) => Error::invalid_request( - "No external IP addresses available", - ), - _ => public_error_from_diesel_pool(e, ErrorHandler::Server), + ) -> CreateResult { + NextExternalIp::new(data).get_result_async(conn).await.map_err(|e| { + use async_bb8_diesel::ConnectionError::Query; + use diesel::result::Error::NotFound; + match e { + Query(NotFound) => { + Error::invalid_request("No external IP addresses available") } - }) + _ => public_error_from_diesel_pool( + e.into(), + ErrorHandler::Server, + ), + } + }) } /// Deallocate the external IP address with the provided ID. diff --git a/nexus/src/db/datastore/ip_pool.rs b/nexus/src/db/datastore/ip_pool.rs index d8ff1e0b3b1..00b82f21b74 100644 --- a/nexus/src/db/datastore/ip_pool.rs +++ b/nexus/src/db/datastore/ip_pool.rs @@ -128,16 +128,32 @@ impl DataStore { pub async fn ip_pools_lookup_by_rack_id_on_connection( conn: &async_bb8_diesel::Connection, rack_id: Uuid, - ) -> Result { + ) -> LookupResult<(authz::IpPool, IpPool)> { use db::schema::ip_pool::dsl; // Look up this IP pool by rack ID. - dsl::ip_pool + let (authz_pool, pool) = dsl::ip_pool .filter(dsl::rack_id.eq(Some(rack_id))) .filter(dsl::time_deleted.is_null()) .select(IpPool::as_select()) .get_result_async(conn) .await + .map_err(|e| { + public_error_from_diesel_pool(e.into(), ErrorHandler::Server) + }) + .map(|ip_pool| { + ( + authz::IpPool::new( + authz::FLEET, + ip_pool.id(), + LookupType::ByCompositeId(format!( + "Rack ID: {rack_id}" + )), + ), + ip_pool, + ) + })?; + Ok((authz_pool, pool)) } /// Creates a new IP pool. diff --git a/nexus/src/db/datastore/mod.rs b/nexus/src/db/datastore/mod.rs index 68a377a5282..36aecdafa42 100644 --- a/nexus/src/db/datastore/mod.rs +++ b/nexus/src/db/datastore/mod.rs @@ -24,10 +24,7 @@ use crate::authz; use crate::context::OpContext; use crate::db::{ self, - error::{ - public_error_from_diesel_pool, - ErrorHandler, - }, + error::{public_error_from_diesel_pool, ErrorHandler}, }; use async_bb8_diesel::{AsyncRunQueryDsl, ConnectionManager}; use diesel::pg::Pg; @@ -150,16 +147,21 @@ impl DataStore { .returning(dsl::last_used_address) } - pub async fn next_ipv6_address_on_connection( - conn: &async_bb8_diesel::Connection, + pub async fn next_ipv6_address_on_connection( + conn: &(impl async_bb8_diesel::AsyncConnection + + Sync), sled_id: Uuid, - ) -> Result { + ) -> Result + where + ConnErr: From + Send + 'static, + async_bb8_diesel::PoolError: From, + { let net = Self::next_ipv6_address_query(sled_id) .get_result_async(conn) .await .map_err(|e| { public_error_from_diesel_pool( - async_bb8_diesel::PoolError::Connection(e), + async_bb8_diesel::PoolError::from(e), ErrorHandler::NotFoundByLookup( ResourceType::Sled, LookupType::ById(sled_id), @@ -186,7 +188,7 @@ impl DataStore { sled_id: Uuid, ) -> Result { let conn = self.pool_authorized(opctx).await?; - Self::next_ipv6_address_on_connection(&conn).await + Self::next_ipv6_address_on_connection(conn, sled_id).await } // Test interfaces diff --git a/nexus/src/db/datastore/rack.rs b/nexus/src/db/datastore/rack.rs index 62940886c02..3b851975909 100644 --- a/nexus/src/db/datastore/rack.rs +++ b/nexus/src/db/datastore/rack.rs @@ -91,8 +91,16 @@ impl DataStore { #[derive(Debug)] enum RackInitError { - ServiceInsert { err: AsyncInsertError, sled_id: Uuid, svc_id: Uuid }, - DatasetInsert { err: AsyncInsertError, zpool_id: Uuid, dataset_id: Uuid, }, + ServiceInsert { + err: AsyncInsertError, + sled_id: Uuid, + svc_id: Uuid, + }, + DatasetInsert { + err: AsyncInsertError, + zpool_id: Uuid, + dataset_id: Uuid, + }, RackUpdate(PoolError), } type TxnError = TransactionError; @@ -164,7 +172,8 @@ impl DataStore { dsl::kind.eq(excluded(dsl::kind)), )), ) - .insert_and_get_result(conn) + .insert_and_get_result_async(&conn) + .await .map_err(|err| { TxnError::CustomError(RackInitError::DatasetInsert { err, @@ -199,18 +208,14 @@ impl DataStore { zpool_id, dataset_id, }) => match err { - SyncInsertError::CollectionNotFound => { + AsyncInsertError::CollectionNotFound => { Error::ObjectNotFound { type_name: ResourceType::Zpool, lookup_type: LookupType::ById(zpool_id), } } - SyncInsertError::DatabaseError(e) => { - public_error_from_diesel_create( - e, - ResourceType::Dataset, - &dataset_id.to_string(), - ) + AsyncInsertError::DatabaseError(e) => { + public_error_from_diesel_pool(e, ErrorHandler::Server) } }, TxnError::CustomError(RackInitError::ServiceInsert { diff --git a/nexus/src/db/datastore/service.rs b/nexus/src/db/datastore/service.rs index 910c43b5f07..fe4306799b1 100644 --- a/nexus/src/db/datastore/service.rs +++ b/nexus/src/db/datastore/service.rs @@ -79,7 +79,7 @@ impl DataStore { async fn service_upsert_on_connection( conn: &async_bb8_diesel::Connection, service: Service, - ) -> Result { + ) -> Result { use db::schema::service::dsl; let sled_id = service.sled_id; @@ -149,7 +149,8 @@ impl DataStore { conn: &async_bb8_diesel::Connection, rack_id: Uuid, kind: ServiceKind, - ) -> Result)>, async_bb8_diesel::ConnectionError> { + ) -> Result)>, async_bb8_diesel::ConnectionError> + { use db::schema::service::dsl as svc_dsl; use db::schema::sled::dsl as sled_dsl; @@ -209,10 +210,12 @@ impl DataStore { for sled in sleds_without_svc { if sled.is_scrimlet() { let svc_id = Uuid::new_v4(); - let address = - Self::next_ipv6_address_on_connection(&conn, sled.id()) - .await - .map_err(|e| TxnError::CustomError(e))?; + let address = Self::next_ipv6_address_on_connection( + &conn, + sled.id(), + ) + .await + .map_err(|e| TxnError::CustomError(e))?; let service = db::model::Service::new( svc_id, @@ -221,9 +224,10 @@ impl DataStore { kind, ); - let svc = Self::service_upsert_on_connection(&conn, service) - .await - .map_err(|e| TxnError::CustomError(e))?; + let svc = + Self::service_upsert_on_connection(&conn, service) + .await + .map_err(|e| TxnError::CustomError(e))?; svcs.push(svc); } } @@ -360,29 +364,41 @@ impl DataStore { let svc_id = Uuid::new_v4(); // Always allocate an internal IP address to this service. - let address = Self::next_ipv6_address_on_connection(&conn, sled.id()) - .await - .map_err(|e| { - TxnError::CustomError(ServiceError::Other(e.into())) - })?; + let address = + Self::next_ipv6_address_on_connection(&conn, sled.id()) + .await + .map_err(|e| { + TxnError::CustomError(ServiceError::Other( + e.into(), + )) + })?; // If requested, allocate an external IP address for this // service too. let external_ip = if matches!(kind, ServiceKind::Nexus) { - let pool = Self::ip_pools_lookup_by_rack_id_on_connection( - &conn, rack_id, - ).await + let (.., pool) = + Self::ip_pools_lookup_by_rack_id_on_connection( + &conn, rack_id, + ) + .await .map_err(|e| { - TxnError::Pool(e.into()) + TxnError::CustomError(ServiceError::Other( + e.into(), + )) })?; - let external_ip = Self::allocate_service_ip_on_connection( - &conn, - Uuid::new_v4(), - pool.id(), - ).await.map_err(|e| { - TxnError::Pool(e.into()) - })?; + let external_ip = + Self::allocate_service_ip_on_connection( + &conn, + Uuid::new_v4(), + pool.id(), + ) + .await + .map_err(|e| { + TxnError::CustomError(ServiceError::Other( + e.into(), + )) + })?; Some(external_ip) } else { @@ -401,9 +417,14 @@ impl DataStore { kind, ); - let svc = Self::service_upsert_on_connection(&conn, service) - .await - .map_err(|e| TxnError::Pool(e.into()))?; + let svc = + Self::service_upsert_on_connection(&conn, service) + .await + .map_err(|e| { + TxnError::CustomError(ServiceError::Other( + e.into(), + )) + })?; svcs.push(svc); } @@ -489,9 +510,12 @@ impl DataStore { ServiceKind::InternalDNS, ); - let svc = Self::service_upsert_on_connection(&conn, service) - .await - .map_err(|e| TxnError::Pool(e.into()))?; + let svc = + Self::service_upsert_on_connection(&conn, service) + .await + .map_err(|e| { + TxnError::CustomError(ServiceError::Other(e)) + })?; svcs.push(svc); } diff --git a/nexus/src/db/error.rs b/nexus/src/db/error.rs index 992ff30c00f..348daf2737d 100644 --- a/nexus/src/db/error.rs +++ b/nexus/src/db/error.rs @@ -51,13 +51,6 @@ impl From for TransactionError { } } -impl From for TransactionError { - fn from(err: async_bb8_diesel::ConnectionError) -> Self { - TransactionError::Pool(async_bb8_diesel::PoolError::Connection(err)) - } -} - - /// Summarizes details provided with a database error. fn format_database_error( kind: DieselErrorKind, From 7bb170e816d4018c41db3e279cdd754ac7efbd88 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 12 Sep 2022 13:05:20 -0400 Subject: [PATCH 80/88] Add service provision CTE sketch --- nexus/src/app/sagas/service_balance.rs | 60 +++----------------------- nexus/src/db/datastore/rack.rs | 2 +- nexus/src/db/datastore/service.rs | 21 ++++++++- 3 files changed, 27 insertions(+), 56 deletions(-) diff --git a/nexus/src/app/sagas/service_balance.rs b/nexus/src/app/sagas/service_balance.rs index 431563b28fa..bf5040f29f6 100644 --- a/nexus/src/app/sagas/service_balance.rs +++ b/nexus/src/app/sagas/service_balance.rs @@ -55,11 +55,6 @@ pub struct ServiceParams { } lazy_static! { - static ref MARK_RACK_BALANCING: NexusAction = ActionFunc::new_action( - "service-balance.mark-rack-balancing", - mark_rack_balancing, - mark_rack_balancing_undo, - ); static ref PICK_DESTINATION_SLEDS: NexusAction = new_action_noop_undo( "service-balance.pick-destination-sleds", pick_destination_sleds, @@ -79,10 +74,6 @@ lazy_static! { create_external_ip, destroy_external_ip, ); - static ref UNMARK_RACK_BALANCING: NexusAction = new_action_noop_undo( - "service-balance.unmark-rack-balancing", - unmark_rack_balancing, - ); } // Helper function for appending subsagas to our parent saga. @@ -118,26 +109,16 @@ impl NexusSaga for SagaServiceBalance { type Params = Params; fn register_actions(registry: &mut super::ActionRegistry) { - registry.register(Arc::clone(&*MARK_RACK_BALANCING)); registry.register(Arc::clone(&*PICK_DESTINATION_SLEDS)); registry.register(Arc::clone(&*CREATE_SERVICE_RECORD)); registry.register(Arc::clone(&*CREATE_INTERNAL_IP)); registry.register(Arc::clone(&*CREATE_EXTERNAL_IP)); - registry.register(Arc::clone(&*UNMARK_RACK_BALANCING)); } fn make_saga_dag( params: &Self::Params, mut builder: steno::DagBuilder, ) -> Result { - let instance_id = Uuid::new_v4(); - - builder.append(Node::action( - "mark_balancing", - "MarkBalancing", - MARK_RACK_BALANCING.as_ref(), - )); - builder.append(Node::action( "destination_sleds", "PickDestinationSleds", @@ -171,81 +152,54 @@ impl NexusSaga for SagaServiceBalance { )?; } - builder.append(Node::action( - "unmark_balancing", - "UnmarkBalancing", - UNMARK_RACK_BALANCING.as_ref(), - )); - Ok(builder.build()?) } } -async fn mark_rack_balancing( - sagactx: NexusActionContext, -) -> Result<(), ActionError> { - // TODO - Ok(()) -} - -async fn mark_rack_balancing_undo( - sagactx: NexusActionContext, -) -> Result<(), anyhow::Error> { - // TODO - Ok(()) -} - -async fn unmark_rack_balancing( - sagactx: NexusActionContext, -) -> Result<(), ActionError> { - // TODO - Ok(()) -} - async fn create_service_record( - sagactx: NexusActionContext, + _sagactx: NexusActionContext, ) -> Result<(), ActionError> { // TODO Ok(()) } async fn create_service_record_undo( - sagactx: NexusActionContext, + _sagactx: NexusActionContext, ) -> Result<(), anyhow::Error> { // TODO Ok(()) } async fn create_internal_ip( - sagactx: NexusActionContext, + _sagactx: NexusActionContext, ) -> Result<(), ActionError> { // TODO Ok(()) } async fn create_internal_ip_undo( - sagactx: NexusActionContext, + _sagactx: NexusActionContext, ) -> Result<(), anyhow::Error> { // TODO Ok(()) } async fn create_external_ip( - sagactx: NexusActionContext, + _sagactx: NexusActionContext, ) -> Result<(), ActionError> { // TODO Ok(()) } async fn destroy_external_ip( - sagactx: NexusActionContext, + _sagactx: NexusActionContext, ) -> Result<(), anyhow::Error> { // TODO Ok(()) } async fn pick_destination_sleds( - sagactx: NexusActionContext, + _sagactx: NexusActionContext, ) -> Result<(), ActionError> { // TODO Ok(()) diff --git a/nexus/src/db/datastore/rack.rs b/nexus/src/db/datastore/rack.rs index 3b851975909..ca237aecb55 100644 --- a/nexus/src/db/datastore/rack.rs +++ b/nexus/src/db/datastore/rack.rs @@ -206,7 +206,7 @@ impl DataStore { TxnError::CustomError(RackInitError::DatasetInsert { err, zpool_id, - dataset_id, + dataset_id: _, }) => match err { AsyncInsertError::CollectionNotFound => { Error::ObjectNotFound { diff --git a/nexus/src/db/datastore/service.rs b/nexus/src/db/datastore/service.rs index fe4306799b1..5a7a2032a6d 100644 --- a/nexus/src/db/datastore/service.rs +++ b/nexus/src/db/datastore/service.rs @@ -268,7 +268,15 @@ impl DataStore { // NOTE: We could also make parts of this a saga? // - // - Mark rack as "rebalancing" + // - TODO: DON'T mark/unmark as rebalancing!!!!!! + // - Use rcgen!!! It's what it's for - optimistic concurrency control. + // - Basically, create a new rcgen for Nexus to use, bail if + // somone else increments past us? *That* can be stored on the rack + // table. + // TODO: alternatively, this whole thing is happening in the DB. + // We *could* issue a CTE. + // + // // - List sleds + services, return sleds with/without services // - Pick sleds that are targets probably all up-front // - FOR EACH @@ -278,7 +286,6 @@ impl DataStore { // - Provision external IP // - Find cert // - Upsert nexus service record - // - Unmark rack as "rebalancing" // NOTE: It's probably possible to do this without the transaction. // @@ -320,6 +327,7 @@ impl DataStore { // INSERT INTO services // SELECT * FROM candidate_services // ON CONFLICT (id) + // --- This doesn't actually work for the 'already exists' case, fyi // DO NOTHING // RETURNING * // ), @@ -363,6 +371,15 @@ impl DataStore { })?; let svc_id = Uuid::new_v4(); + // TODO: With some work, you can get rid of the + // "...on_connection" versions of functions. + // + // See: https://github.com/oxidecomputer/omicron/pull/1621#discussion_r949796959 + // + // TODO: I *strongly* believe this means Connection vs Pool + // error unification in async_bb8_diesel. *always* return + // the pool error; keep it simple. + // Always allocate an internal IP address to this service. let address = Self::next_ipv6_address_on_connection(&conn, sled.id()) From 19dde72a348d2f2d4b9b35e2ed3201c0a19c0cce Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 14 Sep 2022 11:47:23 -0400 Subject: [PATCH 81/88] add sql proof-of-concept --- nexus/src/db/datastore/service.rs | 43 +-- services.sql | 432 ++++++++++++++++++++++++++++++ 2 files changed, 433 insertions(+), 42 deletions(-) create mode 100644 services.sql diff --git a/nexus/src/db/datastore/service.rs b/nexus/src/db/datastore/service.rs index 5a7a2032a6d..e782f7ee9e2 100644 --- a/nexus/src/db/datastore/service.rs +++ b/nexus/src/db/datastore/service.rs @@ -289,48 +289,7 @@ impl DataStore { // NOTE: It's probably possible to do this without the transaction. // - // Something like this - heavily inspired by the external IP allocation - // CTE: - // - // WITH - // existing_count AS ( - // SELECT COUNT(1) FROM services WHERE allocated AND not deleted - // ), - // new_count AS ( - // -- Use "GREATEST" to avoid underflow if we've somehow - // -- over-allocated services beyond the redundancy. - // GREATEST(, existing_count) - existing_count - // ), - // candidate_sleds AS ( - // SELECT all sleds in the allocation scope (in the rack?) - // LEFT OUTER JOIN with allocated services - // ON service_type - // WHERE service_type IS NULL (svc not allocated to the sled) - // LIMIT new_count - // ), - // new_internal_ips AS ( - // UPDATE sled - // SET - // last_used_address = last_used_address + 1 - // WHERE - // sled_id IN candidate_sleds - // RETURNING - // last_used_address - // ), - // new_external_ips AS ( - // (need to insert the external IP allocation CTE here somehow) - // ), - // candidate_services AS ( - // JOIN all the sleds with the IPs they need - // ), - // new_services AS ( - // INSERT INTO services - // SELECT * FROM candidate_services - // ON CONFLICT (id) - // --- This doesn't actually work for the 'already exists' case, fyi - // DO NOTHING - // RETURNING * - // ), + // See: services.sql self.pool() .transaction_async(|conn| async move { let sleds_and_maybe_svcs = diff --git a/services.sql b/services.sql new file mode 100644 index 00000000000..a22f18e3611 --- /dev/null +++ b/services.sql @@ -0,0 +1,432 @@ +/* + * + * TAKEN FROM DBINIT.SQL + * + */ + +/* dbwipe.sql */ +CREATE DATABASE IF NOT EXISTS omicron; +CREATE USER IF NOT EXISTS omicron; +ALTER DEFAULT PRIVILEGES FOR ROLE root REVOKE ALL ON TABLES FROM omicron; +DROP DATABASE IF EXISTS omicron; +DROP USER IF EXISTS omicron; + +/* dbinit.sql */ +CREATE DATABASE IF NOT EXISTS omicron; +CREATE USER IF NOT EXISTS omicron; +ALTER DEFAULT PRIVILEGES GRANT INSERT, SELECT, UPDATE, DELETE ON TABLES to omicron; + +set disallow_full_table_scans = on; +set large_full_scan_rows = 0; + +/* + * Racks + */ +CREATE TABLE omicron.public.rack ( + /* Identity metadata (asset) */ + id UUID PRIMARY KEY, + time_created TIMESTAMPTZ NOT NULL, + time_modified TIMESTAMPTZ NOT NULL, + + /* + * Identifies if rack management has been transferred from RSS -> Nexus. + * If "false", RSS is still managing sleds, services, and DNS records. + * + * This value is set to "true" when RSS calls the + * "rack_initialization_complete" endpoint on Nexus' internal interface. + * + * See RFD 278 for more detail. + */ + initialized BOOL NOT NULL, + + /* Used to configure the updates service URL */ + tuf_base_url STRING(512) +); + +/* + * Sleds + */ + +CREATE TABLE omicron.public.sled ( + /* Identity metadata (asset) */ + id UUID PRIMARY KEY, + time_created TIMESTAMPTZ NOT NULL, + time_modified TIMESTAMPTZ NOT NULL, + time_deleted TIMESTAMPTZ, + rcgen INT NOT NULL, + + /* FK into the Rack table */ + rack_id UUID NOT NULL, + + /* Idenfities if this Sled is a Scrimlet */ + is_scrimlet BOOL NOT NULL, + + /* The IP address and bound port of the sled agent server. */ + ip INET NOT NULL, + port INT4 CHECK (port BETWEEN 0 AND 65535) NOT NULL, + + /* The last address allocated to an Oxide service on this sled. */ + last_used_address INET NOT NULL +); + +/* Add an index which lets us look up the sleds on a rack */ +CREATE INDEX ON omicron.public.sled ( + rack_id +) WHERE + time_deleted IS NULL; + +CREATE INDEX ON omicron.public.sled ( + id +) WHERE + time_deleted IS NULL; + +/* + * Services + */ + +CREATE TYPE omicron.public.service_kind AS ENUM ( + 'internal_dns', + 'nexus', + 'oximeter' +); + +CREATE TABLE omicron.public.service ( + /* Identity metadata (asset) */ + id UUID PRIMARY KEY, + time_created TIMESTAMPTZ NOT NULL, + time_modified TIMESTAMPTZ NOT NULL, + + /* FK into the Sled table */ + sled_id UUID NOT NULL, + /* The IP address of the service. */ + ip INET NOT NULL, + /* Indicates the type of service. */ + kind omicron.public.service_kind NOT NULL +); + +/* Add an index which lets us look up the services on a sled */ +CREATE INDEX ON omicron.public.service ( + sled_id, + kind +); + +/* Add an index which lets us look up services of a particular kind on a sled */ +CREATE INDEX ON omicron.public.service ( + kind +); + +/* + * Additional context for services of "kind = nexus" + * This table should be treated as an optional extension + * of the service table itself. + */ +CREATE TABLE omicron.public.nexus_service ( + id UUID PRIMARY KEY, + + /* FK to the service table */ + service_id UUID NOT NULL, + /* FK to the instance_external_ip table */ + external_ip_id UUID NOT NULL, + /* FK to the nexus_certificate table */ + certificate_id UUID NOT NULL +); + +/* + * Information about x509 certificates used to serve Nexus' external interface. + * These certificates may be used by multiple instantiations of the Nexus + * service simultaneously. + */ +CREATE TABLE omicron.public.nexus_certificate ( + id UUID PRIMARY KEY, + public_cert BYTES NOT NULL, + private_key BYTES NOT NULL +); + +/* + * ZPools of Storage, attached to Sleds. + * Typically these are backed by a single physical disk. + */ +CREATE TABLE omicron.public.Zpool ( + /* Identity metadata (asset) */ + id UUID PRIMARY KEY, + time_created TIMESTAMPTZ NOT NULL, + time_modified TIMESTAMPTZ NOT NULL, + time_deleted TIMESTAMPTZ, + rcgen INT NOT NULL, + + /* FK into the Sled table */ + sled_id UUID NOT NULL, + + /* TODO: Could also store physical disk FK here */ + + total_size INT NOT NULL +); + +CREATE TYPE omicron.public.dataset_kind AS ENUM ( + 'crucible', + 'cockroach', + 'clickhouse' +); + +/* + * A dataset of allocated space within a zpool. + */ +CREATE TABLE omicron.public.Dataset ( + /* Identity metadata (asset) */ + id UUID PRIMARY KEY, + time_created TIMESTAMPTZ NOT NULL, + time_modified TIMESTAMPTZ NOT NULL, + time_deleted TIMESTAMPTZ, + rcgen INT NOT NULL, + + /* FK into the Pool table */ + pool_id UUID NOT NULL, + + /* Contact information for the dataset */ + ip INET NOT NULL, + port INT4 CHECK (port BETWEEN 0 AND 65535) NOT NULL, + + kind omicron.public.dataset_kind NOT NULL, + + /* An upper bound on the amount of space that is allowed to be in-use */ + quota INT NOT NULL, + reservation INT NOT NULL, + + /* An upper bound on the amount of space that might be in-use */ + size_used INT, + + /* A quota smaller than a reservation would reserve unusable space */ + CONSTRAINT reservation_less_than_or_equal_to_quota CHECK ( + reservation <= quota + ), + + /* Crucible must make use of 'size_used'; other datasets manage their own storage */ + CONSTRAINT size_used_column_set_for_crucible CHECK ( + (kind != 'crucible') OR + (kind = 'crucible' AND size_used IS NOT NULL) + ), + + /* Validate that the size usage is less than the quota */ + CONSTRAINT size_used_less_than_or_equal_to_quota CHECK ( + (size_used IS NULL) OR + (size_used IS NOT NULL AND size_used <= quota) + ) +); + +/* Create an index on the size usage for Crucible's allocation */ +CREATE INDEX on omicron.public.Dataset ( + size_used +) WHERE size_used IS NOT NULL AND time_deleted IS NULL AND kind = 'crucible'; + +/* Create an index on the size usage for any dataset */ +CREATE INDEX on omicron.public.Dataset ( + size_used +) WHERE size_used IS NOT NULL AND time_deleted IS NULL; + +-- TODO: Obviously, there's more stuff here. But this is a proxy. +CREATE TABLE omicron.public.external_ip ( + id UUID PRIMARY KEY +); + + +/* + * + * TEST DATA + * + */ + +-- Add a rack +INSERT INTO omicron.public.rack (id, time_created, time_modified, initialized, tuf_base_url) VALUES + ( + '11111111-aaaa-407e-aa8d-602ed78f38be', + TIMESTAMPTZ '2016-03-26', + TIMESTAMPTZ '2016-03-26', + TRUE, + NULL + ); + +-- Add some sleds (aaaa / bbbb are gimlets, cccc is scrimlet) +INSERT INTO omicron.public.sled (id, time_created, time_modified, time_deleted, rcgen, rack_id, is_scrimlet, ip, port, last_used_address) VALUES + ( + '22222222-aaaa-407e-aa8d-602ed78f38be', + TIMESTAMPTZ '2016-03-26', + TIMESTAMPTZ '2016-03-26', + NULL, + 0, + '11111111-aaaa-407e-aa8d-602ed78f38be', + false, + '127.0.0.1', + 0, + '127.0.0.1' + ), + ( + '22222222-bbbb-407e-aa8d-602ed78f38be', + TIMESTAMPTZ '2016-03-26', + TIMESTAMPTZ '2016-03-26', + NULL, + 0, + '11111111-aaaa-407e-aa8d-602ed78f38be', + false, + '127.0.0.1', + 0, + '127.0.100.1' + ), + ( + '22222222-cccc-407e-aa8d-602ed78f38be', + TIMESTAMPTZ '2016-03-26', + TIMESTAMPTZ '2016-03-26', + NULL, + 0, + '11111111-aaaa-407e-aa8d-602ed78f38be', + true, + '127.0.0.1', + 0, + '127.0.200.1' + ); + +INSERT INTO omicron.public.service (id, time_created, time_modified, sled_id, ip, kind) VALUES + ( + '33333333-aaaa-407e-aa8d-602ed78f38be', + TIMESTAMPTZ '2016-03-26', + TIMESTAMPTZ '2016-03-26', + '22222222-aaaa-407e-aa8d-602ed78f38be', + '127.0.0.1', + 'nexus' + ); + +/* + * CTE: Allocate a particular service within a rack. + * + * Inputs: Rack ID, service type, desired count + */ + +WITH + -- Find all allocation targets. + -- This includes sleds which may already be running the service, + -- and sleds which could run the service in the future. + sled_allocation_pool AS ( + SELECT + omicron.public.sled.id + FROM + omicron.public.sled + WHERE + omicron.public.sled.time_deleted IS NULL AND + -- XXX: Constraints can be user-supplied? + omicron.public.sled.rack_id = '11111111-aaaa-407e-aa8d-602ed78f38be' + ), + + -- Get all services which already have been allocated from this pool. + previously_allocated_services AS ( + SELECT + omicron.public.service.id, + omicron.public.service.time_created, + omicron.public.service.time_modified, + omicron.public.service.sled_id, + omicron.public.service.ip, + omicron.public.service.kind + FROM + omicron.public.service + WHERE + -- XXX: 'nexus' is the name of this particular service + omicron.public.service.kind = 'nexus' AND + omicron.public.service.sled_id IN (SELECT id FROM sled_allocation_pool) + ), + + -- Calculate how many services we already have + old_service_count AS ( + SELECT COUNT(1) FROM previously_allocated_services + ), + -- Calculate the number of new services we need + new_service_count AS ( + -- XXX: 3 is the user-supplied redundancy + SELECT GREATEST(3, (SELECT * FROM old_service_count)) + - (SELECT * FROM old_service_count) + ), + + -- Get allocation candidates from the pool, as long as they don't already + -- have the service. + candidate_sleds AS ( + SELECT + sled_allocation_pool.id + FROM + sled_allocation_pool + WHERE + sled_allocation_pool.id NOT IN (SELECT sled_id FROM previously_allocated_services) + LIMIT (SELECT * FROM new_service_count) + ), + + -- Allocate an internal IP address for the service + new_internal_ips AS ( + UPDATE omicron.public.sled + SET + last_used_address = last_used_address + 1 + WHERE + omicron.public.sled.id in (SELECT id from candidate_sleds) + RETURNING + omicron.public.sled.id as sled_id, + omicron.public.sled.last_used_address as ip + ), + + -- TODO: External IPs??? + + -- TODO: This fails; data-modifying statements must be at a top level. +-- new_external_ips AS ( +-- WITH +-- new_ips AS ( +-- INSERT INTO omicron.public.external_ip (id) VALUES ( +-- gen_random_uuid() +-- ) +-- RETURNING * +-- ) +-- SELECT * FROM (SELECT * FROM new_ips) +-- ), + + -- Construct the services we want to insert + candidate_services AS ( + SELECT + gen_random_uuid() as id, + now() as time_created, + now() as time_modified, + candidate_sleds.id as sled_id, + new_internal_ips.ip as ip, + CAST('nexus' AS omicron.public.service_kind) as kind + FROM + candidate_sleds + LEFT JOIN + new_internal_ips + ON + candidate_sleds.id = new_internal_ips.sled_id + + ), + + inserted_services AS ( + INSERT INTO omicron.public.service + ( + SELECT + candidate_services.id, + candidate_services.time_created, + candidate_services.time_modified, + candidate_services.sled_id, + candidate_services.ip, + candidate_services.kind + FROM candidate_services + ) + RETURNING * + ) +SELECT * FROM + ( + SELECT + -- XXX: Do we care about the new/not new distinction? + FALSE as new, + * + FROM previously_allocated_services + UNION + SELECT + TRUE as new, + * + FROM inserted_services + ); + +set disallow_full_table_scans = off; + +-- SELECT * FROM omicron.public.Sled; From 68600482f07d0ae2a6094becbd868d335c5fd23f Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Thu, 15 Sep 2022 10:47:24 -0400 Subject: [PATCH 82/88] Not quite working, but compiling with CTEs --- nexus/src/db/datastore/rack.rs | 3 - nexus/src/db/queries/mod.rs | 1 + nexus/src/db/queries/service_provision.rs | 991 ++++++++++++++++++++++ 3 files changed, 992 insertions(+), 3 deletions(-) create mode 100644 nexus/src/db/queries/service_provision.rs diff --git a/nexus/src/db/datastore/rack.rs b/nexus/src/db/datastore/rack.rs index ca237aecb55..069258d83d2 100644 --- a/nexus/src/db/datastore/rack.rs +++ b/nexus/src/db/datastore/rack.rs @@ -99,7 +99,6 @@ impl DataStore { DatasetInsert { err: AsyncInsertError, zpool_id: Uuid, - dataset_id: Uuid, }, RackUpdate(PoolError), } @@ -178,7 +177,6 @@ impl DataStore { TxnError::CustomError(RackInitError::DatasetInsert { err, zpool_id, - dataset_id: dataset.id(), }) })?; } @@ -206,7 +204,6 @@ impl DataStore { TxnError::CustomError(RackInitError::DatasetInsert { err, zpool_id, - dataset_id: _, }) => match err { AsyncInsertError::CollectionNotFound => { Error::ObjectNotFound { diff --git a/nexus/src/db/queries/mod.rs b/nexus/src/db/queries/mod.rs index 0d2a7f86505..484540a632e 100644 --- a/nexus/src/db/queries/mod.rs +++ b/nexus/src/db/queries/mod.rs @@ -10,5 +10,6 @@ pub mod ip_pool; #[macro_use] mod next_item; pub mod network_interface; +pub mod service_provision; pub mod vpc; pub mod vpc_subnet; diff --git a/nexus/src/db/queries/service_provision.rs b/nexus/src/db/queries/service_provision.rs new file mode 100644 index 00000000000..036536fa607 --- /dev/null +++ b/nexus/src/db/queries/service_provision.rs @@ -0,0 +1,991 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Implementation of queries for provisioning services. + +use crate::db::model::Name; +use crate::db::model::Service; +use crate::db::model::ServiceKind; +use crate::db::model::Sled; +use crate::db::pool::DbConnection; +use crate::db::schema; +use chrono::DateTime; +use chrono::Utc; +use diesel::pg::Pg; +use diesel::query_builder::AstPass; +use diesel::query_builder::Query; +use diesel::query_builder::QueryFragment; +use diesel::query_builder::QueryId; +use diesel::sql_types; +use diesel::Column; +use diesel::ExpressionMethods; +use diesel::QueryDsl; +use diesel::QueryResult; +use diesel::RunQueryDsl; +use diesel::SelectableHelper; +use uuid::Uuid; + +type FromClause = + diesel::internal::table_macro::StaticQueryFragmentInstance; +type ServiceFromClause = FromClause; +const SERVICE_FROM_CLAUSE: ServiceFromClause = ServiceFromClause::new(); + +//trait Queryable: Query + QueryFragment {} +//impl> Queryable for T {} + +trait Queryable: Query + QueryFragment {} + +impl Queryable for T +where + T: Query + QueryFragment +{} + +/* +trait QueryableClone { + fn clone_box(&self) -> Box>; +} + +impl QueryableClone for T +where + T: 'static + Queryable + Clone, +{ + fn clone_box(&self) -> Box> { + Box::new(self.clone()) + } +} + +impl Clone for Box> { + fn clone(&self) -> Box> { + self.clone_box() + } +} +*/ + +/// Represents a sub-query within a CTE. +/// +/// For an expression like: +/// +/// ```sql +/// WITH +/// foo as ..., +/// bar as ..., +/// SELECT * FROM bar; +/// ``` +/// +/// This trait represents one of the sub-query arms, such as "foo as ..." or +/// "bar as ...". +trait SubQuery { + // TODO: This query should have an associated SQL type! + + // TODO: Could be associated constant, maybe? + fn name(&self) -> &'static str; + fn query(&self) -> &dyn QueryFragment; +} + +/// A thin wrapper around a [`SubQuery`]. +/// +/// Used to avoid orphan rules while creating blanket implementations. +struct CteSubquery(Box); + +impl QueryId for CteSubquery { + type QueryId = (); + const HAS_STATIC_QUERY_ID: bool = false; +} + +impl QueryFragment for CteSubquery { + fn walk_ast<'a>( + &'a self, + mut out: AstPass<'_, 'a, Pg>, + ) -> diesel::QueryResult<()> { + out.unsafe_to_cache_prepared(); + + out.push_sql(self.0.name()); + out.push_sql(" AS ("); + self.0.query().walk_ast(out.reborrow())?; + out.push_sql(")"); + Ok(()) + } +} + +struct CteBuilder { + subqueries: Vec, +} + +impl CteBuilder { + fn new() -> Self { + Self { + subqueries: vec![], + } + } + + fn add_subquery(mut self, subquery: Q) -> Self { + self.subqueries.push( + CteSubquery(Box::new(subquery)) + ); + self + } + + fn build(mut self, statement: Box>) -> Cte { + Cte { + subqueries: self.subqueries, + statement + } + } +} + +struct Cte { + subqueries: Vec, + statement: Box>, +} + +impl QueryFragment for Cte { + fn walk_ast<'a>( + &'a self, + mut out: AstPass<'_, 'a, Pg>, + ) -> diesel::QueryResult<()> { + out.unsafe_to_cache_prepared(); + + out.push_sql("WITH "); + for (pos, query) in self.subqueries.iter().enumerate() { + query.walk_ast(out.reborrow())?; + if pos == self.subqueries.len() - 1 { + out.push_sql(" "); + } else { + out.push_sql(", "); + } + } + self.statement.walk_ast(out.reborrow())?; + Ok(()) + } +} + +// ----------------------------- // + +// TODO: I want this to be as lightweight to make as possible! +struct SledAllocationPoolSubquery { + query: Box>, +} + +impl SledAllocationPoolSubquery { + fn new() -> Self { + use crate::db::schema::sled::dsl; + Self { + query: Box::new( + dsl::sled + .filter(dsl::time_deleted.is_null()) + // TODO: Filter by rack? + .select(dsl::id) + ) + } + } +} + +impl SubQuery for SledAllocationPoolSubquery { + fn name(&self) -> &'static str { + "sled_allocation_pool" + } + + fn query(&self) -> &dyn QueryFragment { + &self.query + } +} + +struct PreviouslyAllocatedServices { + query: Box>, +} + +impl PreviouslyAllocatedServices { + fn new(allocation_pool: &SledAllocationPoolSubquery) -> Self { + use crate::db::schema::service::dsl; + Self { + query: Box::new( + dsl::service + .filter(dsl::kind.eq(ServiceKind::Nexus)) +// .filter(dsl::sled_id.eq_any(allocation_pool)) + ) + } + } +} + +impl SubQuery for PreviouslyAllocatedServices { + fn name(&self) -> &'static str { + "previously_allocated_services" + } + + fn query(&self) -> &dyn QueryFragment { + &self.query + } +} + +/// Provision services of a particular type within a rack. +/// +/// TODO: Document +pub struct ServiceProvision { + now: DateTime, + + cte: Cte, +} + +impl ServiceProvision { + pub fn new() -> Self { + let now = Utc::now(); + let sled_allocation_pool = SledAllocationPoolSubquery::new(); + let previously_allocated_services = PreviouslyAllocatedServices::new(&sled_allocation_pool); + + // TODO: Reference prior subquery? + use crate::db::schema::sled::dsl; + let final_select = Box::new(dsl::sled.filter(dsl::time_deleted.is_null())); + + let cte = CteBuilder::new() + .add_subquery(sled_allocation_pool) + .add_subquery(previously_allocated_services) + .build(final_select); + + Self { + now, + cte, + } + } +} + + +impl QueryId for ServiceProvision { + type QueryId = (); + const HAS_STATIC_QUERY_ID: bool = false; +} + +impl QueryFragment for ServiceProvision { + fn walk_ast<'a>( + &'a self, + mut out: AstPass<'_, 'a, Pg>, + ) -> diesel::QueryResult<()> { + out.unsafe_to_cache_prepared(); + + self.cte.walk_ast(out.reborrow())?; + Ok(()) + } +} + +impl Query for ServiceProvision { + type SqlType = <>::SelectExpression as diesel::Expression>::SqlType; +} + +impl RunQueryDsl for ServiceProvision {} + +#[cfg(test)] +mod tests { + use crate::context::OpContext; + use crate::db::datastore::DataStore; + use crate::db::identity::Resource; + use crate::db::model::IpKind; + use crate::db::model::IpPool; + use crate::db::model::IpPoolRange; + use crate::db::model::Name; + use crate::external_api::shared::IpRange; + use async_bb8_diesel::AsyncRunQueryDsl; + use dropshot::test_util::LogContext; + use nexus_test_utils::db::test_setup_database; + use nexus_test_utils::RACK_UUID; + use omicron_common::api::external::Error; + use omicron_common::api::external::IdentityMetadataCreateParams; + use omicron_test_utils::dev; + use omicron_test_utils::dev::db::CockroachInstance; + use std::net::IpAddr; + use std::net::Ipv4Addr; + use std::sync::Arc; + use uuid::Uuid; + + struct TestContext { + logctx: LogContext, + opctx: OpContext, + db: CockroachInstance, + db_datastore: Arc, + } + + impl TestContext { + async fn new(test_name: &str) -> Self { + let logctx = dev::test_setup_log(test_name); + let log = logctx.log.new(o!()); + let db = test_setup_database(&log).await; + crate::db::datastore::datastore_test(&logctx, &db).await; + let cfg = crate::db::Config { url: db.pg_config().clone() }; + let pool = Arc::new(crate::db::Pool::new(&cfg)); + let db_datastore = + Arc::new(crate::db::DataStore::new(Arc::clone(&pool))); + let opctx = + OpContext::for_tests(log.new(o!()), db_datastore.clone()); + Self { logctx, opctx, db, db_datastore } + } + + async fn create_ip_pool_internal( + &self, + name: &str, + range: IpRange, + project_id: Option, + rack_id: Option, + ) { + let pool = IpPool::new( + &IdentityMetadataCreateParams { + name: String::from(name).parse().unwrap(), + description: format!("ip pool {}", name), + }, + project_id, + rack_id, + ); + + diesel::insert_into(crate::db::schema::ip_pool::dsl::ip_pool) + .values(pool.clone()) + .execute_async( + self.db_datastore + .pool_authorized(&self.opctx) + .await + .unwrap(), + ) + .await + .expect("Failed to create IP Pool"); + + let pool_range = IpPoolRange::new(&range, pool.id(), project_id); + diesel::insert_into( + crate::db::schema::ip_pool_range::dsl::ip_pool_range, + ) + .values(pool_range) + .execute_async( + self.db_datastore.pool_authorized(&self.opctx).await.unwrap(), + ) + .await + .expect("Failed to create IP Pool range"); + } + + async fn create_rack_ip_pool( + &self, + name: &str, + range: IpRange, + rack_id: Uuid, + ) { + self.create_ip_pool_internal( + name, + range, + /* project_id= */ None, + Some(rack_id), + ) + .await; + } + + async fn create_ip_pool( + &self, + name: &str, + range: IpRange, + project_id: Option, + ) { + self.create_ip_pool_internal( + name, range, project_id, /* rack_id= */ None, + ) + .await; + } + + async fn success(mut self) { + self.db.cleanup().await.unwrap(); + self.logctx.cleanup_successful(); + } + } + + #[tokio::test] + async fn test_next_external_ip_allocation_and_exhaustion() { + let context = + TestContext::new("test_next_external_ip_allocation_and_exhaustion") + .await; + let range = IpRange::try_from(( + Ipv4Addr::new(10, 0, 0, 1), + Ipv4Addr::new(10, 0, 0, 1), + )) + .unwrap(); + context.create_ip_pool("p0", range, None).await; + let project_id = Uuid::new_v4(); + for first_port in + (0..super::MAX_PORT).step_by(super::NUM_SOURCE_NAT_PORTS) + { + let id = Uuid::new_v4(); + let instance_id = Uuid::new_v4(); + let ip = context + .db_datastore + .allocate_instance_snat_ip( + &context.opctx, + id, + project_id, + instance_id, + ) + .await + .expect("Failed to allocate instance external IP address"); + assert_eq!(ip.ip.ip(), range.first_address()); + assert_eq!(ip.first_port.0, first_port as u16); + assert_eq!( + ip.last_port.0, + (first_port + (super::NUM_SOURCE_NAT_PORTS - 1) as i32) as u16 + ); + } + + // The next allocation should fail, due to IP exhaustion + let instance_id = Uuid::new_v4(); + let err = context + .db_datastore + .allocate_instance_snat_ip( + &context.opctx, + Uuid::new_v4(), + project_id, + instance_id, + ) + .await + .expect_err( + "An error should be received when the IP pools are exhausted", + ); + assert_eq!( + err, + Error::InvalidRequest { + message: String::from("No external IP addresses available"), + } + ); + context.success().await; + } + + #[tokio::test] + async fn test_next_external_ip_out_of_order_allocation_ok() { + let context = TestContext::new( + "test_next_external_ip_out_of_order_allocation_ok", + ) + .await; + // Need a larger range, since we're currently limited to the whole port + // range for each external IP. + let range = IpRange::try_from(( + Ipv4Addr::new(10, 0, 0, 1), + Ipv4Addr::new(10, 0, 0, 3), + )) + .unwrap(); + context.create_ip_pool("p0", range, None).await; + + // TODO-completess: Implementing Iterator for IpRange would be nice. + let addresses = [ + Ipv4Addr::new(10, 0, 0, 1), + Ipv4Addr::new(10, 0, 0, 2), + Ipv4Addr::new(10, 0, 0, 3), + ]; + let ports = (0..super::MAX_PORT).step_by(super::NUM_SOURCE_NAT_PORTS); + let mut external_ips = itertools::iproduct!(addresses, ports); + + // Allocate two addresses + let mut ips = Vec::with_capacity(2); + let project_id = Uuid::new_v4(); + for (expected_ip, expected_first_port) in external_ips.clone().take(2) { + let instance_id = Uuid::new_v4(); + let ip = context + .db_datastore + .allocate_instance_snat_ip( + &context.opctx, + Uuid::new_v4(), + project_id, + instance_id, + ) + .await + .expect("Failed to allocate instance external IP address"); + assert_eq!(ip.ip.ip(), expected_ip); + assert_eq!(ip.first_port.0, expected_first_port as u16); + let expected_last_port = (expected_first_port + + (super::NUM_SOURCE_NAT_PORTS - 1) as i32) + as u16; + assert_eq!(ip.last_port.0, expected_last_port); + ips.push(ip); + } + + // Release the first + context + .db_datastore + .deallocate_external_ip(&context.opctx, ips[0].id) + .await + .expect("Failed to release the first external IP address"); + + // Allocate a new one, ensure it's the same as the first one we + // released. + let instance_id = Uuid::new_v4(); + let ip = context + .db_datastore + .allocate_instance_snat_ip( + &context.opctx, + Uuid::new_v4(), + project_id, + instance_id, + ) + .await + .expect("Failed to allocate instance external IP address"); + println!("{:?}\n{:?}", ip, ips[0]); + assert_eq!( + ip.ip, ips[0].ip, + "Expected to reallocate external IPs sequentially" + ); + assert_eq!( + ip.first_port, ips[0].first_port, + "Expected to reallocate external IPs sequentially" + ); + assert_eq!( + ip.last_port, ips[0].last_port, + "Expected to reallocate external IPs sequentially" + ); + + // Allocate one more, ensure it's the next chunk after the second one + // from the original loop. + let instance_id = Uuid::new_v4(); + let ip = context + .db_datastore + .allocate_instance_snat_ip( + &context.opctx, + Uuid::new_v4(), + project_id, + instance_id, + ) + .await + .expect("Failed to allocate instance external IP address"); + let (expected_ip, expected_first_port) = external_ips.nth(2).unwrap(); + assert_eq!(ip.ip.ip(), std::net::IpAddr::from(expected_ip)); + assert_eq!(ip.first_port.0, expected_first_port as u16); + let expected_last_port = (expected_first_port + + (super::NUM_SOURCE_NAT_PORTS - 1) as i32) + as u16; + assert_eq!(ip.last_port.0, expected_last_port); + + context.success().await; + } + + #[tokio::test] + async fn test_next_external_ip_with_ephemeral_takes_whole_port_range() { + let context = TestContext::new( + "test_next_external_ip_with_ephemeral_takes_whole_port_range", + ) + .await; + let range = IpRange::try_from(( + Ipv4Addr::new(10, 0, 0, 1), + Ipv4Addr::new(10, 0, 0, 3), + )) + .unwrap(); + context.create_ip_pool("p0", range, None).await; + + let instance_id = Uuid::new_v4(); + let project_id = Uuid::new_v4(); + let id = Uuid::new_v4(); + let pool_name = None; + + let ip = context + .db_datastore + .allocate_instance_ephemeral_ip( + &context.opctx, + id, + project_id, + instance_id, + pool_name, + ) + .await + .expect("Failed to allocate instance ephemeral IP address"); + assert_eq!(ip.kind, IpKind::Ephemeral); + assert_eq!(ip.ip.ip(), range.first_address()); + assert_eq!(ip.first_port.0, 0); + assert_eq!(ip.last_port.0, u16::MAX); + + context.success().await; + } + + #[tokio::test] + async fn test_next_external_ip_is_restricted_to_projects() { + let context = + TestContext::new("test_next_external_ip_is_restricted_to_projects") + .await; + + // Create one pool restricted to a project, and one not. + let project_id = Uuid::new_v4(); + let first_range = IpRange::try_from(( + Ipv4Addr::new(10, 0, 0, 1), + Ipv4Addr::new(10, 0, 0, 3), + )) + .unwrap(); + context.create_ip_pool("p0", first_range, Some(project_id)).await; + + let second_range = IpRange::try_from(( + Ipv4Addr::new(10, 0, 0, 4), + Ipv4Addr::new(10, 0, 0, 6), + )) + .unwrap(); + context.create_ip_pool("p1", second_range, None).await; + + // Allocating an address on an instance in a _different_ project should + // get an address from the second pool. + let instance_id = Uuid::new_v4(); + let instance_project_id = Uuid::new_v4(); + let id = Uuid::new_v4(); + let pool_name = None; + + let ip = context + .db_datastore + .allocate_instance_ephemeral_ip( + &context.opctx, + id, + instance_project_id, + instance_id, + pool_name, + ) + .await + .expect("Failed to allocate instance ephemeral IP address"); + assert_eq!(ip.kind, IpKind::Ephemeral); + assert_eq!(ip.ip.ip(), second_range.first_address()); + assert_eq!(ip.first_port.0, 0); + assert_eq!(ip.last_port.0, u16::MAX); + assert_eq!(ip.project_id.unwrap(), instance_project_id); + + // Allocating an address on an instance in the same project should get + // an address from the first pool. + let instance_id = Uuid::new_v4(); + let id = Uuid::new_v4(); + let pool_name = None; + + let ip = context + .db_datastore + .allocate_instance_ephemeral_ip( + &context.opctx, + id, + project_id, + instance_id, + pool_name, + ) + .await + .expect("Failed to allocate instance ephemeral IP address"); + assert_eq!(ip.kind, IpKind::Ephemeral); + assert_eq!(ip.ip.ip(), first_range.first_address()); + assert_eq!(ip.first_port.0, 0); + assert_eq!(ip.last_port.0, u16::MAX); + assert_eq!(ip.project_id.unwrap(), project_id); + + context.success().await; + } + + #[tokio::test] + async fn test_next_external_ip_for_service() { + let context = + TestContext::new("test_next_external_ip_for_service").await; + + // Create an IP pool without an associated project. + let rack_id = Uuid::parse_str(RACK_UUID).unwrap(); + let ip_range = IpRange::try_from(( + Ipv4Addr::new(10, 0, 0, 1), + Ipv4Addr::new(10, 0, 0, 2), + )) + .unwrap(); + context.create_rack_ip_pool("p0", ip_range, rack_id).await; + + // Allocate an IP address as we would for an external, rack-associated + // service. + let id1 = Uuid::new_v4(); + let ip1 = context + .db_datastore + .allocate_service_ip(&context.opctx, id1, rack_id) + .await + .expect("Failed to allocate service IP address"); + assert_eq!(ip1.kind, IpKind::Service); + assert_eq!(ip1.ip.ip(), IpAddr::V4(Ipv4Addr::new(10, 0, 0, 1))); + assert_eq!(ip1.first_port.0, 0); + assert_eq!(ip1.last_port.0, u16::MAX); + assert!(ip1.instance_id.is_none()); + assert!(ip1.project_id.is_none()); + + // Allocate the next (last) IP address + let id2 = Uuid::new_v4(); + let ip2 = context + .db_datastore + .allocate_service_ip(&context.opctx, id2, rack_id) + .await + .expect("Failed to allocate service IP address"); + assert_eq!(ip2.kind, IpKind::Service); + assert_eq!(ip2.ip.ip(), IpAddr::V4(Ipv4Addr::new(10, 0, 0, 2))); + assert_eq!(ip2.first_port.0, 0); + assert_eq!(ip2.last_port.0, u16::MAX); + assert!(ip2.instance_id.is_none()); + assert!(ip2.project_id.is_none()); + + // Once we're out of IP addresses, test that we see the right error. + let id3 = Uuid::new_v4(); + let err = context + .db_datastore + .allocate_service_ip(&context.opctx, id3, rack_id) + .await + .expect_err("Should have failed to allocate after pool exhausted"); + assert_eq!( + err, + Error::InvalidRequest { + message: String::from("No external IP addresses available"), + } + ); + + context.success().await; + } + + #[tokio::test] + async fn test_insert_external_ip_for_service_is_idempoent() { + let context = TestContext::new( + "test_insert_external_ip_for_service_is_idempotent", + ) + .await; + + // Create an IP pool without an associated project. + let rack_id = Uuid::parse_str(RACK_UUID).unwrap(); + let ip_range = IpRange::try_from(( + Ipv4Addr::new(10, 0, 0, 1), + Ipv4Addr::new(10, 0, 0, 2), + )) + .unwrap(); + context.create_rack_ip_pool("p0", ip_range, rack_id).await; + + // Allocate an IP address as we would for an external, rack-associated + // service. + let id = Uuid::new_v4(); + let ip = context + .db_datastore + .allocate_service_ip(&context.opctx, id, rack_id) + .await + .expect("Failed to allocate service IP address"); + assert_eq!(ip.kind, IpKind::Service); + assert_eq!(ip.ip.ip(), IpAddr::V4(Ipv4Addr::new(10, 0, 0, 1))); + assert_eq!(ip.first_port.0, 0); + assert_eq!(ip.last_port.0, u16::MAX); + assert!(ip.instance_id.is_none()); + assert!(ip.project_id.is_none()); + + let ip_again = context + .db_datastore + .allocate_service_ip(&context.opctx, id, rack_id) + .await + .expect("Failed to allocate service IP address"); + + assert_eq!(ip.id, ip_again.id); + assert_eq!(ip.ip.ip(), ip_again.ip.ip()); + + context.success().await; + } + + // This test is identical to "test_insert_external_ip_is_idempotent", + // but tries to make an idempotent allocation after all addresses in the + // pool have been allocated. + #[tokio::test] + async fn test_insert_external_ip_for_service_is_idempotent_even_when_full() + { + let context = TestContext::new( + "test_insert_external_ip_is_idempotent_even_when_full", + ) + .await; + + // Create an IP pool without an associated project. + let rack_id = Uuid::parse_str(RACK_UUID).unwrap(); + let ip_range = IpRange::try_from(( + Ipv4Addr::new(10, 0, 0, 1), + Ipv4Addr::new(10, 0, 0, 1), + )) + .unwrap(); + context.create_rack_ip_pool("p0", ip_range, rack_id).await; + + // Allocate an IP address as we would for an external, rack-associated + // service. + let id = Uuid::new_v4(); + let ip = context + .db_datastore + .allocate_service_ip(&context.opctx, id, rack_id) + .await + .expect("Failed to allocate service IP address"); + assert_eq!(ip.kind, IpKind::Service); + assert_eq!(ip.ip.ip(), IpAddr::V4(Ipv4Addr::new(10, 0, 0, 1))); + assert_eq!(ip.first_port.0, 0); + assert_eq!(ip.last_port.0, u16::MAX); + assert!(ip.instance_id.is_none()); + assert!(ip.project_id.is_none()); + + let ip_again = context + .db_datastore + .allocate_service_ip(&context.opctx, id, rack_id) + .await + .expect("Failed to allocate service IP address"); + + assert_eq!(ip.id, ip_again.id); + assert_eq!(ip.ip.ip(), ip_again.ip.ip()); + + context.success().await; + } + + #[tokio::test] + async fn test_insert_external_ip_is_idempotent() { + let context = + TestContext::new("test_insert_external_ip_is_idempotent").await; + + // Create an IP pool + let range = IpRange::try_from(( + Ipv4Addr::new(10, 0, 0, 1), + Ipv4Addr::new(10, 0, 0, 3), + )) + .unwrap(); + context.create_ip_pool("p0", range, None).await; + + // Create one SNAT IP address. + let instance_id = Uuid::new_v4(); + let id = Uuid::new_v4(); + let project_id = Uuid::new_v4(); + let ip = context + .db_datastore + .allocate_instance_snat_ip( + &context.opctx, + id, + project_id, + instance_id, + ) + .await + .expect("Failed to allocate instance SNAT IP address"); + assert_eq!(ip.kind, IpKind::SNat); + assert_eq!(ip.ip.ip(), range.first_address()); + assert_eq!(ip.first_port.0, 0); + assert_eq!( + usize::from(ip.last_port.0), + super::NUM_SOURCE_NAT_PORTS - 1 + ); + assert_eq!(ip.project_id.unwrap(), project_id); + + // Create a new IP, with the _same_ ID, and ensure we get back the same + // value. + let new_ip = context + .db_datastore + .allocate_instance_snat_ip( + &context.opctx, + id, + project_id, + instance_id, + ) + .await + .expect("Failed to allocate instance SNAT IP address"); + + // Check identity, not equality. The timestamps will be updated. + assert_eq!(ip.id, new_ip.id); + assert_eq!(ip.name, new_ip.name); + assert_eq!(ip.description, new_ip.description); + assert!(ip.time_created <= new_ip.time_created); + assert!(ip.time_modified <= new_ip.time_modified); + assert_eq!(ip.time_deleted, new_ip.time_deleted); + assert_eq!(ip.ip_pool_id, new_ip.ip_pool_id); + assert_eq!(ip.ip_pool_range_id, new_ip.ip_pool_range_id); + assert_eq!(ip.kind, new_ip.kind); + assert_eq!(ip.ip, new_ip.ip); + assert_eq!(ip.first_port, new_ip.first_port); + assert_eq!(ip.last_port, new_ip.last_port); + + context.success().await; + } + + #[tokio::test] + async fn test_next_external_ip_is_restricted_to_pools() { + let context = + TestContext::new("test_next_external_ip_is_restricted_to_pools") + .await; + + // Create two pools, neither project-restricted. + let first_range = IpRange::try_from(( + Ipv4Addr::new(10, 0, 0, 1), + Ipv4Addr::new(10, 0, 0, 3), + )) + .unwrap(); + context.create_ip_pool("p0", first_range, None).await; + let second_range = IpRange::try_from(( + Ipv4Addr::new(10, 0, 0, 4), + Ipv4Addr::new(10, 0, 0, 6), + )) + .unwrap(); + context.create_ip_pool("p1", second_range, None).await; + + // Allocating an address on an instance in the second pool should be + // respected, even though there are IPs available in the first. + let instance_id = Uuid::new_v4(); + let project_id = Uuid::new_v4(); + let id = Uuid::new_v4(); + let pool_name = Some(Name("p1".parse().unwrap())); + + let ip = context + .db_datastore + .allocate_instance_ephemeral_ip( + &context.opctx, + id, + project_id, + instance_id, + pool_name, + ) + .await + .expect("Failed to allocate instance ephemeral IP address"); + assert_eq!(ip.kind, IpKind::Ephemeral); + assert_eq!(ip.ip.ip(), second_range.first_address()); + assert_eq!(ip.first_port.0, 0); + assert_eq!(ip.last_port.0, u16::MAX); + assert_eq!(ip.project_id.unwrap(), project_id); + + context.success().await; + } + + #[tokio::test] + async fn test_ensure_pool_exhaustion_does_not_use_other_pool() { + let context = TestContext::new( + "test_ensure_pool_exhaustion_does_not_use_other_pool", + ) + .await; + + // Create two pools, neither project-restricted. + let first_range = IpRange::try_from(( + Ipv4Addr::new(10, 0, 0, 1), + Ipv4Addr::new(10, 0, 0, 3), + )) + .unwrap(); + context.create_ip_pool("p0", first_range, None).await; + let first_address = Ipv4Addr::new(10, 0, 0, 4); + let last_address = Ipv4Addr::new(10, 0, 0, 6); + let second_range = + IpRange::try_from((first_address, last_address)).unwrap(); + context.create_ip_pool("p1", second_range, None).await; + + // Allocate all available addresses in the second pool. + let instance_id = Uuid::new_v4(); + let project_id = Uuid::new_v4(); + let pool_name = Some(Name("p1".parse().unwrap())); + let first_octet = first_address.octets()[3]; + let last_octet = last_address.octets()[3]; + for octet in first_octet..=last_octet { + let ip = context + .db_datastore + .allocate_instance_ephemeral_ip( + &context.opctx, + Uuid::new_v4(), + project_id, + instance_id, + pool_name.clone(), + ) + .await + .expect("Failed to allocate instance ephemeral IP address"); + println!("{ip:#?}"); + if let IpAddr::V4(addr) = ip.ip.ip() { + assert_eq!(addr.octets()[3], octet); + } else { + panic!("Expected an IPv4 address"); + } + } + + // Allocating another address should _fail_, and not use the first pool. + context + .db_datastore + .allocate_instance_ephemeral_ip( + &context.opctx, + Uuid::new_v4(), + project_id, + instance_id, + pool_name, + ) + .await + .expect_err("Should not use IP addresses from a different pool"); + + context.success().await; + } +} From 3ef71f8445be0007e292f9f8300cba23acb48be2 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Fri, 16 Sep 2022 16:51:35 -0400 Subject: [PATCH 83/88] Making some progress by exploiting the 'table' macro --- nexus/src/db/queries/mod.rs | 1 + nexus/src/db/queries/service_provision.rs | 1061 ++++++++------------- services.sql | 1 - 3 files changed, 375 insertions(+), 688 deletions(-) diff --git a/nexus/src/db/queries/mod.rs b/nexus/src/db/queries/mod.rs index 484540a632e..8d7f1253a96 100644 --- a/nexus/src/db/queries/mod.rs +++ b/nexus/src/db/queries/mod.rs @@ -10,6 +10,7 @@ pub mod ip_pool; #[macro_use] mod next_item; pub mod network_interface; +#[allow(dead_code)] pub mod service_provision; pub mod vpc; pub mod vpc_subnet; diff --git a/nexus/src/db/queries/service_provision.rs b/nexus/src/db/queries/service_provision.rs index 036536fa607..e2582b18526 100644 --- a/nexus/src/db/queries/service_provision.rs +++ b/nexus/src/db/queries/service_provision.rs @@ -4,7 +4,6 @@ //! Implementation of queries for provisioning services. -use crate::db::model::Name; use crate::db::model::Service; use crate::db::model::ServiceKind; use crate::db::model::Sled; @@ -14,49 +13,43 @@ use chrono::DateTime; use chrono::Utc; use diesel::pg::Pg; use diesel::query_builder::AstPass; +use diesel::query_builder::AsQuery; use diesel::query_builder::Query; use diesel::query_builder::QueryFragment; use diesel::query_builder::QueryId; use diesel::sql_types; -use diesel::Column; use diesel::ExpressionMethods; use diesel::QueryDsl; -use diesel::QueryResult; use diesel::RunQueryDsl; -use diesel::SelectableHelper; -use uuid::Uuid; type FromClause = diesel::internal::table_macro::StaticQueryFragmentInstance; type ServiceFromClause = FromClause; const SERVICE_FROM_CLAUSE: ServiceFromClause = ServiceFromClause::new(); -//trait Queryable: Query + QueryFragment {} -//impl> Queryable for T {} +trait CteQuery: Query + QueryFragment {} -trait Queryable: Query + QueryFragment {} - -impl Queryable for T +impl CteQuery for T where - T: Query + QueryFragment + T: Query + QueryFragment {} /* -trait QueryableClone { - fn clone_box(&self) -> Box>; +trait CteQueryClone { + fn clone_box(&self) -> Box>; } -impl QueryableClone for T +impl CteQueryClone for T where - T: 'static + Queryable + Clone, + T: 'static + CteQuery + Clone, { - fn clone_box(&self) -> Box> { + fn clone_box(&self) -> Box> { Box::new(self.clone()) } } -impl Clone for Box> { - fn clone(&self) -> Box> { +impl Clone for Box> { + fn clone(&self) -> Box> { self.clone_box() } } @@ -76,13 +69,18 @@ impl Clone for Box> { /// This trait represents one of the sub-query arms, such as "foo as ..." or /// "bar as ...". trait SubQuery { - // TODO: This query should have an associated SQL type! - - // TODO: Could be associated constant, maybe? fn name(&self) -> &'static str; fn query(&self) -> &dyn QueryFragment; } +trait AsTable +where + Self::Table: AsQuery, +{ + type Table; + fn as_table(&self) -> Self::Table; +} + /// A thin wrapper around a [`SubQuery`]. /// /// Used to avoid orphan rules while creating blanket implementations. @@ -126,7 +124,7 @@ impl CteBuilder { self } - fn build(mut self, statement: Box>) -> Cte { + fn build(self, statement: Box>) -> Cte { Cte { subqueries: self.subqueries, statement @@ -160,11 +158,20 @@ impl QueryFragment for Cte { } } +trait SameType {} +impl SameType for (T, T) {} +fn same_type() where (A, B): SameType {} + +// ----------------------------- // +// Above should be for a generic CTE builder +// Below should be for service provisioning // ----------------------------- // // TODO: I want this to be as lightweight to make as possible! struct SledAllocationPoolSubquery { - query: Box>, + // TODO: How do we bridge the gap of this CteQuery type to the + // table? + query: Box>, } impl SledAllocationPoolSubquery { @@ -175,12 +182,31 @@ impl SledAllocationPoolSubquery { dsl::sled .filter(dsl::time_deleted.is_null()) // TODO: Filter by rack? - .select(dsl::id) + .select((dsl::id,)) ) } } } +impl AsTable for SledAllocationPoolSubquery { + type Table = sled_allocation_pool::dsl::sled_allocation_pool; + fn as_table(&self) -> Self::Table { + use diesel::internal::table_macro::StaticQueryFragment; + + // TODO: This should either be auto-generated, or checked more + // uniformally. + same_type::<(sql_types::Uuid,), sled_allocation_pool::SqlType>(); + + // TODO: Converting this to a compile-time check would be nicer. + // + // TODO: Even better, don't have "name()" at all... force the alias + // to be the intermediate "table" name. + assert_eq!(self.name(), sled_allocation_pool::dsl::sled_allocation_pool::STATIC_COMPONENT.0); + + sled_allocation_pool::dsl::sled_allocation_pool + } +} + impl SubQuery for SledAllocationPoolSubquery { fn name(&self) -> &'static str { "sled_allocation_pool" @@ -191,23 +217,52 @@ impl SubQuery for SledAllocationPoolSubquery { } } +// TODO: We actually want a trimmed down version of this. +// It's generating too much; we don't want to be able to insert/delete/update +// this table; it's basically an alias. +// We *also* do not want the Primary Key. +// +// However, being able to select columns by name is a critical feature +// that we can't easily do without a similar-looking macro. +diesel::table! { + sled_allocation_pool { + id -> Uuid, + } +} + +// TODO: +// - How do we avoid re-typing UUID? +// - What can be made generic? + struct PreviouslyAllocatedServices { - query: Box>, + query: Box>, } impl PreviouslyAllocatedServices { fn new(allocation_pool: &SledAllocationPoolSubquery) -> Self { - use crate::db::schema::service::dsl; + use crate::db::schema::service::dsl as service_dsl; + use sled_allocation_pool::dsl as alloc_pool_dsl; + + let select_from_pool = allocation_pool.as_table().select(alloc_pool_dsl::id).into_boxed(); Self { query: Box::new( - dsl::service - .filter(dsl::kind.eq(ServiceKind::Nexus)) -// .filter(dsl::sled_id.eq_any(allocation_pool)) + service_dsl::service + .filter(service_dsl::kind.eq(ServiceKind::Nexus)) + .filter(service_dsl::sled_id.eq_any(select_from_pool)) ) } } } +impl AsTable for PreviouslyAllocatedServices { + type Table = previously_allocated_services::dsl::previously_allocated_services; + fn as_table(&self) -> Self::Table { + use diesel::internal::table_macro::StaticQueryFragment; + assert_eq!(self.name(), previously_allocated_services::dsl::previously_allocated_services::STATIC_COMPONENT.0); + previously_allocated_services::dsl::previously_allocated_services + } +} + impl SubQuery for PreviouslyAllocatedServices { fn name(&self) -> &'static str { "previously_allocated_services" @@ -218,6 +273,215 @@ impl SubQuery for PreviouslyAllocatedServices { } } +diesel::table! { + previously_allocated_services { + id -> Uuid, + time_created -> Timestamptz, + time_modified -> Timestamptz, + + sled_id -> Uuid, + ip -> Inet, + kind -> crate::db::model::ServiceKindEnum, + } +} + +struct OldServiceCount { + query: Box>, +} + +impl OldServiceCount { + fn new(previously_allocated_services: &PreviouslyAllocatedServices) -> Self { + Self { + query: Box::new( + previously_allocated_services.as_table().count() + ) + } + } +} + +impl AsTable for OldServiceCount { + type Table = old_service_count::dsl::old_service_count; + fn as_table(&self) -> Self::Table { + use diesel::internal::table_macro::StaticQueryFragment; + assert_eq!(self.name(), old_service_count::dsl::old_service_count::STATIC_COMPONENT.0); + old_service_count::dsl::old_service_count + } +} + +impl SubQuery for OldServiceCount { + fn name(&self) -> &'static str { + "old_service_count" + } + + fn query(&self) -> &dyn QueryFragment { + &self.query + } +} + + +diesel::table! { + old_service_count (count) { + count -> Int8, + } +} + +struct NewServiceCount { + redundancy: i32, +} + +impl QueryId for NewServiceCount { + type QueryId = (); + const HAS_STATIC_QUERY_ID: bool = false; +} + +impl NewServiceCount { + fn new(redundancy: i32, _old_service_count: &OldServiceCount) -> Self { + Self { + redundancy, + } + } +} + +impl SubQuery for NewServiceCount { + fn name(&self) -> &'static str { + "new_service_count" + } + + fn query(&self) -> &dyn QueryFragment { + self + } +} + +// NOTE: This CTE arm is raw SQL because the "GREATEST" function is not +// supported by Diesel. +impl QueryFragment for NewServiceCount { + fn walk_ast<'a>( + &'a self, + mut out: AstPass<'_, 'a, Pg>, + ) -> diesel::QueryResult<()> { + out.unsafe_to_cache_prepared(); + out.push_sql("SELECT GREATEST("); + out.push_bind_param::(&self.redundancy)?; + out.push_sql(", (SELECT * FROM old_service_count)) - (SELECT * FROM old_service_count)"); + Ok(()) + } +} + +impl Query for NewServiceCount { + type SqlType = sql_types::BigInt; +} + +struct CandidateSleds {} + +impl CandidateSleds { + fn new() -> Self { + Self {} + } +} + +impl QueryId for CandidateSleds { + type QueryId = (); + const HAS_STATIC_QUERY_ID: bool = false; +} + +impl AsTable for CandidateSleds { + type Table = candidate_sleds::dsl::candidate_sleds; + fn as_table(&self) -> Self::Table { + use diesel::internal::table_macro::StaticQueryFragment; + assert_eq!(self.name(), candidate_sleds::dsl::candidate_sleds::STATIC_COMPONENT.0); + candidate_sleds::dsl::candidate_sleds + } +} + +impl SubQuery for CandidateSleds { + fn name(&self) -> &'static str { + "candidate_sleds" + } + + fn query(&self) -> &dyn QueryFragment { + self + } +} + +// NOTE: This CTE arm is raw SQL because the "LIMIT" expression cannot +// include sub-queries in Diesel. +impl QueryFragment for CandidateSleds { + fn walk_ast<'a>( + &'a self, + mut out: AstPass<'_, 'a, Pg>, + ) -> diesel::QueryResult<()> { + out.unsafe_to_cache_prepared(); + + out.push_sql( + "SELECT \ + \"sled_allocation_pool\".\"id\" \ + FROM \"sled_allocation_pool\" \ + WHERE \ + \"sled_allocation_pool\".\"id\" NOT IN \ + (SELECT \"sled_id\" FROM \"previously_allocated_services\") \ + LIMIT (SELECT * FROM \"new_service_count\")" + ); + Ok(()) + } +} + +impl Query for CandidateSleds { + type SqlType = sql_types::Uuid; +} + +diesel::table! { + candidate_sleds { + id -> Uuid, + } +} + +struct NewInternalIps { + query: Box>, +} + +impl NewInternalIps { + fn new(candidate_sleds: &CandidateSleds) -> Self { + use crate::db::schema::sled::dsl as sled_dsl; + use candidate_sleds::dsl as candidate_sleds_dsl; + + let select_from_candidate_sleds = candidate_sleds.as_table().select(candidate_sleds_dsl::id).into_boxed(); + Self { + + query: Box::new( + diesel::update(sled_dsl::sled.filter(sled_dsl::id.eq_any(select_from_candidate_sleds))) + .set(sled_dsl::last_used_address.eq(sled_dsl::last_used_address + 1)) + .returning((sled_dsl::id, sled_dsl::last_used_address)) + ) + } + } +} + +impl AsTable for NewInternalIps { + type Table = new_internal_ips::dsl::new_internal_ips; + fn as_table(&self) -> Self::Table { + use diesel::internal::table_macro::StaticQueryFragment; + assert_eq!(self.name(), Self::Table::STATIC_COMPONENT.0); + new_internal_ips::dsl::new_internal_ips + } +} + +impl SubQuery for NewInternalIps { + fn name(&self) -> &'static str { + "new_internal_ips" + } + + fn query(&self) -> &dyn QueryFragment { + &self.query + } +} + +diesel::table! { + new_internal_ips { + id -> Uuid, + last_used_address -> Inet, + } +} + /// Provision services of a particular type within a rack. /// /// TODO: Document @@ -228,10 +492,14 @@ pub struct ServiceProvision { } impl ServiceProvision { - pub fn new() -> Self { + pub fn new(redundancy: i32) -> Self { let now = Utc::now(); let sled_allocation_pool = SledAllocationPoolSubquery::new(); let previously_allocated_services = PreviouslyAllocatedServices::new(&sled_allocation_pool); + let old_service_count = OldServiceCount::new(&previously_allocated_services); + let new_service_count = NewServiceCount::new(redundancy, &old_service_count); + let candidate_sleds = CandidateSleds::new(); + let new_internal_ips = NewInternalIps::new(&candidate_sleds); // TODO: Reference prior subquery? use crate::db::schema::sled::dsl; @@ -240,6 +508,10 @@ impl ServiceProvision { let cte = CteBuilder::new() .add_subquery(sled_allocation_pool) .add_subquery(previously_allocated_services) + .add_subquery(old_service_count) + .add_subquery(new_service_count) + .add_subquery(candidate_sleds) + .add_subquery(new_internal_ips) .build(final_select); Self { @@ -279,12 +551,9 @@ mod tests { use crate::context::OpContext; use crate::db::datastore::DataStore; use crate::db::identity::Resource; - use crate::db::model::IpKind; - use crate::db::model::IpPool; - use crate::db::model::IpPoolRange; use crate::db::model::Name; - use crate::external_api::shared::IpRange; use async_bb8_diesel::AsyncRunQueryDsl; + use diesel::pg::Pg; use dropshot::test_util::LogContext; use nexus_test_utils::db::test_setup_database; use nexus_test_utils::RACK_UUID; @@ -292,11 +561,11 @@ mod tests { use omicron_common::api::external::IdentityMetadataCreateParams; use omicron_test_utils::dev; use omicron_test_utils::dev::db::CockroachInstance; - use std::net::IpAddr; - use std::net::Ipv4Addr; use std::sync::Arc; use uuid::Uuid; + use super::ServiceProvision; + struct TestContext { logctx: LogContext, opctx: OpContext, @@ -319,72 +588,6 @@ mod tests { Self { logctx, opctx, db, db_datastore } } - async fn create_ip_pool_internal( - &self, - name: &str, - range: IpRange, - project_id: Option, - rack_id: Option, - ) { - let pool = IpPool::new( - &IdentityMetadataCreateParams { - name: String::from(name).parse().unwrap(), - description: format!("ip pool {}", name), - }, - project_id, - rack_id, - ); - - diesel::insert_into(crate::db::schema::ip_pool::dsl::ip_pool) - .values(pool.clone()) - .execute_async( - self.db_datastore - .pool_authorized(&self.opctx) - .await - .unwrap(), - ) - .await - .expect("Failed to create IP Pool"); - - let pool_range = IpPoolRange::new(&range, pool.id(), project_id); - diesel::insert_into( - crate::db::schema::ip_pool_range::dsl::ip_pool_range, - ) - .values(pool_range) - .execute_async( - self.db_datastore.pool_authorized(&self.opctx).await.unwrap(), - ) - .await - .expect("Failed to create IP Pool range"); - } - - async fn create_rack_ip_pool( - &self, - name: &str, - range: IpRange, - rack_id: Uuid, - ) { - self.create_ip_pool_internal( - name, - range, - /* project_id= */ None, - Some(rack_id), - ) - .await; - } - - async fn create_ip_pool( - &self, - name: &str, - range: IpRange, - project_id: Option, - ) { - self.create_ip_pool_internal( - name, range, project_id, /* rack_id= */ None, - ) - .await; - } - async fn success(mut self) { self.db.cleanup().await.unwrap(); self.logctx.cleanup_successful(); @@ -392,600 +595,84 @@ mod tests { } #[tokio::test] - async fn test_next_external_ip_allocation_and_exhaustion() { - let context = - TestContext::new("test_next_external_ip_allocation_and_exhaustion") - .await; - let range = IpRange::try_from(( - Ipv4Addr::new(10, 0, 0, 1), - Ipv4Addr::new(10, 0, 0, 1), - )) - .unwrap(); - context.create_ip_pool("p0", range, None).await; - let project_id = Uuid::new_v4(); - for first_port in - (0..super::MAX_PORT).step_by(super::NUM_SOURCE_NAT_PORTS) - { - let id = Uuid::new_v4(); - let instance_id = Uuid::new_v4(); - let ip = context - .db_datastore - .allocate_instance_snat_ip( - &context.opctx, - id, - project_id, - instance_id, - ) - .await - .expect("Failed to allocate instance external IP address"); - assert_eq!(ip.ip.ip(), range.first_address()); - assert_eq!(ip.first_port.0, first_port as u16); - assert_eq!( - ip.last_port.0, - (first_port + (super::NUM_SOURCE_NAT_PORTS - 1) as i32) as u16 - ); - } - - // The next allocation should fail, due to IP exhaustion - let instance_id = Uuid::new_v4(); - let err = context - .db_datastore - .allocate_instance_snat_ip( - &context.opctx, - Uuid::new_v4(), - project_id, - instance_id, - ) - .await - .expect_err( - "An error should be received when the IP pools are exhausted", - ); - assert_eq!( - err, - Error::InvalidRequest { - message: String::from("No external IP addresses available"), - } - ); - context.success().await; - } - - #[tokio::test] - async fn test_next_external_ip_out_of_order_allocation_ok() { + async fn test_foobar() { let context = TestContext::new( - "test_next_external_ip_out_of_order_allocation_ok", + "test_foobar", ) .await; - // Need a larger range, since we're currently limited to the whole port - // range for each external IP. - let range = IpRange::try_from(( - Ipv4Addr::new(10, 0, 0, 1), - Ipv4Addr::new(10, 0, 0, 3), - )) - .unwrap(); - context.create_ip_pool("p0", range, None).await; - - // TODO-completess: Implementing Iterator for IpRange would be nice. - let addresses = [ - Ipv4Addr::new(10, 0, 0, 1), - Ipv4Addr::new(10, 0, 0, 2), - Ipv4Addr::new(10, 0, 0, 3), - ]; - let ports = (0..super::MAX_PORT).step_by(super::NUM_SOURCE_NAT_PORTS); - let mut external_ips = itertools::iproduct!(addresses, ports); - - // Allocate two addresses - let mut ips = Vec::with_capacity(2); - let project_id = Uuid::new_v4(); - for (expected_ip, expected_first_port) in external_ips.clone().take(2) { - let instance_id = Uuid::new_v4(); - let ip = context - .db_datastore - .allocate_instance_snat_ip( - &context.opctx, - Uuid::new_v4(), - project_id, - instance_id, - ) - .await - .expect("Failed to allocate instance external IP address"); - assert_eq!(ip.ip.ip(), expected_ip); - assert_eq!(ip.first_port.0, expected_first_port as u16); - let expected_last_port = (expected_first_port - + (super::NUM_SOURCE_NAT_PORTS - 1) as i32) - as u16; - assert_eq!(ip.last_port.0, expected_last_port); - ips.push(ip); - } - - // Release the first - context - .db_datastore - .deallocate_external_ip(&context.opctx, ips[0].id) - .await - .expect("Failed to release the first external IP address"); - - // Allocate a new one, ensure it's the same as the first one we - // released. - let instance_id = Uuid::new_v4(); - let ip = context - .db_datastore - .allocate_instance_snat_ip( - &context.opctx, - Uuid::new_v4(), - project_id, - instance_id, - ) - .await - .expect("Failed to allocate instance external IP address"); - println!("{:?}\n{:?}", ip, ips[0]); - assert_eq!( - ip.ip, ips[0].ip, - "Expected to reallocate external IPs sequentially" - ); - assert_eq!( - ip.first_port, ips[0].first_port, - "Expected to reallocate external IPs sequentially" - ); - assert_eq!( - ip.last_port, ips[0].last_port, - "Expected to reallocate external IPs sequentially" - ); - // Allocate one more, ensure it's the next chunk after the second one - // from the original loop. - let instance_id = Uuid::new_v4(); - let ip = context - .db_datastore - .allocate_instance_snat_ip( - &context.opctx, - Uuid::new_v4(), - project_id, - instance_id, - ) - .await - .expect("Failed to allocate instance external IP address"); - let (expected_ip, expected_first_port) = external_ips.nth(2).unwrap(); - assert_eq!(ip.ip.ip(), std::net::IpAddr::from(expected_ip)); - assert_eq!(ip.first_port.0, expected_first_port as u16); - let expected_last_port = (expected_first_port - + (super::NUM_SOURCE_NAT_PORTS - 1) as i32) - as u16; - assert_eq!(ip.last_port.0, expected_last_port); + let query = ServiceProvision::new(3); - context.success().await; - } - - #[tokio::test] - async fn test_next_external_ip_with_ephemeral_takes_whole_port_range() { - let context = TestContext::new( - "test_next_external_ip_with_ephemeral_takes_whole_port_range", - ) - .await; - let range = IpRange::try_from(( - Ipv4Addr::new(10, 0, 0, 1), - Ipv4Addr::new(10, 0, 0, 3), - )) - .unwrap(); - context.create_ip_pool("p0", range, None).await; - - let instance_id = Uuid::new_v4(); - let project_id = Uuid::new_v4(); - let id = Uuid::new_v4(); - let pool_name = None; - - let ip = context - .db_datastore - .allocate_instance_ephemeral_ip( - &context.opctx, - id, - project_id, - instance_id, - pool_name, - ) - .await - .expect("Failed to allocate instance ephemeral IP address"); - assert_eq!(ip.kind, IpKind::Ephemeral); - assert_eq!(ip.ip.ip(), range.first_address()); - assert_eq!(ip.first_port.0, 0); - assert_eq!(ip.last_port.0, u16::MAX); + let stringified = diesel::debug_query::(&query).to_string(); - context.success().await; - } - - #[tokio::test] - async fn test_next_external_ip_is_restricted_to_projects() { - let context = - TestContext::new("test_next_external_ip_is_restricted_to_projects") - .await; - - // Create one pool restricted to a project, and one not. - let project_id = Uuid::new_v4(); - let first_range = IpRange::try_from(( - Ipv4Addr::new(10, 0, 0, 1), - Ipv4Addr::new(10, 0, 0, 3), - )) - .unwrap(); - context.create_ip_pool("p0", first_range, Some(project_id)).await; - - let second_range = IpRange::try_from(( - Ipv4Addr::new(10, 0, 0, 4), - Ipv4Addr::new(10, 0, 0, 6), - )) - .unwrap(); - context.create_ip_pool("p1", second_range, None).await; - - // Allocating an address on an instance in a _different_ project should - // get an address from the second pool. - let instance_id = Uuid::new_v4(); - let instance_project_id = Uuid::new_v4(); - let id = Uuid::new_v4(); - let pool_name = None; - - let ip = context - .db_datastore - .allocate_instance_ephemeral_ip( - &context.opctx, - id, - instance_project_id, - instance_id, - pool_name, - ) - .await - .expect("Failed to allocate instance ephemeral IP address"); - assert_eq!(ip.kind, IpKind::Ephemeral); - assert_eq!(ip.ip.ip(), second_range.first_address()); - assert_eq!(ip.first_port.0, 0); - assert_eq!(ip.last_port.0, u16::MAX); - assert_eq!(ip.project_id.unwrap(), instance_project_id); - - // Allocating an address on an instance in the same project should get - // an address from the first pool. - let instance_id = Uuid::new_v4(); - let id = Uuid::new_v4(); - let pool_name = None; - - let ip = context - .db_datastore - .allocate_instance_ephemeral_ip( - &context.opctx, - id, - project_id, - instance_id, - pool_name, - ) - .await - .expect("Failed to allocate instance ephemeral IP address"); - assert_eq!(ip.kind, IpKind::Ephemeral); - assert_eq!(ip.ip.ip(), first_range.first_address()); - assert_eq!(ip.first_port.0, 0); - assert_eq!(ip.last_port.0, u16::MAX); - assert_eq!(ip.project_id.unwrap(), project_id); - - context.success().await; - } - - #[tokio::test] - async fn test_next_external_ip_for_service() { - let context = - TestContext::new("test_next_external_ip_for_service").await; - - // Create an IP pool without an associated project. - let rack_id = Uuid::parse_str(RACK_UUID).unwrap(); - let ip_range = IpRange::try_from(( - Ipv4Addr::new(10, 0, 0, 1), - Ipv4Addr::new(10, 0, 0, 2), - )) - .unwrap(); - context.create_rack_ip_pool("p0", ip_range, rack_id).await; - - // Allocate an IP address as we would for an external, rack-associated - // service. - let id1 = Uuid::new_v4(); - let ip1 = context - .db_datastore - .allocate_service_ip(&context.opctx, id1, rack_id) - .await - .expect("Failed to allocate service IP address"); - assert_eq!(ip1.kind, IpKind::Service); - assert_eq!(ip1.ip.ip(), IpAddr::V4(Ipv4Addr::new(10, 0, 0, 1))); - assert_eq!(ip1.first_port.0, 0); - assert_eq!(ip1.last_port.0, u16::MAX); - assert!(ip1.instance_id.is_none()); - assert!(ip1.project_id.is_none()); - - // Allocate the next (last) IP address - let id2 = Uuid::new_v4(); - let ip2 = context - .db_datastore - .allocate_service_ip(&context.opctx, id2, rack_id) - .await - .expect("Failed to allocate service IP address"); - assert_eq!(ip2.kind, IpKind::Service); - assert_eq!(ip2.ip.ip(), IpAddr::V4(Ipv4Addr::new(10, 0, 0, 2))); - assert_eq!(ip2.first_port.0, 0); - assert_eq!(ip2.last_port.0, u16::MAX); - assert!(ip2.instance_id.is_none()); - assert!(ip2.project_id.is_none()); - - // Once we're out of IP addresses, test that we see the right error. - let id3 = Uuid::new_v4(); - let err = context - .db_datastore - .allocate_service_ip(&context.opctx, id3, rack_id) - .await - .expect_err("Should have failed to allocate after pool exhausted"); assert_eq!( - err, - Error::InvalidRequest { - message: String::from("No external IP addresses available"), - } + stringified, + "WITH \ + sled_allocation_pool AS (\ + SELECT \ + \"sled\".\"id\" \ + FROM \"sled\" \ + WHERE (\ + \"sled\".\"time_deleted\" IS NULL\ + )\ + ), \ + previously_allocated_services AS (\ + SELECT \ + \"service\".\"id\", \ + \"service\".\"time_created\", \ + \"service\".\"time_modified\", \ + \"service\".\"sled_id\", \ + \"service\".\"ip\", \ + \"service\".\"kind\" \ + FROM \"service\" \ + WHERE (\ + (\"service\".\"kind\" = $1) AND \ + (\"service\".\"sled_id\" = \ + ANY(SELECT \"sled_allocation_pool\".\"id\" FROM \"sled_allocation_pool\")\ + )\ + )\ + ), \ + old_service_count AS (\ + SELECT COUNT(*) FROM \"previously_allocated_services\"\ + ), \ + new_service_count AS (\ + SELECT GREATEST($2, (SELECT * FROM old_service_count)) - (SELECT * FROM old_service_count)\ + ), \ + candidate_sleds AS (\ + SELECT \ + \"sled_allocation_pool\".\"id\" \ + FROM \"sled_allocation_pool\" \ + WHERE \ + \"sled_allocation_pool\".\"id\" NOT IN \ + (SELECT \"sled_id\" FROM \"previously_allocated_services\") \ + LIMIT (SELECT * FROM \"new_service_count\")\ + ), \ + new_internal_ips AS (\ + UPDATE \ + \"sled\" \ + SET \ + \"last_used_address\" = (\"sled\".\"last_used_address\" + $3) \ + WHERE \ + (\"sled\".\"id\" = ANY(SELECT \"candidate_sleds\".\"id\" FROM \"candidate_sleds\")) \ + RETURNING \"sled\".\"id\", \"sled\".\"last_used_address\"\ + ) \ + SELECT \ + \"sled\".\"id\", \ + \"sled\".\"time_created\", \ + \"sled\".\"time_modified\", \ + \"sled\".\"time_deleted\", \ + \"sled\".\"rcgen\", \ + \"sled\".\"rack_id\", \ + \"sled\".\"is_scrimlet\", \ + \"sled\".\"ip\", \ + \"sled\".\"port\", \ + \"sled\".\"last_used_address\" \ + FROM \"sled\" \ + WHERE (\ + \"sled\".\"time_deleted\" IS NULL\ + ) -- binds: [Nexus, 3, 1]", ); context.success().await; } - - #[tokio::test] - async fn test_insert_external_ip_for_service_is_idempoent() { - let context = TestContext::new( - "test_insert_external_ip_for_service_is_idempotent", - ) - .await; - - // Create an IP pool without an associated project. - let rack_id = Uuid::parse_str(RACK_UUID).unwrap(); - let ip_range = IpRange::try_from(( - Ipv4Addr::new(10, 0, 0, 1), - Ipv4Addr::new(10, 0, 0, 2), - )) - .unwrap(); - context.create_rack_ip_pool("p0", ip_range, rack_id).await; - - // Allocate an IP address as we would for an external, rack-associated - // service. - let id = Uuid::new_v4(); - let ip = context - .db_datastore - .allocate_service_ip(&context.opctx, id, rack_id) - .await - .expect("Failed to allocate service IP address"); - assert_eq!(ip.kind, IpKind::Service); - assert_eq!(ip.ip.ip(), IpAddr::V4(Ipv4Addr::new(10, 0, 0, 1))); - assert_eq!(ip.first_port.0, 0); - assert_eq!(ip.last_port.0, u16::MAX); - assert!(ip.instance_id.is_none()); - assert!(ip.project_id.is_none()); - - let ip_again = context - .db_datastore - .allocate_service_ip(&context.opctx, id, rack_id) - .await - .expect("Failed to allocate service IP address"); - - assert_eq!(ip.id, ip_again.id); - assert_eq!(ip.ip.ip(), ip_again.ip.ip()); - - context.success().await; - } - - // This test is identical to "test_insert_external_ip_is_idempotent", - // but tries to make an idempotent allocation after all addresses in the - // pool have been allocated. - #[tokio::test] - async fn test_insert_external_ip_for_service_is_idempotent_even_when_full() - { - let context = TestContext::new( - "test_insert_external_ip_is_idempotent_even_when_full", - ) - .await; - - // Create an IP pool without an associated project. - let rack_id = Uuid::parse_str(RACK_UUID).unwrap(); - let ip_range = IpRange::try_from(( - Ipv4Addr::new(10, 0, 0, 1), - Ipv4Addr::new(10, 0, 0, 1), - )) - .unwrap(); - context.create_rack_ip_pool("p0", ip_range, rack_id).await; - - // Allocate an IP address as we would for an external, rack-associated - // service. - let id = Uuid::new_v4(); - let ip = context - .db_datastore - .allocate_service_ip(&context.opctx, id, rack_id) - .await - .expect("Failed to allocate service IP address"); - assert_eq!(ip.kind, IpKind::Service); - assert_eq!(ip.ip.ip(), IpAddr::V4(Ipv4Addr::new(10, 0, 0, 1))); - assert_eq!(ip.first_port.0, 0); - assert_eq!(ip.last_port.0, u16::MAX); - assert!(ip.instance_id.is_none()); - assert!(ip.project_id.is_none()); - - let ip_again = context - .db_datastore - .allocate_service_ip(&context.opctx, id, rack_id) - .await - .expect("Failed to allocate service IP address"); - - assert_eq!(ip.id, ip_again.id); - assert_eq!(ip.ip.ip(), ip_again.ip.ip()); - - context.success().await; - } - - #[tokio::test] - async fn test_insert_external_ip_is_idempotent() { - let context = - TestContext::new("test_insert_external_ip_is_idempotent").await; - - // Create an IP pool - let range = IpRange::try_from(( - Ipv4Addr::new(10, 0, 0, 1), - Ipv4Addr::new(10, 0, 0, 3), - )) - .unwrap(); - context.create_ip_pool("p0", range, None).await; - - // Create one SNAT IP address. - let instance_id = Uuid::new_v4(); - let id = Uuid::new_v4(); - let project_id = Uuid::new_v4(); - let ip = context - .db_datastore - .allocate_instance_snat_ip( - &context.opctx, - id, - project_id, - instance_id, - ) - .await - .expect("Failed to allocate instance SNAT IP address"); - assert_eq!(ip.kind, IpKind::SNat); - assert_eq!(ip.ip.ip(), range.first_address()); - assert_eq!(ip.first_port.0, 0); - assert_eq!( - usize::from(ip.last_port.0), - super::NUM_SOURCE_NAT_PORTS - 1 - ); - assert_eq!(ip.project_id.unwrap(), project_id); - - // Create a new IP, with the _same_ ID, and ensure we get back the same - // value. - let new_ip = context - .db_datastore - .allocate_instance_snat_ip( - &context.opctx, - id, - project_id, - instance_id, - ) - .await - .expect("Failed to allocate instance SNAT IP address"); - - // Check identity, not equality. The timestamps will be updated. - assert_eq!(ip.id, new_ip.id); - assert_eq!(ip.name, new_ip.name); - assert_eq!(ip.description, new_ip.description); - assert!(ip.time_created <= new_ip.time_created); - assert!(ip.time_modified <= new_ip.time_modified); - assert_eq!(ip.time_deleted, new_ip.time_deleted); - assert_eq!(ip.ip_pool_id, new_ip.ip_pool_id); - assert_eq!(ip.ip_pool_range_id, new_ip.ip_pool_range_id); - assert_eq!(ip.kind, new_ip.kind); - assert_eq!(ip.ip, new_ip.ip); - assert_eq!(ip.first_port, new_ip.first_port); - assert_eq!(ip.last_port, new_ip.last_port); - - context.success().await; - } - - #[tokio::test] - async fn test_next_external_ip_is_restricted_to_pools() { - let context = - TestContext::new("test_next_external_ip_is_restricted_to_pools") - .await; - - // Create two pools, neither project-restricted. - let first_range = IpRange::try_from(( - Ipv4Addr::new(10, 0, 0, 1), - Ipv4Addr::new(10, 0, 0, 3), - )) - .unwrap(); - context.create_ip_pool("p0", first_range, None).await; - let second_range = IpRange::try_from(( - Ipv4Addr::new(10, 0, 0, 4), - Ipv4Addr::new(10, 0, 0, 6), - )) - .unwrap(); - context.create_ip_pool("p1", second_range, None).await; - - // Allocating an address on an instance in the second pool should be - // respected, even though there are IPs available in the first. - let instance_id = Uuid::new_v4(); - let project_id = Uuid::new_v4(); - let id = Uuid::new_v4(); - let pool_name = Some(Name("p1".parse().unwrap())); - - let ip = context - .db_datastore - .allocate_instance_ephemeral_ip( - &context.opctx, - id, - project_id, - instance_id, - pool_name, - ) - .await - .expect("Failed to allocate instance ephemeral IP address"); - assert_eq!(ip.kind, IpKind::Ephemeral); - assert_eq!(ip.ip.ip(), second_range.first_address()); - assert_eq!(ip.first_port.0, 0); - assert_eq!(ip.last_port.0, u16::MAX); - assert_eq!(ip.project_id.unwrap(), project_id); - - context.success().await; - } - - #[tokio::test] - async fn test_ensure_pool_exhaustion_does_not_use_other_pool() { - let context = TestContext::new( - "test_ensure_pool_exhaustion_does_not_use_other_pool", - ) - .await; - - // Create two pools, neither project-restricted. - let first_range = IpRange::try_from(( - Ipv4Addr::new(10, 0, 0, 1), - Ipv4Addr::new(10, 0, 0, 3), - )) - .unwrap(); - context.create_ip_pool("p0", first_range, None).await; - let first_address = Ipv4Addr::new(10, 0, 0, 4); - let last_address = Ipv4Addr::new(10, 0, 0, 6); - let second_range = - IpRange::try_from((first_address, last_address)).unwrap(); - context.create_ip_pool("p1", second_range, None).await; - - // Allocate all available addresses in the second pool. - let instance_id = Uuid::new_v4(); - let project_id = Uuid::new_v4(); - let pool_name = Some(Name("p1".parse().unwrap())); - let first_octet = first_address.octets()[3]; - let last_octet = last_address.octets()[3]; - for octet in first_octet..=last_octet { - let ip = context - .db_datastore - .allocate_instance_ephemeral_ip( - &context.opctx, - Uuid::new_v4(), - project_id, - instance_id, - pool_name.clone(), - ) - .await - .expect("Failed to allocate instance ephemeral IP address"); - println!("{ip:#?}"); - if let IpAddr::V4(addr) = ip.ip.ip() { - assert_eq!(addr.octets()[3], octet); - } else { - panic!("Expected an IPv4 address"); - } - } - - // Allocating another address should _fail_, and not use the first pool. - context - .db_datastore - .allocate_instance_ephemeral_ip( - &context.opctx, - Uuid::new_v4(), - project_id, - instance_id, - pool_name, - ) - .await - .expect_err("Should not use IP addresses from a different pool"); - - context.success().await; - } } diff --git a/services.sql b/services.sql index a22f18e3611..c8d8480a475 100644 --- a/services.sql +++ b/services.sql @@ -396,7 +396,6 @@ WITH new_internal_ips ON candidate_sleds.id = new_internal_ips.sled_id - ), inserted_services AS ( From 6406ec60b6c94e62dc969f07ac97c35545636d20 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 19 Sep 2022 16:22:06 -0400 Subject: [PATCH 84/88] significantly more of the CTE implemented --- nexus/src/db/queries/service_provision.rs | 283 +++++++++++++++++----- 1 file changed, 223 insertions(+), 60 deletions(-) diff --git a/nexus/src/db/queries/service_provision.rs b/nexus/src/db/queries/service_provision.rs index e2582b18526..352bddfea36 100644 --- a/nexus/src/db/queries/service_provision.rs +++ b/nexus/src/db/queries/service_provision.rs @@ -6,11 +6,16 @@ use crate::db::model::Service; use crate::db::model::ServiceKind; -use crate::db::model::Sled; use crate::db::pool::DbConnection; use crate::db::schema; use chrono::DateTime; use chrono::Utc; +use diesel::Column; +use diesel::ExpressionMethods; +use diesel::IntoSql; +use diesel::Insertable; +use diesel::JoinOnDsl; +use diesel::NullableExpressionMethods; use diesel::pg::Pg; use diesel::query_builder::AstPass; use diesel::query_builder::AsQuery; @@ -18,42 +23,13 @@ use diesel::query_builder::Query; use diesel::query_builder::QueryFragment; use diesel::query_builder::QueryId; use diesel::sql_types; -use diesel::ExpressionMethods; use diesel::QueryDsl; use diesel::RunQueryDsl; -type FromClause = - diesel::internal::table_macro::StaticQueryFragmentInstance; -type ServiceFromClause = FromClause; -const SERVICE_FROM_CLAUSE: ServiceFromClause = ServiceFromClause::new(); - trait CteQuery: Query + QueryFragment {} impl CteQuery for T -where - T: Query + QueryFragment -{} - -/* -trait CteQueryClone { - fn clone_box(&self) -> Box>; -} - -impl CteQueryClone for T -where - T: 'static + CteQuery + Clone, -{ - fn clone_box(&self) -> Box> { - Box::new(self.clone()) - } -} - -impl Clone for Box> { - fn clone(&self) -> Box> { - self.clone_box() - } -} -*/ +where T: Query + QueryFragment {} /// Represents a sub-query within a CTE. /// @@ -158,23 +134,17 @@ impl QueryFragment for Cte { } } -trait SameType {} -impl SameType for (T, T) {} -fn same_type() where (A, B): SameType {} - // ----------------------------- // // Above should be for a generic CTE builder // Below should be for service provisioning // ----------------------------- // // TODO: I want this to be as lightweight to make as possible! -struct SledAllocationPoolSubquery { - // TODO: How do we bridge the gap of this CteQuery type to the - // table? - query: Box>, +struct SledAllocationPool { + query: Box>, } -impl SledAllocationPoolSubquery { +impl SledAllocationPool { fn new() -> Self { use crate::db::schema::sled::dsl; Self { @@ -188,26 +158,22 @@ impl SledAllocationPoolSubquery { } } -impl AsTable for SledAllocationPoolSubquery { +impl AsTable for SledAllocationPool { type Table = sled_allocation_pool::dsl::sled_allocation_pool; fn as_table(&self) -> Self::Table { use diesel::internal::table_macro::StaticQueryFragment; - // TODO: This should either be auto-generated, or checked more - // uniformally. - same_type::<(sql_types::Uuid,), sled_allocation_pool::SqlType>(); - // TODO: Converting this to a compile-time check would be nicer. // // TODO: Even better, don't have "name()" at all... force the alias // to be the intermediate "table" name. - assert_eq!(self.name(), sled_allocation_pool::dsl::sled_allocation_pool::STATIC_COMPONENT.0); + assert_eq!(self.name(), Self::Table::STATIC_COMPONENT.0); sled_allocation_pool::dsl::sled_allocation_pool } } -impl SubQuery for SledAllocationPoolSubquery { +impl SubQuery for SledAllocationPool { fn name(&self) -> &'static str { "sled_allocation_pool" } @@ -235,11 +201,11 @@ diesel::table! { // - What can be made generic? struct PreviouslyAllocatedServices { - query: Box>, + query: Box>, } impl PreviouslyAllocatedServices { - fn new(allocation_pool: &SledAllocationPoolSubquery) -> Self { + fn new(allocation_pool: &SledAllocationPool) -> Self { use crate::db::schema::service::dsl as service_dsl; use sled_allocation_pool::dsl as alloc_pool_dsl; @@ -258,7 +224,7 @@ impl AsTable for PreviouslyAllocatedServices { type Table = previously_allocated_services::dsl::previously_allocated_services; fn as_table(&self) -> Self::Table { use diesel::internal::table_macro::StaticQueryFragment; - assert_eq!(self.name(), previously_allocated_services::dsl::previously_allocated_services::STATIC_COMPONENT.0); + assert_eq!(self.name(), Self::Table::STATIC_COMPONENT.0); previously_allocated_services::dsl::previously_allocated_services } } @@ -303,7 +269,7 @@ impl AsTable for OldServiceCount { type Table = old_service_count::dsl::old_service_count; fn as_table(&self) -> Self::Table { use diesel::internal::table_macro::StaticQueryFragment; - assert_eq!(self.name(), old_service_count::dsl::old_service_count::STATIC_COMPONENT.0); + assert_eq!(self.name(), Self::Table::STATIC_COMPONENT.0); old_service_count::dsl::old_service_count } } @@ -318,13 +284,13 @@ impl SubQuery for OldServiceCount { } } - diesel::table! { old_service_count (count) { count -> Int8, } } +/* struct NewServiceCount { redundancy: i32, } @@ -370,11 +336,55 @@ impl QueryFragment for NewServiceCount { impl Query for NewServiceCount { type SqlType = sql_types::BigInt; } +*/ + +struct NewServiceCount { + query: Box>, +} + +diesel::sql_function!(fn greatest(a: sql_types::BigInt, b: sql_types::BigInt) -> sql_types::BigInt); + +impl NewServiceCount { + fn new(redundancy: i32, old_service_count: &OldServiceCount) -> Self { + let old_count = old_service_count.as_table() + .select(old_service_count::dsl::count) + .single_value() + .assume_not_null(); + Self { + query: Box::new( + diesel::select( + greatest( + (redundancy as i64).into_sql::(), + old_count, + ) - old_count + ) + ) + } + } +} + +impl SubQuery for NewServiceCount { + fn name(&self) -> &'static str { + "new_service_count" + } + + fn query(&self) -> &dyn QueryFragment { + &self.query + } +} struct CandidateSleds {} impl CandidateSleds { - fn new() -> Self { + // TODO: This actually does depend on sled_allocation_pool, + // previously_allocated_services, and new_service_count. + // + // Should we make that explicit? + fn new( + _sled_allocation_pool: &SledAllocationPool, + _previously_allocated_services: &PreviouslyAllocatedServices, + _new_service_count: &NewServiceCount, + ) -> Self { Self {} } } @@ -388,7 +398,7 @@ impl AsTable for CandidateSleds { type Table = candidate_sleds::dsl::candidate_sleds; fn as_table(&self) -> Self::Table { use diesel::internal::table_macro::StaticQueryFragment; - assert_eq!(self.name(), candidate_sleds::dsl::candidate_sleds::STATIC_COMPONENT.0); + assert_eq!(self.name(), Self::Table::STATIC_COMPONENT.0); candidate_sleds::dsl::candidate_sleds } } @@ -482,6 +492,132 @@ diesel::table! { } } +diesel::allow_tables_to_appear_in_same_query!( + candidate_sleds, + new_internal_ips, +); + +struct CandidateServices { + query: Box>, +} + +diesel::sql_function!(fn gen_random_uuid() -> Uuid); +diesel::sql_function!(fn now() -> Timestamptz); + +impl CandidateServices { + fn new(candidate_sleds: &CandidateSleds, new_internal_ips: &NewInternalIps) -> Self { + use candidate_sleds::dsl as candidate_sleds_dsl; + use new_internal_ips::dsl as new_internal_ips_dsl; + + Self { + query: Box::new( + candidate_sleds.as_table().inner_join( + new_internal_ips.as_table().on( + candidate_sleds_dsl::id.eq(new_internal_ips_dsl::id) + ) + ).select( + ( + // TODO: I think I still want these to be aliased? + gen_random_uuid(), + now(), + now(), +// diesel::dsl::sql("gen_random_uuid() AS id"), +// diesel::dsl::sql("now() as time_created"), +// diesel::dsl::sql("now() as time_modified"), + diesel::dsl::sql(&format!("{} as sled_id", candidate_sleds_dsl::id::NAME)), + diesel::dsl::sql(&format!("{} as ip", new_internal_ips_dsl::last_used_address::NAME)), + ServiceKind::Nexus.into_sql::(), + ), + ) + ) + } + } +} + +impl AsTable for CandidateServices { + type Table = candidate_services::dsl::candidate_services; + fn as_table(&self) -> Self::Table { + use diesel::internal::table_macro::StaticQueryFragment; + assert_eq!(self.name(), Self::Table::STATIC_COMPONENT.0); + candidate_services::dsl::candidate_services + } +} + +impl SubQuery for CandidateServices { + fn name(&self) -> &'static str { + "candidate_services" + } + + fn query(&self) -> &dyn QueryFragment { + &self.query + } +} + +diesel::table! { + candidate_services { + id -> Uuid, + time_created -> Timestamptz, + time_modified -> Timestamptz, + + sled_id -> Uuid, + ip -> Inet, + kind -> crate::db::model::ServiceKindEnum, + } +} + +struct InsertServices { + query: Box>, +} + +impl InsertServices { + fn new(candidate: &CandidateServices) -> Self { + use crate::db::schema::service; + + Self { + query: Box::new( + candidate.as_table().select( + candidate_services::all_columns, + ).insert_into( + service::table + ).returning( + service::all_columns + ) + ) + } + } +} + +impl AsTable for InsertServices { + type Table = inserted_services::dsl::inserted_services; + fn as_table(&self) -> Self::Table { + use diesel::internal::table_macro::StaticQueryFragment; + assert_eq!(self.name(), Self::Table::STATIC_COMPONENT.0); + inserted_services::dsl::inserted_services + } +} + +impl SubQuery for InsertServices { + fn name(&self) -> &'static str { + "inserted_services" + } + + fn query(&self) -> &dyn QueryFragment { + &self.query + } +} + +diesel::table! { + inserted_services { + id -> Uuid, + time_created -> Timestamptz, + time_modified -> Timestamptz, + + sled_id -> Uuid, + ip -> Inet, + kind -> crate::db::model::ServiceKindEnum, + } +} + /// Provision services of a particular type within a rack. /// /// TODO: Document @@ -494,16 +630,24 @@ pub struct ServiceProvision { impl ServiceProvision { pub fn new(redundancy: i32) -> Self { let now = Utc::now(); - let sled_allocation_pool = SledAllocationPoolSubquery::new(); + let sled_allocation_pool = SledAllocationPool::new(); let previously_allocated_services = PreviouslyAllocatedServices::new(&sled_allocation_pool); let old_service_count = OldServiceCount::new(&previously_allocated_services); let new_service_count = NewServiceCount::new(redundancy, &old_service_count); - let candidate_sleds = CandidateSleds::new(); + let candidate_sleds = CandidateSleds::new( + &sled_allocation_pool, + &previously_allocated_services, + &new_service_count + ); let new_internal_ips = NewInternalIps::new(&candidate_sleds); + let candidate_services = CandidateServices::new(&candidate_sleds, &new_internal_ips); + let inserted_services = InsertServices::new(&candidate_services); - // TODO: Reference prior subquery? - use crate::db::schema::sled::dsl; - let final_select = Box::new(dsl::sled.filter(dsl::time_deleted.is_null())); + let final_select = Box::new( + inserted_services.as_table().select( + inserted_services::all_columns + ) + ); let cte = CteBuilder::new() .add_subquery(sled_allocation_pool) @@ -512,6 +656,8 @@ impl ServiceProvision { .add_subquery(new_service_count) .add_subquery(candidate_sleds) .add_subquery(new_internal_ips) + .add_subquery(candidate_services) + .add_subquery(inserted_services) .build(final_select); Self { @@ -655,7 +801,24 @@ mod tests { WHERE \ (\"sled\".\"id\" = ANY(SELECT \"candidate_sleds\".\"id\" FROM \"candidate_sleds\")) \ RETURNING \"sled\".\"id\", \"sled\".\"last_used_address\"\ + ), \ + candidate_services AS (\ + SELECT \ + gen_random_uuid() AS id, \ + now() as time_created, \ + now() as time_modified, \ + \"candidate_sleds\".\"id\", \ + \"new_internal_ips\".\"last_used_address\", \ + $4 \ + FROM (\ + \"candidate_sleds\" \ + INNER JOIN \ + \"new_internal_ips\" \ + ON (\ + \"candidate_sleds\".\"id\" = \"new_internal_ips\".\"id\"\ + ))\ ) \ + SELECT \"inserted_services\".\"id\", \"inserted_services\".\"time_created\", \"inserted_services\".\"time_modified\", \"inserted_services\".\"sled_id\", \"inserted_services\".\"ip\", \"inserted_services\".\"kind\" FROM \"service\" AS \"inserted_services\" SELECT \ \"sled\".\"id\", \ \"sled\".\"time_created\", \ @@ -670,7 +833,7 @@ mod tests { FROM \"sled\" \ WHERE (\ \"sled\".\"time_deleted\" IS NULL\ - ) -- binds: [Nexus, 3, 1]", + ) -- binds: [Nexus, 3, 1, Nexus]", ); context.success().await; From 8b3fcc5a0147a34146c1138023728108070b95c7 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Tue, 20 Sep 2022 00:12:21 -0400 Subject: [PATCH 85/88] Aliases seem to be working --- nexus/src/db/queries/service_provision.rs | 386 +++++++++++++--------- 1 file changed, 224 insertions(+), 162 deletions(-) diff --git a/nexus/src/db/queries/service_provision.rs b/nexus/src/db/queries/service_provision.rs index 352bddfea36..833ec1d0d88 100644 --- a/nexus/src/db/queries/service_provision.rs +++ b/nexus/src/db/queries/service_provision.rs @@ -10,7 +10,8 @@ use crate::db::pool::DbConnection; use crate::db::schema; use chrono::DateTime; use chrono::Utc; -use diesel::Column; +use diesel::CombineDsl; +use diesel::Expression; use diesel::ExpressionMethods; use diesel::IntoSql; use diesel::Insertable; @@ -18,19 +19,38 @@ use diesel::JoinOnDsl; use diesel::NullableExpressionMethods; use diesel::pg::Pg; use diesel::query_builder::AstPass; -use diesel::query_builder::AsQuery; +// use diesel::query_builder::AsQuery; use diesel::query_builder::Query; use diesel::query_builder::QueryFragment; use diesel::query_builder::QueryId; use diesel::sql_types; use diesel::QueryDsl; use diesel::RunQueryDsl; +use diesel::SelectableExpression; trait CteQuery: Query + QueryFragment {} impl CteQuery for T where T: Query + QueryFragment {} +// TODO: What macro am I going to build to make this generation easier? +// +// - Must create a `diesel::table` which wraps the output type. +// - This could have a helper to match the shape of an existing table. +// - Should automatically implement our version of "HasTable" +// - Should automatically implement SubQuery +// +// INPUT: +// - macro_rules generation for `subquery` +// - Implied: Associated table name +// +// - On struct: Associated table name +// +// OUTPUT: +// - Output `table!` macro +// - Impl `HasTable` for associated object +// - Impl `SubQuery` + /// Represents a sub-query within a CTE. /// /// For an expression like: @@ -49,12 +69,16 @@ trait SubQuery { fn query(&self) -> &dyn QueryFragment; } -trait AsTable +// TODO: Do you actually want to grab the *table*? +// +// You want something that allows selection, calling 'count', filtering, +// joining, etc. +trait HasTable where - Self::Table: AsQuery, +// Self::Table: AsQuery, { type Table; - fn as_table(&self) -> Self::Table; + fn table(&self) -> Self::Table; } /// A thin wrapper around a [`SubQuery`]. @@ -100,6 +124,9 @@ impl CteBuilder { self } + // TODO: It would be nice if this could be typed? + // It's not necessarily a SubQuery, but it's probably a "Query" object + // with a particular SQL type. fn build(self, statement: Box>) -> Cte { Cte { subqueries: self.subqueries, @@ -134,6 +161,86 @@ impl QueryFragment for Cte { } } +/// Allows an [`diesel::Expression`] to be referenced by a new name. +/// +/// This generates an " AS " SQL fragment. +/// +/// +/// For example: +/// +/// ```ignore +/// diesel::sql_function!(fn gen_random_uuid() -> Uuid); +/// +/// let query = sleds.select( +/// ( +/// ExpressionAlias::(gen_random_uuid()), +/// ExpressionAlias::(gen_random_uuid()), +/// ), +/// ); +/// ``` +/// +/// Produces the following SQL: +/// +/// ```sql +/// SELECT +/// gen_random_uuid() as id, +/// gen_random_uuid() as sled_id, +/// FROM sleds +/// ``` +// TODO: This is currently used within SELECT statements, though it could +// also be used in INSERT / UPDATE / DELETE statements, to force a subquery +// to have a particular name. This would likely involve an invasive change +// within Diesel itself. +#[derive(diesel::expression::ValidGrouping, diesel::query_builder::QueryId)] +struct ExpressionAlias { + expr: E, + name: &'static str, +} + +impl ExpressionAlias +where + E: Expression +{ + fn new(expr: E) -> Self { + Self { + expr, + name: C::NAME, + } + } +} + +impl Expression for ExpressionAlias +where + E: Expression +{ + type SqlType = E::SqlType; +} + +impl diesel::AppearsOnTable for ExpressionAlias +where + E: diesel::AppearsOnTable +{} + +impl SelectableExpression for ExpressionAlias +where + E: SelectableExpression +{} + +impl QueryFragment for ExpressionAlias +where + E: QueryFragment +{ + fn walk_ast<'a>( + &'a self, + mut out: AstPass<'_, 'a, Pg>, + ) -> diesel::QueryResult<()> { + self.expr.walk_ast(out.reborrow())?; + out.push_sql(" AS "); + out.push_sql(&self.name); + Ok(()) + } +} + // ----------------------------- // // Above should be for a generic CTE builder // Below should be for service provisioning @@ -158,14 +265,13 @@ impl SledAllocationPool { } } -impl AsTable for SledAllocationPool { +impl HasTable for SledAllocationPool { type Table = sled_allocation_pool::dsl::sled_allocation_pool; - fn as_table(&self) -> Self::Table { + fn table(&self) -> Self::Table { use diesel::internal::table_macro::StaticQueryFragment; - // TODO: Converting this to a compile-time check would be nicer. // - // TODO: Even better, don't have "name()" at all... force the alias + // TODO: Even better, don't have "name()" at all... force the ExpressionAlias // to be the intermediate "table" name. assert_eq!(self.name(), Self::Table::STATIC_COMPONENT.0); @@ -185,7 +291,7 @@ impl SubQuery for SledAllocationPool { // TODO: We actually want a trimmed down version of this. // It's generating too much; we don't want to be able to insert/delete/update -// this table; it's basically an alias. +// this table; it's basically an ExpressionAlias. // We *also* do not want the Primary Key. // // However, being able to select columns by name is a critical feature @@ -209,7 +315,7 @@ impl PreviouslyAllocatedServices { use crate::db::schema::service::dsl as service_dsl; use sled_allocation_pool::dsl as alloc_pool_dsl; - let select_from_pool = allocation_pool.as_table().select(alloc_pool_dsl::id).into_boxed(); + let select_from_pool = allocation_pool.table().select(alloc_pool_dsl::id).into_boxed(); Self { query: Box::new( service_dsl::service @@ -220,9 +326,9 @@ impl PreviouslyAllocatedServices { } } -impl AsTable for PreviouslyAllocatedServices { +impl HasTable for PreviouslyAllocatedServices { type Table = previously_allocated_services::dsl::previously_allocated_services; - fn as_table(&self) -> Self::Table { + fn table(&self) -> Self::Table { use diesel::internal::table_macro::StaticQueryFragment; assert_eq!(self.name(), Self::Table::STATIC_COMPONENT.0); previously_allocated_services::dsl::previously_allocated_services @@ -259,15 +365,15 @@ impl OldServiceCount { fn new(previously_allocated_services: &PreviouslyAllocatedServices) -> Self { Self { query: Box::new( - previously_allocated_services.as_table().count() + previously_allocated_services.table().count() ) } } } -impl AsTable for OldServiceCount { +impl HasTable for OldServiceCount { type Table = old_service_count::dsl::old_service_count; - fn as_table(&self) -> Self::Table { + fn table(&self) -> Self::Table { use diesel::internal::table_macro::StaticQueryFragment; assert_eq!(self.name(), Self::Table::STATIC_COMPONENT.0); old_service_count::dsl::old_service_count @@ -290,54 +396,6 @@ diesel::table! { } } -/* -struct NewServiceCount { - redundancy: i32, -} - -impl QueryId for NewServiceCount { - type QueryId = (); - const HAS_STATIC_QUERY_ID: bool = false; -} - -impl NewServiceCount { - fn new(redundancy: i32, _old_service_count: &OldServiceCount) -> Self { - Self { - redundancy, - } - } -} - -impl SubQuery for NewServiceCount { - fn name(&self) -> &'static str { - "new_service_count" - } - - fn query(&self) -> &dyn QueryFragment { - self - } -} - -// NOTE: This CTE arm is raw SQL because the "GREATEST" function is not -// supported by Diesel. -impl QueryFragment for NewServiceCount { - fn walk_ast<'a>( - &'a self, - mut out: AstPass<'_, 'a, Pg>, - ) -> diesel::QueryResult<()> { - out.unsafe_to_cache_prepared(); - out.push_sql("SELECT GREATEST("); - out.push_bind_param::(&self.redundancy)?; - out.push_sql(", (SELECT * FROM old_service_count)) - (SELECT * FROM old_service_count)"); - Ok(()) - } -} - -impl Query for NewServiceCount { - type SqlType = sql_types::BigInt; -} -*/ - struct NewServiceCount { query: Box>, } @@ -346,7 +404,7 @@ diesel::sql_function!(fn greatest(a: sql_types::BigInt, b: sql_types::BigInt) -> impl NewServiceCount { fn new(redundancy: i32, old_service_count: &OldServiceCount) -> Self { - let old_count = old_service_count.as_table() + let old_count = old_service_count.table() .select(old_service_count::dsl::count) .single_value() .assume_not_null(); @@ -373,30 +431,49 @@ impl SubQuery for NewServiceCount { } } -struct CandidateSleds {} +struct CandidateSleds { + query: Box> +} impl CandidateSleds { - // TODO: This actually does depend on sled_allocation_pool, - // previously_allocated_services, and new_service_count. - // - // Should we make that explicit? fn new( - _sled_allocation_pool: &SledAllocationPool, - _previously_allocated_services: &PreviouslyAllocatedServices, + sled_allocation_pool: &SledAllocationPool, + previously_allocated_services: &PreviouslyAllocatedServices, _new_service_count: &NewServiceCount, ) -> Self { - Self {} - } -} -impl QueryId for CandidateSleds { - type QueryId = (); - const HAS_STATIC_QUERY_ID: bool = false; + let select_from_previously_allocated = previously_allocated_services.table() + .select(previously_allocated_services::dsl::sled_id) + .into_boxed(); + + let mut select_stmt = sled_allocation_pool + .table() + .filter(sled_allocation_pool::dsl::id.ne_all(select_from_previously_allocated)) + .select(sled_allocation_pool::dsl::id) + .into_boxed(); + + // TODO: I'd really prefer to just pass the 'new_service_count' as the + // `.limit(...)` here, but the API cannot currently operate on an + // expression. + // + // See: https://github.com/diesel-rs/diesel/discussions/3328 for further + // discussion. + select_stmt.limit_offset = diesel::query_builder::BoxedLimitOffsetClause { + limit: Some(Box::new(diesel::dsl::sql::(" LIMIT SELECT * FROM new_service_count"))), + offset: select_stmt.limit_offset.offset, + }; + + Self { + query: Box::new( + select_stmt + ) + } + } } -impl AsTable for CandidateSleds { +impl HasTable for CandidateSleds { type Table = candidate_sleds::dsl::candidate_sleds; - fn as_table(&self) -> Self::Table { + fn table(&self) -> Self::Table { use diesel::internal::table_macro::StaticQueryFragment; assert_eq!(self.name(), Self::Table::STATIC_COMPONENT.0); candidate_sleds::dsl::candidate_sleds @@ -409,36 +486,10 @@ impl SubQuery for CandidateSleds { } fn query(&self) -> &dyn QueryFragment { - self - } -} - -// NOTE: This CTE arm is raw SQL because the "LIMIT" expression cannot -// include sub-queries in Diesel. -impl QueryFragment for CandidateSleds { - fn walk_ast<'a>( - &'a self, - mut out: AstPass<'_, 'a, Pg>, - ) -> diesel::QueryResult<()> { - out.unsafe_to_cache_prepared(); - - out.push_sql( - "SELECT \ - \"sled_allocation_pool\".\"id\" \ - FROM \"sled_allocation_pool\" \ - WHERE \ - \"sled_allocation_pool\".\"id\" NOT IN \ - (SELECT \"sled_id\" FROM \"previously_allocated_services\") \ - LIMIT (SELECT * FROM \"new_service_count\")" - ); - Ok(()) + &self.query } } -impl Query for CandidateSleds { - type SqlType = sql_types::Uuid; -} - diesel::table! { candidate_sleds { id -> Uuid, @@ -446,7 +497,7 @@ diesel::table! { } struct NewInternalIps { - query: Box>, + query: Box>, } impl NewInternalIps { @@ -454,7 +505,7 @@ impl NewInternalIps { use crate::db::schema::sled::dsl as sled_dsl; use candidate_sleds::dsl as candidate_sleds_dsl; - let select_from_candidate_sleds = candidate_sleds.as_table().select(candidate_sleds_dsl::id).into_boxed(); + let select_from_candidate_sleds = candidate_sleds.table().select(candidate_sleds_dsl::id).into_boxed(); Self { query: Box::new( @@ -466,9 +517,9 @@ impl NewInternalIps { } } -impl AsTable for NewInternalIps { +impl HasTable for NewInternalIps { type Table = new_internal_ips::dsl::new_internal_ips; - fn as_table(&self) -> Self::Table { + fn table(&self) -> Self::Table { use diesel::internal::table_macro::StaticQueryFragment; assert_eq!(self.name(), Self::Table::STATIC_COMPONENT.0); new_internal_ips::dsl::new_internal_ips @@ -508,25 +559,22 @@ impl CandidateServices { fn new(candidate_sleds: &CandidateSleds, new_internal_ips: &NewInternalIps) -> Self { use candidate_sleds::dsl as candidate_sleds_dsl; use new_internal_ips::dsl as new_internal_ips_dsl; + use schema::service::dsl as service_dsl; Self { query: Box::new( - candidate_sleds.as_table().inner_join( - new_internal_ips.as_table().on( + candidate_sleds.table().inner_join( + new_internal_ips.table().on( candidate_sleds_dsl::id.eq(new_internal_ips_dsl::id) ) ).select( ( - // TODO: I think I still want these to be aliased? - gen_random_uuid(), - now(), - now(), -// diesel::dsl::sql("gen_random_uuid() AS id"), -// diesel::dsl::sql("now() as time_created"), -// diesel::dsl::sql("now() as time_modified"), - diesel::dsl::sql(&format!("{} as sled_id", candidate_sleds_dsl::id::NAME)), - diesel::dsl::sql(&format!("{} as ip", new_internal_ips_dsl::last_used_address::NAME)), - ServiceKind::Nexus.into_sql::(), + ExpressionAlias::new::(gen_random_uuid()), + ExpressionAlias::new::(now()), + ExpressionAlias::new::(now()), + ExpressionAlias::new::(candidate_sleds_dsl::id), + ExpressionAlias::new::(new_internal_ips_dsl::last_used_address), + ExpressionAlias::new::(ServiceKind::Nexus.into_sql::()), ), ) ) @@ -534,9 +582,9 @@ impl CandidateServices { } } -impl AsTable for CandidateServices { +impl HasTable for CandidateServices { type Table = candidate_services::dsl::candidate_services; - fn as_table(&self) -> Self::Table { + fn table(&self) -> Self::Table { use diesel::internal::table_macro::StaticQueryFragment; assert_eq!(self.name(), Self::Table::STATIC_COMPONENT.0); candidate_services::dsl::candidate_services @@ -575,7 +623,7 @@ impl InsertServices { Self { query: Box::new( - candidate.as_table().select( + candidate.table().select( candidate_services::all_columns, ).insert_into( service::table @@ -587,9 +635,9 @@ impl InsertServices { } } -impl AsTable for InsertServices { +impl HasTable for InsertServices { type Table = inserted_services::dsl::inserted_services; - fn as_table(&self) -> Self::Table { + fn table(&self) -> Self::Table { use diesel::internal::table_macro::StaticQueryFragment; assert_eq!(self.name(), Self::Table::STATIC_COMPONENT.0); inserted_services::dsl::inserted_services @@ -644,9 +692,14 @@ impl ServiceProvision { let inserted_services = InsertServices::new(&candidate_services); let final_select = Box::new( - inserted_services.as_table().select( - inserted_services::all_columns - ) + previously_allocated_services + .table() + .select(previously_allocated_services::all_columns) + .union( + inserted_services.table().select( + inserted_services::all_columns + ) + ) ); let cte = CteBuilder::new() @@ -668,6 +721,14 @@ impl ServiceProvision { } +// TODO: +// We could probably make this generic over the Cte "build" method, enforce the +// type there, and auto-impl: +// - QueryId +// - QueryFragment +// - Query +// +// If we know what the SqlType is supposed to be. impl QueryId for ServiceProvision { type QueryId = (); const HAS_STATIC_QUERY_ID: bool = false; @@ -782,57 +843,58 @@ mod tests { SELECT COUNT(*) FROM \"previously_allocated_services\"\ ), \ new_service_count AS (\ - SELECT GREATEST($2, (SELECT * FROM old_service_count)) - (SELECT * FROM old_service_count)\ + SELECT (\ + greatest(\ + $2, \ + (SELECT \"old_service_count\".\"count\" FROM \"old_service_count\" LIMIT $3)\ + ) - (SELECT \"old_service_count\".\"count\" FROM \"old_service_count\" LIMIT $4)\ + )\ ), \ candidate_sleds AS (\ SELECT \ \"sled_allocation_pool\".\"id\" \ FROM \"sled_allocation_pool\" \ - WHERE \ - \"sled_allocation_pool\".\"id\" NOT IN \ - (SELECT \"sled_id\" FROM \"previously_allocated_services\") \ - LIMIT (SELECT * FROM \"new_service_count\")\ + WHERE (\ + \"sled_allocation_pool\".\"id\" != ALL(\ + SELECT \ + \"previously_allocated_services\".\"sled_id\" \ + FROM \"previously_allocated_services\"\ + )\ + ) \ + LIMIT SELECT * FROM new_service_count\ ), \ new_internal_ips AS (\ UPDATE \ \"sled\" \ SET \ - \"last_used_address\" = (\"sled\".\"last_used_address\" + $3) \ + \"last_used_address\" = (\"sled\".\"last_used_address\" + $5) \ WHERE \ (\"sled\".\"id\" = ANY(SELECT \"candidate_sleds\".\"id\" FROM \"candidate_sleds\")) \ - RETURNING \"sled\".\"id\", \"sled\".\"last_used_address\"\ + RETURNING \ + \"sled\".\"id\", \ + \"sled\".\"last_used_address\"\ ), \ - candidate_services AS (\ + candidate_services AS (\ SELECT \ gen_random_uuid() AS id, \ - now() as time_created, \ - now() as time_modified, \ - \"candidate_sleds\".\"id\", \ - \"new_internal_ips\".\"last_used_address\", \ - $4 \ + now() AS time_created, \ + now() AS time_modified, \ + \"candidate_sleds\".\"id\" AS sled_id, \ + \"new_internal_ips\".\"last_used_address\" AS ip, \ + $6 AS kind \ FROM (\ \"candidate_sleds\" \ INNER JOIN \ \"new_internal_ips\" \ ON (\ - \"candidate_sleds\".\"id\" = \"new_internal_ips\".\"id\"\ - ))\ - ) \ - SELECT \"inserted_services\".\"id\", \"inserted_services\".\"time_created\", \"inserted_services\".\"time_modified\", \"inserted_services\".\"sled_id\", \"inserted_services\".\"ip\", \"inserted_services\".\"kind\" FROM \"service\" AS \"inserted_services\" - SELECT \ - \"sled\".\"id\", \ - \"sled\".\"time_created\", \ - \"sled\".\"time_modified\", \ - \"sled\".\"time_deleted\", \ - \"sled\".\"rcgen\", \ - \"sled\".\"rack_id\", \ - \"sled\".\"is_scrimlet\", \ - \"sled\".\"ip\", \ - \"sled\".\"port\", \ - \"sled\".\"last_used_address\" \ - FROM \"sled\" \ - WHERE (\ - \"sled\".\"time_deleted\" IS NULL\ + \"candidate_sleds\".\"id\" = \"new_internal_ips\".\"id\" + )) + ), + inserted_services AS (\ + INSERT INTO \ + \"service\" \ + (\"id\", \"time_created\", \"time_modified\", \"sled_id\", \"ip\", \"kind\") SELECT \"candidate_services\".\"id\", \"candidate_services\".\"time_created\", \"candidate_services\".\"time_modified\", \"candidate_services\".\"sled_id\", \"candidate_services\".\"ip\", \"candidate_services\".\"kind\" FROM \"candidate_services\" RETURNING \"service\".\"id\", \"service\".\"time_created\", \"service\".\"time_modified\", \"service\".\"sled_id\", \"service\".\"ip\", \"service\".\"kind\") + SELECT \"inserted_services\".\"id\", \"inserted_services\".\"time_created\", \"inserted_services\".\"time_modified\", \"inserted_services\".\"sled_id\", \"inserted_services\".\"ip\", \"inserted_services\".\"kind\" FROM \"inserted_services\" ) -- binds: [Nexus, 3, 1, Nexus]", ); From 92250b1c612f1788622719c64a711b104837077a Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Tue, 20 Sep 2022 16:03:20 -0400 Subject: [PATCH 86/88] splitting up CTE stuff --- nexus/db-macros/src/lib.rs | 15 +- nexus/db-macros/src/subquery.rs | 86 ++++ nexus/src/db/alias.rs | 84 ++++ nexus/src/db/datastore/rack.rs | 11 +- nexus/src/db/mod.rs | 2 + nexus/src/db/queries/service_provision.rs | 576 +++++----------------- nexus/src/db/subquery.rs | 148 ++++++ services.sql | 17 +- 8 files changed, 461 insertions(+), 478 deletions(-) create mode 100644 nexus/db-macros/src/subquery.rs create mode 100644 nexus/src/db/alias.rs create mode 100644 nexus/src/db/subquery.rs diff --git a/nexus/db-macros/src/lib.rs b/nexus/db-macros/src/lib.rs index 839b0479eac..693ccb9f422 100644 --- a/nexus/db-macros/src/lib.rs +++ b/nexus/db-macros/src/lib.rs @@ -19,6 +19,7 @@ use syn::spanned::Spanned; use syn::{Data, DataStruct, DeriveInput, Error, Fields, Ident}; mod lookup; +mod subquery; /// Defines a structure and helper functions for looking up resources /// @@ -110,6 +111,18 @@ fn get_field_with_name<'a>( } } +/// Implements the `Subquery` trait. +/// +/// TODO: more docs. +#[proc_macro_derive(Subquery, attributes(subquery))] +pub fn subquery_target( + input: proc_macro::TokenStream, +) -> proc_macro::TokenStream { + subquery::derive_impl(input.into()) + .unwrap_or_else(|e| e.to_compile_error()) + .into() +} + // Describes which derive macro is being used; allows sharing common code. enum IdentityVariant { Asset, @@ -153,7 +166,7 @@ pub fn asset_target(input: proc_macro::TokenStream) -> proc_macro::TokenStream { } #[derive(Debug)] -struct NameValue { +pub(crate) struct NameValue { name: syn::Path, _eq_token: syn::token::Eq, value: syn::Path, diff --git a/nexus/db-macros/src/subquery.rs b/nexus/db-macros/src/subquery.rs new file mode 100644 index 00000000000..094af7f0048 --- /dev/null +++ b/nexus/db-macros/src/subquery.rs @@ -0,0 +1,86 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Procedure macro for deriving subquery-related information. + +use super::NameValue; + +use proc_macro2::TokenStream; +use quote::quote; +use syn::spanned::Spanned; +use syn::{DeriveInput, Error}; + +/// Looks for a Meta-style attribute with a particular identifier. +/// +/// As an example, for an attribute like `#[subquery(foo = bar)]`, we can find this +/// attribute by calling `get_subquery_attr(&item.attrs, "foo")`. +fn get_subquery_attr( + attrs: &[syn::Attribute], + name: &str, +) -> Option { + attrs + .iter() + .filter(|attr| attr.path.is_ident("subquery")) + .filter_map(|attr| attr.parse_args::().ok()) + .find(|nv| nv.name.is_ident(name)) +} + +// Implementation of `#[derive(Subquery)]` +pub(crate) fn derive_impl(tokens: TokenStream) -> syn::Result { + let item = syn::parse2::(tokens)?; + let name = &item.ident; + + let subquery_nv = get_subquery_attr(&item.attrs, "name").ok_or_else(|| { + Error::new( + item.span(), + format!( + "Resource needs 'name' attribute.\n\ + Try adding #[subquery(name = your_subquery_module)] to {}.", + name + ), + ) + })?; + + // TODO: ensure that a field named "query" exists within this struct. + // Don't bother parsing type; we use it when impl'ing Subquery though. + + let as_query_source_impl = + build_query_source_impl(name, &subquery_nv.value); + let subquery_impl = build_subquery_impl(name, &subquery_nv.value); + + Ok(quote! { + #as_query_source_impl + #subquery_impl + }) +} + +fn build_query_source_impl( + name: &syn::Ident, + subquery_module: &syn::Path, +) -> TokenStream { + quote! { + impl crate::db::subquery::AsQuerySource for #name { + type QuerySource = #subquery_module::table; + fn query_source(&self) -> Self::QuerySource { + #subquery_module::table + } + } + } +} + +fn build_subquery_impl( + name: &syn::Ident, + subquery_module: &syn::Path, +) -> TokenStream { + quote! { + impl crate::db::subquery::SubQuery for #name { + fn name(&self) -> &'static str { + stringify!(#subquery_module) + } + fn query(&self) -> &dyn ::diesel::query_builder::QueryFragment<::diesel::pg::Pg> { + &self.query + } + } + } +} diff --git a/nexus/src/db/alias.rs b/nexus/src/db/alias.rs new file mode 100644 index 00000000000..cbd96fa8301 --- /dev/null +++ b/nexus/src/db/alias.rs @@ -0,0 +1,84 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Tools for creating aliases in diesel. + +use diesel::pg::Pg; +use diesel::query_builder::AstPass; +use diesel::query_builder::QueryFragment; +use diesel::Expression; +use diesel::SelectableExpression; + +/// Allows an [`diesel::Expression`] to be referenced by a new name. +/// +/// This generates an " AS " SQL fragment. +/// +/// +/// For example: +/// +/// ```ignore +/// diesel::sql_function!(fn gen_random_uuid() -> Uuid); +/// +/// let query = sleds.select( +/// ( +/// ExpressionAlias::(gen_random_uuid()), +/// ExpressionAlias::(gen_random_uuid()), +/// ), +/// ); +/// ``` +/// +/// Produces the following SQL: +/// +/// ```sql +/// SELECT +/// gen_random_uuid() as id, +/// gen_random_uuid() as sled_id, +/// FROM sleds +/// ``` +#[derive(diesel::expression::ValidGrouping, diesel::query_builder::QueryId)] +pub struct ExpressionAlias { + expr: E, + name: &'static str, +} + +impl ExpressionAlias +where + E: Expression, +{ + pub fn new(expr: E) -> Self { + Self { expr, name: C::NAME } + } +} + +impl Expression for ExpressionAlias +where + E: Expression, +{ + type SqlType = E::SqlType; +} + +impl diesel::AppearsOnTable for ExpressionAlias where + E: diesel::AppearsOnTable +{ +} + +impl SelectableExpression for ExpressionAlias where + E: SelectableExpression +{ +} + +impl QueryFragment for ExpressionAlias +where + E: QueryFragment, +{ + fn walk_ast<'a>( + &'a self, + mut out: AstPass<'_, 'a, Pg>, + ) -> diesel::QueryResult<()> { + self.expr.walk_ast(out.reborrow())?; + out.push_sql(" AS "); + out.push_sql(&self.name); + Ok(()) + } +} diff --git a/nexus/src/db/datastore/rack.rs b/nexus/src/db/datastore/rack.rs index 069258d83d2..c612c852df5 100644 --- a/nexus/src/db/datastore/rack.rs +++ b/nexus/src/db/datastore/rack.rs @@ -91,15 +91,8 @@ impl DataStore { #[derive(Debug)] enum RackInitError { - ServiceInsert { - err: AsyncInsertError, - sled_id: Uuid, - svc_id: Uuid, - }, - DatasetInsert { - err: AsyncInsertError, - zpool_id: Uuid, - }, + ServiceInsert { err: AsyncInsertError, sled_id: Uuid, svc_id: Uuid }, + DatasetInsert { err: AsyncInsertError, zpool_id: Uuid }, RackUpdate(PoolError), } type TxnError = TransactionError; diff --git a/nexus/src/db/mod.rs b/nexus/src/db/mod.rs index e2a4e7f9f73..edc6fee1021 100644 --- a/nexus/src/db/mod.rs +++ b/nexus/src/db/mod.rs @@ -4,6 +4,7 @@ //! Facilities for working with the Omicron database +pub(crate) mod alias; // This is not intended to be public, but this is necessary to use it from // doctests pub mod collection_attach; @@ -25,6 +26,7 @@ mod pool; pub(crate) mod queries; mod saga_recovery; mod sec_store; +pub(crate) mod subquery; mod update_and_check; #[cfg(test)] diff --git a/nexus/src/db/queries/service_provision.rs b/nexus/src/db/queries/service_provision.rs index 833ec1d0d88..bd1b7e81c0f 100644 --- a/nexus/src/db/queries/service_provision.rs +++ b/nexus/src/db/queries/service_provision.rs @@ -4,251 +4,35 @@ //! Implementation of queries for provisioning services. +use crate::db::alias::ExpressionAlias; use crate::db::model::Service; use crate::db::model::ServiceKind; use crate::db::pool::DbConnection; use crate::db::schema; +use crate::db::subquery::{AsQuerySource, Cte, CteBuilder, CteQuery, SubQuery}; +use crate::subquery; use chrono::DateTime; use chrono::Utc; -use diesel::CombineDsl; -use diesel::Expression; -use diesel::ExpressionMethods; -use diesel::IntoSql; -use diesel::Insertable; -use diesel::JoinOnDsl; -use diesel::NullableExpressionMethods; +use db_macros::Subquery; use diesel::pg::Pg; use diesel::query_builder::AstPass; -// use diesel::query_builder::AsQuery; use diesel::query_builder::Query; use diesel::query_builder::QueryFragment; use diesel::query_builder::QueryId; use diesel::sql_types; +use diesel::CombineDsl; +use diesel::ExpressionMethods; +use diesel::Insertable; +use diesel::IntoSql; +use diesel::JoinOnDsl; +use diesel::NullableExpressionMethods; use diesel::QueryDsl; use diesel::RunQueryDsl; -use diesel::SelectableExpression; - -trait CteQuery: Query + QueryFragment {} - -impl CteQuery for T -where T: Query + QueryFragment {} - -// TODO: What macro am I going to build to make this generation easier? -// -// - Must create a `diesel::table` which wraps the output type. -// - This could have a helper to match the shape of an existing table. -// - Should automatically implement our version of "HasTable" -// - Should automatically implement SubQuery -// -// INPUT: -// - macro_rules generation for `subquery` -// - Implied: Associated table name -// -// - On struct: Associated table name -// -// OUTPUT: -// - Output `table!` macro -// - Impl `HasTable` for associated object -// - Impl `SubQuery` - -/// Represents a sub-query within a CTE. -/// -/// For an expression like: -/// -/// ```sql -/// WITH -/// foo as ..., -/// bar as ..., -/// SELECT * FROM bar; -/// ``` -/// -/// This trait represents one of the sub-query arms, such as "foo as ..." or -/// "bar as ...". -trait SubQuery { - fn name(&self) -> &'static str; - fn query(&self) -> &dyn QueryFragment; -} - -// TODO: Do you actually want to grab the *table*? -// -// You want something that allows selection, calling 'count', filtering, -// joining, etc. -trait HasTable -where -// Self::Table: AsQuery, -{ - type Table; - fn table(&self) -> Self::Table; -} - -/// A thin wrapper around a [`SubQuery`]. -/// -/// Used to avoid orphan rules while creating blanket implementations. -struct CteSubquery(Box); - -impl QueryId for CteSubquery { - type QueryId = (); - const HAS_STATIC_QUERY_ID: bool = false; -} - -impl QueryFragment for CteSubquery { - fn walk_ast<'a>( - &'a self, - mut out: AstPass<'_, 'a, Pg>, - ) -> diesel::QueryResult<()> { - out.unsafe_to_cache_prepared(); - - out.push_sql(self.0.name()); - out.push_sql(" AS ("); - self.0.query().walk_ast(out.reborrow())?; - out.push_sql(")"); - Ok(()) - } -} - -struct CteBuilder { - subqueries: Vec, -} - -impl CteBuilder { - fn new() -> Self { - Self { - subqueries: vec![], - } - } - - fn add_subquery(mut self, subquery: Q) -> Self { - self.subqueries.push( - CteSubquery(Box::new(subquery)) - ); - self - } - - // TODO: It would be nice if this could be typed? - // It's not necessarily a SubQuery, but it's probably a "Query" object - // with a particular SQL type. - fn build(self, statement: Box>) -> Cte { - Cte { - subqueries: self.subqueries, - statement - } - } -} - -struct Cte { - subqueries: Vec, - statement: Box>, -} - -impl QueryFragment for Cte { - fn walk_ast<'a>( - &'a self, - mut out: AstPass<'_, 'a, Pg>, - ) -> diesel::QueryResult<()> { - out.unsafe_to_cache_prepared(); - - out.push_sql("WITH "); - for (pos, query) in self.subqueries.iter().enumerate() { - query.walk_ast(out.reborrow())?; - if pos == self.subqueries.len() - 1 { - out.push_sql(" "); - } else { - out.push_sql(", "); - } - } - self.statement.walk_ast(out.reborrow())?; - Ok(()) - } -} - -/// Allows an [`diesel::Expression`] to be referenced by a new name. -/// -/// This generates an " AS " SQL fragment. -/// -/// -/// For example: -/// -/// ```ignore -/// diesel::sql_function!(fn gen_random_uuid() -> Uuid); -/// -/// let query = sleds.select( -/// ( -/// ExpressionAlias::(gen_random_uuid()), -/// ExpressionAlias::(gen_random_uuid()), -/// ), -/// ); -/// ``` -/// -/// Produces the following SQL: -/// -/// ```sql -/// SELECT -/// gen_random_uuid() as id, -/// gen_random_uuid() as sled_id, -/// FROM sleds -/// ``` -// TODO: This is currently used within SELECT statements, though it could -// also be used in INSERT / UPDATE / DELETE statements, to force a subquery -// to have a particular name. This would likely involve an invasive change -// within Diesel itself. -#[derive(diesel::expression::ValidGrouping, diesel::query_builder::QueryId)] -struct ExpressionAlias { - expr: E, - name: &'static str, -} - -impl ExpressionAlias -where - E: Expression -{ - fn new(expr: E) -> Self { - Self { - expr, - name: C::NAME, - } - } -} - -impl Expression for ExpressionAlias -where - E: Expression -{ - type SqlType = E::SqlType; -} - -impl diesel::AppearsOnTable for ExpressionAlias -where - E: diesel::AppearsOnTable -{} - -impl SelectableExpression for ExpressionAlias -where - E: SelectableExpression -{} -impl QueryFragment for ExpressionAlias -where - E: QueryFragment -{ - fn walk_ast<'a>( - &'a self, - mut out: AstPass<'_, 'a, Pg>, - ) -> diesel::QueryResult<()> { - self.expr.walk_ast(out.reborrow())?; - out.push_sql(" AS "); - out.push_sql(&self.name); - Ok(()) - } -} - -// ----------------------------- // -// Above should be for a generic CTE builder -// Below should be for service provisioning -// ----------------------------- // - -// TODO: I want this to be as lightweight to make as possible! +#[derive(Subquery)] +#[subquery(name = sled_allocation_pool)] struct SledAllocationPool { - query: Box>, + query: Box>, } impl SledAllocationPool { @@ -259,55 +43,22 @@ impl SledAllocationPool { dsl::sled .filter(dsl::time_deleted.is_null()) // TODO: Filter by rack? - .select((dsl::id,)) - ) + .select((dsl::id,)), + ), } } } -impl HasTable for SledAllocationPool { - type Table = sled_allocation_pool::dsl::sled_allocation_pool; - fn table(&self) -> Self::Table { - use diesel::internal::table_macro::StaticQueryFragment; - // TODO: Converting this to a compile-time check would be nicer. - // - // TODO: Even better, don't have "name()" at all... force the ExpressionAlias - // to be the intermediate "table" name. - assert_eq!(self.name(), Self::Table::STATIC_COMPONENT.0); - - sled_allocation_pool::dsl::sled_allocation_pool - } -} - -impl SubQuery for SledAllocationPool { - fn name(&self) -> &'static str { - "sled_allocation_pool" - } - - fn query(&self) -> &dyn QueryFragment { - &self.query - } -} - -// TODO: We actually want a trimmed down version of this. -// It's generating too much; we don't want to be able to insert/delete/update -// this table; it's basically an ExpressionAlias. -// We *also* do not want the Primary Key. -// -// However, being able to select columns by name is a critical feature -// that we can't easily do without a similar-looking macro. -diesel::table! { +subquery! { sled_allocation_pool { id -> Uuid, } } -// TODO: -// - How do we avoid re-typing UUID? -// - What can be made generic? - +#[derive(Subquery)] +#[subquery(name = previously_allocated_services)] struct PreviouslyAllocatedServices { - query: Box>, + query: Box>, } impl PreviouslyAllocatedServices { @@ -315,37 +66,21 @@ impl PreviouslyAllocatedServices { use crate::db::schema::service::dsl as service_dsl; use sled_allocation_pool::dsl as alloc_pool_dsl; - let select_from_pool = allocation_pool.table().select(alloc_pool_dsl::id).into_boxed(); + let select_from_pool = allocation_pool + .query_source() + .select(alloc_pool_dsl::id) + .into_boxed(); Self { query: Box::new( service_dsl::service .filter(service_dsl::kind.eq(ServiceKind::Nexus)) - .filter(service_dsl::sled_id.eq_any(select_from_pool)) - ) + .filter(service_dsl::sled_id.eq_any(select_from_pool)), + ), } } } -impl HasTable for PreviouslyAllocatedServices { - type Table = previously_allocated_services::dsl::previously_allocated_services; - fn table(&self) -> Self::Table { - use diesel::internal::table_macro::StaticQueryFragment; - assert_eq!(self.name(), Self::Table::STATIC_COMPONENT.0); - previously_allocated_services::dsl::previously_allocated_services - } -} - -impl SubQuery for PreviouslyAllocatedServices { - fn name(&self) -> &'static str { - "previously_allocated_services" - } - - fn query(&self) -> &dyn QueryFragment { - &self.query - } -} - -diesel::table! { +subquery! { previously_allocated_services { id -> Uuid, time_created -> Timestamptz, @@ -357,66 +92,50 @@ diesel::table! { } } +#[derive(Subquery)] +#[subquery(name = old_service_count)] struct OldServiceCount { - query: Box>, + query: Box>, } impl OldServiceCount { - fn new(previously_allocated_services: &PreviouslyAllocatedServices) -> Self { + fn new( + previously_allocated_services: &PreviouslyAllocatedServices, + ) -> Self { Self { query: Box::new( - previously_allocated_services.table().count() - ) + previously_allocated_services.query_source().count(), + ), } } } -impl HasTable for OldServiceCount { - type Table = old_service_count::dsl::old_service_count; - fn table(&self) -> Self::Table { - use diesel::internal::table_macro::StaticQueryFragment; - assert_eq!(self.name(), Self::Table::STATIC_COMPONENT.0); - old_service_count::dsl::old_service_count - } -} - -impl SubQuery for OldServiceCount { - fn name(&self) -> &'static str { - "old_service_count" - } - - fn query(&self) -> &dyn QueryFragment { - &self.query - } -} - -diesel::table! { +subquery! { old_service_count (count) { count -> Int8, } } struct NewServiceCount { - query: Box>, + query: Box>, } diesel::sql_function!(fn greatest(a: sql_types::BigInt, b: sql_types::BigInt) -> sql_types::BigInt); impl NewServiceCount { fn new(redundancy: i32, old_service_count: &OldServiceCount) -> Self { - let old_count = old_service_count.table() + let old_count = old_service_count + .query_source() .select(old_service_count::dsl::count) .single_value() .assume_not_null(); Self { - query: Box::new( - diesel::select( - greatest( - (redundancy as i64).into_sql::(), - old_count, - ) - old_count - ) - ) + query: Box::new(diesel::select( + greatest( + (redundancy as i64).into_sql::(), + old_count, + ) - old_count, + )), } } } @@ -431,8 +150,10 @@ impl SubQuery for NewServiceCount { } } +#[derive(Subquery)] +#[subquery(name = candidate_sleds)] struct CandidateSleds { - query: Box> + query: Box>, } impl CandidateSleds { @@ -441,14 +162,17 @@ impl CandidateSleds { previously_allocated_services: &PreviouslyAllocatedServices, _new_service_count: &NewServiceCount, ) -> Self { - - let select_from_previously_allocated = previously_allocated_services.table() + let select_from_previously_allocated = previously_allocated_services + .query_source() .select(previously_allocated_services::dsl::sled_id) .into_boxed(); let mut select_stmt = sled_allocation_pool - .table() - .filter(sled_allocation_pool::dsl::id.ne_all(select_from_previously_allocated)) + .query_source() + .filter( + sled_allocation_pool::dsl::id + .ne_all(select_from_previously_allocated), + ) .select(sled_allocation_pool::dsl::id) .into_boxed(); @@ -458,46 +182,28 @@ impl CandidateSleds { // // See: https://github.com/diesel-rs/diesel/discussions/3328 for further // discussion. - select_stmt.limit_offset = diesel::query_builder::BoxedLimitOffsetClause { - limit: Some(Box::new(diesel::dsl::sql::(" LIMIT SELECT * FROM new_service_count"))), - offset: select_stmt.limit_offset.offset, - }; + select_stmt.limit_offset = + diesel::query_builder::BoxedLimitOffsetClause { + limit: Some(Box::new(diesel::dsl::sql::( + " LIMIT SELECT * FROM new_service_count", + ))), + offset: select_stmt.limit_offset.offset, + }; - Self { - query: Box::new( - select_stmt - ) - } + Self { query: Box::new(select_stmt) } } } -impl HasTable for CandidateSleds { - type Table = candidate_sleds::dsl::candidate_sleds; - fn table(&self) -> Self::Table { - use diesel::internal::table_macro::StaticQueryFragment; - assert_eq!(self.name(), Self::Table::STATIC_COMPONENT.0); - candidate_sleds::dsl::candidate_sleds - } -} - -impl SubQuery for CandidateSleds { - fn name(&self) -> &'static str { - "candidate_sleds" - } - - fn query(&self) -> &dyn QueryFragment { - &self.query - } -} - -diesel::table! { +subquery! { candidate_sleds { id -> Uuid, } } +#[derive(Subquery)] +#[subquery(name = new_internal_ips)] struct NewInternalIps { - query: Box>, + query: Box>, } impl NewInternalIps { @@ -505,38 +211,28 @@ impl NewInternalIps { use crate::db::schema::sled::dsl as sled_dsl; use candidate_sleds::dsl as candidate_sleds_dsl; - let select_from_candidate_sleds = candidate_sleds.table().select(candidate_sleds_dsl::id).into_boxed(); + let select_from_candidate_sleds = candidate_sleds + .query_source() + .select(candidate_sleds_dsl::id) + .into_boxed(); Self { - query: Box::new( - diesel::update(sled_dsl::sled.filter(sled_dsl::id.eq_any(select_from_candidate_sleds))) - .set(sled_dsl::last_used_address.eq(sled_dsl::last_used_address + 1)) - .returning((sled_dsl::id, sled_dsl::last_used_address)) - ) + diesel::update( + sled_dsl::sled.filter( + sled_dsl::id.eq_any(select_from_candidate_sleds), + ), + ) + .set( + sled_dsl::last_used_address + .eq(sled_dsl::last_used_address + 1), + ) + .returning((sled_dsl::id, sled_dsl::last_used_address)), + ), } } } -impl HasTable for NewInternalIps { - type Table = new_internal_ips::dsl::new_internal_ips; - fn table(&self) -> Self::Table { - use diesel::internal::table_macro::StaticQueryFragment; - assert_eq!(self.name(), Self::Table::STATIC_COMPONENT.0); - new_internal_ips::dsl::new_internal_ips - } -} - -impl SubQuery for NewInternalIps { - fn name(&self) -> &'static str { - "new_internal_ips" - } - - fn query(&self) -> &dyn QueryFragment { - &self.query - } -} - -diesel::table! { +subquery! { new_internal_ips { id -> Uuid, last_used_address -> Inet, @@ -548,23 +244,28 @@ diesel::allow_tables_to_appear_in_same_query!( new_internal_ips, ); +#[derive(Subquery)] +#[subquery(name = candidate_services)] struct CandidateServices { - query: Box>, + query: Box>, } diesel::sql_function!(fn gen_random_uuid() -> Uuid); diesel::sql_function!(fn now() -> Timestamptz); impl CandidateServices { - fn new(candidate_sleds: &CandidateSleds, new_internal_ips: &NewInternalIps) -> Self { + fn new( + candidate_sleds: &CandidateSleds, + new_internal_ips: &NewInternalIps, + ) -> Self { use candidate_sleds::dsl as candidate_sleds_dsl; use new_internal_ips::dsl as new_internal_ips_dsl; use schema::service::dsl as service_dsl; Self { query: Box::new( - candidate_sleds.table().inner_join( - new_internal_ips.table().on( + candidate_sleds.query_source().inner_join( + new_internal_ips.query_source().on( candidate_sleds_dsl::id.eq(new_internal_ips_dsl::id) ) ).select( @@ -582,26 +283,7 @@ impl CandidateServices { } } -impl HasTable for CandidateServices { - type Table = candidate_services::dsl::candidate_services; - fn table(&self) -> Self::Table { - use diesel::internal::table_macro::StaticQueryFragment; - assert_eq!(self.name(), Self::Table::STATIC_COMPONENT.0); - candidate_services::dsl::candidate_services - } -} - -impl SubQuery for CandidateServices { - fn name(&self) -> &'static str { - "candidate_services" - } - - fn query(&self) -> &dyn QueryFragment { - &self.query - } -} - -diesel::table! { +subquery! { candidate_services { id -> Uuid, time_created -> Timestamptz, @@ -613,8 +295,10 @@ diesel::table! { } } +#[derive(Subquery)] +#[subquery(name = inserted_services)] struct InsertServices { - query: Box>, + query: Box>, } impl InsertServices { @@ -623,38 +307,17 @@ impl InsertServices { Self { query: Box::new( - candidate.table().select( - candidate_services::all_columns, - ).insert_into( - service::table - ).returning( - service::all_columns - ) - ) + candidate + .query_source() + .select(candidate_services::all_columns) + .insert_into(service::table) + .returning(service::all_columns), + ), } } } -impl HasTable for InsertServices { - type Table = inserted_services::dsl::inserted_services; - fn table(&self) -> Self::Table { - use diesel::internal::table_macro::StaticQueryFragment; - assert_eq!(self.name(), Self::Table::STATIC_COMPONENT.0); - inserted_services::dsl::inserted_services - } -} - -impl SubQuery for InsertServices { - fn name(&self) -> &'static str { - "inserted_services" - } - - fn query(&self) -> &dyn QueryFragment { - &self.query - } -} - -diesel::table! { +subquery! { inserted_services { id -> Uuid, time_created -> Timestamptz, @@ -679,27 +342,31 @@ impl ServiceProvision { pub fn new(redundancy: i32) -> Self { let now = Utc::now(); let sled_allocation_pool = SledAllocationPool::new(); - let previously_allocated_services = PreviouslyAllocatedServices::new(&sled_allocation_pool); - let old_service_count = OldServiceCount::new(&previously_allocated_services); - let new_service_count = NewServiceCount::new(redundancy, &old_service_count); + let previously_allocated_services = + PreviouslyAllocatedServices::new(&sled_allocation_pool); + let old_service_count = + OldServiceCount::new(&previously_allocated_services); + let new_service_count = + NewServiceCount::new(redundancy, &old_service_count); let candidate_sleds = CandidateSleds::new( &sled_allocation_pool, &previously_allocated_services, - &new_service_count + &new_service_count, ); let new_internal_ips = NewInternalIps::new(&candidate_sleds); - let candidate_services = CandidateServices::new(&candidate_sleds, &new_internal_ips); + let candidate_services = + CandidateServices::new(&candidate_sleds, &new_internal_ips); let inserted_services = InsertServices::new(&candidate_services); let final_select = Box::new( previously_allocated_services - .table() + .query_source() .select(previously_allocated_services::all_columns) .union( - inserted_services.table().select( - inserted_services::all_columns - ) - ) + inserted_services + .query_source() + .select(inserted_services::all_columns), + ), ); let cte = CteBuilder::new() @@ -713,14 +380,10 @@ impl ServiceProvision { .add_subquery(inserted_services) .build(final_select); - Self { - now, - cte, - } + Self { now, cte } } } - // TODO: // We could probably make this generic over the Cte "build" method, enforce the // type there, and auto-impl: @@ -803,10 +466,7 @@ mod tests { #[tokio::test] async fn test_foobar() { - let context = TestContext::new( - "test_foobar", - ) - .await; + let context = TestContext::new("test_foobar").await; let query = ServiceProvision::new(3); diff --git a/nexus/src/db/subquery.rs b/nexus/src/db/subquery.rs new file mode 100644 index 00000000000..baa7404746d --- /dev/null +++ b/nexus/src/db/subquery.rs @@ -0,0 +1,148 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Subquery-related traits which may be derived for DB structures. + +use diesel::pg::Pg; +use diesel::query_builder::AstPass; +use diesel::query_builder::Query; +use diesel::query_builder::QueryFragment; +use diesel::query_builder::QueryId; + +/// Specifies that a subquery has a particular name, and associated columns. +/// +/// The syntax currently matches that of the [`diesel::table`] macro. +// TODO: We're currently piggy-backing on the table macro for convenience. +// We actually do not want to generate an entire table for each subquery - we'd +// like to have a query source (which we can use to generate SELECT statements, +// JOIN, etc), but we don't want this to be an INSERT/UPDATE/DELETE target. +// +// Similarly, we don't want to force callers to supply a "primary key". +#[macro_export] +macro_rules! subquery { + ($($tokens:tt)*) => { + ::diesel::table! { $($tokens)* } + } +} + +/// Represents a subquery within a CTE. +/// +/// For an expression like: +/// +/// ```sql +/// WITH +/// foo as ..., +/// bar as ..., +/// SELECT * FROM bar; +/// ``` +/// +/// This trait represents one of the sub-query arms, such as "foo as ..." or +/// "bar as ...". +// This trait intentionally is agnostic to the SQL type of the subquery, +// meaning that it can be used by the [`CteBuilder`] within a [`Vec`]. +pub trait SubQuery { + fn name(&self) -> &'static str; + fn query(&self) -> &dyn QueryFragment; +} + +/// Trait which implies that the associated query may be used +/// as a query source. +/// +/// For example, given the subquery: +/// +/// ```sql +/// user_ids as (SELECT id FROM user) +/// ``` +/// +/// It should be possible to "SELECT" from `user_ids`. This trait +/// surfaces that underlying query source. +// TODO: Take a much closer look at "AliasSource". It doesn't solve +// the problem of grabbing the query fragment for you, but it might +// help for referencing the "origin" object (table in upstream, but +// plausibly a subquery too). +pub trait AsQuerySource { + type QuerySource; + fn query_source(&self) -> Self::QuerySource; +} + +/// Describes the requirements to be subquery within a CTE: +/// - (Query) It must be a complete SQL query with a specific return type +/// - (QueryFragment) It must be capable of emitting a SQL string +// TODO: In the future, we may force this subquery to have named columns. +pub trait CteQuery: Query + QueryFragment {} + +impl CteQuery for T where T: Query + QueryFragment {} + +/// A thin wrapper around a [`SubQuery`]. +/// +/// Used to avoid orphan rules while creating blanket implementations. +pub struct CteSubquery(Box); + +impl QueryId for CteSubquery { + type QueryId = (); + const HAS_STATIC_QUERY_ID: bool = false; +} + +impl QueryFragment for CteSubquery { + fn walk_ast<'a>( + &'a self, + mut out: AstPass<'_, 'a, Pg>, + ) -> diesel::QueryResult<()> { + out.unsafe_to_cache_prepared(); + + out.push_sql(self.0.name()); + out.push_sql(" AS ("); + self.0.query().walk_ast(out.reborrow())?; + out.push_sql(")"); + Ok(()) + } +} + +pub struct CteBuilder { + subqueries: Vec, +} + +impl CteBuilder { + pub fn new() -> Self { + Self { subqueries: vec![] } + } + + pub fn add_subquery(mut self, subquery: Q) -> Self { + self.subqueries.push(CteSubquery(Box::new(subquery))); + self + } + + // TODO: It would be nice if this could be typed? + // It's not necessarily a SubQuery, but it's probably a "Query" object + // with a particular SQL type. + pub fn build(self, statement: Box>) -> Cte { + Cte { subqueries: self.subqueries, statement } + } +} + +pub struct Cte { + subqueries: Vec, + statement: Box>, +} + +impl QueryFragment for Cte { + fn walk_ast<'a>( + &'a self, + mut out: AstPass<'_, 'a, Pg>, + ) -> diesel::QueryResult<()> { + out.unsafe_to_cache_prepared(); + + out.push_sql("WITH "); + for (pos, query) in self.subqueries.iter().enumerate() { + query.walk_ast(out.reborrow())?; + if pos == self.subqueries.len() - 1 { + out.push_sql(" "); + } else { + out.push_sql(", "); + } + } + self.statement.walk_ast(out.reborrow())?; + Ok(()) + } +} diff --git a/services.sql b/services.sql index c8d8480a475..3f616387f78 100644 --- a/services.sql +++ b/services.sql @@ -339,8 +339,8 @@ WITH -- Calculate the number of new services we need new_service_count AS ( -- XXX: 3 is the user-supplied redundancy - SELECT GREATEST(3, (SELECT * FROM old_service_count)) - - (SELECT * FROM old_service_count) + SELECT (greatest(3, (SELECT old_service_count.count FROM old_service_count)) + - (SELECT old_service_count.count FROM old_service_count)) ), -- Get allocation candidates from the pool, as long as they don't already @@ -389,10 +389,11 @@ WITH now() as time_modified, candidate_sleds.id as sled_id, new_internal_ips.ip as ip, + -- XXX service type CAST('nexus' AS omicron.public.service_kind) as kind FROM candidate_sleds - LEFT JOIN + INNER JOIN new_internal_ips ON candidate_sleds.id = new_internal_ips.sled_id @@ -401,13 +402,9 @@ WITH inserted_services AS ( INSERT INTO omicron.public.service ( - SELECT - candidate_services.id, - candidate_services.time_created, - candidate_services.time_modified, - candidate_services.sled_id, - candidate_services.ip, - candidate_services.kind + -- XXX: "SELECT *" isn't currently possible with Diesel... + -- ... but it *COULD* be, when the source is a CTE Query! + SELECT * FROM candidate_services ) RETURNING * From 4f8c43cda0bc085d02a2faf17831bd9ae77265a9 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 21 Sep 2022 11:54:08 -0400 Subject: [PATCH 87/88] using macros for assistance --- Cargo.lock | 38 ++++++ nexus/Cargo.toml | 1 + nexus/db-macros/src/subquery.rs | 14 +- nexus/src/db/queries/service_provision.rs | 150 ++++++++++++++-------- nexus/src/db/subquery.rs | 13 +- 5 files changed, 156 insertions(+), 60 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6757f0223dd..07450bfe855 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -900,6 +900,16 @@ dependencies = [ "memchr", ] +[[package]] +name = "ctor" +version = "0.1.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cdffe87e1d521a10f9696f833fe502293ea446d7f256c06128293a4119bdf4cb" +dependencies = [ + "quote", + "syn", +] + [[package]] name = "cty" version = "0.2.2" @@ -2988,6 +2998,7 @@ dependencies = [ "oximeter-producer", "parse-display", "pq-sys", + "pretty_assertions", "rand 0.8.5", "ref-cast", "regex", @@ -3307,6 +3318,15 @@ dependencies = [ "syn", ] +[[package]] +name = "output_vt100" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "628223faebab4e3e40667ee0b2336d34a5b960ff60ea743ddfdbcf7770bcfb66" +dependencies = [ + "winapi", +] + [[package]] name = "oxide-client" version = "0.1.0" @@ -3872,6 +3892,18 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c6fa0831dd7cc608c38a5e323422a0077678fa5744aa2be4ad91c4ece8eec8d5" +[[package]] +name = "pretty_assertions" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a25e9bcb20aa780fd0bb16b72403a9064d6b3f22f026946029acb941a50af755" +dependencies = [ + "ctor", + "diff", + "output_vt100", + "yansi", +] + [[package]] name = "proc-macro-crate" version = "1.2.1" @@ -6493,6 +6525,12 @@ dependencies = [ "libc", ] +[[package]] +name = "yansi" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec" + [[package]] name = "zerocopy" version = "0.3.0" diff --git a/nexus/Cargo.toml b/nexus/Cargo.toml index 61fecaac7e3..96378b43198 100644 --- a/nexus/Cargo.toml +++ b/nexus/Cargo.toml @@ -122,6 +122,7 @@ nexus-test-utils-macros = { path = "test-utils-macros" } nexus-test-utils = { path = "test-utils" } omicron-test-utils = { path = "../test-utils" } openapiv3 = "1.0" +pretty_assertions = "1.3" regex = "1.6.0" subprocess = "0.2.9" term = "0.7" diff --git a/nexus/db-macros/src/subquery.rs b/nexus/db-macros/src/subquery.rs index 094af7f0048..fa086d48e4c 100644 --- a/nexus/db-macros/src/subquery.rs +++ b/nexus/db-macros/src/subquery.rs @@ -42,8 +42,10 @@ pub(crate) fn derive_impl(tokens: TokenStream) -> syn::Result { ) })?; - // TODO: ensure that a field named "query" exists within this struct. - // Don't bother parsing type; we use it when impl'ing Subquery though. + // TODO: We should ensure that a field named "query" exists within this + // struct. We currently rely on it existing. + // + // Don't bother parsing type, but we use it when impl'ing Subquery. let as_query_source_impl = build_query_source_impl(name, &subquery_nv.value); @@ -55,6 +57,9 @@ pub(crate) fn derive_impl(tokens: TokenStream) -> syn::Result { }) } +// TODO: Should we use diesel's "QuerySource" and "AsQuery" here? +// +// I think that could work for most "select" queries, but might break joins. fn build_query_source_impl( name: &syn::Ident, subquery_module: &syn::Path, @@ -74,9 +79,10 @@ fn build_subquery_impl( subquery_module: &syn::Path, ) -> TokenStream { quote! { - impl crate::db::subquery::SubQuery for #name { + impl crate::db::subquery::Subquery for #name { fn name(&self) -> &'static str { - stringify!(#subquery_module) + use ::diesel::internal::table_macro::StaticQueryFragment; + #subquery_module::table::STATIC_COMPONENT.0 } fn query(&self) -> &dyn ::diesel::query_builder::QueryFragment<::diesel::pg::Pg> { &self.query diff --git a/nexus/src/db/queries/service_provision.rs b/nexus/src/db/queries/service_provision.rs index bd1b7e81c0f..024279cdc34 100644 --- a/nexus/src/db/queries/service_provision.rs +++ b/nexus/src/db/queries/service_provision.rs @@ -9,7 +9,7 @@ use crate::db::model::Service; use crate::db::model::ServiceKind; use crate::db::pool::DbConnection; use crate::db::schema; -use crate::db::subquery::{AsQuerySource, Cte, CteBuilder, CteQuery, SubQuery}; +use crate::db::subquery::{AsQuerySource, Cte, CteBuilder, CteQuery}; use crate::subquery; use chrono::DateTime; use chrono::Utc; @@ -29,6 +29,7 @@ use diesel::NullableExpressionMethods; use diesel::QueryDsl; use diesel::RunQueryDsl; +/// A subquery to find all sleds that could run services. #[derive(Subquery)] #[subquery(name = sled_allocation_pool)] struct SledAllocationPool { @@ -36,13 +37,13 @@ struct SledAllocationPool { } impl SledAllocationPool { - fn new() -> Self { + fn new(rack_id: uuid::Uuid) -> Self { use crate::db::schema::sled::dsl; Self { query: Box::new( dsl::sled .filter(dsl::time_deleted.is_null()) - // TODO: Filter by rack? + .filter(dsl::rack_id.eq(rack_id)) .select((dsl::id,)), ), } @@ -55,6 +56,8 @@ subquery! { } } +/// A subquery to find all services of a particular type which have already been +/// allocated. #[derive(Subquery)] #[subquery(name = previously_allocated_services)] struct PreviouslyAllocatedServices { @@ -62,7 +65,7 @@ struct PreviouslyAllocatedServices { } impl PreviouslyAllocatedServices { - fn new(allocation_pool: &SledAllocationPool) -> Self { + fn new(allocation_pool: &SledAllocationPool, kind: ServiceKind) -> Self { use crate::db::schema::service::dsl as service_dsl; use sled_allocation_pool::dsl as alloc_pool_dsl; @@ -73,7 +76,7 @@ impl PreviouslyAllocatedServices { Self { query: Box::new( service_dsl::service - .filter(service_dsl::kind.eq(ServiceKind::Nexus)) + .filter(service_dsl::kind.eq(kind)) .filter(service_dsl::sled_id.eq_any(select_from_pool)), ), } @@ -92,6 +95,7 @@ subquery! { } } +/// A subquery to find the number of old services. #[derive(Subquery)] #[subquery(name = old_service_count)] struct OldServiceCount { @@ -116,6 +120,10 @@ subquery! { } } +/// A subquery to find the number of additional services which should be +/// provisioned. +#[derive(Subquery)] +#[subquery(name = new_service_count)] struct NewServiceCount { query: Box>, } @@ -131,25 +139,24 @@ impl NewServiceCount { .assume_not_null(); Self { query: Box::new(diesel::select( - greatest( - (redundancy as i64).into_sql::(), - old_count, - ) - old_count, + ExpressionAlias::new::( + greatest( + (redundancy as i64).into_sql::(), + old_count, + ) - old_count, + ) )), } } } -impl SubQuery for NewServiceCount { - fn name(&self) -> &'static str { - "new_service_count" - } - - fn query(&self) -> &dyn QueryFragment { - &self.query +subquery! { + new_service_count (count) { + count -> Int8, } } +/// A subquery to find new sleds to host the proposed services. #[derive(Subquery)] #[subquery(name = candidate_sleds)] struct CandidateSleds { @@ -200,6 +207,7 @@ subquery! { } } +/// A subquery to provision internal IPs for all the new services. #[derive(Subquery)] #[subquery(name = new_internal_ips)] struct NewInternalIps { @@ -244,6 +252,7 @@ diesel::allow_tables_to_appear_in_same_query!( new_internal_ips, ); +/// A subquery to create the new services which should be inserted. #[derive(Subquery)] #[subquery(name = candidate_services)] struct CandidateServices { @@ -257,6 +266,7 @@ impl CandidateServices { fn new( candidate_sleds: &CandidateSleds, new_internal_ips: &NewInternalIps, + kind: ServiceKind, ) -> Self { use candidate_sleds::dsl as candidate_sleds_dsl; use new_internal_ips::dsl as new_internal_ips_dsl; @@ -275,7 +285,7 @@ impl CandidateServices { ExpressionAlias::new::(now()), ExpressionAlias::new::(candidate_sleds_dsl::id), ExpressionAlias::new::(new_internal_ips_dsl::last_used_address), - ExpressionAlias::new::(ServiceKind::Nexus.into_sql::()), + ExpressionAlias::new::(kind.into_sql::()), ), ) ) @@ -295,6 +305,7 @@ subquery! { } } +/// A subquery to insert the new services. #[derive(Subquery)] #[subquery(name = inserted_services)] struct InsertServices { @@ -317,6 +328,9 @@ impl InsertServices { } } +// TODO: It's worth looking at Diesel's aliasing facilities to see +// what we can do for these cases where we're trying to generate +// a table identical to an existing one, but with a new name. subquery! { inserted_services { id -> Uuid, @@ -339,11 +353,11 @@ pub struct ServiceProvision { } impl ServiceProvision { - pub fn new(redundancy: i32) -> Self { + pub fn new(redundancy: i32, rack_id: uuid::Uuid, kind: ServiceKind) -> Self { let now = Utc::now(); - let sled_allocation_pool = SledAllocationPool::new(); + let sled_allocation_pool = SledAllocationPool::new(rack_id); let previously_allocated_services = - PreviouslyAllocatedServices::new(&sled_allocation_pool); + PreviouslyAllocatedServices::new(&sled_allocation_pool, kind); let old_service_count = OldServiceCount::new(&previously_allocated_services); let new_service_count = @@ -355,7 +369,7 @@ impl ServiceProvision { ); let new_internal_ips = NewInternalIps::new(&candidate_sleds); let candidate_services = - CandidateServices::new(&candidate_sleds, &new_internal_ips); + CandidateServices::new(&candidate_sleds, &new_internal_ips, kind); let inserted_services = InsertServices::new(&candidate_services); let final_select = Box::new( @@ -420,15 +434,10 @@ impl RunQueryDsl for ServiceProvision {} mod tests { use crate::context::OpContext; use crate::db::datastore::DataStore; - use crate::db::identity::Resource; - use crate::db::model::Name; - use async_bb8_diesel::AsyncRunQueryDsl; use diesel::pg::Pg; use dropshot::test_util::LogContext; use nexus_test_utils::db::test_setup_database; use nexus_test_utils::RACK_UUID; - use omicron_common::api::external::Error; - use omicron_common::api::external::IdentityMetadataCreateParams; use omicron_test_utils::dev; use omicron_test_utils::dev::db::CockroachInstance; use std::sync::Arc; @@ -465,22 +474,27 @@ mod tests { } #[tokio::test] - async fn test_foobar() { - let context = TestContext::new("test_foobar").await; - - let query = ServiceProvision::new(3); - - let stringified = diesel::debug_query::(&query).to_string(); + async fn test_query_output() { + let context = TestContext::new("test_query_output").await; + + let redundancy = 3; + let query = ServiceProvision::new( + redundancy, + Uuid::parse_str(RACK_UUID).unwrap(), + crate::db::model::ServiceKind::Nexus + ); - assert_eq!( - stringified, + pretty_assertions::assert_eq!( + diesel::debug_query::(&query).to_string(), + format!( "WITH \ - sled_allocation_pool AS (\ + sled_allocation_pool AS (\ SELECT \ \"sled\".\"id\" \ FROM \"sled\" \ WHERE (\ - \"sled\".\"time_deleted\" IS NULL\ + (\"sled\".\"time_deleted\" IS NULL) AND \ + (\"sled\".\"rack_id\" = $1)\ )\ ), \ previously_allocated_services AS (\ @@ -493,7 +507,7 @@ mod tests { \"service\".\"kind\" \ FROM \"service\" \ WHERE (\ - (\"service\".\"kind\" = $1) AND \ + (\"service\".\"kind\" = $2) AND \ (\"service\".\"sled_id\" = \ ANY(SELECT \"sled_allocation_pool\".\"id\" FROM \"sled_allocation_pool\")\ )\ @@ -505,10 +519,11 @@ mod tests { new_service_count AS (\ SELECT (\ greatest(\ - $2, \ - (SELECT \"old_service_count\".\"count\" FROM \"old_service_count\" LIMIT $3)\ - ) - (SELECT \"old_service_count\".\"count\" FROM \"old_service_count\" LIMIT $4)\ - )\ + $3, \ + (SELECT \"old_service_count\".\"count\" FROM \"old_service_count\" LIMIT $4)\ + ) - (SELECT \"old_service_count\".\"count\" FROM \"old_service_count\" LIMIT $5)\ + ) \ + AS count\ ), \ candidate_sleds AS (\ SELECT \ @@ -527,7 +542,7 @@ mod tests { UPDATE \ \"sled\" \ SET \ - \"last_used_address\" = (\"sled\".\"last_used_address\" + $5) \ + \"last_used_address\" = (\"sled\".\"last_used_address\" + $6) \ WHERE \ (\"sled\".\"id\" = ANY(SELECT \"candidate_sleds\".\"id\" FROM \"candidate_sleds\")) \ RETURNING \ @@ -541,21 +556,54 @@ mod tests { now() AS time_modified, \ \"candidate_sleds\".\"id\" AS sled_id, \ \"new_internal_ips\".\"last_used_address\" AS ip, \ - $6 AS kind \ + $7 AS kind \ FROM (\ \"candidate_sleds\" \ INNER JOIN \ \"new_internal_ips\" \ ON (\ - \"candidate_sleds\".\"id\" = \"new_internal_ips\".\"id\" - )) - ), + \"candidate_sleds\".\"id\" = \"new_internal_ips\".\"id\"\ + ))\ + ), \ inserted_services AS (\ - INSERT INTO \ - \"service\" \ - (\"id\", \"time_created\", \"time_modified\", \"sled_id\", \"ip\", \"kind\") SELECT \"candidate_services\".\"id\", \"candidate_services\".\"time_created\", \"candidate_services\".\"time_modified\", \"candidate_services\".\"sled_id\", \"candidate_services\".\"ip\", \"candidate_services\".\"kind\" FROM \"candidate_services\" RETURNING \"service\".\"id\", \"service\".\"time_created\", \"service\".\"time_modified\", \"service\".\"sled_id\", \"service\".\"ip\", \"service\".\"kind\") - SELECT \"inserted_services\".\"id\", \"inserted_services\".\"time_created\", \"inserted_services\".\"time_modified\", \"inserted_services\".\"sled_id\", \"inserted_services\".\"ip\", \"inserted_services\".\"kind\" FROM \"inserted_services\" - ) -- binds: [Nexus, 3, 1, Nexus]", + INSERT INTO \"service\" \ + (\"id\", \"time_created\", \"time_modified\", \"sled_id\", \"ip\", \"kind\") \ + SELECT \ + \"candidate_services\".\"id\", \ + \"candidate_services\".\"time_created\", \ + \"candidate_services\".\"time_modified\", \ + \"candidate_services\".\"sled_id\", \ + \"candidate_services\".\"ip\", \ + \"candidate_services\".\"kind\" \ + FROM \"candidate_services\" \ + RETURNING \ + \"service\".\"id\", \ + \"service\".\"time_created\", \ + \"service\".\"time_modified\", \ + \"service\".\"sled_id\", \ + \"service\".\"ip\", \"service\".\"kind\"\ + ) \ + (\ + SELECT \ + \"previously_allocated_services\".\"id\", \ + \"previously_allocated_services\".\"time_created\", \ + \"previously_allocated_services\".\"time_modified\", \ + \"previously_allocated_services\".\"sled_id\", \ + \"previously_allocated_services\".\"ip\", \ + \"previously_allocated_services\".\"kind\" \ + FROM \"previously_allocated_services\"\ + ) UNION \ + (\ + SELECT \ + \"inserted_services\".\"id\", \ + \"inserted_services\".\"time_created\", \ + \"inserted_services\".\"time_modified\", \ + \"inserted_services\".\"sled_id\", \ + \"inserted_services\".\"ip\", \ + \"inserted_services\".\"kind\" \ + FROM \"inserted_services\"\ + ) -- binds: [{RACK_UUID}, Nexus, {redundancy}, 1, 1, 1, Nexus]", + ), ); context.success().await; diff --git a/nexus/src/db/subquery.rs b/nexus/src/db/subquery.rs index baa7404746d..f28ee45fe2a 100644 --- a/nexus/src/db/subquery.rs +++ b/nexus/src/db/subquery.rs @@ -19,6 +19,9 @@ use diesel::query_builder::QueryId; // JOIN, etc), but we don't want this to be an INSERT/UPDATE/DELETE target. // // Similarly, we don't want to force callers to supply a "primary key". +// +// TODO: It might be worth looking at "diesel_dynamic_schema" for inspiration. +// Although we shouldn't use that exactly, we may recreate a variant of it? #[macro_export] macro_rules! subquery { ($($tokens:tt)*) => { @@ -41,7 +44,7 @@ macro_rules! subquery { /// "bar as ...". // This trait intentionally is agnostic to the SQL type of the subquery, // meaning that it can be used by the [`CteBuilder`] within a [`Vec`]. -pub trait SubQuery { +pub trait Subquery { fn name(&self) -> &'static str; fn query(&self) -> &dyn QueryFragment; } @@ -74,10 +77,10 @@ pub trait CteQuery: Query + QueryFragment {} impl CteQuery for T where T: Query + QueryFragment {} -/// A thin wrapper around a [`SubQuery`]. +/// A thin wrapper around a [`SubQUery`]. /// /// Used to avoid orphan rules while creating blanket implementations. -pub struct CteSubquery(Box); +pub struct CteSubquery(Box); impl QueryId for CteSubquery { type QueryId = (); @@ -108,13 +111,13 @@ impl CteBuilder { Self { subqueries: vec![] } } - pub fn add_subquery(mut self, subquery: Q) -> Self { + pub fn add_subquery(mut self, subquery: Q) -> Self { self.subqueries.push(CteSubquery(Box::new(subquery))); self } // TODO: It would be nice if this could be typed? - // It's not necessarily a SubQuery, but it's probably a "Query" object + // It's not necessarily a Subquery, but it's probably a "Query" object // with a particular SQL type. pub fn build(self, statement: Box>) -> Cte { Cte { subqueries: self.subqueries, statement } From 62616f4748f14b08ed3b38b32d3616d2aa7841db Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 21 Sep 2022 14:46:22 -0400 Subject: [PATCH 88/88] Escaping impld --- nexus/db-macros/src/subquery.rs | 14 +++++--- nexus/src/db/queries/service_provision.rs | 40 ++++++++++++----------- nexus/src/db/subquery.rs | 38 ++++++++++++++++++--- services.sql | 1 - 4 files changed, 64 insertions(+), 29 deletions(-) diff --git a/nexus/db-macros/src/subquery.rs b/nexus/db-macros/src/subquery.rs index fa086d48e4c..9c7633b76e8 100644 --- a/nexus/db-macros/src/subquery.rs +++ b/nexus/db-macros/src/subquery.rs @@ -79,11 +79,17 @@ fn build_subquery_impl( subquery_module: &syn::Path, ) -> TokenStream { quote! { - impl crate::db::subquery::Subquery for #name { - fn name(&self) -> &'static str { - use ::diesel::internal::table_macro::StaticQueryFragment; - #subquery_module::table::STATIC_COMPONENT.0 + impl ::diesel::query_builder::QueryFragment<::diesel::pg::Pg> for #name { + fn walk_ast<'a>( + &'a self, + mut out: ::diesel::query_builder::AstPass<'_, 'a, ::diesel::pg::Pg> + ) -> ::diesel::QueryResult<()> { + #subquery_module::table.walk_ast(out)?; + Ok(()) } + } + + impl crate::db::subquery::Subquery for #name { fn query(&self) -> &dyn ::diesel::query_builder::QueryFragment<::diesel::pg::Pg> { &self.query } diff --git a/nexus/src/db/queries/service_provision.rs b/nexus/src/db/queries/service_provision.rs index 024279cdc34..62edfcf00c0 100644 --- a/nexus/src/db/queries/service_provision.rs +++ b/nexus/src/db/queries/service_provision.rs @@ -61,7 +61,7 @@ subquery! { #[derive(Subquery)] #[subquery(name = previously_allocated_services)] struct PreviouslyAllocatedServices { - query: Box>, + query: Box>, } impl PreviouslyAllocatedServices { @@ -99,7 +99,7 @@ subquery! { #[derive(Subquery)] #[subquery(name = old_service_count)] struct OldServiceCount { - query: Box>, + query: Box>, } impl OldServiceCount { @@ -108,7 +108,8 @@ impl OldServiceCount { ) -> Self { Self { query: Box::new( - previously_allocated_services.query_source().count(), + previously_allocated_services.query_source() + .select((diesel::dsl::count_star(),)), ), } } @@ -125,7 +126,7 @@ subquery! { #[derive(Subquery)] #[subquery(name = new_service_count)] struct NewServiceCount { - query: Box>, + query: Box>, } diesel::sql_function!(fn greatest(a: sql_types::BigInt, b: sql_types::BigInt) -> sql_types::BigInt); @@ -139,12 +140,12 @@ impl NewServiceCount { .assume_not_null(); Self { query: Box::new(diesel::select( - ExpressionAlias::new::( + ExpressionAlias::new::(( greatest( (redundancy as i64).into_sql::(), old_count, ) - old_count, - ) + ),) )), } } @@ -160,7 +161,7 @@ subquery! { #[derive(Subquery)] #[subquery(name = candidate_sleds)] struct CandidateSleds { - query: Box>, + query: Box>, } impl CandidateSleds { @@ -180,7 +181,7 @@ impl CandidateSleds { sled_allocation_pool::dsl::id .ne_all(select_from_previously_allocated), ) - .select(sled_allocation_pool::dsl::id) + .select((sled_allocation_pool::dsl::id,)) .into_boxed(); // TODO: I'd really prefer to just pass the 'new_service_count' as the @@ -256,7 +257,7 @@ diesel::allow_tables_to_appear_in_same_query!( #[derive(Subquery)] #[subquery(name = candidate_services)] struct CandidateServices { - query: Box>, + query: Box>, } diesel::sql_function!(fn gen_random_uuid() -> Uuid); @@ -272,6 +273,7 @@ impl CandidateServices { use new_internal_ips::dsl as new_internal_ips_dsl; use schema::service::dsl as service_dsl; + let kind = kind.into_sql::(); Self { query: Box::new( candidate_sleds.query_source().inner_join( @@ -285,7 +287,7 @@ impl CandidateServices { ExpressionAlias::new::(now()), ExpressionAlias::new::(candidate_sleds_dsl::id), ExpressionAlias::new::(new_internal_ips_dsl::last_used_address), - ExpressionAlias::new::(kind.into_sql::()), + ExpressionAlias::new::(kind), ), ) ) @@ -484,11 +486,11 @@ mod tests { crate::db::model::ServiceKind::Nexus ); - pretty_assertions::assert_eq!( + pretty_assertions::assert_eq!( diesel::debug_query::(&query).to_string(), format!( "WITH \ - sled_allocation_pool AS (\ + \"sled_allocation_pool\" AS (\ SELECT \ \"sled\".\"id\" \ FROM \"sled\" \ @@ -497,7 +499,7 @@ mod tests { (\"sled\".\"rack_id\" = $1)\ )\ ), \ - previously_allocated_services AS (\ + \"previously_allocated_services\" AS (\ SELECT \ \"service\".\"id\", \ \"service\".\"time_created\", \ @@ -513,10 +515,10 @@ mod tests { )\ )\ ), \ - old_service_count AS (\ + \"old_service_count\" AS (\ SELECT COUNT(*) FROM \"previously_allocated_services\"\ ), \ - new_service_count AS (\ + \"new_service_count\" AS (\ SELECT (\ greatest(\ $3, \ @@ -525,7 +527,7 @@ mod tests { ) \ AS count\ ), \ - candidate_sleds AS (\ + \"candidate_sleds\" AS (\ SELECT \ \"sled_allocation_pool\".\"id\" \ FROM \"sled_allocation_pool\" \ @@ -538,7 +540,7 @@ mod tests { ) \ LIMIT SELECT * FROM new_service_count\ ), \ - new_internal_ips AS (\ + \"new_internal_ips\" AS (\ UPDATE \ \"sled\" \ SET \ @@ -549,7 +551,7 @@ mod tests { \"sled\".\"id\", \ \"sled\".\"last_used_address\"\ ), \ - candidate_services AS (\ + \"candidate_services\" AS (\ SELECT \ gen_random_uuid() AS id, \ now() AS time_created, \ @@ -565,7 +567,7 @@ mod tests { \"candidate_sleds\".\"id\" = \"new_internal_ips\".\"id\"\ ))\ ), \ - inserted_services AS (\ + \"inserted_services\" AS (\ INSERT INTO \"service\" \ (\"id\", \"time_created\", \"time_modified\", \"sled_id\", \"ip\", \"kind\") \ SELECT \ diff --git a/nexus/src/db/subquery.rs b/nexus/src/db/subquery.rs index f28ee45fe2a..300b8eac1bc 100644 --- a/nexus/src/db/subquery.rs +++ b/nexus/src/db/subquery.rs @@ -29,7 +29,32 @@ macro_rules! subquery { } } -/// Represents a subquery within a CTE. +// TODO: I'd like to make a version of the macro that says: +// +// ``` +// subquery_alias!(existing_table as alias_name); +// ``` +// +// And which generates an AliasSource - very similar to the `alias!` macro +// in diesel, but which lets callers control the "AS" position. +// +// The existing alias macro implements QueryFragment as: +// +// " as " +// +// but we actually want this relationship flipped, kinda. +// +// We want: +// +// " as ..." +// #[macro_export] +// macro_rules! subquery_alias { +// ($table_name:ident as $alias_name:ident) => { +// ::diesel::alias!($table_name as $alias_name) +// } +// } + +/// Represents a named subquery within a CTE. /// /// For an expression like: /// @@ -44,8 +69,11 @@ macro_rules! subquery { /// "bar as ...". // This trait intentionally is agnostic to the SQL type of the subquery, // meaning that it can be used by the [`CteBuilder`] within a [`Vec`]. -pub trait Subquery { - fn name(&self) -> &'static str; +pub trait Subquery: QueryFragment { + /// Returns the underlying query fragment. + /// + /// For " as ", this refers to the "QUERY" portion + /// of SQL. fn query(&self) -> &dyn QueryFragment; } @@ -77,7 +105,7 @@ pub trait CteQuery: Query + QueryFragment {} impl CteQuery for T where T: Query + QueryFragment {} -/// A thin wrapper around a [`SubQUery`]. +/// A thin wrapper around a [`Subquery`]. /// /// Used to avoid orphan rules while creating blanket implementations. pub struct CteSubquery(Box); @@ -94,7 +122,7 @@ impl QueryFragment for CteSubquery { ) -> diesel::QueryResult<()> { out.unsafe_to_cache_prepared(); - out.push_sql(self.0.name()); + self.0.walk_ast(out.reborrow())?; out.push_sql(" AS ("); self.0.query().walk_ast(out.reborrow())?; out.push_sql(")"); diff --git a/services.sql b/services.sql index 3f616387f78..eeccaf53d1d 100644 --- a/services.sql +++ b/services.sql @@ -409,7 +409,6 @@ WITH ) RETURNING * ) -SELECT * FROM ( SELECT -- XXX: Do we care about the new/not new distinction?