From 4d22521349d58fcec4f317908ea9ee8a7d69d183 Mon Sep 17 00:00:00 2001 From: David Pacheco Date: Tue, 9 May 2023 12:29:36 -0700 Subject: [PATCH 01/11] pass initial TLS certificate into RSS --- Cargo.lock | 1 + openapi/bootstrap-agent.json | 33 ++++++ sled-agent/Cargo.toml | 1 + sled-agent/src/bootstrap/params.rs | 4 + sled-agent/src/config.rs | 2 + sled-agent/src/rack_setup/config.rs | 105 +++++++++++++++++- sled-agent/src/rack_setup/service.rs | 8 +- sled-agent/src/services.rs | 6 +- .../gimlet-standalone/config-rss.toml | 4 + smf/sled-agent/non-gimlet/config-rss.toml | 4 + 10 files changed, 154 insertions(+), 14 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index dda9a46a081..94dc62f5175 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4787,6 +4787,7 @@ dependencies = [ "propolis-client", "propolis-server", "rand 0.8.5", + "rcgen", "reqwest", "schemars", "semver 1.0.17", diff --git a/openapi/bootstrap-agent.json b/openapi/bootstrap-agent.json index cf556564ed6..cd8845023a8 100644 --- a/openapi/bootstrap-agent.json +++ b/openapi/bootstrap-agent.json @@ -156,6 +156,31 @@ } ] }, + "Certificate": { + "type": "object", + "properties": { + "cert": { + "type": "array", + "items": { + "type": "integer", + "format": "uint8", + "minimum": 0 + } + }, + "key": { + "type": "array", + "items": { + "type": "integer", + "format": "uint8", + "minimum": 0 + } + } + }, + "required": [ + "cert", + "key" + ] + }, "Component": { "type": "object", "properties": { @@ -274,6 +299,13 @@ "type": "string" } }, + "external_certificates": { + "description": "initial TLS certificates for the external API", + "type": "array", + "items": { + "$ref": "#/components/schemas/Certificate" + } + }, "external_dns_zone_name": { "description": "DNS name for the DNS zone delegated to the rack for external DNS", "type": "string" @@ -314,6 +346,7 @@ "required": [ "bootstrap_discovery", "dns_servers", + "external_certificates", "external_dns_zone_name", "internal_services_ip_pool_ranges", "ntp_servers", diff --git a/sled-agent/Cargo.toml b/sled-agent/Cargo.toml index 15180aeeeb5..4268110442c 100644 --- a/sled-agent/Cargo.toml +++ b/sled-agent/Cargo.toml @@ -44,6 +44,7 @@ progenitor.workspace = true propolis-client = { workspace = true, features = [ "generated-migration" ] } propolis-server.workspace = true # Only used by the simulated sled agent rand = { workspace = true, features = ["getrandom"] } +rcgen.workspace = true reqwest = { workspace = true, features = ["rustls-tls", "stream"] } schemars = { workspace = true, features = [ "chrono", "uuid1" ] } semver.workspace = true diff --git a/sled-agent/src/bootstrap/params.rs b/sled-agent/src/bootstrap/params.rs index 56507af589d..1afca8dfe70 100644 --- a/sled-agent/src/bootstrap/params.rs +++ b/sled-agent/src/bootstrap/params.rs @@ -55,10 +55,14 @@ pub struct RackInitializeRequest { /// DNS name for the DNS zone delegated to the rack for external DNS pub external_dns_zone_name: String, + /// initial TLS certificates for the external API + pub external_certificates: Vec, + /// Configuration of the Recovery Silo (the initial Silo) pub recovery_silo: RecoverySiloConfig, } +pub type Certificate = nexus_client::types::Certificate; pub type RecoverySiloConfig = nexus_client::types::RecoverySiloConfig; /// Configuration information for launching a Sled Agent. diff --git a/sled-agent/src/config.rs b/sled-agent/src/config.rs index e98f9f9a71e..faf5f06e73b 100644 --- a/sled-agent/src/config.rs +++ b/sled-agent/src/config.rs @@ -79,6 +79,8 @@ pub enum ConfigError { #[source] err: toml::de::Error, }, + #[error("Failed to generate self-signed certificate: {0}")] + GenerateCertificate(#[source] anyhow::Error), #[error("Could not determine if host is a Gimlet: {0}")] SystemDetection(#[source] anyhow::Error), #[error("Could not enumerate physical links")] diff --git a/sled-agent/src/rack_setup/config.rs b/sled-agent/src/rack_setup/config.rs index 8a1873afbb8..fd72a88a1c8 100644 --- a/sled-agent/src/rack_setup/config.rs +++ b/sled-agent/src/rack_setup/config.rs @@ -5,20 +5,81 @@ //! Interfaces for working with RSS config. use crate::config::ConfigError; +use anyhow::Context; use camino::Utf8Path; use omicron_common::address::{ get_64_subnet, Ipv6Subnet, AZ_PREFIX, RACK_PREFIX, SLED_PREFIX, }; +use serde::Deserialize; +use serde::Serialize; +use crate::bootstrap::params::Certificate; pub use crate::bootstrap::params::RackInitializeRequest as SetupServiceConfig; +// XXX-dap TODO-doc +#[derive(Deserialize, Serialize)] +struct FileBasedConfig { + #[serde(flatten)] + literal: SetupServiceConfig, + + #[serde(default)] + extra_cert: ExtraCert, +} + +#[derive(Default, Deserialize, Serialize)] +enum ExtraCert { + #[default] + None, + GenerateSelfSigned, +} + +impl TryFrom for SetupServiceConfig { + type Error = ConfigError; + + fn try_from(raw_config: FileBasedConfig) -> Result { + let extra_cert = match raw_config.extra_cert { + ExtraCert::None => None, + ExtraCert::GenerateSelfSigned => { + let domain = format!( + "*.sys.{}", + raw_config.literal.external_dns_zone_name + ); + let cert = + rcgen::generate_simple_self_signed(vec![domain.clone()]) + .with_context(|| { + format!( + "generating certificate for domain {:?}", + domain + ) + }) + .map_err(ConfigError::GenerateCertificate)?; + let key_bytes = cert.serialize_private_key_pem().into_bytes(); + let cert_bytes = cert + .serialize_pem() + .context("serializing generated certificate") + .map_err(ConfigError::GenerateCertificate)? + .into_bytes(); + Some(Certificate { key: key_bytes, cert: cert_bytes }) + } + }; + + let mut rv = raw_config.literal; + if let Some(cert) = extra_cert { + rv.external_certificates.push(cert) + } + + Ok(rv) + } +} + impl SetupServiceConfig { pub fn from_file>(path: P) -> Result { let path = path.as_ref(); let contents = std::fs::read_to_string(&path) .map_err(|err| ConfigError::Io { path: path.into(), err })?; - toml::from_str(&contents) - .map_err(|err| ConfigError::Parse { path: path.into(), err }) + let raw_config: FileBasedConfig = toml::from_str(&contents) + .map_err(|err| ConfigError::Parse { path: path.into(), err })?; + SetupServiceConfig::try_from(raw_config) } pub fn az_subnet(&self) -> Ipv6Subnet { @@ -44,9 +105,8 @@ mod test { use omicron_common::address::IpRange; use std::net::{IpAddr, Ipv4Addr, Ipv6Addr}; - #[test] - fn test_subnets() { - let cfg = SetupServiceConfig { + fn test_config() -> SetupServiceConfig { + SetupServiceConfig { rack_subnet: "fd00:1122:3344:0100::".parse().unwrap(), bootstrap_discovery: BootstrapAddressDiscovery::OnlyOurs, rack_secret_threshold: 0, @@ -56,6 +116,7 @@ mod test { internal_services_ip_pool_ranges: vec![IpRange::from(IpAddr::V4( Ipv4Addr::new(129, 168, 1, 20), ))], + external_certificates: vec![], recovery_silo: RecoverySiloConfig { silo_name: "test-silo".parse().unwrap(), user_name: "dummy".parse().unwrap(), @@ -67,7 +128,12 @@ mod test { .parse() .unwrap(), }, - }; + } + } + + #[test] + fn test_subnets() { + let cfg = test_config(); assert_eq!( Ipv6Subnet::::new( @@ -110,4 +176,31 @@ mod test { cfg.sled_subnet(255) ); } + + #[test] + fn text_extra_certs() { + let cfg = test_config(); + assert!(cfg.external_certificates.is_empty()); + + // First, test a configuration that requests nothing in particular. + let file_cfg = FileBasedConfig { + literal: cfg.clone(), + extra_cert: ExtraCert::None, + }; + let read_cfg = SetupServiceConfig::try_from(file_cfg).unwrap(); + assert!(read_cfg.external_certificates.is_empty()); + + // Now test a configuration that requests a generated certificate. + let file_cfg = FileBasedConfig { + literal: cfg.clone(), + extra_cert: ExtraCert::GenerateSelfSigned, + }; + let read_cfg = SetupServiceConfig::try_from(file_cfg).unwrap(); + assert_eq!(read_cfg.external_certificates.len(), 1); + let cert = read_cfg.external_certificates.iter().next().unwrap(); + let key_pem = std::str::from_utf8(&cert.key) + .expect("generated PEM was not UTF-8"); + let _ = rcgen::KeyPair::from_pem(&key_pem) + .expect("generated PEM did not parse as KeyPair"); + } } diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 1ed3d6176c1..d8b4edbf34e 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -729,13 +729,7 @@ impl ServiceInner { services, datasets, internal_services_ip_pool_ranges, - // TODO(https://github.com/oxidecomputer/omicron/issues/1959): Plumb - // these paths through RSS's API. - // - // These certificates CAN be updated through Nexus' HTTP API, but - // should be bootstrapped during the rack setup process to avoid - // the need for unencrypted communication. - certs: vec![], + certs: config.external_certificates.clone(), internal_dns_zone_config: d2n_params(&service_plan.dns_config), external_dns_zone_name: config.external_dns_zone_name.clone(), recovery_silo: config.recovery_silo.clone(), diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index d3a864b3906..ddfdf2e2f92 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -1316,7 +1316,11 @@ impl ServiceManager { IpAddr::V6(*internal_ip), NEXUS_INTERNAL_PORT, ), - request_body_max_bytes: 1048576, + // This has to be large enough to support, among + // other things, the initial list of TLS + // certificates provided by the customer during rack + // setup. + request_body_max_bytes: 10 * 1024 * 1024, ..Default::default() }, subnet: Ipv6Subnet::::new( diff --git a/smf/sled-agent/gimlet-standalone/config-rss.toml b/smf/sled-agent/gimlet-standalone/config-rss.toml index facea8837b8..04a4fd9d00e 100644 --- a/smf/sled-agent/gimlet-standalone/config-rss.toml +++ b/smf/sled-agent/gimlet-standalone/config-rss.toml @@ -20,6 +20,10 @@ dns_servers = [ "1.1.1.1", "9.9.9.9" ] # Delegated external DNS zone name external_dns_zone_name = "oxide.test" +# Initial TLS certificates for the external API +external_certificates = [] +extra_cert = "generate_self_signed" + # The IP ranges configured as part of the services IP Pool. # e.g., Nexus will be configured to use an address from this # pool as its external IP. diff --git a/smf/sled-agent/non-gimlet/config-rss.toml b/smf/sled-agent/non-gimlet/config-rss.toml index b81663b4371..15ec6a2f7d5 100644 --- a/smf/sled-agent/non-gimlet/config-rss.toml +++ b/smf/sled-agent/non-gimlet/config-rss.toml @@ -19,6 +19,10 @@ dns_servers = [ "1.1.1.1", "9.9.9.9" ] # Delegated external DNS zone name external_dns_zone_name = "oxide.test" +extra_cert = "generate_self_signed" + +# Initial TLS certificates for the external API +external_certificates = [] # The IP ranges configured as part of the services IP Pool. # e.g., Nexus will be configured to use an address from this From d15bb4f426c25bb184451a400c93bcc4509ba99f Mon Sep 17 00:00:00 2001 From: David Pacheco Date: Tue, 9 May 2023 16:03:04 -0700 Subject: [PATCH 02/11] attempt: incorporate into end-to-end tests --- .github/buildomat/jobs/deploy.sh | 32 ++++++++++++++++++++ end-to-end-tests/src/helpers/ctx.rs | 45 ++++++++++++++++++++++++----- 2 files changed, 70 insertions(+), 7 deletions(-) diff --git a/.github/buildomat/jobs/deploy.sh b/.github/buildomat/jobs/deploy.sh index ce200422bcc..90ccf09d45c 100644 --- a/.github/buildomat/jobs/deploy.sh +++ b/.github/buildomat/jobs/deploy.sh @@ -132,6 +132,37 @@ done ptime -m pfexec ./tools/create_virtual_hardware.sh +# +# Generate a self-signed certificate to use as the initial TLS certificate for +# the recovery Silo. Its DNS name is determined by the silo name and the +# delegated external DNS name, both of which are in the RSS config file. In a +# real system, the certificate would come from the customer during initial rack +# setup on the technician port. +# +tar xf out/omicron-sled-agent.tar pkg/config-rss.toml +SILO_NAME="$(sed -n 's/silo_name = "\(.*\)"/\1/p' pkg/config-rss.toml)" +EXTERNAL_DNS_DOMAIN="$(sed -n 's/external_dns_zone_name = "\(.*\)"/\1/p' pkg/config-rss.toml)" +rm -f pkg/config-rss.toml + +TLS_NAME="$SILO_NAME.sys.$EXTERNAL_DOMAIN" +openssl req -newkey rsa:4096 \ + -x509 \ + -sha256 \ + -days 3 \ + -nodes \ + -out "pkg/initial-tls-cert.pem" \ + -keyout "pkg/initial-tls-key.pem" \ + -subj "/CN=$TLS_NAME" +tar rvf out/omicron-sled-agent.tar \ + pkg/initial-tls-cert.pem \ + pkg/initial-tls-key.pem +rm -f pkg/initial-tls-cert.pem pkg/initial-tls-key.pem +rmdir pkg +# The actual end-to-end tests need the certificate. This is where that file +# will end up once installed. +E2E_TLS_CERT="/opt/oxide/sled-agent/pkg/initial-tls-cert.pem" + + # # Image-related tests use images served by catacomb. The lab network is # IPv4-only; the propolis zones are IPv6-only. These steps set up tcpproxy @@ -218,6 +249,7 @@ export RUST_BACKTRACE=1 ./tests/bootstrap rm ./tests/bootstrap +export E2E_TLS_CERT for test_bin in tests/*; do ./"$test_bin" done diff --git a/end-to-end-tests/src/helpers/ctx.rs b/end-to-end-tests/src/helpers/ctx.rs index b9ddec8f773..621a952c3c5 100644 --- a/end-to-end-tests/src/helpers/ctx.rs +++ b/end-to-end-tests/src/helpers/ctx.rs @@ -23,6 +23,9 @@ const RSS_CONFIG_STR: &str = include_str!(concat!( "/../smf/sled-agent/non-gimlet/config-rss.toml" )); +// Environment variable containing the path to a cert that we should trust. +const E2E_TLS_CERT_ENV: &str = "E2E_TLS_CERT"; + #[derive(Clone)] pub struct Context { pub client: Client, @@ -161,7 +164,12 @@ pub async fn nexus_addr() -> Result { } async fn get_base_url() -> Result { - Ok(format!("http://{}", nexus_addr().await?)) + let proto = if std::env::var(E2E_TLS_CERT_ENV).is_ok() { + "http" + } else { + "https" + }; + Ok(format!("{}://{}", proto, nexus_addr().await?)) } async fn build_authenticated_client() -> Result { @@ -184,11 +192,29 @@ async fn build_authenticated_client() -> Result { // Do not have reqwest follow redirects. That's because our login response // includes both a redirect and the session cookie header. If reqwest // follows the redirect, we won't have a chance to get the cookie. - let reqwest_login_client = reqwest::ClientBuilder::new() + let mut builder = reqwest::ClientBuilder::new() .connect_timeout(Duration::from_secs(15)) .redirect(reqwest::redirect::Policy::none()) - .timeout(Duration::from_secs(60)) - .build()?; + .timeout(Duration::from_secs(60)); + + // If we were provided with a path to a certificate in the environment, add + // it as a trusted one. + let extra_root_cert = std::env::var(E2E_TLS_CERT_ENV) + .ok() + .map(|path| { + let cert_bytes = std::fs::read(&path).with_context(|| { + format!("reading certificate from {:?}", &path) + })?; + reqwest::tls::Certificate::from_pem(&cert_bytes).with_context( + || format!("parsing certificate from {:?}", &path), + ) + }) + .transpose()?; + if let Some(cert) = &extra_root_cert { + builder = builder.add_root_certificate(cert.clone()); + } + + let reqwest_login_client = builder.build()?; let login_url = format!("{}/login/{}/local", base_url, silo_name); // By the time we get here, we generally would have successfully resolved @@ -241,10 +267,15 @@ async fn build_authenticated_client() -> Result { HeaderValue::from_str(session_token).unwrap(), ); - let reqwest_client = reqwest::ClientBuilder::new() + let mut builder = reqwest::ClientBuilder::new() .default_headers(headers) .connect_timeout(Duration::from_secs(15)) - .timeout(Duration::from_secs(60)) - .build()?; + .timeout(Duration::from_secs(60)); + + if let Some(cert) = extra_root_cert { + builder = builder.add_root_certificate(cert); + } + + let reqwest_client = builder.build()?; Ok(Client::new_with_client(&base_url, reqwest_client)) } From 2a57a2bfae600779d9b6979dc364d650273cf7a9 Mon Sep 17 00:00:00 2001 From: David Pacheco Date: Tue, 9 May 2023 16:12:01 -0700 Subject: [PATCH 03/11] rustfmt --- end-to-end-tests/src/helpers/ctx.rs | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/end-to-end-tests/src/helpers/ctx.rs b/end-to-end-tests/src/helpers/ctx.rs index 621a952c3c5..3982c6b89a4 100644 --- a/end-to-end-tests/src/helpers/ctx.rs +++ b/end-to-end-tests/src/helpers/ctx.rs @@ -164,11 +164,8 @@ pub async fn nexus_addr() -> Result { } async fn get_base_url() -> Result { - let proto = if std::env::var(E2E_TLS_CERT_ENV).is_ok() { - "http" - } else { - "https" - }; + let proto = + if std::env::var(E2E_TLS_CERT_ENV).is_ok() { "http" } else { "https" }; Ok(format!("{}://{}", proto, nexus_addr().await?)) } From 3a8f533364d1e5d6b4dcc30f5cf3dbf0a2f68d45 Mon Sep 17 00:00:00 2001 From: David Pacheco Date: Tue, 9 May 2023 16:49:34 -0700 Subject: [PATCH 04/11] support TLS cert injection through files rather than generating one --- sled-agent/Cargo.toml | 2 +- sled-agent/src/config.rs | 4 +- sled-agent/src/rack_setup/config.rs | 193 ++++++++++++++++------------ 3 files changed, 111 insertions(+), 88 deletions(-) diff --git a/sled-agent/Cargo.toml b/sled-agent/Cargo.toml index 4268110442c..753fc981003 100644 --- a/sled-agent/Cargo.toml +++ b/sled-agent/Cargo.toml @@ -44,7 +44,6 @@ progenitor.workspace = true propolis-client = { workspace = true, features = [ "generated-migration" ] } propolis-server.workspace = true # Only used by the simulated sled agent rand = { workspace = true, features = ["getrandom"] } -rcgen.workspace = true reqwest = { workspace = true, features = ["rustls-tls", "stream"] } schemars = { workspace = true, features = [ "chrono", "uuid1" ] } semver.workspace = true @@ -81,6 +80,7 @@ omicron-test-utils.workspace = true openapi-lint.workspace = true openapiv3.workspace = true pretty_assertions.workspace = true +rcgen.workspace = true serial_test.workspace = true subprocess.workspace = true slog-async.workspace = true diff --git a/sled-agent/src/config.rs b/sled-agent/src/config.rs index faf5f06e73b..e9a4d9848b6 100644 --- a/sled-agent/src/config.rs +++ b/sled-agent/src/config.rs @@ -79,8 +79,8 @@ pub enum ConfigError { #[source] err: toml::de::Error, }, - #[error("Failed to generate self-signed certificate: {0}")] - GenerateCertificate(#[source] anyhow::Error), + #[error("Loading certificate: {0}")] + Certificate(#[source] anyhow::Error), #[error("Could not determine if host is a Gimlet: {0}")] SystemDetection(#[source] anyhow::Error), #[error("Could not enumerate physical links")] diff --git a/sled-agent/src/rack_setup/config.rs b/sled-agent/src/rack_setup/config.rs index fd72a88a1c8..905fc8ae1e5 100644 --- a/sled-agent/src/rack_setup/config.rs +++ b/sled-agent/src/rack_setup/config.rs @@ -5,81 +5,69 @@ //! Interfaces for working with RSS config. use crate::config::ConfigError; -use anyhow::Context; use camino::Utf8Path; use omicron_common::address::{ get_64_subnet, Ipv6Subnet, AZ_PREFIX, RACK_PREFIX, SLED_PREFIX, }; -use serde::Deserialize; -use serde::Serialize; use crate::bootstrap::params::Certificate; pub use crate::bootstrap::params::RackInitializeRequest as SetupServiceConfig; -// XXX-dap TODO-doc -#[derive(Deserialize, Serialize)] -struct FileBasedConfig { - #[serde(flatten)] - literal: SetupServiceConfig, - - #[serde(default)] - extra_cert: ExtraCert, -} - -#[derive(Default, Deserialize, Serialize)] -enum ExtraCert { - #[default] - None, - GenerateSelfSigned, -} - -impl TryFrom for SetupServiceConfig { - type Error = ConfigError; - - fn try_from(raw_config: FileBasedConfig) -> Result { - let extra_cert = match raw_config.extra_cert { - ExtraCert::None => None, - ExtraCert::GenerateSelfSigned => { - let domain = format!( - "*.sys.{}", - raw_config.literal.external_dns_zone_name - ); - let cert = - rcgen::generate_simple_self_signed(vec![domain.clone()]) - .with_context(|| { - format!( - "generating certificate for domain {:?}", - domain - ) - }) - .map_err(ConfigError::GenerateCertificate)?; - let key_bytes = cert.serialize_private_key_pem().into_bytes(); - let cert_bytes = cert - .serialize_pem() - .context("serializing generated certificate") - .map_err(ConfigError::GenerateCertificate)? - .into_bytes(); - Some(Certificate { key: key_bytes, cert: cert_bytes }) - } - }; - - let mut rv = raw_config.literal; - if let Some(cert) = extra_cert { - rv.external_certificates.push(cert) - } - - Ok(rv) - } -} - impl SetupServiceConfig { pub fn from_file>(path: P) -> Result { let path = path.as_ref(); let contents = std::fs::read_to_string(&path) .map_err(|err| ConfigError::Io { path: path.into(), err })?; - let raw_config: FileBasedConfig = toml::from_str(&contents) + let mut raw_config: SetupServiceConfig = toml::from_str(&contents) .map_err(|err| ConfigError::Parse { path: path.into(), err })?; - SetupServiceConfig::try_from(raw_config) + + // In the same way that sled-agent itself (our caller) discovers the + // optional config-rss.toml in a well-known path relative to its config + // file, we look for a pair of well-known paths adjacent to + // config-rss.toml that specify an extra TLS certificate and private + // key. This is used by the end-to-end tests. Any developer can also + // use this to inject a TLS certificate into their setup. + // (config-rss.toml is only used for dev/test, not production + // deployments, which will always get their RSS configuration from + // Wicket.) + if let Some(parent) = path.parent() { + let cert_path = parent.join("initial-tls-cert.pem"); + let key_path = parent.join("initial-tls-key.pem"); + let cert_bytes = std::fs::read(&cert_path); + let key_bytes = std::fs::read(&key_path); + match (cert_bytes, key_bytes) { + (Ok(cert), Ok(key)) => { + // XXX-dap could validate better + raw_config + .external_certificates + .push(Certificate { key, cert }); + } + (Err(cert_error), Err(key_error)) + if cert_error.kind() == std::io::ErrorKind::NotFound + && key_error.kind() == std::io::ErrorKind::NotFound => + { + // Fine. No extra cert was provided. + } + (Err(cert_error), _) => { + return Err(ConfigError::Certificate( + anyhow::Error::new(cert_error).context(format!( + "loading certificate from {:?}", + cert_path + )), + )); + } + (_, Err(key_error)) => { + return Err(ConfigError::Certificate( + anyhow::Error::new(key_error).context(format!( + "loading private key from {:?}", + key_path + )), + )); + } + }; + } + + Ok(raw_config) } pub fn az_subnet(&self) -> Ipv6Subnet { @@ -102,11 +90,14 @@ mod test { use super::*; use crate::bootstrap::params::BootstrapAddressDiscovery; use crate::bootstrap::params::RecoverySiloConfig; + use anyhow::Context; + use camino::Utf8PathBuf; use omicron_common::address::IpRange; use std::net::{IpAddr, Ipv4Addr, Ipv6Addr}; - fn test_config() -> SetupServiceConfig { - SetupServiceConfig { + #[test] + fn test_subnets() { + let cfg = SetupServiceConfig { rack_subnet: "fd00:1122:3344:0100::".parse().unwrap(), bootstrap_discovery: BootstrapAddressDiscovery::OnlyOurs, rack_secret_threshold: 0, @@ -128,12 +119,7 @@ mod test { .parse() .unwrap(), }, - } - } - - #[test] - fn test_subnets() { - let cfg = test_config(); + }; assert_eq!( Ipv6Subnet::::new( @@ -178,24 +164,61 @@ mod test { } #[test] - fn text_extra_certs() { - let cfg = test_config(); + fn test_extra_certs() { + // The stock non-Gimlet config has no TLS certificates. + let path = Utf8PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("../smf/sled-agent/non-gimlet/config-rss.toml"); + let cfg = SetupServiceConfig::from_file(&path) + .unwrap_or_else(|e| panic!("failed to parse {:?}: {}", &path, e)); assert!(cfg.external_certificates.is_empty()); - // First, test a configuration that requests nothing in particular. - let file_cfg = FileBasedConfig { - literal: cfg.clone(), - extra_cert: ExtraCert::None, - }; - let read_cfg = SetupServiceConfig::try_from(file_cfg).unwrap(); - assert!(read_cfg.external_certificates.is_empty()); - - // Now test a configuration that requests a generated certificate. - let file_cfg = FileBasedConfig { - literal: cfg.clone(), - extra_cert: ExtraCert::GenerateSelfSigned, - }; - let read_cfg = SetupServiceConfig::try_from(file_cfg).unwrap(); + // Now let's create a configuration that does have an adjacent + // certificate and key. + let tempdir = + camino_tempfile::tempdir().expect("creating temporary directory"); + println!("using temp path: {:?}", tempdir); + + // Generate the certificate. + let domain = format!( + "{}.sys.{}", + cfg.external_dns_zone_name, + cfg.recovery_silo.silo_name.as_str(), + ); + let cert = rcgen::generate_simple_self_signed(vec![domain.clone()]) + .unwrap_or_else(|error| { + panic!( + "generating certificate for domain {:?}: {}", + domain, error + ) + }); + + // Write the configuration file. + let cfg_bytes = std::fs::read(&path).unwrap(); + let cfg_path = tempdir.path().join("config-rss.toml"); + std::fs::write(&cfg_path, &cfg_bytes) + .with_context(|| format!("failed to write to {:?}", &tempdir)) + .unwrap(); + + // Write the certificate. + let cert_bytes = cert + .serialize_pem() + .expect("serializing generated certificate") + .into_bytes(); + let cert_path = tempdir.path().join("initial-tls-cert.pem"); + std::fs::write(&cert_path, &cert_bytes) + .with_context(|| format!("failed to write to {:?}", &cert_path)) + .unwrap(); + + // Write the private key. + let key_path = tempdir.path().join("initial-tls-key.pem"); + let key_bytes = cert.serialize_private_key_pem().into_bytes(); + std::fs::write(&key_path, &key_bytes) + .with_context(|| format!("failed to write to {:?}", &key_path)) + .unwrap(); + + // Now try to load it all. + let read_cfg = SetupServiceConfig::from_file(&cfg_path) + .expect("failed to read generated config with certificate"); assert_eq!(read_cfg.external_certificates.len(), 1); let cert = read_cfg.external_certificates.iter().next().unwrap(); let key_pem = std::str::from_utf8(&cert.key) From c3e05d0f0965969a980a0c75c7e5bbcfe088dfd8 Mon Sep 17 00:00:00 2001 From: David Pacheco Date: Tue, 9 May 2023 17:03:12 -0700 Subject: [PATCH 05/11] remove vestigial config --- smf/sled-agent/gimlet-standalone/config-rss.toml | 1 - smf/sled-agent/non-gimlet/config-rss.toml | 1 - 2 files changed, 2 deletions(-) diff --git a/smf/sled-agent/gimlet-standalone/config-rss.toml b/smf/sled-agent/gimlet-standalone/config-rss.toml index 04a4fd9d00e..f26f67da602 100644 --- a/smf/sled-agent/gimlet-standalone/config-rss.toml +++ b/smf/sled-agent/gimlet-standalone/config-rss.toml @@ -22,7 +22,6 @@ external_dns_zone_name = "oxide.test" # Initial TLS certificates for the external API external_certificates = [] -extra_cert = "generate_self_signed" # The IP ranges configured as part of the services IP Pool. # e.g., Nexus will be configured to use an address from this diff --git a/smf/sled-agent/non-gimlet/config-rss.toml b/smf/sled-agent/non-gimlet/config-rss.toml index 15ec6a2f7d5..5e8c752f6f3 100644 --- a/smf/sled-agent/non-gimlet/config-rss.toml +++ b/smf/sled-agent/non-gimlet/config-rss.toml @@ -19,7 +19,6 @@ dns_servers = [ "1.1.1.1", "9.9.9.9" ] # Delegated external DNS zone name external_dns_zone_name = "oxide.test" -extra_cert = "generate_self_signed" # Initial TLS certificates for the external API external_certificates = [] From 134c7617f541b111db966d6b07e8f55529811113 Mon Sep 17 00:00:00 2001 From: David Pacheco Date: Tue, 9 May 2023 19:21:01 -0700 Subject: [PATCH 06/11] could use some more logging on failure of end-to-end test login path --- Cargo.lock | 1 + end-to-end-tests/Cargo.toml | 1 + end-to-end-tests/src/helpers/ctx.rs | 4 ++++ 3 files changed, 6 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index 94dc62f5175..42eb55caa5b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2177,6 +2177,7 @@ dependencies = [ "async-trait", "base64 0.21.0", "camino", + "chrono", "http", "omicron-sled-agent", "omicron-test-utils", diff --git a/end-to-end-tests/Cargo.toml b/end-to-end-tests/Cargo.toml index 08bfab6a6bf..a9b53d568cd 100644 --- a/end-to-end-tests/Cargo.toml +++ b/end-to-end-tests/Cargo.toml @@ -9,6 +9,7 @@ anyhow = { workspace = true, features = ["backtrace"] } async-trait.workspace = true base64.workspace = true camino.workspace = true +chrono.workspace = true http.workspace = true omicron-sled-agent.workspace = true omicron-test-utils.workspace = true diff --git a/end-to-end-tests/src/helpers/ctx.rs b/end-to-end-tests/src/helpers/ctx.rs index 3982c6b89a4..30d67ba8c8b 100644 --- a/end-to-end-tests/src/helpers/ctx.rs +++ b/end-to-end-tests/src/helpers/ctx.rs @@ -1,5 +1,6 @@ use crate::helpers::generate_name; use anyhow::{anyhow, Context as _, Result}; +use chrono::Utc; use omicron_sled_agent::rack_setup::config::SetupServiceConfig; use omicron_test_utils::dev::poll::{wait_for_condition, CondCheckError}; use oxide_client::types::{Name, ProjectCreate, UsernamePasswordCredentials}; @@ -227,12 +228,14 @@ async fn build_authenticated_client() -> Result { // Use a raw reqwest client because it's not clear that Progenitor // is intended to support endpoints that return 300-level response // codes. See progenitor#451. + eprintln!("{}: attempting to log into API", Utc::now()); reqwest_login_client .post(&login_url) .body(login_request_body.clone()) .send() .await .map_err(|e| { + eprintln!("{}: login failed: {:?}", Utc::now(), e); if e.is_connect() { CondCheckError::NotYet } else { @@ -248,6 +251,7 @@ async fn build_authenticated_client() -> Result { .await .context("logging in")?; + eprintln!("{}: login succeeded", Utc::now()); let session_cookie = response .headers() .get(http::header::SET_COOKIE) From 59c7cd603ee41ddc80bf22ed17a994d0e26a6ac4 Mon Sep 17 00:00:00 2001 From: David Pacheco Date: Tue, 9 May 2023 20:20:50 -0700 Subject: [PATCH 07/11] fix port --- end-to-end-tests/src/helpers/ctx.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/end-to-end-tests/src/helpers/ctx.rs b/end-to-end-tests/src/helpers/ctx.rs index 30d67ba8c8b..c1f86ac8576 100644 --- a/end-to-end-tests/src/helpers/ctx.rs +++ b/end-to-end-tests/src/helpers/ctx.rs @@ -94,6 +94,8 @@ pub async fn nexus_addr() -> Result { return Ok(host); } + let port = if std::env::var(E2E_TLS_CERT_ENV).is_ok() { 80 } else { 443 }; + // Otherwise, use the RSS configuration to find the DNS server, silo name, // and delegated DNS zone name. Use this to look up Nexus's IP in the // external DNS server. @@ -155,7 +157,7 @@ pub async fn nexus_addr() -> Result { .iter() .next() .ok_or(CondCheckError::NotYet)?; - Ok(SocketAddr::from((addr, 80))) + Ok(SocketAddr::from((addr, port))) }, &Duration::from_secs(1), &Duration::from_secs(300), From 07c0ec612263eaf65e6a09557a0bc8aa5955571c Mon Sep 17 00:00:00 2001 From: David Pacheco Date: Tue, 9 May 2023 20:38:06 -0700 Subject: [PATCH 08/11] reqwest errors debug output is not so great --- end-to-end-tests/src/helpers/ctx.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/end-to-end-tests/src/helpers/ctx.rs b/end-to-end-tests/src/helpers/ctx.rs index c1f86ac8576..bc2950966c4 100644 --- a/end-to-end-tests/src/helpers/ctx.rs +++ b/end-to-end-tests/src/helpers/ctx.rs @@ -237,7 +237,7 @@ async fn build_authenticated_client() -> Result { .send() .await .map_err(|e| { - eprintln!("{}: login failed: {:?}", Utc::now(), e); + eprintln!("{}: login failed: {:#}", Utc::now(), e); if e.is_connect() { CondCheckError::NotYet } else { From 3d8401b2906791366b0368b51845f7842f18510e Mon Sep 17 00:00:00 2001 From: David Pacheco Date: Wed, 10 May 2023 14:16:16 -0700 Subject: [PATCH 09/11] reqwest does not allow the self-signed cert to be a CA cert; plus we must use a hostname for TLS validation to work --- .github/buildomat/jobs/deploy.sh | 22 ++- Cargo.lock | 2 + end-to-end-tests/Cargo.toml | 2 + end-to-end-tests/src/helpers/ctx.rs | 279 +++++++++++++++++----------- end-to-end-tests/src/helpers/mod.rs | 2 +- 5 files changed, 192 insertions(+), 115 deletions(-) diff --git a/.github/buildomat/jobs/deploy.sh b/.github/buildomat/jobs/deploy.sh index 90ccf09d45c..14d0b0f08d2 100644 --- a/.github/buildomat/jobs/deploy.sh +++ b/.github/buildomat/jobs/deploy.sh @@ -144,15 +144,31 @@ SILO_NAME="$(sed -n 's/silo_name = "\(.*\)"/\1/p' pkg/config-rss.toml)" EXTERNAL_DNS_DOMAIN="$(sed -n 's/external_dns_zone_name = "\(.*\)"/\1/p' pkg/config-rss.toml)" rm -f pkg/config-rss.toml -TLS_NAME="$SILO_NAME.sys.$EXTERNAL_DOMAIN" -openssl req -newkey rsa:4096 \ +# By default, OpenSSL creates self-signed certificates with "CA:true". The TLS +# implementation used by reqwest rejects endpoint certificates that are also CA +# certificates. So in order to use the certificate, we need one without +# "CA:true". There doesn't seem to be a way to do this on the command line. +# Instead, we must override the system configuration with our own configuration +# file. There's virtually nothing in it. +TLS_NAME="$SILO_NAME.sys.$EXTERNAL_DNS_DOMAIN" +openssl req \ + -newkey rsa:4096 \ -x509 \ -sha256 \ -days 3 \ -nodes \ -out "pkg/initial-tls-cert.pem" \ -keyout "pkg/initial-tls-key.pem" \ - -subj "/CN=$TLS_NAME" + -subj "/CN=$TLS_NAME" \ + -addext "subjectAltName=DNS:$TLS_NAME" \ + -addext "basicConstraints=critical,CA:FALSE" \ + -config /dev/stdin < Result { - build_authenticated_client().await -} - fn rss_config() -> Result { toml::from_str(RSS_CONFIG_STR) .with_context(|| format!("parsing {:?} as TOML", RSS_CONFIG_PATH)) } -pub async fn nexus_addr() -> Result { - // Check $OXIDE_HOST first. - if let Ok(host) = - std::env::var("OXIDE_HOST").map_err(anyhow::Error::from).and_then(|s| { - Ok(Url::parse(&s)? - .host_str() - .context("no host in OXIDE_HOST url")? - .parse()?) - }) - { - return Ok(host); - } - - let port = if std::env::var(E2E_TLS_CERT_ENV).is_ok() { 80 } else { 443 }; - - // Otherwise, use the RSS configuration to find the DNS server, silo name, - // and delegated DNS zone name. Use this to look up Nexus's IP in the - // external DNS server. - // - // First, load the RSS configuration file. - let config = rss_config()?; +fn nexus_external_dns_name(config: &SetupServiceConfig) -> String { + format!( + "{}.sys.{}", + config.recovery_silo.silo_name.as_str(), + config.external_dns_zone_name + ) +} - // From config-rss.toml, grab the first address from the configured services +fn external_dns_addr(config: &SetupServiceConfig) -> Result { + // From the RSS config, grab the first address from the configured services // IP pool as the DNS server's IP address. let dns_ip = config .internal_services_ip_pool_ranges @@ -113,81 +99,51 @@ pub async fn nexus_addr() -> Result { .ok_or_else(|| { anyhow!( "failed to get first IP from internal service \ - pool in {}", - RSS_CONFIG_PATH, + pool in RSS configuration" ) })?; - let dns_addr = SocketAddr::from((dns_ip, 53)); - - // Resolve the DNS name of the recovery Silo that ought to have been created - // already. This could take a few seconds, since it's asynchronous with the - // rack initialization request. - let silo_name = &config.recovery_silo.silo_name; - let dns_name = format!( - "{}.sys.{}", - silo_name.as_str(), - &config.external_dns_zone_name - ); - - let mut resolver_config = ResolverConfig::new(); - resolver_config.add_name_server(NameServerConfig { - socket_addr: dns_addr, - protocol: Protocol::Udp, - tls_dns_name: None, - trust_nx_responses: false, - bind_addr: None, - }); - - let resolver = - TokioAsyncResolver::tokio(resolver_config, ResolverOpts::default()) - .context("failed to create resolver")?; - - wait_for_condition::<_, anyhow::Error, _, _>( - || async { - let addr = resolver - .lookup_ip(&dns_name) - .await - .map_err(|e| match e.kind() { - ResolveErrorKind::NoRecordsFound { .. } - | ResolveErrorKind::Timeout => CondCheckError::NotYet, - _ => CondCheckError::Failed(anyhow::Error::new(e).context( - format!("resolving {:?} from {}", dns_name, dns_addr), - )), - })? - .iter() - .next() - .ok_or(CondCheckError::NotYet)?; - Ok(SocketAddr::from((addr, port))) - }, - &Duration::from_secs(1), - &Duration::from_secs(300), - ) - .await - .context("failed to get Nexus addr") + Ok(SocketAddr::from((dns_ip, 53))) } -async fn get_base_url() -> Result { - let proto = - if std::env::var(E2E_TLS_CERT_ENV).is_ok() { "http" } else { "https" }; - Ok(format!("{}://{}", proto, nexus_addr().await?)) +pub async fn nexus_addr() -> Result { + // Check $OXIDE_HOST first. + if let Ok(host) = + std::env::var("OXIDE_HOST").map_err(anyhow::Error::from).and_then(|s| { + Ok(Url::parse(&s)? + .host_str() + .context("no host in OXIDE_HOST url")? + .parse::()? + .ip()) + }) + { + return Ok(host); + } + + // Otherwise, use the RSS configuration to find the DNS server, silo name, + // and delegated DNS zone name. Use this to look up Nexus's IP in the + // external DNS server. This could take a few seconds, since it's + // asynchronous with the rack initialization request. + let config = rss_config()?; + let dns_addr = external_dns_addr(&config)?; + let dns_name = nexus_external_dns_name(&config); + let resolver = CustomDnsResolver::new(dns_addr)?; + resolver + .wait_for_records( + &dns_name, + Duration::from_secs(1), + Duration::from_secs(300), + ) + .await } -async fn build_authenticated_client() -> Result { +pub async fn build_client() -> Result { + // Make a reqwest client that we can use to make the initial login request. + // To do this, we need to find the IP of the external DNS server in the RSS + // configuration and then set up a custom resolver to use this DNS server. let config = rss_config()?; - let base_url = get_base_url().await?; - let silo_name = config.recovery_silo.silo_name.as_str(); - let username: oxide_client::types::UserId = - config.recovery_silo.user_name.as_str().parse().map_err(|s| { - anyhow!("parsing configured recovery user name: {:?}", s) - })?; - // See the comment in the config file. - let password: oxide_client::types::Password = "oxide".parse().unwrap(); - let login_request_body = - serde_json::to_string(&UsernamePasswordCredentials { - username: username, - password: password, - }) - .context("serializing login request body")?; + let dns_addr = external_dns_addr(&config)?; + let dns_name = nexus_external_dns_name(&config); + let resolver = Arc::new(CustomDnsResolver::new(dns_addr)?); // Do not have reqwest follow redirects. That's because our login response // includes both a redirect and the session cookie header. If reqwest @@ -195,36 +151,52 @@ async fn build_authenticated_client() -> Result { let mut builder = reqwest::ClientBuilder::new() .connect_timeout(Duration::from_secs(15)) .redirect(reqwest::redirect::Policy::none()) + .dns_resolver(resolver.clone()) .timeout(Duration::from_secs(60)); // If we were provided with a path to a certificate in the environment, add // it as a trusted one. - let extra_root_cert = std::env::var(E2E_TLS_CERT_ENV) - .ok() - .map(|path| { + let (proto, extra_root_cert) = match std::env::var(E2E_TLS_CERT_ENV) { + Err(_) => ("http", None), + Ok(path) => { let cert_bytes = std::fs::read(&path).with_context(|| { format!("reading certificate from {:?}", &path) })?; - reqwest::tls::Certificate::from_pem(&cert_bytes).with_context( - || format!("parsing certificate from {:?}", &path), - ) - }) - .transpose()?; + let cert = reqwest::tls::Certificate::from_pem(&cert_bytes) + .with_context(|| { + format!("parsing certificate from {:?}", &path) + })?; + ("https", Some(cert)) + } + }; + if let Some(cert) = &extra_root_cert { builder = builder.add_root_certificate(cert.clone()); } let reqwest_login_client = builder.build()?; + + // Prepare to make a login request. + let base_url = format!("{}://{}", proto, dns_name); + let silo_name = config.recovery_silo.silo_name.as_str(); let login_url = format!("{}/login/{}/local", base_url, silo_name); + let username: oxide_client::types::UserId = + config.recovery_silo.user_name.as_str().parse().map_err(|s| { + anyhow!("parsing configured recovery user name: {:?}", s) + })?; + // See the comment in the config file about this password. + let password: oxide_client::types::Password = "oxide".parse().unwrap(); + let login_request_body = + serde_json::to_string(&UsernamePasswordCredentials { + username: username, + password: password, + }) + .context("serializing login request body")?; - // By the time we get here, we generally would have successfully resolved - // Nexus's external IP address from the external DNS server. So we'd - // expect Nexus to be up. But that's not necessarily true: external DNS can - // be set up during rack initialization, before Nexus has opened its - // external listening socket. This is arguably a bug, advertising a service - // before it's ready, but a pretty niche corner case (rack initialization) - // and anyway DNS is always best-effort. The point is: let's retry a little - // while if we can't immediately connect. + // By the time we get here, Nexus might not be up yet. It may not have + // published its names to external DNS, and even if it has, it may not have + // opened its external listening socket. So we have to retry a bit until we + // succeed. let response = wait_for_condition( || async { // Use a raw reqwest client because it's not clear that Progenitor @@ -248,7 +220,7 @@ async fn build_authenticated_client() -> Result { }) }, &Duration::from_secs(1), - &Duration::from_secs(30), + &Duration::from_secs(300), ) .await .context("logging in")?; @@ -273,6 +245,7 @@ async fn build_authenticated_client() -> Result { let mut builder = reqwest::ClientBuilder::new() .default_headers(headers) .connect_timeout(Duration::from_secs(15)) + .dns_resolver(resolver) .timeout(Duration::from_secs(60)); if let Some(cert) = extra_root_cert { @@ -282,3 +255,87 @@ async fn build_authenticated_client() -> Result { let reqwest_client = builder.build()?; Ok(Client::new_with_client(&base_url, reqwest_client)) } + +// XXX-dap TODO-cleanup the lifetime constraints on the `Resolve` trait make it +// hard to avoid an Arc here. +/// Wrapper around a `TokioAsyncResolver` so that we can impl +/// `reqwest::dns::Resolve` for it. +struct CustomDnsResolver { + dns_addr: SocketAddr, + resolver: Arc, +} + +impl CustomDnsResolver { + fn new(dns_addr: SocketAddr) -> Result { + let mut resolver_config = ResolverConfig::new(); + resolver_config.add_name_server(NameServerConfig { + socket_addr: dns_addr, + protocol: Protocol::Udp, + tls_dns_name: None, + trust_nx_responses: false, + bind_addr: None, + }); + + let resolver = Arc::new( + TokioAsyncResolver::tokio(resolver_config, ResolverOpts::default()) + .context("failed to create resolver")?, + ); + Ok(CustomDnsResolver { dns_addr, resolver }) + } + + async fn wait_for_records( + &self, + dns_name: &str, + check_period: Duration, + max: Duration, + ) -> Result { + wait_for_condition::<_, anyhow::Error, _, _>( + || async { + self.resolver + .lookup_ip(dns_name) + .await + .map_err(|e| match e.kind() { + ResolveErrorKind::NoRecordsFound { .. } + | ResolveErrorKind::Timeout => CondCheckError::NotYet, + _ => CondCheckError::Failed( + anyhow::Error::new(e).context(format!( + "resolving {:?} from {}", + dns_name, self.dns_addr + )), + ), + })? + .iter() + .next() + .ok_or(CondCheckError::NotYet) + }, + &check_period, + &max, + ) + .await + .with_context(|| { + format!( + "failed to resolve {:?} from {:?} within {:?}", + dns_name, self.dns_addr, max + ) + }) + } +} + +impl reqwest::dns::Resolve for CustomDnsResolver { + fn resolve( + &self, + name: hyper::client::connect::dns::Name, + ) -> reqwest::dns::Resolving { + let resolver = self.resolver.clone(); + async move { + let list = resolver.lookup_ip(name.as_str()).await?; + Ok(Box::new(list.into_iter().map(|s| { + // reqwest does not appear to use the port number here. + // (See the docs for `ClientBuilder::resolve()`, which isn't + // the same thing, but is related.) + SocketAddr::from((s, 0)) + })) as Box + Send>) + } + .boxed() + } +} diff --git a/end-to-end-tests/src/helpers/mod.rs b/end-to-end-tests/src/helpers/mod.rs index 5030fe58975..2f514cdb129 100644 --- a/end-to-end-tests/src/helpers/mod.rs +++ b/end-to-end-tests/src/helpers/mod.rs @@ -18,7 +18,7 @@ pub fn generate_name(prefix: &str) -> Result { /// the DHCP range is 100-249, and in the buildomat lab environment the network /// is currently private.) pub async fn get_system_ip_pool() -> Result<(Ipv4Addr, Ipv4Addr)> { - let nexus_addr = match nexus_addr().await?.ip() { + let nexus_addr = match nexus_addr().await? { IpAddr::V4(addr) => addr.octets(), IpAddr::V6(_) => bail!("not sure what to do about IPv6 here"), }; From fe03367570d29d95abe2603e56a7d85c60075b26 Mon Sep 17 00:00:00 2001 From: David Pacheco Date: Wed, 10 May 2023 14:36:05 -0700 Subject: [PATCH 10/11] nits --- .github/buildomat/jobs/deploy.sh | 2 ++ end-to-end-tests/src/helpers/ctx.rs | 4 ++-- sled-agent/src/rack_setup/config.rs | 1 - 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/buildomat/jobs/deploy.sh b/.github/buildomat/jobs/deploy.sh index 14d0b0f08d2..f564b790027 100644 --- a/.github/buildomat/jobs/deploy.sh +++ b/.github/buildomat/jobs/deploy.sh @@ -144,12 +144,14 @@ SILO_NAME="$(sed -n 's/silo_name = "\(.*\)"/\1/p' pkg/config-rss.toml)" EXTERNAL_DNS_DOMAIN="$(sed -n 's/external_dns_zone_name = "\(.*\)"/\1/p' pkg/config-rss.toml)" rm -f pkg/config-rss.toml +# # By default, OpenSSL creates self-signed certificates with "CA:true". The TLS # implementation used by reqwest rejects endpoint certificates that are also CA # certificates. So in order to use the certificate, we need one without # "CA:true". There doesn't seem to be a way to do this on the command line. # Instead, we must override the system configuration with our own configuration # file. There's virtually nothing in it. +# TLS_NAME="$SILO_NAME.sys.$EXTERNAL_DNS_DOMAIN" openssl req \ -newkey rsa:4096 \ diff --git a/end-to-end-tests/src/helpers/ctx.rs b/end-to-end-tests/src/helpers/ctx.rs index da67e2c9584..ce7b92e76db 100644 --- a/end-to-end-tests/src/helpers/ctx.rs +++ b/end-to-end-tests/src/helpers/ctx.rs @@ -256,12 +256,12 @@ pub async fn build_client() -> Result { Ok(Client::new_with_client(&base_url, reqwest_client)) } -// XXX-dap TODO-cleanup the lifetime constraints on the `Resolve` trait make it -// hard to avoid an Arc here. /// Wrapper around a `TokioAsyncResolver` so that we can impl /// `reqwest::dns::Resolve` for it. struct CustomDnsResolver { dns_addr: SocketAddr, + // The lifetime constraints on the `Resolve` trait make it hard to avoid an + // Arc here. resolver: Arc, } diff --git a/sled-agent/src/rack_setup/config.rs b/sled-agent/src/rack_setup/config.rs index 905fc8ae1e5..9f9dab11b48 100644 --- a/sled-agent/src/rack_setup/config.rs +++ b/sled-agent/src/rack_setup/config.rs @@ -37,7 +37,6 @@ impl SetupServiceConfig { let key_bytes = std::fs::read(&key_path); match (cert_bytes, key_bytes) { (Ok(cert), Ok(key)) => { - // XXX-dap could validate better raw_config .external_certificates .push(Certificate { key, cert }); From 57b9eec4ffac098bc892589648ed1a5f08b8dc5d Mon Sep 17 00:00:00 2001 From: David Pacheco Date: Thu, 11 May 2023 11:54:44 -0700 Subject: [PATCH 11/11] review feedback --- docs/how-to-run.adoc | 18 ++++++++++++++++-- sled-agent/src/rack_setup/config.rs | 7 ++++--- .../gimlet-standalone/config-rss.toml | 10 ++++++++++ smf/sled-agent/non-gimlet/config-rss.toml | 10 ++++++++++ 4 files changed, 40 insertions(+), 5 deletions(-) diff --git a/docs/how-to-run.adoc b/docs/how-to-run.adoc index 8b706773f07..69cc6498dc7 100644 --- a/docs/how-to-run.adoc +++ b/docs/how-to-run.adoc @@ -91,8 +91,8 @@ The control plane repository contains a packaging tool which bundles binaries and SMF manifests. After building the expected binaries, they can be packaged in a format which lets them be transferred to a Helios machine. -This tool acts on a `package-manifest.toml` file which describes the packages to be -bundled in the build. +This tool acts on a `package-manifest.toml` file which describes the packages +to be bundled in the build. Configuration files are used to select IP addresses, and to manage Zpools utilized by the Sled Agent. These configuration files are located within @@ -137,6 +137,20 @@ Created new build target 'default' and set it as active NOTE: The `target create` command will set the new target as active and thus let you omit the `-t` flag in subsequent commands. +Initial TLS certificates for the externally-facing endpoints are also part of +the runtime configuration that would normally come from the customer during +initial setup. In development, by default, a deployed Omicron system will have +no TLS certificates. You can deploy a system with TLS certificates by putting +a PEM-format certificate chain and private key into files called +"initial-tls-cert.pem" and "initial-tls-key.pem" in the same directory as the +"config-rss.toml" file that you're using. This must happen before packaging up +the sled agent (which is the next step below). + +If you don't specify initial certificates in this way, you can always load +certificates later via the API. This assumes you have a way to reach the API +that doesn't require a valid TLS certificate. Today, Nexus always starts an +HTTP server that you can use for this. This may be removed in the future. + === Building To actually kick off the build and package everything, you can run: diff --git a/sled-agent/src/rack_setup/config.rs b/sled-agent/src/rack_setup/config.rs index 9f9dab11b48..4816bb5f2cd 100644 --- a/sled-agent/src/rack_setup/config.rs +++ b/sled-agent/src/rack_setup/config.rs @@ -192,10 +192,11 @@ mod test { }); // Write the configuration file. - let cfg_bytes = std::fs::read(&path).unwrap(); let cfg_path = tempdir.path().join("config-rss.toml"); - std::fs::write(&cfg_path, &cfg_bytes) - .with_context(|| format!("failed to write to {:?}", &tempdir)) + let _ = std::fs::copy(&path, &cfg_path) + .with_context(|| { + format!("failed to copy file {:?} to {:?}", &path, &cfg_path) + }) .unwrap(); // Write the certificate. diff --git a/smf/sled-agent/gimlet-standalone/config-rss.toml b/smf/sled-agent/gimlet-standalone/config-rss.toml index f26f67da602..62ca9683283 100644 --- a/smf/sled-agent/gimlet-standalone/config-rss.toml +++ b/smf/sled-agent/gimlet-standalone/config-rss.toml @@ -1,4 +1,8 @@ # RSS (Rack Setup Service) "stand-in" configuration. +# +# This file conforms to the schema for "RackInitializeRequest" in the Bootstrap +# Agent API. See the `RackInitializeRequest` type in bootstrap-agent or its +# OpenAPI spec (in openapi/bootstrap-agent.json in the root of this workspace). # The /56 subnet for the rack. # Also implies the /48 AZ subnet. @@ -21,6 +25,12 @@ dns_servers = [ "1.1.1.1", "9.9.9.9" ] external_dns_zone_name = "oxide.test" # Initial TLS certificates for the external API +# +# For the structure of these certificates, see `Certificate` in the Nexus +# internal API. In practice, it can be unwieldy to put them here. You can also +# specify a certificate by including the certificate chain and private key in +# PEM-format files called "initial-tls-cert.pem" and "initial-tls-key.pem", +# respectively, in the same place as this configuration file. external_certificates = [] # The IP ranges configured as part of the services IP Pool. diff --git a/smf/sled-agent/non-gimlet/config-rss.toml b/smf/sled-agent/non-gimlet/config-rss.toml index 5e8c752f6f3..091293d677b 100644 --- a/smf/sled-agent/non-gimlet/config-rss.toml +++ b/smf/sled-agent/non-gimlet/config-rss.toml @@ -1,4 +1,8 @@ # RSS (Rack Setup Service) "stand-in" configuration. +# +# This file conforms to the schema for "RackInitializeRequest" in the Bootstrap +# Agent API. See the `RackInitializeRequest` type in bootstrap-agent or its +# OpenAPI spec (in openapi/bootstrap-agent.json in the root of this workspace). # The /56 subnet for the rack. # Also implies the /48 AZ subnet. @@ -21,6 +25,12 @@ dns_servers = [ "1.1.1.1", "9.9.9.9" ] external_dns_zone_name = "oxide.test" # Initial TLS certificates for the external API +# +# For the structure of these certificates, see `Certificate` in the Nexus +# internal API. In practice, it can be unwieldy to put them here. You can also +# specify a certificate by including the certificate chain and private key in +# PEM-format files called "initial-tls-cert.pem" and "initial-tls-key.pem", +# respectively, in the same place as this configuration file. external_certificates = [] # The IP ranges configured as part of the services IP Pool.