From 8b16dd66b2deee187a32514b7c48f7624568060d Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Fri, 29 Apr 2022 16:20:01 -0400 Subject: [PATCH 01/12] Improve error messages in omicron-package --- package/src/bin/omicron-package.rs | 34 ++++++++++++++++++++++++------ package/src/lib.rs | 21 ++++++++++++------ 2 files changed, 42 insertions(+), 13 deletions(-) diff --git a/package/src/bin/omicron-package.rs b/package/src/bin/omicron-package.rs index 5858a22287c..d3daf67d67d 100644 --- a/package/src/bin/omicron-package.rs +++ b/package/src/bin/omicron-package.rs @@ -124,12 +124,19 @@ async fn do_build(config: &Config) -> Result<()> { // Calculates the SHA256 digest for a file. async fn get_sha256_digest(path: &PathBuf) -> Result { - let mut reader = BufReader::new(tokio::fs::File::open(&path).await?); + let mut reader = BufReader::new( + tokio::fs::File::open(&path) + .await + .with_context(|| format!("could not open {path:?}"))?, + ); let mut context = DigestContext::new(&SHA256); let mut buffer = [0; 1024]; loop { - let count = reader.read(&mut buffer).await?; + let count = reader + .read(&mut buffer) + .await + .with_context(|| format!("failed to read {path:?}"))?; if count == 0 { break; } else { @@ -170,21 +177,31 @@ async fn get_external_package( commit, path.as_path().file_name().unwrap().to_string_lossy(), ); - let response = reqwest::Client::new().get(url).send().await?; + let response = reqwest::Client::new() + .get(&url) + .send() + .await + .with_context(|| format!("failed to get {url}"))?; progress.set_length( response .content_length() .ok_or_else(|| anyhow!("Missing Content Length"))?, ); - let mut file = tokio::fs::File::create(path).await?; + let mut file = tokio::fs::File::create(&path) + .await + .with_context(|| format!("failed to create {path:?}"))?; let mut stream = response.bytes_stream(); let mut context = DigestContext::new(&SHA256); while let Some(chunk) = stream.next().await { - let chunk = chunk?; + let chunk = chunk.with_context(|| { + format!("failed reading response from {url}") + })?; // Update the running SHA digest context.update(&chunk); // Update the downloaded file - file.write_all(&chunk).await?; + file.write_all(&chunk) + .await + .with_context(|| format!("failed writing {path:?}"))?; // Record progress in the UI progress.increment(chunk.len().try_into().unwrap()); } @@ -255,7 +272,10 @@ async fn do_package(config: &Config, output_directory: &Path) -> Result<()> { progress.set_message("bundle package".to_string()); package .create_with_progress(&progress, &output_directory) - .await?; + .await + .with_context(|| { + format!("failed to create {package_name} in {output_directory:?}") + })?; progress.finish(); Ok(()) }, diff --git a/package/src/lib.rs b/package/src/lib.rs index 8bc557b042f..0740ca6d93e 100644 --- a/package/src/lib.rs +++ b/package/src/lib.rs @@ -12,17 +12,26 @@ use thiserror::Error; /// Errors which may be returned when parsing the server configuration. #[derive(Error, Debug)] pub enum ParseError { - #[error("Cannot parse toml: {0}")] - Toml(#[from] toml::de::Error), - #[error("IO error: {0}")] - Io(#[from] std::io::Error), + #[error("Error deserializing toml from {path}: {err}")] + Toml { path: PathBuf, err: toml::de::Error }, + #[error("IO error: {message}: {err}")] + Io { message: String, err: std::io::Error }, } pub fn parse, C: DeserializeOwned>( path: P, ) -> Result { - let contents = std::fs::read_to_string(path.as_ref())?; - let cfg = toml::from_str::(&contents)?; + let path = path.as_ref(); + let contents = std::fs::read_to_string(path).map_err(|err| { + ParseError::Io { + message: format!("failed reading {path:?}"), + err, + } + })?; + let cfg = toml::from_str::(&contents).map_err(|err| ParseError::Toml { + path: path.to_path_buf(), + err, + })?; Ok(cfg) } From 4a3b752507d4dc751fc5d573561fee1d2d07388e Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Fri, 29 Apr 2022 16:21:49 -0400 Subject: [PATCH 02/12] Update `thing-flinger sync`'s file list 1. Include specific files/directories under `out/` (prebuilt dependencies) 2. Exclude `config-rss.toml` (needs to be moved to `thing-flinger overlay`) --- deploy/src/bin/thing-flinger.rs | 76 ++++++++++++++++++++++++--------- 1 file changed, 57 insertions(+), 19 deletions(-) diff --git a/deploy/src/bin/thing-flinger.rs b/deploy/src/bin/thing-flinger.rs index c83c6f2a394..3e19b99869d 100644 --- a/deploy/src/bin/thing-flinger.rs +++ b/deploy/src/bin/thing-flinger.rs @@ -153,6 +153,19 @@ fn do_exec( Ok(()) } +// start an `rsync` command with args common to all our uses +fn rsync_common() -> Command { + let mut cmd = Command::new("rsync"); + cmd.arg("-az") + .arg("-e") + .arg("ssh") + .arg("--delete") + .arg("--progress") + .arg("--out-format") + .arg("File changed: %o %t %f"); + cmd +} + fn do_sync(config: &Config) -> Result<()> { let builder = config.servers.get(&config.builder.server).ok_or_else(|| { @@ -161,8 +174,17 @@ fn do_sync(config: &Config) -> Result<()> { // For rsync to copy from the source appropriately we must guarantee a // trailing slash. - let src = - format!("{}/", config.omicron_path.canonicalize()?.to_string_lossy()); + let src = format!( + "{}/", + config + .omicron_path + .canonicalize() + .with_context(|| format!( + "could not canonicalize {}", + config.omicron_path.display() + ))? + .to_string_lossy() + ); let dst = format!( "{}@{}:{}", builder.username, @@ -171,29 +193,45 @@ fn do_sync(config: &Config) -> Result<()> { ); println!("Synchronizing source files to: {}", dst); + let mut cmd = rsync_common(); - let mut cmd = Command::new("rsync"); - cmd.arg("-az") - .arg("-e") - .arg("ssh") - .arg("--delete") - .arg("--progress") - .arg("--exclude") + // exclude build and development environment artifacts + cmd.arg("--exclude") .arg("target/") .arg("--exclude") - .arg("out/") - .arg("--exclude") - .arg("/cockroachdb/") - .arg("--exclude") - .arg("/clickhouse/") + .arg("*.vdev") .arg("--exclude") .arg("*.swp") .arg("--exclude") - .arg(".git/") - .arg("--out-format") - .arg("File changed: %o %t %f") - .arg(&src) - .arg(&dst); + .arg(".git/"); + + // exclude `config-rss.toml`, which needs to be sent to only one target + // system. we handle this in `do_overlay` below. + cmd.arg("--exclude").arg("**/config-rss.toml"); + + // Exclude `out/`, except for the prebuilt dependencies we keep there. + // The include/include/exclude dance is specific to how rsync applies + // patterns: it checks each file or directory against all supplied patterns, + // and stops on the first match. + // + // The steps below ensure: + // + // 1. We include exactly `out/`, allowing rsync to recurse into it. + // 2. We include each of the specific children of `out/` we want to sync. + // 3. We exclude `out/*`, skipping any other children of `out/`. + cmd.arg("--include") + .arg("out/") + .arg("--include") + .arg("out/clickhouse") + .arg("--include") + .arg("out/cockroachdb") + .arg("--include") + .arg("out/console-assets") + .arg("--exclude") + .arg("out/*"); + + // finish with src/dst + cmd.arg(&src).arg(&dst); let status = cmd.status().context(format!("Failed to run command: ({:?})", cmd))?; if !status.success() { From 24fb8ea04199ef00655fcf420bef6e96a31c60a8 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Fri, 29 Apr 2022 16:27:32 -0400 Subject: [PATCH 03/12] Teach `thing-flinger sync` about `config-rss.toml` --- deploy/src/bin/deployment-example.toml | 4 +- deploy/src/bin/thing-flinger.rs | 132 ++++++++++++++++--------- 2 files changed, 88 insertions(+), 48 deletions(-) diff --git a/deploy/src/bin/deployment-example.toml b/deploy/src/bin/deployment-example.toml index 7121911dd3a..95311ccb054 100644 --- a/deploy/src/bin/deployment-example.toml +++ b/deploy/src/bin/deployment-example.toml @@ -15,7 +15,9 @@ server = "foo" omicron_path = "/remote/path/to/omicron" [deployment] -servers = ["foo", "bar"] +# which server is responsible for running the rack setup service; must +# refer to one of the `servers` in the servers table +rss_server = "foo" rack_secret_threshold = 2 # Location where files to install will be placed before running # `omicron-package install` diff --git a/deploy/src/bin/thing-flinger.rs b/deploy/src/bin/thing-flinger.rs index 3e19b99869d..8625dc34277 100644 --- a/deploy/src/bin/thing-flinger.rs +++ b/deploy/src/bin/thing-flinger.rs @@ -32,7 +32,7 @@ struct Server { #[derive(Deserialize, Debug)] struct Deployment { - servers: BTreeSet, + rss_server: String, rack_secret_threshold: usize, staging_dir: PathBuf, } @@ -300,10 +300,8 @@ fn do_uninstall( ) -> Result<()> { let mut deployment_src = PathBuf::from(&config.deployment.staging_dir); deployment_src.push(&artifact_dir); - for server_name in &config.deployment.servers { - let builder = &config.servers[&config.builder.server]; - let server = &config.servers[server_name]; - + let builder = &config.servers[&config.builder.server]; + for server in config.servers.values() { copy_omicron_package_binary_to_staging(config, builder, server)?; // Run `omicron-package uninstall` on the deployment server @@ -331,7 +329,7 @@ fn do_install(config: &Config, artifact_dir: &Path, install_dir: &Path) { Vec::<(String, ScopedJoinHandle<'_, Result<()>>)>::new(); // Spawn a thread for each server install - for server_name in &config.deployment.servers { + for server_name in config.servers.keys() { handles.push(( server_name.to_owned(), s.spawn(move |_| -> Result<()> { @@ -372,30 +370,61 @@ fn do_install(config: &Config, artifact_dir: &Path, install_dir: &Path) { } fn do_overlay(config: &Config) -> Result<()> { + let builder = &config.servers[&config.builder.server]; let mut root_path = PathBuf::from(&config.builder.omicron_path); // TODO: This needs to match the artifact_dir in `package` root_path.push("out/overlay"); - let server_dirs = dir_per_deploy_server(config, &root_path); - let builder = &config.servers[&config.builder.server]; - overlay_sled_agent(&builder, config, &server_dirs) + + // Build a list of directories for each server to be deployed and tag which + // one is the server to run RSS; e.g., for servers ["foo", "bar", "baz"] + // with root_path "/my/path", we produce + // [ + // "/my/path/foo/sled-agent/pkg", + // "/my/path/bar/sled-agent/pkg", + // "/my/path/baz/sled-agent/pkg", + // ] + // As we're doing so, record which directory is the one for the server that + // will run RSS. + let mut rss_server_dir = None; + let sled_agent_dirs = config + .servers + .keys() + .map(|server_name| { + let mut dir = root_path.clone(); + dir.push(server_name); + dir.push("sled-agent/pkg"); + if *server_name == config.deployment.rss_server { + rss_server_dir = Some(dir.clone()); + } + dir + }) + .collect::>(); + + // we know exactly one of the servers matches `rss_server` from our config + // validation, so we can unwrap here + let rss_server_dir = rss_server_dir.unwrap(); + + overlay_sled_agent(builder, config, &sled_agent_dirs)?; + overlay_rss_config(builder, config, &rss_server_dir)?; + + Ok(()) } fn overlay_sled_agent( - server: &Server, + builder: &Server, config: &Config, - server_dirs: &[PathBuf], + sled_agent_dirs: &[PathBuf], ) -> Result<()> { - let sled_agent_dirs: Vec = server_dirs + // Send SSH command to create directories on builder and generate share + // secrets. + + // TODO do we need any escaping here? this will definitely break if any dir + // names have spaces + let dirs = sled_agent_dirs .iter() - .map(|dir| { - let mut dir = PathBuf::from(dir); - dir.push("sled-agent/pkg"); - dir - }) - .collect(); + .map(|dir| format!("{} ", dir.display())) + .collect::(); - // Create directories on builder - let dirs = dir_string(&sled_agent_dirs); let cmd = format!( "sh -c 'for dir in {}; do mkdir -p $dir; done' && \ cd {} && \ @@ -406,7 +435,38 @@ fn overlay_sled_agent( config.deployment.rack_secret_threshold, dirs ); - ssh_exec(server, &cmd, false) + ssh_exec(builder, &cmd, false) +} + +fn overlay_rss_config( + builder: &Server, + config: &Config, + rss_server_dir: &Path, +) -> Result<()> { + // Sync `config-rss.toml` to the directory for the RSS server on the + // builder. + let src = config.omicron_path.join("smf/sled-agent/config-rss.toml"); + let dst = format!( + "{}@{}:{}", + builder.username, + builder.addr, + rss_server_dir.display() + ); + + let mut cmd = rsync_common(); + cmd.arg(&src).arg(&dst); + + let status = + cmd.status().context(format!("Failed to run command: ({:?})", cmd))?; + if !status.success() { + return Err(FlingError::FailedSync { + src: src.to_string_lossy().to_string(), + dst, + } + .into()); + } + + Ok(()) } fn single_server_install( @@ -573,29 +633,6 @@ fn restart_services(destination: &Server) -> Result<()> { ssh_exec(destination, "svcadm restart sled-agent", false) } -fn dir_string(dirs: &[PathBuf]) -> String { - dirs.iter().map(|dir| dir.to_string_lossy().to_string() + " ").collect() -} - -// For each server to be deployed, append the server name to `root`. -// -// Example (for servers "foo", "bar", "baz"): -// -// dir_per_deploy_server(&config, "/my/path") -> -// vec!["/my/path/foo", "/my/path/bar", "/my/path/baz"] -fn dir_per_deploy_server(config: &Config, root: &Path) -> Vec { - config - .deployment - .servers - .iter() - .map(|server_dir| { - let mut dir = PathBuf::from(root); - dir.push(server_dir); - dir - }) - .collect() -} - fn ssh_exec( server: &Server, remote_cmd: &str, @@ -661,10 +698,11 @@ fn validate(config: &Config) -> Result<(), FlingError> { "deployment.staging_dir", )?; - validate_servers(&config.deployment.servers, &config.servers)?; - validate_servers( - &BTreeSet::from([config.builder.server.clone()]), + &BTreeSet::from([ + config.builder.server.clone(), + config.deployment.rss_server.clone(), + ]), &config.servers, ) } From 0c3420b7121c61d425390f667ce0f4e2bcbaf56c Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Fri, 29 Apr 2022 16:30:12 -0400 Subject: [PATCH 04/12] Update thing-flinger docs --- deploy/README.adoc | 101 ++++++++++++++++++++++++++++++++++++++------- 1 file changed, 87 insertions(+), 14 deletions(-) diff --git a/deploy/README.adoc b/deploy/README.adoc index eaf17ca782a..36c1ede4264 100644 --- a/deploy/README.adoc +++ b/deploy/README.adoc @@ -54,7 +54,11 @@ thing-flinger, pointing out room for improvement. `thing-flinger` defines three types of nodes: - * Client - Where a user typically edits their code and runs thing-flinger. This can run any OS. + * Client - Where a user typically edits their code and runs thing-flinger. In + theory this can run any OS, but currently it needs to be a Helios system. + omicron downloads prebuilt dependencies (clickhouse, cockroachDB) for the + current system, and we will sync those binaries from the client to the + builder (and ultimately to the deployment servers). * Builder - A Helios box where Omicron is built and packaged * Deployed Server - Helios machines where Omicron will be installed and run @@ -83,42 +87,55 @@ all the dependencies for Omicron installed. Following the *prerequisites* in the https://github.com/oxidecomputer/omicron/#build-and-run[Build and run] section of the main Omicron README is probably a good idea. -=== Command Based Workflow +==== Update `config-rss.toml` -==== Build thing-flinger on client -`thing-flinger` is part of the `omicron-package` crate. +Currently rack setup is driven by a configuration file that lives at +`smf/sled-agent/config-rss.toml` in the root of this repository. The committed +configuration of that file contains a single `[[requests]]` entry (with many +services inside it), which means it will start services on only one sled. To +start services (e.g., nexus) on multiple sleds, add additional entries to that +configuration file before proceeding. -`cargo build -p omicron-package` +=== Command Based Workflow ==== sync Copy your source code to the builder. Note that this copies over your `.git` subdirectory on purpose so that a branch can be configured for building with the `git_treeish` field in the toml `builder` table. -`./target/debug/thing-flinger -c sync` +`cargo run --bin thing-flinger -- -c sync` -==== build-minimal -Build necessary parts of omicron on the builder, required for future use by thing-flinger. +==== check (optional) +Run `cargo check` on the builder against the copy of `omicron` that was sync'd +to it in the previous step. -`./target/debug/thing-flinger -c build-minimal` +`cargo run --bin thing-flinger -- -c build check` -==== package +==== package Build and package omicron using `omicron-package` on the builder. -`./target/debug/thing-flinger -c package` +`cargo run --bin thing-flinger -- -c build package` ==== overlay Create files that are unique to each deployment server. -`./target/debug/thing-flinger -c overlay` +`cargo run --bin thing-flinger -- -c overlay` -==== install +==== install Install omicron to all machines, in parallel. This consists of copying the packaged omicron tarballs along with overlay files, and omicron-package and its manifest to a `staging` directory on each deployment server, and then running omicron-package, installing overlay files, and restarting services. -`./target/debug/thing-flinger -c install` +`cargo run --bin thing-flinger -- -c deploy install` + +==== uninstall +Uninstall omicron from all machines. + +`cargo run --bin thing-flinger -- -c deploy uninstall` + +Note: This does not fully undo everything done by the `install` step above; +noteably overlay files are not removed. === Current Limitations @@ -140,3 +157,59 @@ effort to use securely. This particular implementation wraps the openssh ssh cli `std::process::Command`, rather than using the `ssh2` crate, because ssh2, as a wrapper around `libssh`, does not support agent-forwarding. +== Notes on Using VMs as Deployed Servers on a Linux Host + +TODO: This section should be fleshed out more and potentially lifted to its own +document; for now this is a collection of rough notes. + +It's possible to use a Linux libvirt host running multiple helios VMs as the +builder/deployment server targets, but it requires some additional setup beyond +what [`helios-engvm`](https://github.com/oxidecomputer/helios-engvm). + +To enable communication between the VMs over their IPv6 bootstrap networks: + +1. Enable IPv6 and DHCP on the virtual network libvirt uses for the VMs; e.g., + +```xml + + + + + +``` + +After booting the VMs with this enabled, they should be able to ping each other +over their acquired IPv6 addresses, but connecting to each other over the +`bootstrap6` interface that sled-agent creates will fail. + +2. Explicitly add routes in the Linux host for the `bootstrap6` addresses, +specifying the virtual interface libvirt created that is used by the VMs. + +``` +bash% sudo ip -6 route add fdb0:5254:13:7331::1/64 dev virbr1 +bash% sudo ip -6 route add fdb0:5254:f0:acfd::1/64 dev virbr1 +``` + +3. Once the sled-agents advance sufficiently to set up `sled6` interfaces, +routes need to be added for them both in the Linux host and in the Helios VMs. +Assuming two sleds with these interfaces: + +``` +# VM 1 +vioif0/sled6 static ok fd00:1122:3344:1::1/64 +# VM 2 +vioif0/sled6 static ok fd00:1122:3344:2::1/64 +``` + +The Linux host needs to be told to route that subnet to the appropriate virtual +interface: + +``` +bash% ip -6 route add fd00:1122:3344::1/48 dev virbr1 +``` + +and each Helios VM needs to be told to route that subnet to the host gateway: + +``` +vm% pfexec route add -inet6 fd00:1122:3344::/48 $IPV6_HOST_GATEWAY_ADDR +``` From 78cacab6f0469762bf38f4451de8d7d2d9d5e344 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Fri, 29 Apr 2022 16:31:35 -0400 Subject: [PATCH 05/12] cargo fmt --- package/src/lib.rs | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/package/src/lib.rs b/package/src/lib.rs index 0740ca6d93e..72be8c4d4bf 100644 --- a/package/src/lib.rs +++ b/package/src/lib.rs @@ -23,15 +23,10 @@ pub fn parse, C: DeserializeOwned>( ) -> Result { let path = path.as_ref(); let contents = std::fs::read_to_string(path).map_err(|err| { - ParseError::Io { - message: format!("failed reading {path:?}"), - err, - } - })?; - let cfg = toml::from_str::(&contents).map_err(|err| ParseError::Toml { - path: path.to_path_buf(), - err, + ParseError::Io { message: format!("failed reading {path:?}"), err } })?; + let cfg = toml::from_str::(&contents) + .map_err(|err| ParseError::Toml { path: path.to_path_buf(), err })?; Ok(cfg) } From 3ae02541d8e4791ecc8a90e2d5bd629d5dc51bf0 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Mon, 2 May 2022 10:54:42 -0400 Subject: [PATCH 06/12] Exclude `out/` from thing-flinger sync --- deploy/src/bin/thing-flinger.rs | 31 ++++++++----------------------- 1 file changed, 8 insertions(+), 23 deletions(-) diff --git a/deploy/src/bin/thing-flinger.rs b/deploy/src/bin/thing-flinger.rs index 8625dc34277..14a5aab1a60 100644 --- a/deploy/src/bin/thing-flinger.rs +++ b/deploy/src/bin/thing-flinger.rs @@ -203,33 +203,14 @@ fn do_sync(config: &Config) -> Result<()> { .arg("--exclude") .arg("*.swp") .arg("--exclude") - .arg(".git/"); + .arg(".git/") + .arg("--exclude") + .arg("out/"); // exclude `config-rss.toml`, which needs to be sent to only one target // system. we handle this in `do_overlay` below. cmd.arg("--exclude").arg("**/config-rss.toml"); - // Exclude `out/`, except for the prebuilt dependencies we keep there. - // The include/include/exclude dance is specific to how rsync applies - // patterns: it checks each file or directory against all supplied patterns, - // and stops on the first match. - // - // The steps below ensure: - // - // 1. We include exactly `out/`, allowing rsync to recurse into it. - // 2. We include each of the specific children of `out/` we want to sync. - // 3. We exclude `out/*`, skipping any other children of `out/`. - cmd.arg("--include") - .arg("out/") - .arg("--include") - .arg("out/clickhouse") - .arg("--include") - .arg("out/cockroachdb") - .arg("--include") - .arg("out/console-assets") - .arg("--exclude") - .arg("out/*"); - // finish with src/dst cmd.arg(&src).arg(&dst); let status = @@ -525,7 +506,11 @@ fn copy_package_artifacts_to_staging( ) -> Result<()> { let cmd = format!( "rsync -avz -e 'ssh -o StrictHostKeyChecking=no' \ - --exclude overlay/ {} {}@{}:{}", + --include 'out/' \ + --include 'out/*.tar' \ + --include 'out/*.tar.gz' \ + --exclude '*' \ + {} {}@{}:{}", pkg_dir, destination.username, destination.addr, From aed6aeecc739569e81e95983ee881cbaf40396a3 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Mon, 2 May 2022 13:01:34 -0400 Subject: [PATCH 07/12] Add `-y` option to `install_prerequisites.sh` to skip confirm prompts --- tools/install_prerequisites.sh | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/tools/install_prerequisites.sh b/tools/install_prerequisites.sh index 7f3f4aed19d..4ea77c059de 100755 --- a/tools/install_prerequisites.sh +++ b/tools/install_prerequisites.sh @@ -19,13 +19,28 @@ function on_exit trap on_exit ERR -# Offers a confirmation prompt. +# Parse command line options: +# +# -y Assume "yes" intead of showing confirmation prompts. +ASSUME_YES="false" +while getopts y flag +do + case "${flag}" in + y) ASSUME_YES="true" ; + esac +done + +# Offers a confirmation prompt, unless we were passed `-y`. # # Args: # $1: Text to be displayed function confirm { - read -r -p "$1 (y/n): " response + if [[ "${ASSUME_YES}" == "true" ]]; then + response=y + else + read -r -p "$1 (y/n): " response + fi case $response in [yY]) true From 3056c12cbcd07d14c89f60e9b7c55a7c1b4f6d33 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Mon, 2 May 2022 15:18:08 -0400 Subject: [PATCH 08/12] Add `-p` option to `install_prerequisites.sh` to skip PATH check --- tools/install_prerequisites.sh | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/tools/install_prerequisites.sh b/tools/install_prerequisites.sh index 4ea77c059de..c348c6d971a 100755 --- a/tools/install_prerequisites.sh +++ b/tools/install_prerequisites.sh @@ -23,10 +23,12 @@ trap on_exit ERR # # -y Assume "yes" intead of showing confirmation prompts. ASSUME_YES="false" -while getopts y flag +SKIP_PATH_CHECK="false" +while getopts yp flag do case "${flag}" in - y) ASSUME_YES="true" ; + y) ASSUME_YES="true" ;; + p) SKIP_PATH_CHECK="true" ;; esac done @@ -166,7 +168,12 @@ function show_hint esac } -# Check all paths before returning an error. +# Check all paths before returning an error, unless we were told not too. +if [[ "$SKIP_PATH_CHECK" == "true" ]]; then + echo "All prerequisites installed successfully" + exit 0 +fi + ANY_PATH_ERROR="false" for command in "${expected_in_path[@]}"; do rc=0 From 95c9133592873fdbbf3612606715fb2a47c7b537 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Mon, 2 May 2022 15:18:49 -0400 Subject: [PATCH 09/12] Add server names to thing-flinger install logging --- deploy/src/bin/thing-flinger.rs | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/deploy/src/bin/thing-flinger.rs b/deploy/src/bin/thing-flinger.rs index 14a5aab1a60..ed81d2003a4 100644 --- a/deploy/src/bin/thing-flinger.rs +++ b/deploy/src/bin/thing-flinger.rs @@ -460,16 +460,25 @@ fn single_server_install( ) -> Result<()> { let server = &config.servers[server_name]; - println!("COPYING packages from builder -> deploy server"); + println!( + "COPYING packages from builder ({}) -> deploy server ({})", + builder.addr, server_name + ); copy_package_artifacts_to_staging(config, pkg_dir, builder, server)?; - println!("COPYING deploy tool from builder -> deploy server"); + println!( + "COPYING deploy tool from builder ({}) -> deploy server ({})", + builder.addr, server_name + ); copy_omicron_package_binary_to_staging(config, builder, server)?; - println!("COPYING manifest from builder -> deploy server"); + println!( + "COPYING manifest from builder ({}) -> deploy server ({})", + builder.addr, server_name + ); copy_package_manifest_to_staging(config, builder, server)?; - println!("INSTALLING packages on deploy server"); + println!("INSTALLING packages on deploy server ({})", server_name); run_omicron_package_install_from_staging( config, server, @@ -477,7 +486,10 @@ fn single_server_install( &install_dir, )?; - println!("COPYING overlay files from builder -> deploy server"); + println!( + "COPYING overlay files from builder ({}) -> deploy server ({})", + builder.addr, server_name + ); copy_overlay_files_to_staging( config, pkg_dir, @@ -486,10 +498,10 @@ fn single_server_install( server_name, )?; - println!("INSTALLING overlay files into the install directory of the deploy server"); + println!("INSTALLING overlay files into the install directory of the deploy server ({})", server_name); install_overlay_files_from_staging(config, server, &install_dir)?; - println!("RESTARTING services on the deploy server"); + println!("RESTARTING services on the deploy server ({})", server_name); restart_services(server) } From 95312b64ad034d83d143517d05f6725ba1064d66 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Mon, 2 May 2022 15:19:58 -0400 Subject: [PATCH 10/12] Add `thing-flinger install-prereqs` subcommand --- deploy/README.adoc | 14 +++--- deploy/src/bin/thing-flinger.rs | 75 +++++++++++++++++++++++++++++++++ 2 files changed, 84 insertions(+), 5 deletions(-) diff --git a/deploy/README.adoc b/deploy/README.adoc index 36c1ede4264..eb3e77c7fea 100644 --- a/deploy/README.adoc +++ b/deploy/README.adoc @@ -54,11 +54,7 @@ thing-flinger, pointing out room for improvement. `thing-flinger` defines three types of nodes: - * Client - Where a user typically edits their code and runs thing-flinger. In - theory this can run any OS, but currently it needs to be a Helios system. - omicron downloads prebuilt dependencies (clickhouse, cockroachDB) for the - current system, and we will sync those binaries from the client to the - builder (and ultimately to the deployment servers). + * Client - Where a user typically edits their code and runs thing-flinger. This can run any OS. * Builder - A Helios box where Omicron is built and packaged * Deployed Server - Helios machines where Omicron will be installed and run @@ -105,6 +101,14 @@ table. `cargo run --bin thing-flinger -- -c sync` +==== Install Prerequisites +Install necessary build and runtime dependencies (including downloading prebuilt +binaries like Clickhouse and CockroachDB) on the builder and all deployment +targets. This step only needs to be performed once, absent any changes to the +dependencies, but is idempotent so may be run multiple times. + +`cargo run --bin thing-flinger -- -c install-prereqs` + ==== check (optional) Run `cargo check` on the builder against the copy of `omicron` that was sync'd to it in the previous step. diff --git a/deploy/src/bin/thing-flinger.rs b/deploy/src/bin/thing-flinger.rs index ed81d2003a4..e311361ee4e 100644 --- a/deploy/src/bin/thing-flinger.rs +++ b/deploy/src/bin/thing-flinger.rs @@ -78,6 +78,10 @@ enum SubCommand { servers: Option>, }, + /// Install necessary prerequisites on the "builder" server and all "deploy" + /// servers. + InstallPrereqs, + /// Sync our local source to the build host Sync, @@ -222,6 +226,76 @@ fn do_sync(config: &Config) -> Result<()> { Ok(()) } +fn do_install_prereqs(config: &Config) -> Result<()> { + // we need to rsync `./tools/*` to each of the deployment targets (the + // "builder" already has it via `do_sync()`), and then run `pfxec + // tools/install_prerequisites.sh` on each system. + let src = format!( + // the `./` here is load-bearing; it interacts with `--relative` to tell + // rsync to create `tools` but none of its parents + "{}/./tools/", + config + .omicron_path + .canonicalize() + .with_context(|| format!( + "could not canonicalize {}", + config.omicron_path.display() + ))? + .to_string_lossy() + ); + let partial_cmd = || { + let mut cmd = rsync_common(); + cmd.arg("--relative"); + cmd.arg(&src); + cmd + }; + + for server in config.servers.values() { + let dst = format!( + "{}@{}:{}", + server.username, + server.addr, + config.deployment.staging_dir.to_str().unwrap() + ); + let mut cmd = partial_cmd(); + cmd.arg(&dst); + let status = cmd + .status() + .context(format!("Failed to run command: ({:?})", cmd))?; + if !status.success() { + return Err(FlingError::FailedSync { src, dst }.into()); + } + } + + // run install_prereqs on each server + let builder = &config.servers[&config.builder.server]; + let build_server = (builder, &config.builder.omicron_path); + let all_servers = std::iter::once(build_server).chain( + config.servers.iter().filter_map(|(name, server)| { + // skip running prereq installing on a deployment target if it is + // also the builder, because we're already running it on the builder + if *name == config.builder.server { + None + } else { + Some((server, &config.deployment.staging_dir)) + } + }), + ); + + for (server, root_path) in all_servers { + // -y: assume yes instead of prompting + // -p: skip check that deps end up in $PATH + let cmd = format!( + "cd {} && mkdir -p out && pfexec ./tools/install_prerequisites.sh -y -p", + root_path.display() + ); + println!("install prerequisites on {}", server.addr); + ssh_exec(server, &cmd, false)?; + } + + Ok(()) +} + // Build omicron-package and omicron-deploy on the builder // // We need to build omicron-deploy for overlay file generation @@ -715,6 +789,7 @@ fn main() -> Result<()> { do_exec(&config, cmd, servers)?; } SubCommand::Sync => do_sync(&config)?, + SubCommand::InstallPrereqs => do_install_prereqs(&config)?, SubCommand::Builder(BuildCommand::Package { artifact_dir }) => { do_package(&config, artifact_dir)?; } From 05948ce8a3cabaf511b1355e243d595822ed4bad Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Mon, 2 May 2022 15:20:57 -0400 Subject: [PATCH 11/12] Note virtual hardware requirement in README section about VMs --- deploy/README.adoc | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/deploy/README.adoc b/deploy/README.adoc index eb3e77c7fea..9acdadef3a1 100644 --- a/deploy/README.adoc +++ b/deploy/README.adoc @@ -166,10 +166,18 @@ effort to use securely. This particular implementation wraps the openssh ssh cli TODO: This section should be fleshed out more and potentially lifted to its own document; for now this is a collection of rough notes. +--- + It's possible to use a Linux libvirt host running multiple helios VMs as the builder/deployment server targets, but it requires some additional setup beyond what [`helios-engvm`](https://github.com/oxidecomputer/helios-engvm). +`thing-flinger` does not have any support for running the +`tools/create_virtual_hardware.sh` script; this will need to be done by hand on +each VM. + +--- + To enable communication between the VMs over their IPv6 bootstrap networks: 1. Enable IPv6 and DHCP on the virtual network libvirt uses for the VMs; e.g., From 9c30098d87ce6a7a8a92a3a416bb0aa11dcaf740 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Tue, 3 May 2022 10:19:19 -0400 Subject: [PATCH 12/12] Update from PR comments --- deploy/README.adoc | 9 ++------- deploy/src/bin/thing-flinger.rs | 4 ++-- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/deploy/README.adoc b/deploy/README.adoc index 9acdadef3a1..b88fc4d03cc 100644 --- a/deploy/README.adoc +++ b/deploy/README.adoc @@ -95,9 +95,7 @@ configuration file before proceeding. === Command Based Workflow ==== sync -Copy your source code to the builder. Note that this copies over your `.git` subdirectory on purpose so -that a branch can be configured for building with the `git_treeish` field in the toml `builder` -table. +Copy your source code to the builder. `cargo run --bin thing-flinger -- -c sync` @@ -138,9 +136,6 @@ Uninstall omicron from all machines. `cargo run --bin thing-flinger -- -c deploy uninstall` -Note: This does not fully undo everything done by the `install` step above; -noteably overlay files are not removed. - === Current Limitations `thing-flinger` is an early prototype. It has served so far to demonstrate that unique files, @@ -170,7 +165,7 @@ document; for now this is a collection of rough notes. It's possible to use a Linux libvirt host running multiple helios VMs as the builder/deployment server targets, but it requires some additional setup beyond -what [`helios-engvm`](https://github.com/oxidecomputer/helios-engvm). +[`helios-engvm`](https://github.com/oxidecomputer/helios-engvm). `thing-flinger` does not have any support for running the `tools/create_virtual_hardware.sh` script; this will need to be done by hand on diff --git a/deploy/src/bin/thing-flinger.rs b/deploy/src/bin/thing-flinger.rs index e311361ee4e..68f4363bee0 100644 --- a/deploy/src/bin/thing-flinger.rs +++ b/deploy/src/bin/thing-flinger.rs @@ -470,8 +470,8 @@ fn overlay_sled_agent( config: &Config, sled_agent_dirs: &[PathBuf], ) -> Result<()> { - // Send SSH command to create directories on builder and generate share - // secrets. + // Send SSH command to create directories on builder and generate secret + // shares. // TODO do we need any escaping here? this will definitely break if any dir // names have spaces