From df3a17616fce25ca10f287b10276e83d45540789 Mon Sep 17 00:00:00 2001 From: Benjamin Naecker Date: Thu, 30 Jun 2022 15:42:32 +0000 Subject: [PATCH 1/2] Integrates workaround in OPTE for external instance IPs - Update OPTE rev - Add code to set secondary MAC for guest on the underlay NIC - Add code to use the workaround in `xde.conf` and reload the driver - Add a default route to the internet gateway for the guest - Update some documentation - Use pfexec internally in `install_opte.sh` rather than requiring it on the invocation - Download OPTE package archives to temporary files and then move into place, to avoid corrupting `pkg` on the system if that fails. --- Cargo.lock | 8 +-- docs/how-to-run.adoc | 10 ++- sled-agent/Cargo.toml | 4 +- sled-agent/src/illumos/dladm.rs | 30 +++++++++ sled-agent/src/opte/opte.rs | 104 ++++++++++++++++++++++++++++---- tools/install_opte.sh | 38 +++++++----- tools/install_prerequisites.sh | 2 +- 7 files changed, 160 insertions(+), 36 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0dafce1bc39..241fc464b8b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2265,7 +2265,7 @@ dependencies = [ [[package]] name = "illumos-ddi-dki" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=5764c07#5764c0732a26c6d0e096ce1b8e871b44d8dc79e7" +source = "git+https://github.com/oxidecomputer/opte?rev=7089b7858b713cc4d540d3870a0a2436595ac308#7089b7858b713cc4d540d3870a0a2436595ac308" dependencies = [ "illumos-sys-hdrs", ] @@ -2273,7 +2273,7 @@ dependencies = [ [[package]] name = "illumos-sys-hdrs" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=5764c07#5764c0732a26c6d0e096ce1b8e871b44d8dc79e7" +source = "git+https://github.com/oxidecomputer/opte?rev=7089b7858b713cc4d540d3870a0a2436595ac308#7089b7858b713cc4d540d3870a0a2436595ac308" [[package]] name = "impl-trait-for-tuples" @@ -3364,7 +3364,7 @@ dependencies = [ [[package]] name = "opte" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=5764c07#5764c0732a26c6d0e096ce1b8e871b44d8dc79e7" +source = "git+https://github.com/oxidecomputer/opte?rev=7089b7858b713cc4d540d3870a0a2436595ac308#7089b7858b713cc4d540d3870a0a2436595ac308" dependencies = [ "anymap", "cfg-if 0.1.10", @@ -3381,7 +3381,7 @@ dependencies = [ [[package]] name = "opte-ioctl" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=5764c07#5764c0732a26c6d0e096ce1b8e871b44d8dc79e7" +source = "git+https://github.com/oxidecomputer/opte?rev=7089b7858b713cc4d540d3870a0a2436595ac308#7089b7858b713cc4d540d3870a0a2436595ac308" dependencies = [ "libc", "libnet", diff --git a/docs/how-to-run.adoc b/docs/how-to-run.adoc index 58d3d3a07fd..2389e3c12ae 100644 --- a/docs/how-to-run.adoc +++ b/docs/how-to-run.adoc @@ -146,6 +146,12 @@ command line interface. Note that the `jq` command is required. In addition, th 2. Create an IP Pool, for providing external connectivity to the instance later. We need to create an IP Pool itself, and a range of IP addresses in that pool. +**Important:** The addresses used here are appropriate for the Oxide lab +environment, but not for an arbitrary environment. The actual IP range must +currently be something that matches the physical network that the host is +running in, at least if you'd like to be able to SSH into the guest. This is +most often a private address range, like `10.0.0.0/8` or `192.168.0.0/16`, but +the exact addresses that are available depends on the environment. oxide api /ip-pools --method POST --input - < Result<(), SetLinkpropError> { + let mut command = std::process::Command::new(PFEXEC); + let prop = format!("{}={}", prop_name, prop_value); + let cmd = + command.args(&[DLADM, "set-linkprop", "-t", "-p", &prop, vnic]); + execute(cmd).map_err(|err| SetLinkpropError { + link_name: vnic.to_string(), + prop_name: prop_name.to_string(), + prop_value: prop_value.to_string(), + err, + })?; + Ok(()) + } } diff --git a/sled-agent/src/opte/opte.rs b/sled-agent/src/opte/opte.rs index db91c9377a6..a553e4dc859 100644 --- a/sled-agent/src/opte/opte.rs +++ b/sled-agent/src/opte/opte.rs @@ -22,8 +22,11 @@ use opte::oxide_vpc::api::RouterTarget; use opte::oxide_vpc::api::SNatCfg; use opte_ioctl::OpteHdl; use slog::Logger; +use std::fs; use std::net::IpAddr; +use std::net::Ipv4Addr; use std::net::Ipv6Addr; +use std::path::Path; use std::sync::atomic::AtomicU64; use std::sync::atomic::Ordering; use std::sync::Arc; @@ -53,6 +56,12 @@ pub enum Error { driver which are compatible." )] IncompatibleKernel, + + #[error(transparent)] + BadAddrObj(#[from] crate::illumos::addrobj::ParseError), + + #[error(transparent)] + SetLinkpropError(#[from] crate::illumos::dladm::SetLinkpropError), } #[derive(Debug, Clone)] @@ -123,16 +132,7 @@ impl OptePortAllocator { ).into()); } }; - // OPTE's `SnatCfg` accepts a `std::ops::Range`, but the first/last - // port representation in the database and Nexus is inclusive of the - // last port. At the moment, that means we need to add 1 to the - // last port in the request data. However, if last port is - // `u16::MAX`, that would overflow and panic. We'll use saturating - // add for now, and live with missing the last port. - // - // TODO-correctness: Support the last port, see - // https://github.com/oxidecomputer/omicron/issues/1292 - let ports = ip.first_port..ip.last_port.saturating_add(1); + let ports = ip.first_port..=ip.last_port; Some(SNatCfg { public_ip, ports }) } None => None, @@ -209,6 +209,43 @@ impl OptePortAllocator { Some(Vnic::wrap_existing(vnic_name).unwrap()) }; + // TODO-remove + // + // This is part of the workaround to get external connectivity into + // instances, without setting up all of boundary services. Rather than + // encap/decap the guest traffic, OPTE just performs 1-1 NAT between the + // private IP address of the guest and the external address provided by + // the control plane. This call here allows the underlay nic, `net0` to + // advertise as having the guest's MAC address. + Dladm::set_linkprop( + underlay::find_chelsio_links()?[0].0.as_str(), + "secondary-macs", + &mac.to_string().to_lowercase(), + )?; + + // TODO-remove + // + // This is another part of the workaround, allowing reply traffic from + // the guest back out. Normally, OPTE would drop such traffic at the + // router layer, as it has no route for that external IP address. This + // allows such traffic through. + // + // Note that this exact rule will eventually be included, since it's one + // of the default routing rules in the VPC System Router. However, that + // will likely be communicated in a different way, or could be modified, + // and this specific call should be removed in favor of sending the + // routing rules the control plane provides. + // + // This rule sends all traffic that has no better match to the gateway. + let prefix = Ipv4PrefixLen::new(0).unwrap(); + let dest = Ipv4Cidr::new(Ipv4Addr::UNSPECIFIED.into(), prefix); + let target = RouterTarget::InternetGateway; + hdl.add_router_entry_ip4(&AddRouterEntryIpv4Req { + port_name: name.clone(), + dest, + target, + })?; + Ok(OptePort { name, ip, @@ -338,9 +375,21 @@ pub fn delete_all_xde_devices(log: &Logger) -> Result<(), Error> { /// The xde driver needs information about the physical devices out which it can /// send traffic from the guests. pub fn initialize_xde_driver(log: &Logger) -> Result<(), Error> { - if !std::path::Path::new("/kernel/drv/xde.conf").exists() { + const XDE_CONF: &str = "/kernel/drv/xde.conf"; + let xde_conf = Path::new(XDE_CONF); + if !xde_conf.exists() { return Err(Error::NoXdeConf); } + + // TODO-remove + // + // An additional part of the workaround to connect into instances. This is + // required to tell OPTE to actually act as a 1-1 NAT when an instance is + // provided with an external IP address, rather than do its normal job of + // encapsulating the traffic onto the underlay (such as for delivery to + // boundary services). + use_external_ip_workaround(&log, &xde_conf); + let underlay_nics = underlay::find_nics()?; info!(log, "using '{:?}' as data links for xde driver", underlay_nics); if underlay_nics.len() < 2 { @@ -383,3 +432,36 @@ pub fn initialize_xde_driver(log: &Logger) -> Result<(), Error> { Err(e) => Err(e.into()), } } + +fn use_external_ip_workaround(log: &Logger, xde_conf: &Path) { + const NEEDLE: &str = "ext_ip_hack = 0;"; + const NEW_NEEDLE: &str = "ext_ip_hack = 1;"; + + // NOTE: This only works in the real sled agent, which is run as root. The + // file is not world-readable. + let contents = fs::read_to_string(xde_conf) + .expect("Failed to read xde configuration file"); + let new = contents.replace(NEEDLE, NEW_NEEDLE); + if contents == new { + info!( + log, + "xde driver configuration file appears to already use external IP workaround"; + "conf_file" => ?xde_conf, + ); + } else { + info!( + log, + "updating xde driver configuration file for external IP workaround"; + "conf_file" => ?xde_conf, + ); + fs::write(xde_conf, &new) + .expect("Failed to modify xde configuration file"); + } + + // Ensure the driver picks up the updated configuration file, if it's been + // loaded previously without the workaround. + std::process::Command::new(crate::illumos::PFEXEC) + .args(&["update_drv", "xde"]) + .output() + .expect("Failed to reload xde driver configuration file"); +} diff --git a/tools/install_opte.sh b/tools/install_opte.sh index 1fc1a9ec21a..38afb246a6d 100755 --- a/tools/install_opte.sh +++ b/tools/install_opte.sh @@ -17,11 +17,6 @@ if [[ "$(uname)" != "SunOS" ]]; then exit 1 fi -if [[ $(id -u) -ne 0 ]]; then - echo "This must be run as root" - exit 1 -fi - SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" cd "${SOURCE_DIR}/.." OMICRON_TOP="$PWD" @@ -29,6 +24,15 @@ OUT_DIR="$OMICRON_TOP/out" XDE_DIR="$OUT_DIR/xde" mkdir -p "$XDE_DIR" +# Create a temporary directory in which to download artifacts, removing it on +# exit +DOWNLOAD_DIR="$(mktemp -t -d)" +trap remove_download_dir EXIT ERR + +function remove_download_dir { + rm -rf $DOWNLOAD_DIR +} + # Compute the SHA256 of the path in $1, returning just the sum function file_sha { sha256sum "$1" | cut -d ' ' -f 1 @@ -37,18 +41,20 @@ function file_sha { # Download a file from $1 and compare its sha256 to the value provided in $2 function download_and_check_sha { local URL="$1" + local SHA="$2" local FILENAME="$(basename "$URL")" + local DOWNLOAD_PATH="$(mktemp -p $DOWNLOAD_DIR)" local OUT_PATH="$XDE_DIR/$FILENAME" - local SHA="$2" # Check if the file already exists, with the expected SHA if ! [[ -f "$OUT_PATH" ]] || [[ "$SHA" != "$(file_sha "$OUT_PATH")" ]]; then - curl -L -o "$OUT_PATH" "$URL" 2> /dev/null - local ACTUAL_SHA="$(sha256sum "$OUT_PATH" | cut -d ' ' -f 1)" + curl -L -o "$DOWNLOAD_PATH" "$URL" 2> /dev/null + local ACTUAL_SHA="$(sha256sum "$DOWNLOAD_PATH" | cut -d ' ' -f 1)" if [[ "$ACTUAL_SHA" != "$SHA" ]]; then echo "SHA mismatch downloding file $FILENAME" exit 1 fi + mv -f "$DOWNLOAD_PATH" "$OUT_PATH" echo "\"$OUT_PATH\" downloaded and has verified SHA" else echo "\"$OUT_PATH\" already exists with correct SHA" @@ -80,7 +86,7 @@ function helios_dev_stickiness { function ensure_helios_dev_is_non_sticky { local STICKINESS="$(helios_dev_stickiness)" if [[ "$STICKINESS" = "sticky" ]]; then - pkg set-publisher --non-sticky helios-dev + pfexec pkg set-publisher --non-sticky helios-dev STICKINESS="$(helios_dev_stickiness)" if [[ "$STICKINESS" = "sticky" ]]; then echo "Failed to make helios-dev publisher non-sticky" @@ -101,22 +107,22 @@ function add_publisher { if [[ "$N_PUBLISHERS" -gt 1 ]]; then echo "More than one publisher named \"$PUBLISHER_NAME\" found" echo "Removing all publishers and installing from scratch" - pkg unset-publisher "$PUBLISHER_NAME" - pkg set-publisher -p "$ARCHIVE_PATH" --search-first + pfexec pkg unset-publisher "$PUBLISHER_NAME" + pfexec pkg set-publisher -p "$ARCHIVE_PATH" --search-first elif [[ "$N_PUBLISHERS" -eq 1 ]]; then echo "Publisher \"$PUBLISHER_NAME\" already exists, setting" echo "the origin to "$ARCHIVE_PATH"" - pkg set-publisher --origin-uri "$ARCHIVE_PATH" --search-first "$PUBLISHER_NAME" + pfexec pkg set-publisher --origin-uri "$ARCHIVE_PATH" --search-first "$PUBLISHER_NAME" else echo "Publisher \"$PUBLISHER_NAME\" does not exist, adding" - pkg set-publisher -p "$ARCHIVE_PATH" --search-first + pfexec pkg set-publisher -p "$ARCHIVE_PATH" --search-first fi } # `helios-netdev` provides the xde kernel driver and the `opteadm` userland tool # for interacting with it. HELIOS_NETDEV_BASE_URL="https://buildomat.eng.oxide.computer/public/file/oxidecomputer/opte/repo" -HELIOS_NETDEV_COMMIT="5764c0732a26c6d0e096ce1b8e871b44d8dc79e7" +HELIOS_NETDEV_COMMIT="6bc29a925cf328863e4f371bc4bec7a512269314" HELIOS_NETDEV_REPO_URL="$HELIOS_NETDEV_BASE_URL/$HELIOS_NETDEV_COMMIT/opte.p5p" HELIOS_NETDEV_REPO_SHA_URL="$HELIOS_NETDEV_BASE_URL/$HELIOS_NETDEV_COMMIT/opte.p5p.sha256" HELIOS_NETDEV_REPO_PATH="$XDE_DIR/$(basename "$HELIOS_NETDEV_REPO_URL")" @@ -144,7 +150,7 @@ add_publisher "$XDE_REPO_PATH" # Actually install the xde kernel module and opteadm tool RC=0 -pkg install -v pkg://helios-netdev/driver/network/opte || RC=$? +pfexec pkg install -v pkg://helios-netdev/driver/network/opte || RC=$? if [[ "$RC" -ne 0 ]] && [[ "$RC" -ne 4 ]]; then echo "Installing xde kernel driver and opteadm tool failed" exit "$RC" @@ -161,7 +167,7 @@ fi # Install the kernel bits required for the xde kernel driver to operate # correctly RC=0 -pkg install -v pkg://on-nightly/consolidation/osnet/osnet-incorporation* || RC=$? +pfexec pkg install -v pkg://on-nightly/consolidation/osnet/osnet-incorporation* || RC=$? if [[ "$RC" -eq 0 ]]; then echo "The xde kernel driver, opteadm tool, and xde-related kernel bits" echo "have successfully been installed. A reboot may be required to activate" diff --git a/tools/install_prerequisites.sh b/tools/install_prerequisites.sh index 23b670273fd..5c8ddf795f0 100755 --- a/tools/install_prerequisites.sh +++ b/tools/install_prerequisites.sh @@ -150,7 +150,7 @@ fi # installs the `xde` driver and some kernel bits required to work with that # driver. if [[ "${HOST_OS}" == "SunOS" ]]; then - pfexec ./tools/install_opte.sh + ./tools/install_opte.sh fi # Validate the PATH: From 0437db562b7642840c090332ab255f3591174b9f Mon Sep 17 00:00:00 2001 From: Benjamin Naecker Date: Wed, 6 Jul 2022 02:43:06 +0000 Subject: [PATCH 2/2] Review feedback - Sync SHAs of OPTE in Cargo dep and `install_opte.sh` - Fix visibility from bad rebase --- Cargo.lock | 8 ++++---- sled-agent/Cargo.toml | 4 ++-- sled-agent/src/common/underlay.rs | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 241fc464b8b..83cbf941f5c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2265,7 +2265,7 @@ dependencies = [ [[package]] name = "illumos-ddi-dki" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=7089b7858b713cc4d540d3870a0a2436595ac308#7089b7858b713cc4d540d3870a0a2436595ac308" +source = "git+https://github.com/oxidecomputer/opte?rev=6bc29a925cf328863e4f371bc4bec7a512269314#6bc29a925cf328863e4f371bc4bec7a512269314" dependencies = [ "illumos-sys-hdrs", ] @@ -2273,7 +2273,7 @@ dependencies = [ [[package]] name = "illumos-sys-hdrs" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=7089b7858b713cc4d540d3870a0a2436595ac308#7089b7858b713cc4d540d3870a0a2436595ac308" +source = "git+https://github.com/oxidecomputer/opte?rev=6bc29a925cf328863e4f371bc4bec7a512269314#6bc29a925cf328863e4f371bc4bec7a512269314" [[package]] name = "impl-trait-for-tuples" @@ -3364,7 +3364,7 @@ dependencies = [ [[package]] name = "opte" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=7089b7858b713cc4d540d3870a0a2436595ac308#7089b7858b713cc4d540d3870a0a2436595ac308" +source = "git+https://github.com/oxidecomputer/opte?rev=6bc29a925cf328863e4f371bc4bec7a512269314#6bc29a925cf328863e4f371bc4bec7a512269314" dependencies = [ "anymap", "cfg-if 0.1.10", @@ -3381,7 +3381,7 @@ dependencies = [ [[package]] name = "opte-ioctl" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=7089b7858b713cc4d540d3870a0a2436595ac308#7089b7858b713cc4d540d3870a0a2436595ac308" +source = "git+https://github.com/oxidecomputer/opte?rev=6bc29a925cf328863e4f371bc4bec7a512269314#6bc29a925cf328863e4f371bc4bec7a512269314" dependencies = [ "libc", "libnet", diff --git a/sled-agent/Cargo.toml b/sled-agent/Cargo.toml index 164955339c4..2e061d95a4d 100644 --- a/sled-agent/Cargo.toml +++ b/sled-agent/Cargo.toml @@ -54,8 +54,8 @@ vsss-rs = { version = "2.0.0-pre2", default-features = false, features = ["std"] zone = "0.1" [target.'cfg(target_os = "illumos")'.dependencies] -opte-ioctl = { git = "https://github.com/oxidecomputer/opte", rev = "7089b7858b713cc4d540d3870a0a2436595ac308" } -opte = { git = "https://github.com/oxidecomputer/opte", rev = "7089b7858b713cc4d540d3870a0a2436595ac308", features = [ "api", "std" ] } +opte-ioctl = { git = "https://github.com/oxidecomputer/opte", rev = "6bc29a925cf328863e4f371bc4bec7a512269314" } +opte = { git = "https://github.com/oxidecomputer/opte", rev = "6bc29a925cf328863e4f371bc4bec7a512269314", features = [ "api", "std" ] } [dev-dependencies] expectorate = "1.0.5" diff --git a/sled-agent/src/common/underlay.rs b/sled-agent/src/common/underlay.rs index 42e22f66915..37cb52f21c1 100644 --- a/sled-agent/src/common/underlay.rs +++ b/sled-agent/src/common/underlay.rs @@ -36,7 +36,7 @@ pub fn find_nics() -> Result, Error> { Ok(addr_objs) } -fn find_chelsio_links() -> Result, Error> { +pub(crate) fn find_chelsio_links() -> Result, Error> { // TODO-correctness: This should eventually be determined by a call to // `Dladm` to get the real Chelsio links on a Gimlet. These will likely be // called `cxgbeN`, but we explicitly call them `netN` to be clear that