diff --git a/common/src/address.rs b/common/src/address.rs index 5fd69543456..536fd75b0e5 100644 --- a/common/src/address.rs +++ b/common/src/address.rs @@ -53,7 +53,7 @@ const DNS_ADDRESS_INDEX: usize = 1; const GZ_ADDRESS_INDEX: usize = 2; /// The maximum number of addresses per sled reserved for RSS. -pub const RSS_RESERVED_ADDRESSES: u16 = 10; +pub const RSS_RESERVED_ADDRESSES: u16 = 16; /// Wraps an [`Ipv6Network`] with a compile-time prefix length. #[derive( @@ -136,6 +136,7 @@ impl ReservedRackSubnet { } const SLED_AGENT_ADDRESS_INDEX: usize = 1; +const SWITCH_ZONE_ADDRESS_INDEX: usize = 2; /// Return the sled agent address for a subnet. /// @@ -146,6 +147,15 @@ pub fn get_sled_address(sled_subnet: Ipv6Subnet) -> SocketAddrV6 { SocketAddrV6::new(sled_agent_ip, SLED_AGENT_PORT, 0, 0) } +/// Return the switch zone address for a subnet. +/// +/// This address will come from the second address of the [`SLED_PREFIX`] subnet. +pub fn get_switch_zone_address( + sled_subnet: Ipv6Subnet, +) -> Ipv6Addr { + sled_subnet.net().iter().nth(SWITCH_ZONE_ADDRESS_INDEX).unwrap() +} + /// Returns a sled subnet within a rack subnet. /// /// The subnet at index == 0 is used for rack-local services. diff --git a/docs/crdb-debugging.adoc b/docs/crdb-debugging.adoc index 362cde5b494..c2d857db02a 100644 --- a/docs/crdb-debugging.adoc +++ b/docs/crdb-debugging.adoc @@ -14,6 +14,6 @@ The following provides instructions for connecting to a CRDB shell on a running 1. **Find the zone running CockroachDB**. This can be accomplished by running `zoneadm list -cv`, and finding the zone with a prefix of `oxz_cockroachdb`. 2. **Log into that zone**. This can be done using `pfexec zlogin `. -3. **Read the CockroachDB log file to determine the connection instructions**. This can be done with `tail -f $(svcs -L cockroachdb)`, look for a line starting with `RPC client flags:`. As one example, this may look like `/opt/oxide/cockroachdb/bin/cockroach --host=[fd00:1122:3344:101::2]:32221 --insecure` +3. **Read the CockroachDB log file to determine the connection instructions**. This can be done with `tail -f $(svcs -L cockroachdb)`, look for a line starting with `RPC client flags:`. As one example, this may look like `/opt/oxide/cockroachdb/bin/cockroach --host=[fd00:1122:3344:101::3]:32221 --insecure` 4. Run that command, with however you want to access `cockroach`. One notable `` is `sql`, which grants access to a SQL shell. diff --git a/docs/how-to-run.adoc b/docs/how-to-run.adoc index 28504d9d7ea..81181c97f5a 100644 --- a/docs/how-to-run.adoc +++ b/docs/how-to-run.adoc @@ -172,13 +172,14 @@ be set as a default route for the Nexus zone. | Service | Endpoint | Sled Agent: Bootstrap | Derived from MAC address of physical data link. | Sled Agent: Dropshot API | `[fd00:1122:3344:0101::1]:12345` -| Cockroach DB | `[fd00:1122:3344:0101::2]:32221` -| Nexus: Internal API | `[fd00:1122:3344:0101::3]:12221` -| Oximeter | `[fd00:1122:3344:0101::4]:12223` -| Clickhouse | `[fd00:1122:3344:0101::5]:8123` -| Crucible Downstairs 1 | `[fd00:1122:3344:0101::6]:32345` -| Crucible Downstairs 2 | `[fd00:1122:3344:0101::7]:32345` -| Crucible Downstairs 3 | `[fd00:1122:3344:0101::8]:32345` +| Switch Zone | `[fd00:1122:3344:0101::2]` +| Cockroach DB | `[fd00:1122:3344:0101::3]:32221` +| Nexus: Internal API | `[fd00:1122:3344:0101::4]:12221` +| Oximeter | `[fd00:1122:3344:0101::5]:12223` +| Clickhouse | `[fd00:1122:3344:0101::6]:8123` +| Crucible Downstairs 1 | `[fd00:1122:3344:0101::7]:32345` +| Crucible Downstairs 2 | `[fd00:1122:3344:0101::8]:32345` +| Crucible Downstairs 3 | `[fd00:1122:3344:0101::9]:32345` | Internal DNS Service | `[fd00:1122:3344:0001::1]:5353` | Nexus: External API | `192.168.1.20:80` | Internet Gateway | None, but can be set in `smf/sled-agent/config-rss.toml` diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index 8b9bdc64d52..b81ebcf56df 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -1525,15 +1525,15 @@ "$ref": "#/components/schemas/ServiceType" } }, - "zone_name": { - "type": "string" + "zone_type": { + "$ref": "#/components/schemas/ZoneType" } }, "required": [ "addresses", "id", "services", - "zone_name" + "zone_type" ] }, "Slot": { @@ -1834,6 +1834,16 @@ "required": [ "rules" ] + }, + "ZoneType": { + "description": "The type of zone which may be requested from Sled Agent", + "type": "string", + "enum": [ + "internal_dns", + "nexus", + "oximeter", + "switch" + ] } } } diff --git a/package-manifest.toml b/package-manifest.toml index 0da7246d6e6..fbf648d6f14 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -61,11 +61,11 @@ output.type = "zone" setup_hint = "Run `./tools/ci_download_cockroachdb` to download the necessary binaries" [package.internal-dns] -service_name = "internal-dns" +service_name = "internal_dns" source.type = "local" source.rust.binary_names = ["dnsadm", "dns-server"] source.rust.release = true -source.paths = [ { from = "smf/internal-dns", to = "/var/svc/manifest/site/internal-dns" } ] +source.paths = [ { from = "smf/internal-dns", to = "/var/svc/manifest/site/internal_dns" } ] output.type = "zone" [package.omicron-gateway] @@ -161,6 +161,10 @@ source.sha256 = "208ae10a61f834608378eb135e4b6e5993dc363019b8fba75465b6ea5506b63 output.type = "zone" output.intermediate_only = true +# To package and install the asic variant of the switch, do: +# +# $ cargo run --release -p omicron-package -- -t switch_variant=asic package +# $ pfexec ./target/release/omicron-package -t switch_variant=asic install [package.switch-asic] service_name = "switch" only_for_targets.switch_variant = "asic" @@ -168,6 +172,12 @@ source.type = "composite" source.packages = [ "omicron-gateway.tar.gz", "dendrite-asic.tar.gz" ] output.type = "zone" +# To package and install the stub variant of the switch, do the following: +# +# - Set the sled agent's configuration option "stub_scrimlet" to "true" +# - Run the following: +# $ cargo run --release -p omicron-package -- -t switch_variant=stub package +# $ pfexec ./target/release/omicron-package -t switch_variant=stub install [package.switch-stub] service_name = "switch" only_for_targets.switch_variant = "stub" diff --git a/sled-agent/src/bootstrap/agent.rs b/sled-agent/src/bootstrap/agent.rs index 252b3789d5b..cb0be2a82a8 100644 --- a/sled-agent/src/bootstrap/agent.rs +++ b/sled-agent/src/bootstrap/agent.rs @@ -20,7 +20,7 @@ use crate::illumos::dladm::{self, Dladm, PhysicalLink}; use crate::illumos::zone::Zones; use crate::server::Server as SledServer; use crate::sp::SpHandle; -use omicron_common::address::{get_sled_address, Ipv6Subnet}; +use omicron_common::address::Ipv6Subnet; use omicron_common::api::external::{Error as ExternalError, MacAddr}; use omicron_common::backoff::{ internal_service_policy, retry_notify, BackoffError, @@ -234,7 +234,7 @@ impl Agent { ) -> Result { info!(&self.log, "Loading Sled Agent: {:?}", request); - let sled_address = get_sled_address(request.subnet); + let sled_address = request.sled_address(); let mut maybe_agent = self.sled_agent.lock().await; if let Some(server) = &*maybe_agent { @@ -276,7 +276,6 @@ impl Agent { let server = SledServer::start( &self.sled_config, self.parent_log.clone(), - sled_address, request.clone(), ) .await diff --git a/sled-agent/src/bootstrap/params.rs b/sled-agent/src/bootstrap/params.rs index 94db44729d4..1c61327ed19 100644 --- a/sled-agent/src/bootstrap/params.rs +++ b/sled-agent/src/bootstrap/params.rs @@ -6,13 +6,13 @@ use super::trust_quorum::SerializableShareDistribution; use macaddr::MacAddr6; -use omicron_common::address::{Ipv6Subnet, SLED_PREFIX}; +use omicron_common::address::{self, Ipv6Subnet, SLED_PREFIX}; use serde::{Deserialize, Deserializer, Serialize}; use serde_with::serde_as; use serde_with::DeserializeAs; use serde_with::PickFirst; use std::borrow::Cow; -use std::net::Ipv4Addr; +use std::net::{Ipv4Addr, Ipv6Addr, SocketAddrV6}; use uuid::Uuid; /// Information about the internet gateway used for externally-facing services. @@ -78,6 +78,16 @@ pub struct SledAgentRequest { pub subnet: Ipv6Subnet, } +impl SledAgentRequest { + pub fn sled_address(&self) -> SocketAddrV6 { + address::get_sled_address(self.subnet) + } + + pub fn switch_ip(&self) -> Ipv6Addr { + address::get_switch_zone_address(self.subnet) + } +} + // We intentionally DO NOT derive `Debug` or `Serialize`; both provide avenues // by which we may accidentally log the contents of our `share`. To serialize a // request, use `RequestEnvelope::danger_serialize_as_json()`. diff --git a/sled-agent/src/illumos/dladm.rs b/sled-agent/src/illumos/dladm.rs index af74298c0a6..d9bead4880a 100644 --- a/sled-agent/src/illumos/dladm.rs +++ b/sled-agent/src/illumos/dladm.rs @@ -5,7 +5,7 @@ //! Utilities for poking at data links. use crate::common::vlan::VlanID; -use crate::illumos::vnic::VnicKind; +use crate::illumos::link::{Link, LinkKind}; use crate::illumos::zone::IPADM; use crate::illumos::{execute, ExecutionError, PFEXEC}; use omicron_common::api::external::MacAddr; @@ -158,7 +158,7 @@ impl Dladm { /// Creates a VNIC on top of the etherstub. /// - /// This VNIC is not tracked like [`crate::illumos::vnic::Vnic`], because + /// This VNIC is not tracked like [`crate::illumos::link::Link`], because /// it is expected to exist for the lifetime of the sled. pub fn ensure_etherstub_vnic( source: &Etherstub, @@ -221,6 +221,21 @@ impl Dladm { Ok(()) } + /// Verify that the given link exists + pub fn verify_link(link: &str) -> Result { + let mut command = std::process::Command::new(PFEXEC); + let cmd = command.args(&[DLADM, "show-link", "-p", "-o", "LINK", link]); + let output = execute(cmd)?; + match String::from_utf8_lossy(&output.stdout) + .lines() + .next() + .map(|s| s.trim()) + { + Some(x) if x == link => Ok(Link::wrap_physical(link)), + _ => Err(FindPhysicalLinkError::NoPhysicalLinkFound), + } + } + /// Returns the name of the first observed physical data link. pub fn find_physical() -> Result { let mut command = std::process::Command::new(PFEXEC); @@ -322,7 +337,7 @@ impl Dladm { .filter_map(|name| { // Ensure this is a kind of VNIC that the sled agent could be // responsible for. - match VnicKind::from_name(name) { + match LinkKind::from_name(name) { Some(_) => Some(name.to_owned()), None => None, } diff --git a/sled-agent/src/illumos/vnic.rs b/sled-agent/src/illumos/link.rs similarity index 79% rename from sled-agent/src/illumos/vnic.rs rename to sled-agent/src/illumos/link.rs index 3fbc5b77b25..acd2b421c16 100644 --- a/sled-agent/src/illumos/vnic.rs +++ b/sled-agent/src/illumos/link.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -//! API for controlling a single instance. +//! API for allocating and managing data links. use crate::illumos::dladm::{ CreateVnicError, DeleteVnicError, VnicSource, VNIC_PREFIX, @@ -54,13 +54,13 @@ impl VnicAllocator
{ pub fn new_control( &self, mac: Option, - ) -> Result { + ) -> Result { let allocator = self.new_superscope("Control"); let name = allocator.next(); debug_assert!(name.starts_with(VNIC_PREFIX)); debug_assert!(name.starts_with(VNIC_PREFIX_CONTROL)); Dladm::create_vnic(&self.data_link, &name, mac, None)?; - Ok(Vnic { name, deleted: false, kind: VnicKind::OxideControl }) + Ok(Link { name, deleted: false, kind: LinkKind::OxideControlVnic }) } fn new_superscope>(&self, scope: S) -> Self { @@ -82,22 +82,23 @@ impl VnicAllocator
{ } } -/// Represents the kind of a VNIC, such as whether it's for guest networking or +/// Represents the kind of a Link, such as whether it's for guest networking or /// communicating with Oxide services. #[derive(Debug, Clone, Copy, PartialEq)] -pub enum VnicKind { - OxideControl, - Guest, +pub enum LinkKind { + Physical, + OxideControlVnic, + GuestVnic, } -impl VnicKind { +impl LinkKind { /// Infer the kind from a VNIC's name, if this one the sled agent can /// manage, and `None` otherwise. pub fn from_name(name: &str) -> Option { if name.starts_with(VNIC_PREFIX) { - Some(VnicKind::OxideControl) + Some(LinkKind::OxideControlVnic) } else if name.starts_with(VNIC_PREFIX_GUEST) { - Some(VnicKind::Guest) + Some(LinkKind::GuestVnic) } else { None } @@ -106,7 +107,7 @@ impl VnicKind { #[derive(thiserror::Error, Debug)] #[error("VNIC with name '{0}' is not valid for sled agent management")] -pub struct InvalidVnicKind(String); +pub struct InvalidLinkKind(String); /// Represents an allocated VNIC on the system. /// The VNIC is de-allocated when it goes out of scope. @@ -115,30 +116,41 @@ pub struct InvalidVnicKind(String); /// another process in the global zone could also modify / destroy /// the VNIC while this object is alive. #[derive(Debug)] -pub struct Vnic { +pub struct Link { name: String, deleted: bool, - kind: VnicKind, + kind: LinkKind, } -impl Vnic { +impl Link { /// Takes ownership of an existing VNIC. pub fn wrap_existing>( name: S, - ) -> Result { - match VnicKind::from_name(name.as_ref()) { - Some(kind) => Ok(Vnic { + ) -> Result { + match LinkKind::from_name(name.as_ref()) { + Some(kind) => Ok(Self { name: name.as_ref().to_owned(), deleted: false, kind, }), - None => Err(InvalidVnicKind(name.as_ref().to_owned())), + None => Err(InvalidLinkKind(name.as_ref().to_owned())), + } + } + + /// Wraps a physical nic in a Link structure. + /// + /// It is the caller's responsibility to ensure this is a physical link. + pub fn wrap_physical>(name: S) -> Self { + Link { + name: name.as_ref().to_owned(), + deleted: false, + kind: LinkKind::Physical, } } /// Deletes a NIC (if it has not already been deleted). pub fn delete(&mut self) -> Result<(), DeleteVnicError> { - if self.deleted { + if self.deleted || self.kind == LinkKind::Physical { Ok(()) } else { self.deleted = true; @@ -150,16 +162,16 @@ impl Vnic { &self.name } - pub fn kind(&self) -> VnicKind { + pub fn kind(&self) -> LinkKind { self.kind } } -impl Drop for Vnic { +impl Drop for Link { fn drop(&mut self) { let r = self.delete(); if let Err(e) = r { - eprintln!("Failed to delete VNIC: {}", e); + eprintln!("Failed to delete Link: {}", e); } } } diff --git a/sled-agent/src/illumos/mod.rs b/sled-agent/src/illumos/mod.rs index 3aead102087..ad7200c3602 100644 --- a/sled-agent/src/illumos/mod.rs +++ b/sled-agent/src/illumos/mod.rs @@ -8,9 +8,9 @@ use cfg_if::cfg_if; pub mod addrobj; pub mod dladm; +pub mod link; pub mod running_zone; pub mod svc; -pub mod vnic; pub mod zfs; pub mod zone; pub mod zpool; diff --git a/sled-agent/src/illumos/running_zone.rs b/sled-agent/src/illumos/running_zone.rs index de50d2b5d4a..dd38cd7dfaf 100644 --- a/sled-agent/src/illumos/running_zone.rs +++ b/sled-agent/src/illumos/running_zone.rs @@ -6,8 +6,8 @@ use crate::illumos::addrobj::AddrObject; use crate::illumos::dladm::Etherstub; +use crate::illumos::link::{Link, VnicAllocator}; use crate::illumos::svc::wait_for_service; -use crate::illumos::vnic::{Vnic, VnicAllocator}; use crate::illumos::zone::{AddressRequest, ZONE_PREFIX}; use crate::opte::Port; use ipnetwork::IpNetwork; @@ -190,7 +190,8 @@ impl RunningZone { ) -> Result { info!(self.inner.log, "Adding address: {:?}", addrtype); let addrobj = AddrObject::new( - self.inner.physical_nic + self.inner + .link .as_ref() .expect("Cannot allocate external address on zone without physical NIC") .name(), @@ -284,7 +285,7 @@ impl RunningZone { }, )?; - let control_vnic = Vnic::wrap_existing(vnic_name) + let control_vnic = Link::wrap_existing(vnic_name) .expect("Failed to wrap valid control VNIC"); Ok(Self { @@ -296,7 +297,7 @@ impl RunningZone { // // Re-initialize guest_vnic state by inspecting the zone. opte_ports: vec![], - physical_nic: None, + link: None, }, }) } @@ -346,14 +347,13 @@ pub struct InstalledZone { name: String, // NIC used for control plane communication. - control_vnic: Vnic, + control_vnic: Link, // OPTE devices for the guest network interfaces opte_ports: Vec, // Physical NIC possibly provisioned to the zone. - // TODO: Remove once Nexus traffic is transmitted over OPTE. - physical_nic: Option, + link: Option, } impl InstalledZone { @@ -384,7 +384,7 @@ impl InstalledZone { datasets: &[zone::Dataset], devices: &[zone::Device], opte_ports: Vec, - physical_nic: Option, + link: Option, ) -> Result { let control_vnic = vnic_allocator.new_control(None).map_err(|err| { InstallZoneError::CreateVnic { zone: zone_name.to_string(), err } @@ -398,7 +398,7 @@ impl InstalledZone { .iter() .map(|port| port.vnic_name().to_string()) .chain(std::iter::once(control_vnic.name().to_string())) - .chain(physical_nic.as_ref().map(|vnic| vnic.name().to_string())) + .chain(link.as_ref().map(|vnic| vnic.name().to_string())) .collect(); Zones::install_omicron_zone( @@ -420,7 +420,7 @@ impl InstalledZone { name: full_zone_name, control_vnic, opte_ports, - physical_nic, + link, }) } } diff --git a/sled-agent/src/instance.rs b/sled-agent/src/instance.rs index ec8173b09a6..0c7910d59bf 100644 --- a/sled-agent/src/instance.rs +++ b/sled-agent/src/instance.rs @@ -6,11 +6,11 @@ use crate::common::instance::{Action as InstanceAction, InstanceStates}; use crate::illumos::dladm::Etherstub; +use crate::illumos::link::VnicAllocator; use crate::illumos::running_zone::{ InstalledZone, RunCommandError, RunningZone, }; use crate::illumos::svc::wait_for_service; -use crate::illumos::vnic::VnicAllocator; use crate::illumos::zone::{AddressRequest, PROPOLIS_ZONE_PREFIX}; use crate::instance_manager::InstanceTicket; use crate::nexus::LazyNexusClient; diff --git a/sled-agent/src/instance_manager.rs b/sled-agent/src/instance_manager.rs index 3af21061cc0..4ff923983fd 100644 --- a/sled-agent/src/instance_manager.rs +++ b/sled-agent/src/instance_manager.rs @@ -5,7 +5,7 @@ //! API for controlling multiple instances on a sled. use crate::illumos::dladm::Etherstub; -use crate::illumos::vnic::VnicAllocator; +use crate::illumos::link::VnicAllocator; use crate::nexus::LazyNexusClient; use crate::opte::PortManager; use crate::params::{ diff --git a/sled-agent/src/params.rs b/sled-agent/src/params.rs index 6692f1209a0..c08224f79bb 100644 --- a/sled-agent/src/params.rs +++ b/sled-agent/src/params.rs @@ -380,7 +380,7 @@ impl std::fmt::Display for ServiceType { fn fmt(&self, f: &mut Formatter<'_>) -> FormatResult { match self { ServiceType::Nexus { .. } => write!(f, "nexus"), - ServiceType::InternalDns { .. } => write!(f, "internal-dns"), + ServiceType::InternalDns { .. } => write!(f, "internal_dns"), ServiceType::Oximeter => write!(f, "oximeter"), ServiceType::Dendrite { .. } => write!(f, "dendrite"), ServiceType::Tfport { .. } => write!(f, "tfport"), @@ -410,6 +410,45 @@ impl From for sled_agent_client::types::ServiceType { } } +/// The type of zone which may be requested from Sled Agent +#[derive( + Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq, Hash, +)] +pub enum ZoneType { + #[serde(rename = "internal_dns")] + InternalDNS, + #[serde(rename = "nexus")] + Nexus, + #[serde(rename = "oximeter")] + Oximeter, + #[serde(rename = "switch")] + Switch, +} + +impl From for sled_agent_client::types::ZoneType { + fn from(zt: ZoneType) -> Self { + match zt { + ZoneType::InternalDNS => Self::InternalDns, + ZoneType::Nexus => Self::Nexus, + ZoneType::Oximeter => Self::Oximeter, + ZoneType::Switch => Self::Switch, + } + } +} + +impl std::fmt::Display for ZoneType { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + use ZoneType::*; + let name = match self { + InternalDNS => "internal_dns", + Nexus => "nexus", + Oximeter => "oximeter", + Switch => "switch", + }; + write!(f, "{name}") + } +} + /// Describes a request to create a zone running one or more services. #[derive( Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq, Hash, @@ -417,8 +456,8 @@ impl From for sled_agent_client::types::ServiceType { pub struct ServiceZoneRequest { // The UUID of the zone to be initialized. pub id: Uuid, - // The name of the zone to be created. - pub zone_name: String, + // The type of the zone to be created. + pub zone_type: ZoneType, // The addresses on which the service should listen for requests. pub addresses: Vec, // The addresses in the global zone which should be created, if necessary @@ -480,7 +519,7 @@ impl From for sled_agent_client::types::ServiceZoneRequest { Self { id: s.id, - zone_name: s.zone_name.to_string(), + zone_type: s.zone_type.into(), addresses: s.addresses, gz_addresses: s.gz_addresses, services, diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 4997ce7f2fc..ee0ab306787 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -10,7 +10,7 @@ use crate::bootstrap::ddm_admin_client::{DdmAdminClient, DdmError}; use crate::bootstrap::params::SledAgentRequest; use crate::bootstrap::rss_handle::BootstrapAgentHandle; use crate::bootstrap::trust_quorum::{RackSecret, ShareDistribution}; -use crate::params::{ServiceType, ServiceZoneRequest}; +use crate::params::{ServiceType, ServiceZoneRequest, ZoneType}; use internal_dns_client::multiclient::{DnsError, Updater as DnsUpdater}; use omicron_common::address::{ get_sled_address, ReservedRackSubnet, DNS_PORT, DNS_SERVER_PORT, @@ -316,7 +316,7 @@ impl ServiceInner { let dns_addr = dns_subnet.dns_address().ip(); request.dns_services.push(ServiceZoneRequest { id: Uuid::new_v4(), - zone_name: "internal-dns".to_string(), + zone_type: ZoneType::InternalDNS, addresses: vec![dns_addr], gz_addresses: vec![dns_subnet.gz_address().ip()], services: vec![ServiceType::InternalDns { diff --git a/sled-agent/src/server.rs b/sled-agent/src/server.rs index 4aa9078890d..c5b9c8aff4b 100644 --- a/sled-agent/src/server.rs +++ b/sled-agent/src/server.rs @@ -10,7 +10,7 @@ use super::sled_agent::SledAgent; use crate::bootstrap::params::SledAgentRequest; use crate::nexus::LazyNexusClient; use slog::Logger; -use std::net::{SocketAddr, SocketAddrV6}; +use std::net::SocketAddr; use uuid::Uuid; /// Packages up a [`SledAgent`], running the sled agent API under a Dropshot @@ -33,13 +33,13 @@ impl Server { pub async fn start( config: &Config, log: Logger, - addr: SocketAddrV6, request: SledAgentRequest, ) -> Result { info!(log, "setting up sled agent server"); let client_log = log.new(o!("component" => "NexusClient")); + let addr = request.sled_address(); let lazy_nexus_client = LazyNexusClient::new(client_log, *addr.ip()) .map_err(|e| e.to_string())?; @@ -47,7 +47,6 @@ impl Server { &config, log.clone(), lazy_nexus_client.clone(), - addr, request, ) .await diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 5eef7654359..a503d86f0da 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -2,16 +2,39 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -//! Support for miscellaneous services managed by the sled. +//! Sled-local service management. +//! +//! For controlling zone-based storage services, refer to +//! [crate::storage_manager::StorageManager]. +//! +//! For controlling virtual machine instances, refer to +//! [crate::instance_manager::InstanceManager]. +//! +//! The [ServiceManager] provides separate mechanisms for services where the +//! "source-of-truth" is Nexus, compared with services where the +//! "source-of-truth" is the Sled Agent itself. Although we generally prefer to +//! delegate the decision of "which services run where" to Nexus, there are +//! situations where the Sled Agent must be capable of autonomously ensuring +//! that zones execute. For example, the "switch zone" contains services which +//! should automatically start when a Tofino device is detected, independently +//! of what other services Nexus wants to have executing on the sled. +//! +//! To accomplish this, the following interfaces are exposed: +//! - [ServiceManager::ensure_persistent] exposes an API to request a set of +//! services that should persist beyond reboot. +//! - [ServiceManager::ensure_switch] exposes an API to specifically enable +//! or disable the switch zone. use crate::bootstrap::ddm_admin_client::{DdmAdminClient, DdmError}; use crate::common::underlay; -use crate::illumos::dladm::{Etherstub, EtherstubVnic, PhysicalLink}; +use crate::illumos::dladm::{Dladm, Etherstub, EtherstubVnic, PhysicalLink}; +use crate::illumos::link::{Link, VnicAllocator}; use crate::illumos::running_zone::{InstalledZone, RunningZone}; -use crate::illumos::vnic::VnicAllocator; use crate::illumos::zfs::ZONE_ZFS_DATASET_MOUNTPOINT; use crate::illumos::zone::AddressRequest; -use crate::params::{ServiceEnsureBody, ServiceType, ServiceZoneRequest}; +use crate::params::{ + DendriteAsic, ServiceEnsureBody, ServiceType, ServiceZoneRequest, +}; use crate::zone::Zones; use omicron_common::address::Ipv6Subnet; use omicron_common::address::DENDRITE_PORT; @@ -29,8 +52,11 @@ use std::iter::FromIterator; use std::net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddr}; use std::path::{Path, PathBuf}; use std::str::FromStr; +use std::sync::Arc; use tokio::io::AsyncWriteExt; +use tokio::sync::oneshot; use tokio::sync::Mutex; +use tokio::task::JoinHandle; use uuid::Uuid; // The filename of ServiceManager's internal storage. @@ -53,6 +79,9 @@ pub enum Error { #[error("I/O Error accessing {path}: {err}")] Io { path: PathBuf, err: std::io::Error }, + #[error("Failed to find device {device}")] + MissingDevice { device: String }, + #[error("Failed to do '{intent}' by running command in zone: {err}")] ZoneCommand { intent: String, @@ -224,10 +253,34 @@ impl<'t> SmfHelper<'t> { } } +struct Task { + // A signal for the initializer task to terminate + exit_tx: oneshot::Sender<()>, + // A task repeatedly trying to initialize the zone + initializer: JoinHandle<()>, +} + +// Describes the Switch Zone state. +enum SwitchZone { + // The switch zone is not currently running. + Disabled, + // The Zone is still initializing - it may be awaiting the initialization + // of certain links. + Initializing { + // The request for the zone + request: ServiceZoneRequest, + // A background task + worker: Option, + }, + // The Zone is currently running. + Running(RunningZone), +} + /// Manages miscellaneous Sled-local services. -pub struct ServiceManager { +pub struct ServiceManagerInner { log: Logger, config: Config, + switch_zone: Mutex, zones: Mutex>, vnic_allocator: VnicAllocator, physical_link_vnic_allocator: VnicAllocator, @@ -238,6 +291,11 @@ pub struct ServiceManager { advertised_prefixes: Mutex>>, } +#[derive(Clone)] +pub struct ServiceManager { + inner: Arc, +} + impl ServiceManager { /// Creates a service manager, which returns once all requested services /// have been started. @@ -261,28 +319,33 @@ impl ServiceManager { debug!(log, "Creating new ServiceManager"); let log = log.new(o!("component" => "ServiceManager")); let mgr = Self { - log: log.clone(), - config, - zones: Mutex::new(vec![]), - vnic_allocator: VnicAllocator::new("Service", etherstub), - physical_link_vnic_allocator: VnicAllocator::new( - "Public", - // NOTE: Right now, we only use a connection to one of the Chelsio - // links. Longer-term, when we we use OPTE, we'll be able to use both - // connections. - physical_link, - ), - underlay_vnic, - underlay_address, - rack_id, - ddmd_client: DdmAdminClient::new(log)?, - advertised_prefixes: Mutex::new(HashSet::new()), + inner: Arc::new(ServiceManagerInner { + log: log.clone(), + config, + // TODO(https://github.com/oxidecomputer/omicron/issues/725): + // Load the switch zone if it already exists? + switch_zone: Mutex::new(SwitchZone::Disabled), + zones: Mutex::new(vec![]), + vnic_allocator: VnicAllocator::new("Service", etherstub), + physical_link_vnic_allocator: VnicAllocator::new( + "Public", + // NOTE: Right now, we only use a connection to one of the Chelsio + // links. Longer-term, when we we use OPTE, we'll be able to use both + // connections. + physical_link, + ), + underlay_vnic, + underlay_address, + rack_id, + ddmd_client: DdmAdminClient::new(log)?, + advertised_prefixes: Mutex::new(HashSet::new()), + }), }; let config_path = mgr.services_config_path(); if config_path.exists() { info!( - &mgr.log, + &mgr.inner.log, "Sled services found at {}; loading", config_path.to_string_lossy() ); @@ -295,12 +358,12 @@ impl ServiceManager { path: config_path.clone(), err, })?; - let mut existing_zones = mgr.zones.lock().await; + let mut existing_zones = mgr.inner.zones.lock().await; mgr.initialize_services_locked(&mut existing_zones, &cfg.services) .await?; } else { info!( - &mgr.log, + &mgr.inner.log, "No sled services found at {}", config_path.to_string_lossy() ); @@ -312,7 +375,7 @@ impl ServiceManager { // Returns either the path to the explicitly provided config path, or // chooses the default one. fn services_config_path(&self) -> PathBuf { - self.config.all_svcs_config_path.clone() + self.inner.config.all_svcs_config_path.clone() } // Advertise the /64 prefix of `address`, unless we already have. @@ -322,188 +385,223 @@ impl ServiceManager { // prefix is spawned onto a background task. async fn advertise_prefix_of_address(&self, address: Ipv6Addr) { let subnet = Ipv6Subnet::new(address); - if self.advertised_prefixes.lock().await.insert(subnet) { - self.ddmd_client.advertise_prefix(subnet); + if self.inner.advertised_prefixes.lock().await.insert(subnet) { + self.inner.ddmd_client.advertise_prefix(subnet); } } - // Populates `existing_zones` according to the requests in `services`. - // - // At the point this function is invoked, IP addresses have already been - // allocated (by either RSS or Nexus). However, this function explicitly - // assigns such addresses to interfaces within zones. - async fn initialize_services_locked( + // Check the services intended to run in the zone to determine whether any + // physical devices need to be mapped into the zone when it is created. + fn devices_needed( &self, - existing_zones: &mut Vec, - requests: &Vec, - ) -> Result<(), Error> { - // TODO(https://github.com/oxidecomputer/omicron/issues/726): - // As long as we ensure the requests don't overlap, we could - // parallelize this request. - for req in requests { - info!( - self.log, - "Ensuring service zone is initialized: {:?}", req.zone_name - ); - // Before we bother allocating anything for this request, check if - // this service has already been created. - let expected_zone_name = - InstalledZone::get_zone_name(&req.zone_name, None); - if existing_zones.iter().any(|z| z.name() == expected_zone_name) { - info!( - self.log, - "Service zone {} already exists", req.zone_name - ); - continue; - } else { - info!( - self.log, - "Service zone {} does not yet exist", req.zone_name - ); + req: &ServiceZoneRequest, + ) -> Result, Error> { + let mut devices = vec![]; + for svc in &req.services { + match svc { + ServiceType::Dendrite { asic: DendriteAsic::TofinoAsic } => { + // When running on a real sidecar, we need the /dev/tofino + // device to talk to the tofino ASIC. + devices.push("/dev/tofino".to_string()); + } + _ => (), } + } - // TODO: Remove once Nexus traffic is transmitted over OPTE. - let physical_nic = match req - .services - .iter() - .any(|service| matches!(service, ServiceType::Nexus { .. })) - { - true => { - let vnic = self + for dev in &devices { + if !Path::new(dev).exists() { + return Err(Error::MissingDevice { device: dev.to_string() }); + } + } + Ok(devices) + } + + // Check the services intended to run in the zone to determine whether any + // physical links or vnics need to be mapped into the zone when it is created. + // + // NOTE: This function is implemented to return the first link found, under + // the assumption that "at most one" would be necessary. + fn link_needed( + &self, + req: &ServiceZoneRequest, + ) -> Result, Error> { + for svc in &req.services { + match svc { + ServiceType::Tfport { pkt_source } => { + // The tfport service requires a MAC device to/from which sidecar + // packets may be multiplexed. If the link isn't present, don't + // bother trying to start the zone. + match Dladm::verify_link(pkt_source) { + Ok(link) => { + return Ok(Some(link)); + } + Err(_) => { + return Err(Error::MissingDevice { + device: pkt_source.to_string(), + }); + } + } + } + ServiceType::Nexus { .. } => { + // TODO: Remove once Nexus traffic is transmitted over OPTE. + match self + .inner .physical_link_vnic_allocator .new_control(None) - .map_err(|e| Error::NexusVnicCreation(e))?; - Some(vnic) + { + Ok(n) => { + return Ok(Some(n)); + } + Err(e) => { + return Err(Error::NexusVnicCreation(e)); + } + } } - false => None, - }; - - let installed_zone = InstalledZone::install( - &self.log, - &self.vnic_allocator, - &req.zone_name, - // unique_name= - None, - // dataset= - &[], - // devices= - &[], - // opte_ports= - vec![], - // physical_nic= - physical_nic, - ) - .await?; - - let running_zone = RunningZone::boot(installed_zone).await?; - - for addr in &req.addresses { - info!(self.log, "Ensuring address {} exists", addr.to_string()); - let addr_request = - AddressRequest::new_static(IpAddr::V6(*addr), None); - running_zone.ensure_address(addr_request).await?; - info!( - self.log, - "Ensuring address {} exists - OK", - addr.to_string() - ); + _ => (), } + } + Ok(None) + } - info!(self.log, "GZ addresses: {:#?}", req.gz_addresses); - for &addr in &req.gz_addresses { - info!( - self.log, - "Ensuring GZ address {} exists", - addr.to_string() - ); + async fn initialize_zone( + &self, + request: &ServiceZoneRequest, + ) -> Result { + let device_names = self.devices_needed(request)?; + let link = self.link_needed(request)?; + + let devices: Vec = device_names + .iter() + .map(|d| zone::Device { name: d.to_string() }) + .collect(); + + let installed_zone = InstalledZone::install( + &self.inner.log, + &self.inner.vnic_allocator, + &request.zone_type.to_string(), + // unique_name= + None, + // dataset= + &[], + &devices, + // opte_ports= + vec![], + link, + ) + .await?; - let addr_name = req.zone_name.replace(&['-', '_'][..], ""); - Zones::ensure_has_global_zone_v6_address( - self.underlay_vnic.clone(), - addr, - &addr_name, - ) - .map_err(|err| Error::GzAddress { - message: format!( - "adding address on behalf of service zone '{}'", - req.zone_name - ), - err, - })?; + let running_zone = RunningZone::boot(installed_zone).await?; - // If this address is in a new ipv6 prefix, notify maghemite so - // it can advertise it to other sleds. - self.advertise_prefix_of_address(addr).await; - } + for addr in &request.addresses { + info!( + self.inner.log, + "Ensuring address {} exists", + addr.to_string() + ); + let addr_request = + AddressRequest::new_static(IpAddr::V6(*addr), None); + running_zone.ensure_address(addr_request).await?; + info!( + self.inner.log, + "Ensuring address {} exists - OK", + addr.to_string() + ); + } - let gateway = if !req.gz_addresses.is_empty() { - // If this service supplies its own GZ address, add a route. - // - // This is currently being used for the DNS service. - // - // TODO: consider limiting the number of GZ addresses which - // can be supplied - now that we're actively using it, we - // aren't really handling the "many GZ addresses" case, and it - // doesn't seem necessary now. - req.gz_addresses[0] - } else { - self.underlay_address - }; + info!(self.inner.log, "GZ addresses: {:#?}", request.gz_addresses); + for &addr in &request.gz_addresses { + info!( + self.inner.log, + "Ensuring GZ address {} exists", + addr.to_string() + ); - running_zone.add_default_route(gateway).await.map_err(|err| { - Error::ZoneCommand { intent: "Adding Route".to_string(), err } + let addr_name = + request.zone_type.to_string().replace(&['-', '_'][..], ""); + Zones::ensure_has_global_zone_v6_address( + self.inner.underlay_vnic.clone(), + addr, + &addr_name, + ) + .map_err(|err| Error::GzAddress { + message: format!( + "adding address on behalf of service zone '{}'", + request.zone_type + ), + err, })?; - for service in &req.services { - // TODO: Related to - // https://github.com/oxidecomputer/omicron/pull/1124 , should we - // avoid importing this manifest? - debug!(self.log, "importing manifest"); - - let smfh = SmfHelper::new(&running_zone, service); - smfh.import_manifest()?; - - match &service { - ServiceType::Nexus { internal_ip, external_ip } => { - info!(self.log, "Setting up Nexus service"); - - // The address of Nexus' external interface is a special - // case; it may be an IPv4 address. - let addr_request = - AddressRequest::new_static(*external_ip, None); - running_zone - .ensure_external_address_with_name( - addr_request, - "public", - ) - .await?; - - if let IpAddr::V4(_public_addr4) = *external_ip { - // If requested, create a default route back through - // the internet gateway. - if let Some(ref gateway) = - self.config.gateway_address - { - running_zone - .add_default_route4(*gateway) - .await - .map_err(|err| Error::ZoneCommand { - intent: "Adding Route".to_string(), - err, - })?; - } + // If this address is in a new ipv6 prefix, notify maghemite so + // it can advertise it to other sleds. + self.advertise_prefix_of_address(addr).await; + } + + let gateway = if !request.gz_addresses.is_empty() { + // If this service supplies its own GZ address, add a route. + // + // This is currently being used for the DNS service. + // + // TODO: consider limiting the number of GZ addresses which + // can be supplied - now that we're actively using it, we + // aren't really handling the "many GZ addresses" case, and it + // doesn't seem necessary now. + request.gz_addresses[0] + } else { + self.inner.underlay_address + }; + + running_zone.add_default_route(gateway).await.map_err(|err| { + Error::ZoneCommand { intent: "Adding Route".to_string(), err } + })?; + + for service in &request.services { + // TODO: Related to + // https://github.com/oxidecomputer/omicron/pull/1124 , should we + // avoid importing this manifest? + debug!(self.inner.log, "importing manifest"); + + let smfh = SmfHelper::new(&running_zone, service); + smfh.import_manifest()?; + + match &service { + ServiceType::Nexus { internal_ip, external_ip } => { + info!(self.inner.log, "Setting up Nexus service"); + + // The address of Nexus' external interface is a special + // case; it may be an IPv4 address. + let addr_request = + AddressRequest::new_static(*external_ip, None); + running_zone + .ensure_external_address_with_name( + addr_request, + "public", + ) + .await?; + + if let IpAddr::V4(_public_addr4) = *external_ip { + // If requested, create a default route back through + // the internet gateway. + if let Some(ref gateway) = + self.inner.config.gateway_address + { + running_zone + .add_default_route4(*gateway) + .await + .map_err(|err| Error::ZoneCommand { + intent: "Adding Route".to_string(), + err, + })?; } + } - let cert_file = - PathBuf::from("/var/nexus/certs/cert.pem"); - let key_file = - PathBuf::from("/var/nexus/certs/key.pem"); + let cert_file = PathBuf::from("/var/nexus/certs/cert.pem"); + let key_file = PathBuf::from("/var/nexus/certs/key.pem"); - // Nexus takes a separate config file for parameters which - // cannot be known at packaging time. - let deployment_config = NexusDeploymentConfig { - id: req.id, - rack_id: self.rack_id, + // Nexus takes a separate config file for parameters which + // cannot be known at packaging time. + let deployment_config = NexusDeploymentConfig { + id: request.id, + rack_id: self.inner.rack_id, // Request two dropshot servers: One for HTTP (port 80), // one for HTTPS (port 443). @@ -525,117 +623,147 @@ impl ServiceManager { ..Default::default() }, subnet: Ipv6Subnet::::new( - self.underlay_address, + self.inner.underlay_address, ), // TODO: Switch to inferring this URL by DNS. database: nexus_config::Database::FromUrl { url: PostgresConfigWithUrl::from_str( - "postgresql://root@[fd00:1122:3344:0101::2]:32221/omicron?sslmode=disable" + "postgresql://root@[fd00:1122:3344:0101::3]:32221/omicron?sslmode=disable" ).unwrap(), } }; - // Copy the partial config file to the expected location. - let config_dir = (self.config.get_svc_config_dir)( - running_zone.name(), - &req.zone_name, - ); - let partial_config_path = - config_dir.join(PARTIAL_CONFIG_FILENAME); - let config_path = - config_dir.join(COMPLETE_CONFIG_FILENAME); - tokio::fs::copy(partial_config_path, &config_path) - .await - .map_err(|err| Error::Io { - path: config_path.clone(), - err, - })?; - - // Serialize the configuration and append it into the file. - let serialized_cfg = - toml::Value::try_from(&deployment_config) - .expect("Cannot serialize config"); - let mut map = toml::map::Map::new(); - map.insert("deployment".to_string(), serialized_cfg); - let config_str = - toml::to_string(&map).map_err(|err| { - Error::TomlSerialize { - path: config_path.clone(), - err, - } - })?; - let mut file = tokio::fs::OpenOptions::new() - .append(true) - .open(&config_path) - .await - .map_err(|err| Error::Io { - path: config_path.clone(), - err, - })?; - file.write_all(config_str.as_bytes()).await.map_err( - |err| Error::Io { path: config_path.clone(), err }, - )?; - } - ServiceType::InternalDns { - server_address, - dns_address, - } => { - info!(self.log, "Setting up internal-dns service"); - smfh.setprop( - "config/server_address", - format!( - "[{}]:{}", - server_address.ip(), - server_address.port(), - ), - )?; - smfh.setprop( - "config/dns_address", - &format!( - "[{}]:{}", - dns_address.ip(), - dns_address.port(), - ), - )?; - - // Refresh the manifest with the new properties we set, - // so they become "effective" properties when the service is enabled. - smfh.refresh()?; - } - ServiceType::Oximeter => { - info!(self.log, "Setting up oximeter service"); - - let address = req.addresses[0]; - smfh.setprop("config/id", req.id)?; - smfh.setprop( - "config/address", - &format!("[{}]:{}", address, OXIMETER_PORT), - )?; - smfh.refresh()?; - } - ServiceType::Dendrite { asic } => { - info!(self.log, "Setting up dendrite service"); - - let address = req.addresses[0]; - smfh.setprop("config/asic", asic)?; - smfh.setprop( - "config/address", - &format!("[{}]:{}", address, DENDRITE_PORT,), - )?; - smfh.refresh()?; - } - ServiceType::Tfport { pkt_source } => { - info!(self.log, "Setting up tfport service"); + // Copy the partial config file to the expected location. + let config_dir = (self.inner.config.get_svc_config_dir)( + running_zone.name(), + &request.zone_type.to_string(), + ); + let partial_config_path = + config_dir.join(PARTIAL_CONFIG_FILENAME); + let config_path = config_dir.join(COMPLETE_CONFIG_FILENAME); + tokio::fs::copy(partial_config_path, &config_path) + .await + .map_err(|err| Error::Io { + path: config_path.clone(), + err, + })?; + + // Serialize the configuration and append it into the file. + let serialized_cfg = + toml::Value::try_from(&deployment_config) + .expect("Cannot serialize config"); + let mut map = toml::map::Map::new(); + map.insert("deployment".to_string(), serialized_cfg); + let config_str = toml::to_string(&map).map_err(|err| { + Error::TomlSerialize { path: config_path.clone(), err } + })?; + let mut file = tokio::fs::OpenOptions::new() + .append(true) + .open(&config_path) + .await + .map_err(|err| Error::Io { + path: config_path.clone(), + err, + })?; + file.write_all(config_str.as_bytes()).await.map_err( + |err| Error::Io { path: config_path.clone(), err }, + )?; + } + ServiceType::InternalDns { server_address, dns_address } => { + info!(self.inner.log, "Setting up internal-dns service"); + smfh.setprop( + "config/server_address", + format!( + "[{}]:{}", + server_address.ip(), + server_address.port(), + ), + )?; + smfh.setprop( + "config/dns_address", + &format!( + "[{}]:{}", + dns_address.ip(), + dns_address.port(), + ), + )?; - smfh.setprop("config/pkt_source", pkt_source)?; - smfh.refresh()?; - } + // Refresh the manifest with the new properties we set, + // so they become "effective" properties when the service is enabled. + smfh.refresh()?; + } + ServiceType::Oximeter => { + info!(self.inner.log, "Setting up oximeter service"); + + let address = request.addresses[0]; + smfh.setprop("config/id", request.id)?; + smfh.setprop( + "config/address", + &format!("[{}]:{}", address, OXIMETER_PORT), + )?; + smfh.refresh()?; } + ServiceType::Dendrite { asic } => { + info!(self.inner.log, "Setting up dendrite service"); + + let address = request.addresses[0]; + smfh.setprop("config/asic", asic)?; + smfh.setprop( + "config/address", + &format!("[{}]:{}", address, DENDRITE_PORT,), + )?; + smfh.refresh()?; + } + ServiceType::Tfport { pkt_source } => { + info!(self.inner.log, "Setting up tfport service"); + + smfh.setprop("config/pkt_source", pkt_source)?; + smfh.refresh()?; + } + } + + debug!(self.inner.log, "enabling service"); + smfh.enable()?; + } + Ok(running_zone) + } - debug!(self.log, "enabling service"); - smfh.enable()?; + // Populates `existing_zones` according to the requests in `services`. + // + // At the point this function is invoked, IP addresses have already been + // allocated (by either RSS or Nexus). However, this function explicitly + // assigns such addresses to interfaces within zones. + async fn initialize_services_locked( + &self, + existing_zones: &mut Vec, + requests: &Vec, + ) -> Result<(), Error> { + // TODO(https://github.com/oxidecomputer/omicron/issues/726): + // As long as we ensure the requests don't overlap, we could + // parallelize this request. + for req in requests { + info!( + self.inner.log, + "Ensuring service zone is initialized: {:?}", req.zone_type + ); + // Before we bother allocating anything for this request, check if + // this service has already been created. + let expected_zone_name = + InstalledZone::get_zone_name(&req.zone_type.to_string(), None); + if existing_zones.iter().any(|z| z.name() == expected_zone_name) { + info!( + self.inner.log, + "Service zone {} already exists", req.zone_type + ); + continue; + } else { + info!( + self.inner.log, + "Service zone {} does not yet exist", req.zone_type + ); } + let running_zone = self.initialize_zone(req).await?; existing_zones.push(running_zone); } Ok(()) @@ -645,11 +773,11 @@ impl ServiceManager { /// /// These services will be instantiated by this function, will be recorded /// to a local file to ensure they start automatically on next boot. - pub async fn ensure( + pub async fn ensure_persistent( &self, request: ServiceEnsureBody, ) -> Result<(), Error> { - let mut existing_zones = self.zones.lock().await; + let mut existing_zones = self.inner.zones.lock().await; let config_path = self.services_config_path(); let services_to_initialize = { @@ -676,7 +804,7 @@ impl ServiceManager { // the case of changing configurations, rather than just doing // that removal implicitly. warn!( - self.log, + self.inner.log, "Cannot request services on this sled, differing configurations: {:#?}", known_set.symmetric_difference(&requested_set) ); @@ -709,6 +837,90 @@ impl ServiceManager { Ok(()) } + + pub async fn ensure_switch(&self, request: Option) { + let log = &self.inner.log; + let mut switch_zone = self.inner.switch_zone.lock().await; + + match (&mut *switch_zone, request) { + (SwitchZone::Disabled, Some(request)) => { + info!(log, "Enabling switch zone (new)"); + let mgr = self.clone(); + let (exit_tx, exit_rx) = oneshot::channel(); + *switch_zone = SwitchZone::Initializing { + request, + worker: Some(Task { + exit_tx, + initializer: tokio::task::spawn(async move { + mgr.initialize_switch_zone(exit_rx).await + }), + }), + }; + } + (SwitchZone::Initializing { .. }, Some(_)) => { + info!(log, "Enabling switch zone (already underway)"); + } + (SwitchZone::Running(_), Some(_)) => { + info!(log, "Enabling switch zone (already complete)"); + } + (SwitchZone::Disabled, None) => { + info!(log, "Disabling switch zone (already complete)"); + } + (SwitchZone::Initializing { worker, .. }, None) => { + info!(log, "Disabling switch zone (was initializing)"); + let worker = worker + .take() + .expect("Initializing without background task"); + // If this succeeds, we told the background task to exit + // successfully. If it fails, the background task already + // exited. + let _ = worker.exit_tx.send(()); + worker + .initializer + .await + .expect("Switch initializer task panicked"); + *switch_zone = SwitchZone::Disabled; + } + (SwitchZone::Running(_), None) => { + info!(log, "Disabling switch zone (was running)"); + *switch_zone = SwitchZone::Disabled; + } + } + } + + async fn initialize_switch_zone(&self, mut exit_rx: oneshot::Receiver<()>) { + loop { + { + let mut switch_zone = self.inner.switch_zone.lock().await; + match &*switch_zone { + SwitchZone::Initializing { request, .. } => { + match self.initialize_zone(&request).await { + Ok(zone) => { + *switch_zone = SwitchZone::Running(zone); + return; + } + Err(err) => { + warn!( + self.inner.log, + "Failed to initialize switch zone: {err}" + ); + } + } + } + _ => return, + } + } + + tokio::select! { + // If we've been told to stop trying, bail. + _ = &mut exit_rx => return, + + // Poll for the device every second - this timeout is somewhat + // arbitrary, but we probably don't want to use backoff here. + _ = tokio::time::sleep(tokio::time::Duration::from_secs(1)) => (), + }; + } + } } #[cfg(test)] @@ -719,12 +931,12 @@ mod test { svc, zone::MockZones, }; + use crate::params::ZoneType; use std::net::Ipv6Addr; use std::os::unix::process::ExitStatusExt; use uuid::Uuid; - const SVC_NAME: &str = "my_svc"; - const EXPECTED_ZONE_NAME: &str = "oxz_my_svc"; + const EXPECTED_ZONE_NAME: &str = "oxz_oximeter"; // Returns the expectations for a new service to be created. fn expect_new_service() -> Vec> { @@ -783,10 +995,10 @@ mod test { async fn ensure_new_service(mgr: &ServiceManager, id: Uuid) { let _expectations = expect_new_service(); - mgr.ensure(ServiceEnsureBody { + mgr.ensure_persistent(ServiceEnsureBody { services: vec![ServiceZoneRequest { id, - zone_name: SVC_NAME.to_string(), + zone_type: ZoneType::Oximeter, addresses: vec![Ipv6Addr::LOCALHOST], gz_addresses: vec![], services: vec![ServiceType::Oximeter], @@ -799,10 +1011,10 @@ mod test { // Prepare to call "ensure" for a service which already exists. We should // return the service without actually installing a new zone. async fn ensure_existing_service(mgr: &ServiceManager, id: Uuid) { - mgr.ensure(ServiceEnsureBody { + mgr.ensure_persistent(ServiceEnsureBody { services: vec![ServiceZoneRequest { id, - zone_name: SVC_NAME.to_string(), + zone_type: ZoneType::Oximeter, addresses: vec![Ipv6Addr::LOCALHOST], gz_addresses: vec![], services: vec![ServiceType::Oximeter], diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 4cc5d65ad75..b18fb1a27cd 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -7,7 +7,7 @@ use crate::bootstrap::params::SledAgentRequest; use crate::config::Config; use crate::hardware::HardwareManager; -use crate::illumos::vnic::VnicKind; +use crate::illumos::link::LinkKind; use crate::illumos::zfs::{ Mountpoint, ZONE_ZFS_DATASET, ZONE_ZFS_DATASET_MOUNTPOINT, }; @@ -16,14 +16,18 @@ use crate::illumos::{execute, PFEXEC}; use crate::instance_manager::InstanceManager; use crate::nexus::{LazyNexusClient, NexusRequestQueue}; use crate::params::{ - DatasetKind, DiskStateRequested, InstanceHardware, InstanceMigrateParams, - InstanceRuntimeStateRequested, InstanceSerialConsoleData, - ServiceEnsureBody, VpcFirewallRule, + DatasetKind, DendriteAsic, DiskStateRequested, InstanceHardware, + InstanceMigrateParams, InstanceRuntimeStateRequested, + InstanceSerialConsoleData, ServiceEnsureBody, ServiceType, + ServiceZoneRequest, VpcFirewallRule, ZoneType, }; use crate::services::{self, ServiceManager}; use crate::storage_manager::StorageManager; use dropshot::HttpError; use futures::stream::{self, StreamExt, TryStreamExt}; +use omicron_common::address::{ + get_sled_address, get_switch_zone_address, Ipv6Subnet, SLED_PREFIX, +}; use omicron_common::api::{ internal::nexus::DiskRuntimeState, internal::nexus::InstanceRuntimeState, internal::nexus::UpdateArtifact, @@ -32,7 +36,7 @@ use omicron_common::backoff::{ internal_service_policy_with_max, retry_notify, BackoffError, }; use slog::Logger; -use std::net::SocketAddrV6; +use std::net::{Ipv6Addr, SocketAddrV6}; use std::process::Command; use std::sync::Arc; use uuid::Uuid; @@ -152,8 +156,13 @@ struct SledAgentInner { // ID of the Sled id: Uuid, - // Sled underlay address - addr: SocketAddrV6, + // The sled's initial configuration + config: Config, + + // Subnet of the Sled's underlay. + // + // The Sled Agent's address can be derived from this value. + subnet: Ipv6Subnet, // Component of Sled Agent responsible for storage and dataset management. storage: StorageManager, @@ -174,6 +183,16 @@ struct SledAgentInner { nexus_request_queue: NexusRequestQueue, } +impl SledAgentInner { + fn sled_address(&self) -> SocketAddrV6 { + get_sled_address(self.subnet) + } + + fn switch_ip(&self) -> Ipv6Addr { + get_switch_zone_address(self.subnet) + } +} + #[derive(Clone)] pub struct SledAgent { inner: Arc, @@ -185,7 +204,6 @@ impl SledAgent { config: &Config, log: Logger, lazy_nexus_client: LazyNexusClient, - sled_address: SocketAddrV6, request: SledAgentRequest, ) -> Result { let id = config.id; @@ -223,6 +241,7 @@ impl SledAgent { // should be removed once the Sled Agent is initialized with a // RSS-provided IP address. In the meantime, we use one from the // configuration file. + let sled_address = request.sled_address(); Zones::ensure_has_global_zone_v6_address( etherstub_vnic.clone(), *sled_address.ip(), @@ -340,7 +359,8 @@ impl SledAgent { let sled_agent = SledAgent { inner: Arc::new(SledAgentInner { id, - addr: sled_address, + config: config.clone(), + subnet: request.subnet, storage, instances, hardware, @@ -424,14 +444,34 @@ impl SledAgent { } async fn ensure_scrimlet_services_active(&self, log: &Logger) { - // TODO(https://github.com/oxidecomputer/omicron/issues/823): Launch the switch zone, with - // Dendrite, MGS, and any other services we want to enable. - warn!(log, "Activating scrimlet services not yet implemented"); + info!(log, "Ensuring scrimlet services (enabling services)"); + + let services = match self.inner.config.stub_scrimlet { + Some(_) => { + vec![ServiceType::Dendrite { asic: DendriteAsic::TofinoStub }] + } + None => { + vec![ + ServiceType::Dendrite { asic: DendriteAsic::TofinoAsic }, + ServiceType::Tfport { pkt_source: "tfpkt0".to_string() }, + ] + } + }; + + let request = ServiceZoneRequest { + id: Uuid::new_v4(), + zone_type: ZoneType::Switch, + addresses: vec![self.inner.switch_ip()], + gz_addresses: vec![], + services, + }; + + self.inner.services.ensure_switch(Some(request)).await; } async fn ensure_scrimlet_services_deactive(&self, log: &Logger) { - // TODO(https://github.com/oxidecomputer/omicron/issues/823): Terminate the switch zone. - warn!(log, "Deactivating scrimlet services not yet implemented"); + info!(log, "Ensuring scrimlet services (disabling services)"); + self.inner.services.ensure_switch(None).await; } pub fn id(&self) -> Uuid { @@ -442,7 +482,7 @@ impl SledAgent { fn notify_nexus_about_self(&self, log: &Logger) { let sled_id = self.inner.id; let lazy_nexus_client = self.inner.lazy_nexus_client.clone(); - let sled_address = self.inner.addr; + let sled_address = self.inner.sled_address(); let is_scrimlet = self.inner.hardware.is_scrimlet(); let log = log.clone(); let fut = async move { @@ -511,7 +551,7 @@ impl SledAgent { &self, requested_services: ServiceEnsureBody, ) -> Result<(), Error> { - self.inner.services.ensure(requested_services).await?; + self.inner.services.ensure_persistent(requested_services).await?; Ok(()) } @@ -692,7 +732,7 @@ async fn delete_omicron_vnics(log: &Logger) -> Result<(), Error> { log, "Deleting existing VNIC"; "vnic_name" => &vnic, - "vnic_kind" => ?VnicKind::from_name(&vnic).unwrap(), + "vnic_kind" => ?LinkKind::from_name(&vnic).unwrap(), ); Dladm::delete_vnic(&vnic) }) diff --git a/sled-agent/src/storage_manager.rs b/sled-agent/src/storage_manager.rs index 492c26fbbe3..70badc34b5e 100644 --- a/sled-agent/src/storage_manager.rs +++ b/sled-agent/src/storage_manager.rs @@ -5,8 +5,8 @@ //! Management of sled-local storage. use crate::illumos::dladm::Etherstub; +use crate::illumos::link::VnicAllocator; use crate::illumos::running_zone::{InstalledZone, RunningZone}; -use crate::illumos::vnic::VnicAllocator; use crate::illumos::zone::AddressRequest; use crate::illumos::zpool::ZpoolName; use crate::illumos::{zfs::Mountpoint, zone::ZONE_PREFIX, zpool::ZpoolInfo}; diff --git a/smf/internal-dns/manifest.xml b/smf/internal-dns/manifest.xml index 96384d8dc0c..b8e7b7f7901 100644 --- a/smf/internal-dns/manifest.xml +++ b/smf/internal-dns/manifest.xml @@ -1,9 +1,9 @@ - + - + @@ -13,7 +13,7 @@ diff --git a/smf/nexus/config-partial.toml b/smf/nexus/config-partial.toml index c2ba69e5384..2dedbc41625 100644 --- a/smf/nexus/config-partial.toml +++ b/smf/nexus/config-partial.toml @@ -22,4 +22,4 @@ if_exists = "append" # Configuration for interacting with the timeseries database [timeseries_db] -address = "[fd00:1122:3344:0101::5]:8123" +address = "[fd00:1122:3344:0101::6]:8123" diff --git a/smf/sled-agent/config-rss.toml b/smf/sled-agent/config-rss.toml index efc066260dd..c641cdf4c73 100644 --- a/smf/sled-agent/config-rss.toml +++ b/smf/sled-agent/config-rss.toml @@ -34,44 +34,44 @@ mac = "00:0d:b9:54:fe:e4" [[request.dataset]] id = "09a9a25f-2602-4e2f-9630-31af9c492c3e" zpool_id = "d462a7f7-b628-40fe-80ff-4e4189e2d62b" -address = "[fd00:1122:3344:0101::6]:32345" +address = "[fd00:1122:3344:0101::7]:32345" dataset_kind.type = "crucible" [[request.dataset]] id = "2713b37a-3043-4ed5-aaff-f38200e45cfb" zpool_id = "e4b4dc87-ab46-49fb-a4b4-d361ae214c03" -address = "[fd00:1122:3344:0101::7]:32345" +address = "[fd00:1122:3344:0101::8]:32345" dataset_kind.type = "crucible" [[request.dataset]] id = "ffd16cad-e5d5-495e-9c59-4312a3857d91" zpool_id = "f4b4dc87-ab46-49fb-a4b4-d361ae214c03" -address = "[fd00:1122:3344:0101::8]:32345" +address = "[fd00:1122:3344:0101::9]:32345" dataset_kind.type = "crucible" [[request.dataset]] id = "4d08fc19-3d5f-4f6b-9c48-925f8eac7255" zpool_id = "d462a7f7-b628-40fe-80ff-4e4189e2d62b" -address = "[fd00:1122:3344:0101::2]:32221" +address = "[fd00:1122:3344:0101::3]:32221" dataset_kind.type = "cockroach_db" -dataset_kind.all_addresses = [ "[fd00:1122:3344:0101::2]:32221" ] +dataset_kind.all_addresses = [ "[fd00:1122:3344:0101::3]:32221" ] # TODO(https://github.com/oxidecomputer/omicron/issues/732): Nexus # should allocate clickhouse datasets. [[request.dataset]] id = "a3505b41-a592-420b-84f2-3d76bf0e0a81" zpool_id = "d462a7f7-b628-40fe-80ff-4e4189e2d62b" -address = "[fd00:1122:3344:0101::5]:8123" +address = "[fd00:1122:3344:0101::6]:8123" dataset_kind.type = "clickhouse" [[request.service_zone]] id = "e6bff1ff-24fb-49dc-a54e-c6a350cd4d6c" -zone_name = "nexus" -addresses = [ "fd00:1122:3344:0101::3" ] +zone_type = "nexus" +addresses = [ "fd00:1122:3344:0101::4" ] gz_addresses = [] [[request.service_zone.services]] type = "nexus" -internal_ip = "fd00:1122:3344:0101::3" +internal_ip = "fd00:1122:3344:0101::4" # NOTE: In the lab, use "172.20.15.226" external_ip = "192.168.1.20" @@ -79,23 +79,8 @@ external_ip = "192.168.1.20" # should allocate Oximeter services. [[request.service_zone]] id = "1da65e5b-210c-4859-a7d7-200c1e659972" -zone_name = "oximeter" -addresses = [ "fd00:1122:3344:0101::4" ] +zone_type = "oximeter" +addresses = [ "fd00:1122:3344:0101::5" ] gz_addresses = [] [[request.service_zone.services]] type = "oximeter" - -[[request.service_zone]] -id = "a0fe5ebc-9261-6f77-acc1-972481755789" -zone_name = "switch" -addresses = [ "fd00:1122:3344:0101::9" ] -gz_addresses = [] -[[request.service_zone.services]] -type = "dendrite" -asic = "tofino_stub" -#[[request.service_zone.services]] -# The tfport service will not work with the tofino_stub asic, -# as there is no network traffic to multiplex or network device -# to layer over. -#type = "tfport" -#pkt_source = "tfpkt0"