From edf9de97dad723e0d521682a8d2c35db6eca425c Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Wed, 5 Nov 2025 14:21:46 -0500 Subject: [PATCH 01/11] wip: boundary NTP converted --- .../planning/src/blueprint_builder/builder.rs | 7 +++- .../planning/src/blueprint_editor.rs | 5 ++- .../src/blueprint_editor/allocators.rs | 14 ++----- .../allocators/external_networking.rs | 30 +++++++------- nexus/reconfigurator/planning/src/planner.rs | 41 +++++++++++++++++-- 5 files changed, 64 insertions(+), 33 deletions(-) diff --git a/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs b/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs index 5a31605b2e8..8a00183ca58 100644 --- a/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs +++ b/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs @@ -137,6 +137,8 @@ pub enum Error { }, #[error("error constructing resource allocator")] AllocatorInput(#[from] BlueprintResourceAllocatorInputError), + #[error("error constructing external networking allocator")] + ExternalNetworkingAllocator(#[source] anyhow::Error), #[error("no commissioned sleds - rack subnet is unknown")] RackSubnetUnknownNoSleds, #[error("no reserved subnets available for internal DNS")] @@ -1872,6 +1874,7 @@ impl<'a> BlueprintBuilder<'a> { &mut self, sled_id: SledUuid, image_source: BlueprintZoneImageSource, + external_ip: ExternalSnatNetworkingChoice, ) -> Result<(), Error> { // The upstream NTP/DNS servers and domain _should_ come from Nexus and // be modifiable by the operator, but currently can only be set at RSS. @@ -1896,6 +1899,7 @@ impl<'a> BlueprintBuilder<'a> { dns_servers, domain, image_source, + external_ip, ) } @@ -1906,6 +1910,7 @@ impl<'a> BlueprintBuilder<'a> { dns_servers: Vec, domain: Option, image_source: BlueprintZoneImageSource, + external_ip: ExternalSnatNetworkingChoice, ) -> Result<(), Error> { let editor = self.sled_editors.get_mut(&sled_id).ok_or_else(|| { Error::Planner(anyhow!( @@ -1953,7 +1958,7 @@ impl<'a> BlueprintBuilder<'a> { nic_ip, nic_subnet, nic_mac, - } = self.resource_allocator()?.next_external_ip_boundary_ntp()?; + } = external_ip; let external_ip = OmicronZoneExternalSnatIp { id: self.rng.sled_rng(sled_id).next_external_ip(), snat_cfg, diff --git a/nexus/reconfigurator/planning/src/blueprint_editor.rs b/nexus/reconfigurator/planning/src/blueprint_editor.rs index bb1e2dbaec1..598b0ffb9af 100644 --- a/nexus/reconfigurator/planning/src/blueprint_editor.rs +++ b/nexus/reconfigurator/planning/src/blueprint_editor.rs @@ -10,7 +10,10 @@ mod allocators; mod sled_editor; pub use allocators::BlueprintResourceAllocatorInputError; +pub use allocators::ExternalNetworkingAllocator; +pub use allocators::ExternalNetworkingChoice; pub use allocators::ExternalNetworkingError; +pub use allocators::ExternalSnatNetworkingChoice; pub use sled_editor::DatasetsEditError; pub use sled_editor::DisksEditError; pub use sled_editor::MultipleDatasetsOfKind; @@ -19,8 +22,6 @@ pub use sled_editor::SledInputError; pub use sled_editor::ZonesEditError; pub(crate) use allocators::BlueprintResourceAllocator; -pub(crate) use allocators::ExternalNetworkingChoice; -pub(crate) use allocators::ExternalSnatNetworkingChoice; pub(crate) use sled_editor::DiskExpungeDetails; pub(crate) use sled_editor::EditedSled; pub(crate) use sled_editor::SledEditor; diff --git a/nexus/reconfigurator/planning/src/blueprint_editor/allocators.rs b/nexus/reconfigurator/planning/src/blueprint_editor/allocators.rs index c4246fd2908..437249df152 100644 --- a/nexus/reconfigurator/planning/src/blueprint_editor/allocators.rs +++ b/nexus/reconfigurator/planning/src/blueprint_editor/allocators.rs @@ -10,12 +10,10 @@ use nexus_types::deployment::ExternalIpPolicy; mod external_networking; +pub use self::external_networking::ExternalNetworkingAllocator; +pub use self::external_networking::ExternalNetworkingChoice; pub use self::external_networking::ExternalNetworkingError; - -pub(crate) use self::external_networking::ExternalNetworkingChoice; -pub(crate) use self::external_networking::ExternalSnatNetworkingChoice; - -use self::external_networking::ExternalNetworkingAllocator; +pub use self::external_networking::ExternalSnatNetworkingChoice; #[derive(Debug, thiserror::Error)] pub enum BlueprintResourceAllocatorInputError { @@ -58,10 +56,4 @@ impl BlueprintResourceAllocator { ) -> Result { self.external_networking.for_new_external_dns() } - - pub(crate) fn next_external_ip_boundary_ntp( - &mut self, - ) -> Result { - self.external_networking.for_new_boundary_ntp() - } } diff --git a/nexus/reconfigurator/planning/src/blueprint_editor/allocators/external_networking.rs b/nexus/reconfigurator/planning/src/blueprint_editor/allocators/external_networking.rs index 769dc1f44ff..4a3fa62c161 100644 --- a/nexus/reconfigurator/planning/src/blueprint_editor/allocators/external_networking.rs +++ b/nexus/reconfigurator/planning/src/blueprint_editor/allocators/external_networking.rs @@ -46,7 +46,7 @@ pub enum ExternalNetworkingError { } #[derive(Debug)] -pub(super) struct ExternalNetworkingAllocator { +pub struct ExternalNetworkingAllocator { // These fields mirror how RSS chooses addresses for zone NICs. boundary_ntp_v4_ips: AvailableIterator<'static, Ipv4Addr>, boundary_ntp_v6_ips: AvailableIterator<'static, Ipv6Addr>, @@ -67,7 +67,7 @@ pub(super) struct ExternalNetworkingAllocator { } impl ExternalNetworkingAllocator { - pub(super) fn new<'b>( + pub(crate) fn new<'b>( running_omicron_zones: impl Iterator, external_ip_policy: &ExternalIpPolicy, ) -> anyhow::Result { @@ -264,7 +264,7 @@ impl ExternalNetworkingAllocator { }) } - pub(super) fn for_new_nexus( + pub(crate) fn for_new_nexus( &mut self, ) -> Result { let external_ip = self.external_ip_alloc.claim_next_exclusive_ip()?; @@ -301,7 +301,7 @@ impl ExternalNetworkingAllocator { }) } - pub(super) fn for_new_boundary_ntp( + pub(crate) fn for_new_boundary_ntp( &mut self, ) -> Result { let snat_cfg = self.external_ip_alloc.claim_next_snat_ip()?; @@ -338,7 +338,7 @@ impl ExternalNetworkingAllocator { }) } - pub(super) fn for_new_external_dns( + pub(crate) fn for_new_external_dns( &mut self, ) -> Result { let external_ip = self @@ -381,19 +381,19 @@ impl ExternalNetworkingAllocator { } #[derive(Debug, Clone, Copy)] -pub(crate) struct ExternalNetworkingChoice { - pub(crate) external_ip: IpAddr, - pub(crate) nic_ip: IpAddr, - pub(crate) nic_subnet: IpNet, - pub(crate) nic_mac: MacAddr, +pub struct ExternalNetworkingChoice { + pub external_ip: IpAddr, + pub nic_ip: IpAddr, + pub nic_subnet: IpNet, + pub nic_mac: MacAddr, } #[derive(Debug, Clone, Copy)] -pub(crate) struct ExternalSnatNetworkingChoice { - pub(crate) snat_cfg: SourceNatConfig, - pub(crate) nic_ip: IpAddr, - pub(crate) nic_subnet: IpNet, - pub(crate) nic_mac: MacAddr, +pub struct ExternalSnatNetworkingChoice { + pub snat_cfg: SourceNatConfig, + pub nic_ip: IpAddr, + pub nic_subnet: IpNet, + pub nic_mac: MacAddr, } /// Combines a base iterator with an `in_use` set, filtering out any elements diff --git a/nexus/reconfigurator/planning/src/planner.rs b/nexus/reconfigurator/planning/src/planner.rs index 6c59470b112..3866c99f7e1 100644 --- a/nexus/reconfigurator/planning/src/planner.rs +++ b/nexus/reconfigurator/planning/src/planner.rs @@ -13,6 +13,7 @@ use crate::blueprint_builder::EnsureMupdateOverrideAction; use crate::blueprint_builder::Error; use crate::blueprint_builder::Operation; use crate::blueprint_editor::DisksEditError; +use crate::blueprint_editor::ExternalNetworkingAllocator; use crate::blueprint_editor::SledEditError; use crate::mgs_updates::ImpossibleUpdatePolicy; use crate::mgs_updates::MgsUpdatePlanner; @@ -1010,6 +1011,11 @@ impl<'a> Planner<'a> { // discretionary zones, so defer its creation until it's needed. let mut zone_placement = None; + // Likewise, we usually don't need to create an + // `ExternalNetworkingAllocator` to add discretionary zones, so defer + // its creation until it's needed. + let mut external_networking_alloc = None; + for zone_kind in [ DiscretionaryOmicronZone::BoundaryNtp, DiscretionaryOmicronZone::Clickhouse, @@ -1087,9 +1093,9 @@ impl<'a> Planner<'a> { if num_zones_to_add == 0 { continue; } - // We need to add at least one zone; construct our `zone_placement` - // (or reuse the existing one if a previous loop iteration already - // created it). + // We need to add at least one zone; construct our + // `zone_placement` (or reuse the existing one if a previous + // loop iteration already created it)... let zone_placement = zone_placement.get_or_insert_with(|| { // This constructs a picture of the sleds as we currently // understand them, as far as which sleds have discretionary @@ -1124,8 +1130,30 @@ impl<'a> Planner<'a> { }); OmicronZonePlacement::new(current_discretionary_zones) }); + // ...and our external networking allocator. (We can't use + // `get_or_insert_with()` because we can't bubble out the error, + // so effectively recreate that manually. + let external_networking_alloc = + match external_networking_alloc.as_mut() { + Some(allocator) => allocator, + None => { + let allocator = ExternalNetworkingAllocator::new( + self.blueprint + .current_zones( + BlueprintZoneDisposition::is_in_service, + ) + .map(|(_sled_id, zone)| zone), + self.input.external_ip_policy(), + ) + .map_err(Error::ExternalNetworkingAllocator)?; + external_networking_alloc = Some(allocator); + external_networking_alloc.as_mut().unwrap() + } + }; + self.add_discretionary_zones( zone_placement, + external_networking_alloc, zone_kind, num_zones_to_add, image_source, @@ -1237,6 +1265,7 @@ impl<'a> Planner<'a> { fn add_discretionary_zones( &mut self, zone_placement: &mut OmicronZonePlacement, + external_networking_alloc: &mut ExternalNetworkingAllocator, kind: DiscretionaryOmicronZone, num_zones_to_add: usize, image_source: BlueprintZoneImageSource, @@ -1274,8 +1303,12 @@ impl<'a> Planner<'a> { let image = image_source.clone(); match kind { DiscretionaryOmicronZone::BoundaryNtp => { + let external_ip = + external_networking_alloc.for_new_boundary_ntp()?; self.blueprint.sled_promote_internal_ntp_to_boundary_ntp( - sled_id, image, + sled_id, + image, + external_ip, )? } DiscretionaryOmicronZone::Clickhouse => { From 5ec21a8bb9df2f425685d7722d80c861b562c3ec Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Wed, 5 Nov 2025 14:34:53 -0500 Subject: [PATCH 02/11] wip: external DNS converted (tests broken until Nexus is too) --- .../planning/src/blueprint_builder/builder.rs | 3 +- .../src/blueprint_editor/allocators.rs | 6 -- nexus/reconfigurator/planning/src/example.rs | 12 ++++ nexus/reconfigurator/planning/src/planner.rs | 56 ++++++++++++------- 4 files changed, 50 insertions(+), 27 deletions(-) diff --git a/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs b/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs index 8a00183ca58..957e4ba0079 100644 --- a/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs +++ b/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs @@ -1448,6 +1448,7 @@ impl<'a> BlueprintBuilder<'a> { &mut self, sled_id: SledUuid, image_source: BlueprintZoneImageSource, + external_ip: ExternalNetworkingChoice, ) -> Result<(), Error> { let id = self.rng.sled_rng(sled_id).next_zone(); let ExternalNetworkingChoice { @@ -1455,7 +1456,7 @@ impl<'a> BlueprintBuilder<'a> { nic_ip, nic_subnet, nic_mac, - } = self.resource_allocator()?.next_external_ip_external_dns()?; + } = external_ip; let nic = NetworkInterface { id: self.rng.sled_rng(sled_id).next_network_interface(), kind: NetworkInterfaceKind::Service { id: id.into_untyped_uuid() }, diff --git a/nexus/reconfigurator/planning/src/blueprint_editor/allocators.rs b/nexus/reconfigurator/planning/src/blueprint_editor/allocators.rs index 437249df152..f5ce049eeb0 100644 --- a/nexus/reconfigurator/planning/src/blueprint_editor/allocators.rs +++ b/nexus/reconfigurator/planning/src/blueprint_editor/allocators.rs @@ -50,10 +50,4 @@ impl BlueprintResourceAllocator { ) -> Result { self.external_networking.for_new_nexus() } - - pub(crate) fn next_external_ip_external_dns( - &mut self, - ) -> Result { - self.external_networking.for_new_external_dns() - } } diff --git a/nexus/reconfigurator/planning/src/example.rs b/nexus/reconfigurator/planning/src/example.rs index 62456b7b4af..13cbc1323f3 100644 --- a/nexus/reconfigurator/planning/src/example.rs +++ b/nexus/reconfigurator/planning/src/example.rs @@ -12,6 +12,7 @@ use std::net::IpAddr; use std::net::Ipv4Addr; use crate::blueprint_builder::BlueprintBuilder; +use crate::blueprint_editor::ExternalNetworkingAllocator; use crate::planner::rng::PlannerRng; use crate::system::RotStateOverrides; use crate::system::SledBuilder; @@ -560,6 +561,11 @@ impl ExampleSystemBuilder { let discretionary_sled_count = base_input.all_sled_ids(SledFilter::Discretionary).count(); + let mut external_networking_alloc = ExternalNetworkingAllocator::new( + std::iter::empty(), // no zones yet! we're about to add them + base_input.external_ip_policy(), + ) + .expect("constructed ExternalNetworkingAllocator"); // * Create disks and non-discretionary zones on all sleds. // * Only create discretionary zones on discretionary sleds. @@ -633,6 +639,11 @@ impl ExampleSystemBuilder { .external_dns_count .on(discretionary_ix, discretionary_sled_count) { + let external_ip = external_networking_alloc + .for_new_external_dns() + .expect( + "should have an external IP for external DNS", + ); builder .sled_add_zone_external_dns( sled_id, @@ -641,6 +652,7 @@ impl ExampleSystemBuilder { .expect( "obtained ExternalDNS image source", ), + external_ip, ) .unwrap(); } diff --git a/nexus/reconfigurator/planning/src/planner.rs b/nexus/reconfigurator/planning/src/planner.rs index 3866c99f7e1..a06d0bd30fd 100644 --- a/nexus/reconfigurator/planning/src/planner.rs +++ b/nexus/reconfigurator/planning/src/planner.rs @@ -1335,7 +1335,13 @@ impl<'a> Planner<'a> { )? } DiscretionaryOmicronZone::ExternalDns => { - self.blueprint.sled_add_zone_external_dns(sled_id, image)? + let external_ip = + external_networking_alloc.for_new_external_dns()?; + self.blueprint.sled_add_zone_external_dns( + sled_id, + image, + external_ip, + )? } DiscretionaryOmicronZone::Nexus => { let nexus_generation = @@ -3287,22 +3293,16 @@ pub(crate) mod test { // because we haven't give it any addresses (which currently // come only from RSS). This is not an error, though. assert!(input.external_ip_policy().external_dns_ips().is_empty()); - let mut builder = BlueprintBuilder::new_based_on( - &logctx.log, - &blueprint1, - &input, - &collection, - TEST_NAME, - PlannerRng::from_entropy(), + let mut external_networking_alloc = ExternalNetworkingAllocator::new( + blueprint1 + .all_omicron_zones(BlueprintZoneDisposition::is_in_service) + .map(|(_, zone)| zone), + input.external_ip_policy(), ) - .expect("failed to build blueprint builder"); - let sled_id = builder.sled_ids_with_zones().next().expect("no sleds"); - builder - .sled_add_zone_external_dns( - sled_id, - BlueprintZoneImageSource::InstallDataset, - ) - .expect_err("can't add external DNS zones"); + .expect("constructed allocator"); + external_networking_alloc + .for_new_external_dns() + .expect_err("should not have available IPs for external DNS"); // Change the policy: add some external DNS IPs. let external_dns_ips = @@ -3339,32 +3339,48 @@ pub(crate) mod test { PlannerRng::from_entropy(), ) .expect("failed to build blueprint builder"); + let mut external_networking_alloc = ExternalNetworkingAllocator::new( + blueprint_builder + .current_zones(BlueprintZoneDisposition::is_in_service) + .map(|(_, zone)| zone), + input.external_ip_policy(), + ) + .expect("constructed allocator"); // Now we can add external DNS zones. We'll add two to the first // sled and one to the second. let (sled_1, sled_2) = { let mut sleds = blueprint_builder.sled_ids_with_zones(); ( - sleds.next().expect("no first sled"), - sleds.next().expect("no second sled"), + sleds.next().expect("at least one sled"), + sleds.next().expect("at least two sleds"), ) }; blueprint_builder .sled_add_zone_external_dns( sled_1, BlueprintZoneImageSource::InstallDataset, + external_networking_alloc + .for_new_external_dns() + .expect("got IP for external DNS"), ) .expect("added external DNS zone"); blueprint_builder .sled_add_zone_external_dns( sled_1, BlueprintZoneImageSource::InstallDataset, + external_networking_alloc + .for_new_external_dns() + .expect("got IP for external DNS"), ) .expect("added external DNS zone"); blueprint_builder .sled_add_zone_external_dns( sled_2, BlueprintZoneImageSource::InstallDataset, + external_networking_alloc + .for_new_external_dns() + .expect("got IP for external DNS"), ) .expect("added external DNS zone"); @@ -3427,13 +3443,13 @@ pub(crate) mod test { let diff = blueprint3.diff_since_blueprint(&blueprint2); println!("2 -> 3 (expunged sled):\n{}", diff.display()); assert_eq!( - blueprint3.sleds[&sled_id] + blueprint3.sleds[&sled_1] .zones .iter() .filter(|zone| { zone.disposition == BlueprintZoneDisposition::Expunged { - as_of_generation: blueprint3.sleds[&sled_id] + as_of_generation: blueprint3.sleds[&sled_1] .sled_agent_generation, ready_for_cleanup: true, } From 81b32856cc1134c8ce415e9d5761686f02196f19 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Wed, 5 Nov 2025 14:42:39 -0500 Subject: [PATCH 03/11] wip: convert Nexus --- .../planning/src/blueprint_builder/builder.rs | 78 ++++++++++++------- .../planning/src/blueprint_editor.rs | 2 - .../src/blueprint_editor/allocators.rs | 41 ---------- nexus/reconfigurator/planning/src/example.rs | 4 + nexus/reconfigurator/planning/src/planner.rs | 3 + 5 files changed, 57 insertions(+), 71 deletions(-) diff --git a/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs b/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs index 957e4ba0079..20eedcbd4ae 100644 --- a/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs +++ b/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs @@ -4,8 +4,6 @@ //! Low-level facility for generating Blueprints -use crate::blueprint_editor::BlueprintResourceAllocator; -use crate::blueprint_editor::BlueprintResourceAllocatorInputError; use crate::blueprint_editor::DiskExpungeDetails; use crate::blueprint_editor::EditedSled; use crate::blueprint_editor::ExternalNetworkingChoice; @@ -79,7 +77,6 @@ use omicron_uuid_kinds::OmicronZoneUuid; use omicron_uuid_kinds::PhysicalDiskUuid; use omicron_uuid_kinds::SledUuid; use omicron_uuid_kinds::ZpoolUuid; -use once_cell::unsync::OnceCell; use slog::Logger; use slog::debug; use slog::error; @@ -135,8 +132,6 @@ pub enum Error { #[source] err: SledEditError, }, - #[error("error constructing resource allocator")] - AllocatorInput(#[from] BlueprintResourceAllocatorInputError), #[error("error constructing external networking allocator")] ExternalNetworkingAllocator(#[source] anyhow::Error), #[error("no commissioned sleds - rack subnet is unknown")] @@ -514,17 +509,6 @@ pub struct BlueprintBuilder<'a> { // These fields are used to allocate resources for sleds. input: &'a PlanningInput, - // `allocators` contains logic for choosing new underlay IPs, external IPs, - // internal DNS subnets, etc. It's held in a `OnceCell` to delay its - // creation until it's first needed; the planner expunges zones before - // adding zones, so this delay allows us to reuse resources that just came - // free. (This is implicit and awkward; as we rework the builder we should - // rework this to make it more explicit.) - // - // Note: this is currently still a `once_cell` `OnceCell` rather than a std - // `OnceCell`, because `get_or_try_init` isn't stable yet. - resource_allocator: OnceCell, - // These fields will become part of the final blueprint. See the // corresponding fields in `Blueprint`. sled_editors: BTreeMap, @@ -674,7 +658,6 @@ impl<'a> BlueprintBuilder<'a> { collection: inventory, new_blueprint_id: rng.next_blueprint(), input, - resource_allocator: OnceCell::new(), sled_editors, cockroachdb_setting_preserve_downgrade: parent_blueprint .cockroachdb_setting_preserve_downgrade, @@ -735,6 +718,7 @@ impl<'a> BlueprintBuilder<'a> { &self.input } + /* fn resource_allocator( &mut self, ) -> Result<&mut BlueprintResourceAllocator, Error> { @@ -760,6 +744,7 @@ impl<'a> BlueprintBuilder<'a> { })?; Ok(self.resource_allocator.get_mut().expect("get_or_init succeeded")) } + */ /// Iterates over the list of sled IDs for which we have zones. /// @@ -1606,6 +1591,7 @@ impl<'a> BlueprintBuilder<'a> { &mut self, sled_id: SledUuid, image_source: BlueprintZoneImageSource, + external_ip: ExternalNetworkingChoice, nexus_generation: Generation, ) -> Result<(), Error> { // Whether Nexus should use TLS and what the external DNS servers it @@ -1636,6 +1622,7 @@ impl<'a> BlueprintBuilder<'a> { external_tls, external_dns_servers, image_source, + external_ip, nexus_generation, ) } @@ -1646,6 +1633,7 @@ impl<'a> BlueprintBuilder<'a> { external_tls: bool, external_dns_servers: Vec, image_source: BlueprintZoneImageSource, + external_ip: ExternalNetworkingChoice, nexus_generation: Generation, ) -> Result<(), Error> { let nexus_id = self.rng.sled_rng(sled_id).next_zone(); @@ -1654,7 +1642,7 @@ impl<'a> BlueprintBuilder<'a> { nic_ip, nic_subnet, nic_mac, - } = self.resource_allocator()?.next_external_ip_nexus()?; + } = external_ip; let external_ip = OmicronZoneExternalFloatingIp { id: self.rng.sled_rng(sled_id).next_external_ip(), ip: external_ip, @@ -2785,6 +2773,7 @@ impl fmt::Display for BpMupdateOverrideNotClearedReason { #[cfg(test)] pub mod test { use super::*; + use crate::blueprint_editor::ExternalNetworkingAllocator; use crate::example::ExampleSystemBuilder; use crate::example::SimRngState; use crate::example::example; @@ -3376,6 +3365,14 @@ pub mod test { ) .expect("failed to create builder"); + let mut external_networking_alloc = ExternalNetworkingAllocator::new( + builder + .current_zones(BlueprintZoneDisposition::is_in_service) + .map(|(_, zone)| zone), + input.external_ip_policy(), + ) + .expect("created external networking allocator"); + let err = builder .sled_add_zone_nexus( collection @@ -3385,6 +3382,9 @@ pub mod test { .map(|sa| sa.sled_id) .expect("no sleds present"), BlueprintZoneImageSource::InstallDataset, + external_networking_alloc + .for_new_nexus() + .expect("have IP for Nexus"), parent.nexus_generation, ) .unwrap_err(); @@ -3484,10 +3484,21 @@ pub mod test { rng.next_planner_rng(), ) .expect("failed to create builder"); + let mut external_networking_alloc = + ExternalNetworkingAllocator::new( + builder + .current_zones(BlueprintZoneDisposition::is_in_service) + .map(|(_, zone)| zone), + input.external_ip_policy(), + ) + .expect("created external networking allocator"); builder .sled_add_zone_nexus( sled_id, BlueprintZoneImageSource::InstallDataset, + external_networking_alloc + .for_new_nexus() + .expect("have IP for Nexus"), parent.nexus_generation, ) .expect("added nexus zone"); @@ -3506,11 +3517,22 @@ pub mod test { rng.next_planner_rng(), ) .expect("failed to create builder"); + let mut external_networking_alloc = + ExternalNetworkingAllocator::new( + builder + .current_zones(BlueprintZoneDisposition::is_in_service) + .map(|(_, zone)| zone), + input.external_ip_policy(), + ) + .expect("created external networking allocator"); for _ in 0..3 { builder .sled_add_zone_nexus( sled_id, BlueprintZoneImageSource::InstallDataset, + external_networking_alloc + .for_new_nexus() + .expect("have IP for Nexus"), parent.nexus_generation, ) .expect("added nexus zone"); @@ -3544,7 +3566,7 @@ pub mod test { builder.build() }; - let mut builder = BlueprintBuilder::new_based_on( + let builder = BlueprintBuilder::new_based_on( &logctx.log, &parent, &input, @@ -3553,20 +3575,20 @@ pub mod test { rng.next_planner_rng(), ) .expect("failed to create builder"); - let err = builder - .sled_add_zone_nexus( - sled_id, - BlueprintZoneImageSource::InstallDataset, - parent.nexus_generation, + let mut external_networking_alloc = + ExternalNetworkingAllocator::new( + builder + .current_zones(BlueprintZoneDisposition::is_in_service) + .map(|(_, zone)| zone), + input.external_ip_policy(), ) - .unwrap_err(); + .expect("created external networking allocator"); + let err = external_networking_alloc.for_new_nexus().unwrap_err(); assert!( matches!( err, - Error::AllocateExternalNetworking( - ExternalNetworkingError::NoExternalServiceIpAvailable - ) + ExternalNetworkingError::NoExternalServiceIpAvailable ), "unexpected error {err}" ); diff --git a/nexus/reconfigurator/planning/src/blueprint_editor.rs b/nexus/reconfigurator/planning/src/blueprint_editor.rs index 598b0ffb9af..b70f2e44b31 100644 --- a/nexus/reconfigurator/planning/src/blueprint_editor.rs +++ b/nexus/reconfigurator/planning/src/blueprint_editor.rs @@ -9,7 +9,6 @@ mod allocators; mod sled_editor; -pub use allocators::BlueprintResourceAllocatorInputError; pub use allocators::ExternalNetworkingAllocator; pub use allocators::ExternalNetworkingChoice; pub use allocators::ExternalNetworkingError; @@ -21,7 +20,6 @@ pub use sled_editor::SledEditError; pub use sled_editor::SledInputError; pub use sled_editor::ZonesEditError; -pub(crate) use allocators::BlueprintResourceAllocator; pub(crate) use sled_editor::DiskExpungeDetails; pub(crate) use sled_editor::EditedSled; pub(crate) use sled_editor::SledEditor; diff --git a/nexus/reconfigurator/planning/src/blueprint_editor/allocators.rs b/nexus/reconfigurator/planning/src/blueprint_editor/allocators.rs index f5ce049eeb0..53201634e18 100644 --- a/nexus/reconfigurator/planning/src/blueprint_editor/allocators.rs +++ b/nexus/reconfigurator/planning/src/blueprint_editor/allocators.rs @@ -4,50 +4,9 @@ //! Blueprint planner resource allocation -use super::SledEditor; -use nexus_types::deployment::BlueprintZoneDisposition; -use nexus_types::deployment::ExternalIpPolicy; - mod external_networking; pub use self::external_networking::ExternalNetworkingAllocator; pub use self::external_networking::ExternalNetworkingChoice; pub use self::external_networking::ExternalNetworkingError; pub use self::external_networking::ExternalSnatNetworkingChoice; - -#[derive(Debug, thiserror::Error)] -pub enum BlueprintResourceAllocatorInputError { - #[error("failed to create external networking allocator")] - ExternalNetworking(#[source] anyhow::Error), -} - -#[derive(Debug)] -pub(crate) struct BlueprintResourceAllocator { - external_networking: ExternalNetworkingAllocator, -} - -impl BlueprintResourceAllocator { - pub fn new<'a, I>( - all_sleds: I, - external_ip_policy: &ExternalIpPolicy, - ) -> Result - where - I: Iterator, - { - let external_networking = ExternalNetworkingAllocator::new( - all_sleds.flat_map(|editor| { - editor.zones(BlueprintZoneDisposition::is_in_service) - }), - external_ip_policy, - ) - .map_err(BlueprintResourceAllocatorInputError::ExternalNetworking)?; - - Ok(Self { external_networking }) - } - - pub(crate) fn next_external_ip_nexus( - &mut self, - ) -> Result { - self.external_networking.for_new_nexus() - } -} diff --git a/nexus/reconfigurator/planning/src/example.rs b/nexus/reconfigurator/planning/src/example.rs index 13cbc1323f3..be899b9a0d6 100644 --- a/nexus/reconfigurator/planning/src/example.rs +++ b/nexus/reconfigurator/planning/src/example.rs @@ -593,6 +593,9 @@ impl ExampleSystemBuilder { for _ in 0..nexus_count .on(discretionary_ix, discretionary_sled_count) { + let external_ip = external_networking_alloc + .for_new_nexus() + .expect("should have an external IP for Nexus"); builder .sled_add_zone_nexus_with_config( sled_id, @@ -601,6 +604,7 @@ impl ExampleSystemBuilder { self.target_release .zone_image_source(ZoneKind::Nexus) .expect("obtained Nexus image source"), + external_ip, initial_blueprint.nexus_generation, ) .unwrap(); diff --git a/nexus/reconfigurator/planning/src/planner.rs b/nexus/reconfigurator/planning/src/planner.rs index a06d0bd30fd..8abd748a79a 100644 --- a/nexus/reconfigurator/planning/src/planner.rs +++ b/nexus/reconfigurator/planning/src/planner.rs @@ -1344,11 +1344,14 @@ impl<'a> Planner<'a> { )? } DiscretionaryOmicronZone::Nexus => { + let external_ip = + external_networking_alloc.for_new_nexus()?; let nexus_generation = self.determine_nexus_generation(&image)?; self.blueprint.sled_add_zone_nexus( sled_id, image, + external_ip, nexus_generation, )? } From d13b299d17af1e7223ac4a951f90e5a1c15632fa Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Wed, 5 Nov 2025 15:10:58 -0500 Subject: [PATCH 04/11] different constructor --- .../planning/src/blueprint_builder/builder.rs | 31 +++++------ .../allocators/external_networking.rs | 37 ++++++++++++- nexus/reconfigurator/planning/src/example.rs | 11 ++-- nexus/reconfigurator/planning/src/planner.rs | 52 +++++++++---------- 4 files changed, 78 insertions(+), 53 deletions(-) diff --git a/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs b/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs index 20eedcbd4ae..5afdbda6305 100644 --- a/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs +++ b/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs @@ -3365,13 +3365,12 @@ pub mod test { ) .expect("failed to create builder"); - let mut external_networking_alloc = ExternalNetworkingAllocator::new( - builder - .current_zones(BlueprintZoneDisposition::is_in_service) - .map(|(_, zone)| zone), - input.external_ip_policy(), - ) - .expect("created external networking allocator"); + let mut external_networking_alloc = + ExternalNetworkingAllocator::from_current_zones( + &builder, + input.external_ip_policy(), + ) + .expect("created external networking allocator"); let err = builder .sled_add_zone_nexus( @@ -3485,10 +3484,8 @@ pub mod test { ) .expect("failed to create builder"); let mut external_networking_alloc = - ExternalNetworkingAllocator::new( - builder - .current_zones(BlueprintZoneDisposition::is_in_service) - .map(|(_, zone)| zone), + ExternalNetworkingAllocator::from_current_zones( + &builder, input.external_ip_policy(), ) .expect("created external networking allocator"); @@ -3518,10 +3515,8 @@ pub mod test { ) .expect("failed to create builder"); let mut external_networking_alloc = - ExternalNetworkingAllocator::new( - builder - .current_zones(BlueprintZoneDisposition::is_in_service) - .map(|(_, zone)| zone), + ExternalNetworkingAllocator::from_current_zones( + &builder, input.external_ip_policy(), ) .expect("created external networking allocator"); @@ -3576,10 +3571,8 @@ pub mod test { ) .expect("failed to create builder"); let mut external_networking_alloc = - ExternalNetworkingAllocator::new( - builder - .current_zones(BlueprintZoneDisposition::is_in_service) - .map(|(_, zone)| zone), + ExternalNetworkingAllocator::from_current_zones( + &builder, input.external_ip_policy(), ) .expect("created external networking allocator"); diff --git a/nexus/reconfigurator/planning/src/blueprint_editor/allocators/external_networking.rs b/nexus/reconfigurator/planning/src/blueprint_editor/allocators/external_networking.rs index 4a3fa62c161..41e05460d16 100644 --- a/nexus/reconfigurator/planning/src/blueprint_editor/allocators/external_networking.rs +++ b/nexus/reconfigurator/planning/src/blueprint_editor/allocators/external_networking.rs @@ -6,7 +6,9 @@ use anyhow::bail; use debug_ignore::DebugIgnore; use nexus_config::NUM_INITIAL_RESERVED_IP_ADDRESSES; use nexus_sled_agent_shared::inventory::ZoneKind; +use nexus_types::deployment::Blueprint; use nexus_types::deployment::BlueprintZoneConfig; +use nexus_types::deployment::BlueprintZoneDisposition; use nexus_types::deployment::BlueprintZoneType; use nexus_types::deployment::ExternalIpPolicy; use nexus_types::deployment::OmicronZoneExternalIp; @@ -31,6 +33,8 @@ use std::net::Ipv4Addr; use std::net::Ipv6Addr; use strum::IntoEnumIterator as _; +use crate::blueprint_builder::BlueprintBuilder; + #[derive(Debug, thiserror::Error)] pub enum ExternalNetworkingError { #[error("no external DNS IP addresses are available")] @@ -67,7 +71,38 @@ pub struct ExternalNetworkingAllocator { } impl ExternalNetworkingAllocator { - pub(crate) fn new<'b>( + /// Construct an `ExternalNetworkingAllocator` that hands out IPs based on + /// `external_ip_policy`, treating any IPs used by in-service zones + /// in `blueprint` as already-in-use. + #[cfg(test)] + pub(crate) fn from_blueprint( + blueprint: &Blueprint, + external_ip_policy: &ExternalIpPolicy, + ) -> anyhow::Result { + Self::new( + blueprint + .all_omicron_zones(BlueprintZoneDisposition::is_in_service) + .map(|(_sled_id, zone)| zone), + external_ip_policy, + ) + } + + /// Construct an `ExternalNetworkingAllocator` that hands out IPs based on + /// `external_ip_policy`, treating any IPs used by in-service zones + /// described by `builder` as already-in-use. + pub(crate) fn from_current_zones( + builder: &BlueprintBuilder<'_>, + external_ip_policy: &ExternalIpPolicy, + ) -> anyhow::Result { + Self::new( + builder + .current_zones(BlueprintZoneDisposition::is_in_service) + .map(|(_sled_id, zone)| zone), + external_ip_policy, + ) + } + + fn new<'b>( running_omicron_zones: impl Iterator, external_ip_policy: &ExternalIpPolicy, ) -> anyhow::Result { diff --git a/nexus/reconfigurator/planning/src/example.rs b/nexus/reconfigurator/planning/src/example.rs index be899b9a0d6..fa46bca40ae 100644 --- a/nexus/reconfigurator/planning/src/example.rs +++ b/nexus/reconfigurator/planning/src/example.rs @@ -561,11 +561,12 @@ impl ExampleSystemBuilder { let discretionary_sled_count = base_input.all_sled_ids(SledFilter::Discretionary).count(); - let mut external_networking_alloc = ExternalNetworkingAllocator::new( - std::iter::empty(), // no zones yet! we're about to add them - base_input.external_ip_policy(), - ) - .expect("constructed ExternalNetworkingAllocator"); + let mut external_networking_alloc = + ExternalNetworkingAllocator::from_current_zones( + &builder, + base_input.external_ip_policy(), + ) + .expect("constructed ExternalNetworkingAllocator"); // * Create disks and non-discretionary zones on all sleds. // * Only create discretionary zones on discretionary sleds. diff --git a/nexus/reconfigurator/planning/src/planner.rs b/nexus/reconfigurator/planning/src/planner.rs index 8abd748a79a..29d58914732 100644 --- a/nexus/reconfigurator/planning/src/planner.rs +++ b/nexus/reconfigurator/planning/src/planner.rs @@ -1133,23 +1133,21 @@ impl<'a> Planner<'a> { // ...and our external networking allocator. (We can't use // `get_or_insert_with()` because we can't bubble out the error, // so effectively recreate that manually. - let external_networking_alloc = - match external_networking_alloc.as_mut() { - Some(allocator) => allocator, - None => { - let allocator = ExternalNetworkingAllocator::new( - self.blueprint - .current_zones( - BlueprintZoneDisposition::is_in_service, - ) - .map(|(_sled_id, zone)| zone), + let external_networking_alloc = match external_networking_alloc + .as_mut() + { + Some(allocator) => allocator, + None => { + let allocator = + ExternalNetworkingAllocator::from_current_zones( + &self.blueprint, self.input.external_ip_policy(), ) .map_err(Error::ExternalNetworkingAllocator)?; - external_networking_alloc = Some(allocator); - external_networking_alloc.as_mut().unwrap() - } - }; + external_networking_alloc = Some(allocator); + external_networking_alloc.as_mut().unwrap() + } + }; self.add_discretionary_zones( zone_placement, @@ -3296,13 +3294,12 @@ pub(crate) mod test { // because we haven't give it any addresses (which currently // come only from RSS). This is not an error, though. assert!(input.external_ip_policy().external_dns_ips().is_empty()); - let mut external_networking_alloc = ExternalNetworkingAllocator::new( - blueprint1 - .all_omicron_zones(BlueprintZoneDisposition::is_in_service) - .map(|(_, zone)| zone), - input.external_ip_policy(), - ) - .expect("constructed allocator"); + let mut external_networking_alloc = + ExternalNetworkingAllocator::from_blueprint( + &blueprint1, + input.external_ip_policy(), + ) + .expect("constructed allocator"); external_networking_alloc .for_new_external_dns() .expect_err("should not have available IPs for external DNS"); @@ -3342,13 +3339,12 @@ pub(crate) mod test { PlannerRng::from_entropy(), ) .expect("failed to build blueprint builder"); - let mut external_networking_alloc = ExternalNetworkingAllocator::new( - blueprint_builder - .current_zones(BlueprintZoneDisposition::is_in_service) - .map(|(_, zone)| zone), - input.external_ip_policy(), - ) - .expect("constructed allocator"); + let mut external_networking_alloc = + ExternalNetworkingAllocator::from_current_zones( + &blueprint_builder, + input.external_ip_policy(), + ) + .expect("constructed allocator"); // Now we can add external DNS zones. We'll add two to the first // sled and one to the second. From 758e666a34901a55f709983c73e5a2278140ca43 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Wed, 5 Nov 2025 15:29:29 -0500 Subject: [PATCH 05/11] fix reconfigurator-cli --- dev-tools/reconfigurator-cli/src/lib.rs | 17 ++++++++++++++++- .../allocators/external_networking.rs | 11 +++++------ 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/dev-tools/reconfigurator-cli/src/lib.rs b/dev-tools/reconfigurator-cli/src/lib.rs index ad5ff37f31d..fce3be34a8e 100644 --- a/dev-tools/reconfigurator-cli/src/lib.rs +++ b/dev-tools/reconfigurator-cli/src/lib.rs @@ -20,6 +20,7 @@ use nexus_inventory::CollectionBuilder; use nexus_reconfigurator_blippy::Blippy; use nexus_reconfigurator_blippy::BlippyReportSortKey; use nexus_reconfigurator_planning::blueprint_builder::BlueprintBuilder; +use nexus_reconfigurator_planning::blueprint_editor::ExternalNetworkingAllocator; use nexus_reconfigurator_planning::example::{ ExampleSystemBuilder, extract_tuf_repo_description, tuf_assemble, }; @@ -2245,8 +2246,22 @@ fn cmd_blueprint_edit( &planning_input, ZoneKind::Nexus, )?; + let mut external_networking_alloc = + ExternalNetworkingAllocator::from_current_zones( + &builder, + planning_input.external_ip_policy(), + ) + .context("failed to construct external networking allocator")?; + let external_ip = external_networking_alloc + .for_new_nexus() + .context("failed to pick an external IP for Nexus")?; builder - .sled_add_zone_nexus(sled_id, image_source, nexus_generation) + .sled_add_zone_nexus( + sled_id, + image_source, + external_ip, + nexus_generation, + ) .context("failed to add Nexus zone")?; format!("added Nexus zone to sled {}", sled_id) } diff --git a/nexus/reconfigurator/planning/src/blueprint_editor/allocators/external_networking.rs b/nexus/reconfigurator/planning/src/blueprint_editor/allocators/external_networking.rs index 41e05460d16..53703f6a7ab 100644 --- a/nexus/reconfigurator/planning/src/blueprint_editor/allocators/external_networking.rs +++ b/nexus/reconfigurator/planning/src/blueprint_editor/allocators/external_networking.rs @@ -6,7 +6,6 @@ use anyhow::bail; use debug_ignore::DebugIgnore; use nexus_config::NUM_INITIAL_RESERVED_IP_ADDRESSES; use nexus_sled_agent_shared::inventory::ZoneKind; -use nexus_types::deployment::Blueprint; use nexus_types::deployment::BlueprintZoneConfig; use nexus_types::deployment::BlueprintZoneDisposition; use nexus_types::deployment::BlueprintZoneType; @@ -76,7 +75,7 @@ impl ExternalNetworkingAllocator { /// in `blueprint` as already-in-use. #[cfg(test)] pub(crate) fn from_blueprint( - blueprint: &Blueprint, + blueprint: &nexus_types::deployment::Blueprint, external_ip_policy: &ExternalIpPolicy, ) -> anyhow::Result { Self::new( @@ -90,7 +89,7 @@ impl ExternalNetworkingAllocator { /// Construct an `ExternalNetworkingAllocator` that hands out IPs based on /// `external_ip_policy`, treating any IPs used by in-service zones /// described by `builder` as already-in-use. - pub(crate) fn from_current_zones( + pub fn from_current_zones( builder: &BlueprintBuilder<'_>, external_ip_policy: &ExternalIpPolicy, ) -> anyhow::Result { @@ -299,7 +298,7 @@ impl ExternalNetworkingAllocator { }) } - pub(crate) fn for_new_nexus( + pub fn for_new_nexus( &mut self, ) -> Result { let external_ip = self.external_ip_alloc.claim_next_exclusive_ip()?; @@ -336,7 +335,7 @@ impl ExternalNetworkingAllocator { }) } - pub(crate) fn for_new_boundary_ntp( + pub fn for_new_boundary_ntp( &mut self, ) -> Result { let snat_cfg = self.external_ip_alloc.claim_next_snat_ip()?; @@ -373,7 +372,7 @@ impl ExternalNetworkingAllocator { }) } - pub(crate) fn for_new_external_dns( + pub fn for_new_external_dns( &mut self, ) -> Result { let external_ip = self From 9cdc5975f90f688e635163706490a01a362e9432 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Wed, 5 Nov 2025 15:45:45 -0500 Subject: [PATCH 06/11] remove ensure_input_networking_records_appear_in_parent_blueprint() --- .../planning/src/blueprint_builder/builder.rs | 189 ------------------ 1 file changed, 189 deletions(-) diff --git a/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs b/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs index 5afdbda6305..e822cae27e5 100644 --- a/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs +++ b/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs @@ -18,7 +18,6 @@ use crate::planner::ZoneExpungeReason; use crate::planner::rng::PlannerRng; use anyhow::Context as _; use anyhow::anyhow; -use anyhow::bail; use clickhouse_admin_types::OXIMETER_CLUSTER; use id_map::IdMap; use iddqd::IdOrdItem; @@ -718,34 +717,6 @@ impl<'a> BlueprintBuilder<'a> { &self.input } - /* - fn resource_allocator( - &mut self, - ) -> Result<&mut BlueprintResourceAllocator, Error> { - self.resource_allocator.get_or_try_init(|| { - // Check the planning input: there shouldn't be any external - // networking resources in the database (the source of `input`) - // that we don't know about from the parent blueprint. - // - // TODO-cleanup Should the planner do this instead? Move this check - // to blippy. - ensure_input_networking_records_appear_in_parent_blueprint( - self.parent_blueprint, - self.input, - ) - .map_err(Error::Planner)?; - - let allocator = BlueprintResourceAllocator::new( - self.sled_editors.values(), - self.input.external_ip_policy(), - )?; - - Ok::<_, Error>(allocator) - })?; - Ok(self.resource_allocator.get_mut().expect("get_or_init succeeded")) - } - */ - /// Iterates over the list of sled IDs for which we have zones. /// /// This may include decommissioned sleds. @@ -2323,166 +2294,6 @@ impl<'a> BlueprintBuilder<'a> { } } -// Helper to validate that the system hasn't gone off the rails. There should -// never be any external networking resources in the planning input (which is -// derived from the contents of CRDB) that we don't know about from the parent -// blueprint. It's possible a given planning iteration could see such a state -// there have been intermediate changes made by other Nexus instances; e.g., -// -// 1. Nexus A generates a `PlanningInput` by reading from CRDB -// 2. Nexus B executes on a target blueprint that removes IPs/NICs from -// CRDB -// 3. Nexus B regenerates a new blueprint and prunes the zone(s) associated -// with the IPs/NICs from step 2 -// 4. Nexus B makes this new blueprint the target -// 5. Nexus A attempts to run planning with its `PlanningInput` from step 1 but -// the target blueprint from step 4; this will fail the following checks -// because the input contains records that were removed in step 3 -// -// We do not need to handle this class of error; it's a transient failure that -// will clear itself up when Nexus A repeats its planning loop from the top and -// generates a new `PlanningInput`. -// -// There may still be database records corresponding to _expunged_ zones, but -// that's okay: it just means we haven't yet realized a blueprint where those -// zones are expunged. And those should should still be in the blueprint (not -// pruned) until their database records are cleaned up. -// -// It's also possible that there may be networking records in the database -// assigned to zones that have been expunged, and our parent blueprint uses -// those same records for new zones. This is also fine and expected, and is a -// similar case to the previous paragraph: a zone with networking resources was -// expunged, the database doesn't realize it yet, but can still move forward and -// make planning decisions that reuse those resources for new zones. -pub(super) fn ensure_input_networking_records_appear_in_parent_blueprint( - parent_blueprint: &Blueprint, - input: &PlanningInput, -) -> anyhow::Result<()> { - use nexus_types::deployment::OmicronZoneExternalIp; - use omicron_common::address::DNS_OPTE_IPV4_SUBNET; - use omicron_common::address::DNS_OPTE_IPV6_SUBNET; - use omicron_common::address::NEXUS_OPTE_IPV4_SUBNET; - use omicron_common::address::NEXUS_OPTE_IPV6_SUBNET; - use omicron_common::address::NTP_OPTE_IPV4_SUBNET; - use omicron_common::address::NTP_OPTE_IPV6_SUBNET; - use omicron_common::api::external::MacAddr; - - let mut all_macs: HashSet = HashSet::new(); - let mut all_nexus_nic_ips: HashSet = HashSet::new(); - let mut all_boundary_ntp_nic_ips: HashSet = HashSet::new(); - let mut all_external_dns_nic_ips: HashSet = HashSet::new(); - let mut all_external_ips: HashSet = HashSet::new(); - - // Unlike the construction of the external IP allocator and existing IPs - // constructed above in `BuilderExternalNetworking::new()`, we do not - // check for duplicates here: we could very well see reuse of IPs - // between expunged zones or between expunged -> running zones. - for (_, z) in - parent_blueprint.all_omicron_zones(BlueprintZoneDisposition::any) - { - let zone_type = &z.zone_type; - match zone_type { - BlueprintZoneType::BoundaryNtp(ntp) => { - all_boundary_ntp_nic_ips.insert(ntp.nic.ip); - } - BlueprintZoneType::Nexus(nexus) => { - all_nexus_nic_ips.insert(nexus.nic.ip); - } - BlueprintZoneType::ExternalDns(dns) => { - all_external_dns_nic_ips.insert(dns.nic.ip); - } - _ => (), - } - - if let Some((external_ip, nic)) = zone_type.external_networking() { - // As above, ignore localhost (used by the test suite). - if !external_ip.ip().is_loopback() { - all_external_ips.insert(external_ip); - } - all_macs.insert(nic.mac); - } - } - for external_ip_entry in - input.network_resources().omicron_zone_external_ips() - { - // As above, ignore localhost (used by the test suite). - if external_ip_entry.ip.ip().is_loopback() { - continue; - } - if !all_external_ips.contains(&external_ip_entry.ip) { - bail!( - "planning input contains unexpected external IP \ - (IP not found in parent blueprint): {external_ip_entry:?}" - ); - } - } - for nic_entry in input.network_resources().omicron_zone_nics() { - if !all_macs.contains(&nic_entry.nic.mac) { - bail!( - "planning input contains unexpected NIC \ - (MAC not found in parent blueprint): {nic_entry:?}" - ); - } - match nic_entry.nic.ip { - IpAddr::V4(ip) if NEXUS_OPTE_IPV4_SUBNET.contains(ip) => { - if !all_nexus_nic_ips.contains(&ip.into()) { - bail!( - "planning input contains unexpected NIC \ - (IP not found in parent blueprint): {nic_entry:?}" - ); - } - } - IpAddr::V4(ip) if NTP_OPTE_IPV4_SUBNET.contains(ip) => { - if !all_boundary_ntp_nic_ips.contains(&ip.into()) { - bail!( - "planning input contains unexpected NIC \ - (IP not found in parent blueprint): {nic_entry:?}" - ); - } - } - IpAddr::V4(ip) if DNS_OPTE_IPV4_SUBNET.contains(ip) => { - if !all_external_dns_nic_ips.contains(&ip.into()) { - bail!( - "planning input contains unexpected NIC \ - (IP not found in parent blueprint): {nic_entry:?}" - ); - } - } - IpAddr::V6(ip) if NEXUS_OPTE_IPV6_SUBNET.contains(ip) => { - if !all_nexus_nic_ips.contains(&ip.into()) { - bail!( - "planning input contains unexpected NIC \ - (IP not found in parent blueprint): {nic_entry:?}" - ); - } - } - IpAddr::V6(ip) if NTP_OPTE_IPV6_SUBNET.contains(ip) => { - if !all_boundary_ntp_nic_ips.contains(&ip.into()) { - bail!( - "planning input contains unexpected NIC \ - (IP not found in parent blueprint): {nic_entry:?}" - ); - } - } - IpAddr::V6(ip) if DNS_OPTE_IPV6_SUBNET.contains(ip) => { - if !all_external_dns_nic_ips.contains(&ip.into()) { - bail!( - "planning input contains unexpected NIC \ - (IP not found in parent blueprint): {nic_entry:?}" - ); - } - } - _ => { - bail!( - "planning input contains unexpected NIC \ - (IP not contained in known OPTE subnet): {nic_entry:?}" - ) - } - } - } - Ok(()) -} - /// The result of an `ensure_mupdate_override` call for a particular sled. #[derive(Clone, Debug, Eq, PartialEq)] pub(crate) enum EnsureMupdateOverrideAction { From 54b5975feb1ef3ea4fbf0dcc0ed7eaced46d314d Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Wed, 5 Nov 2025 15:45:58 -0500 Subject: [PATCH 07/11] more fixup tests --- dev-tools/reconfigurator-cli/src/lib.rs | 16 +++++++--------- live-tests/tests/test_nexus_add_remove.rs | 15 ++++++++++++++- live-tests/tests/test_nexus_handoff.rs | 11 +++++++++++ nexus/reconfigurator/execution/src/dns.rs | 16 +++++++++++++--- nexus/reconfigurator/execution/src/sagas.rs | 17 ++++++++++++++++- 5 files changed, 61 insertions(+), 14 deletions(-) diff --git a/dev-tools/reconfigurator-cli/src/lib.rs b/dev-tools/reconfigurator-cli/src/lib.rs index fce3be34a8e..5208f3c0cce 100644 --- a/dev-tools/reconfigurator-cli/src/lib.rs +++ b/dev-tools/reconfigurator-cli/src/lib.rs @@ -2246,15 +2246,13 @@ fn cmd_blueprint_edit( &planning_input, ZoneKind::Nexus, )?; - let mut external_networking_alloc = - ExternalNetworkingAllocator::from_current_zones( - &builder, - planning_input.external_ip_policy(), - ) - .context("failed to construct external networking allocator")?; - let external_ip = external_networking_alloc - .for_new_nexus() - .context("failed to pick an external IP for Nexus")?; + let external_ip = ExternalNetworkingAllocator::from_current_zones( + &builder, + planning_input.external_ip_policy(), + ) + .context("failed to construct external networking allocator")? + .for_new_nexus() + .context("failed to pick an external IP for Nexus")?; builder .sled_add_zone_nexus( sled_id, diff --git a/live-tests/tests/test_nexus_add_remove.rs b/live-tests/tests/test_nexus_add_remove.rs index bf78cea0c6b..bc33ac098d4 100644 --- a/live-tests/tests/test_nexus_add_remove.rs +++ b/live-tests/tests/test_nexus_add_remove.rs @@ -16,6 +16,7 @@ use nexus_lockstep_client::types::QuiesceState; use nexus_lockstep_client::types::Saga; use nexus_lockstep_client::types::SagaState; use nexus_reconfigurator_planning::blueprint_builder::BlueprintBuilder; +use nexus_reconfigurator_planning::blueprint_editor::ExternalNetworkingAllocator; use nexus_reconfigurator_planning::planner::Planner; use nexus_reconfigurator_planning::planner::PlannerRng; use nexus_reconfigurator_preparation::PlanningInputFromDb; @@ -115,8 +116,20 @@ async fn test_nexus_add_remove(lc: &LiveTestContext) { "could not find in-service Nexus in parent blueprint", )?; + let external_ip = ExternalNetworkingAllocator::from_current_zones( + builder, + planning_input.external_ip_policy(), + ) + .context("constructing ExternalNetworkingAllocator")? + .for_new_nexus() + .context("choosing external IP for new Nexus")?; builder - .sled_add_zone_nexus(sled_id, image_source, *nexus_generation) + .sled_add_zone_nexus( + sled_id, + image_source, + external_ip, + *nexus_generation, + ) .context("adding Nexus zone")?; Ok(()) diff --git a/live-tests/tests/test_nexus_handoff.rs b/live-tests/tests/test_nexus_handoff.rs index 0a1d22942c8..e4af32ef37c 100644 --- a/live-tests/tests/test_nexus_handoff.rs +++ b/live-tests/tests/test_nexus_handoff.rs @@ -15,6 +15,7 @@ use live_tests_macros::live_test; use nexus_db_model::DbMetadataNexusState; use nexus_lockstep_client::types::QuiesceState; use nexus_reconfigurator_planning::blueprint_builder::BlueprintBuilder; +use nexus_reconfigurator_planning::blueprint_editor::ExternalNetworkingAllocator; use nexus_reconfigurator_preparation::PlanningInputFromDb; use nexus_types::deployment::Blueprint; use nexus_types::deployment::BlueprintZoneDisposition; @@ -173,11 +174,21 @@ async fn test_nexus_handoff(lc: &LiveTestContext) { &collection, &nexus, &|builder: &mut BlueprintBuilder| { + let mut external_networking_alloc = + ExternalNetworkingAllocator::from_current_zones( + builder, + planning_input.external_ip_policy(), + ) + .context("constructing ExternalNetworkingAllocator")?; for current_nexus in current_nexus_zones.values() { + let external_ip = external_networking_alloc + .for_new_nexus() + .context("choosing external IP for new Nexus")?; builder .sled_add_zone_nexus( current_nexus.sled_id, current_nexus.image_source.clone(), + external_ip, next_generation, ) .context("adding Nexus zone")?; diff --git a/nexus/reconfigurator/execution/src/dns.rs b/nexus/reconfigurator/execution/src/dns.rs index 1f622f62752..09f09ff0a08 100644 --- a/nexus/reconfigurator/execution/src/dns.rs +++ b/nexus/reconfigurator/execution/src/dns.rs @@ -333,6 +333,7 @@ mod test { use nexus_inventory::CollectionBuilder; use nexus_inventory::now_db_precision; use nexus_reconfigurator_planning::blueprint_builder::BlueprintBuilder; + use nexus_reconfigurator_planning::blueprint_editor::ExternalNetworkingAllocator; use nexus_reconfigurator_planning::example::ExampleSystemBuilder; use nexus_reconfigurator_planning::planner::PlannerRng; use nexus_reconfigurator_preparation::PlanningInputFromDb; @@ -1584,13 +1585,22 @@ mod test { // * 127.0.0.1 (Nexus) // * ::1 (external DNS) // - // However, when the builder compiles its list of "IPs already in use", - // it _ignores_ loopback addresses, meaning we still have two external - // IPs available for new zones (127.0.0.1 and ::1). + // However, when the allocator compiles its list of "IPs already in + // use", it _ignores_ loopback addresses, meaning we still have two + // external IPs available for new zones (127.0.0.1 and ::1). + let new_nexus_external_ip = + ExternalNetworkingAllocator::from_current_zones( + &builder, + planning_input.external_ip_policy(), + ) + .expect("constructed ExternalNetworkingAllocator") + .for_new_nexus() + .expect("found external IP for Nexus"); builder .sled_add_zone_nexus( sled_id, BlueprintZoneImageSource::InstallDataset, + new_nexus_external_ip, blueprint.nexus_generation, ) .unwrap(); diff --git a/nexus/reconfigurator/execution/src/sagas.rs b/nexus/reconfigurator/execution/src/sagas.rs index 929225056c8..8e679eaeda5 100644 --- a/nexus/reconfigurator/execution/src/sagas.rs +++ b/nexus/reconfigurator/execution/src/sagas.rs @@ -100,6 +100,7 @@ fn find_expunged_same_generation( mod test { use super::*; use nexus_reconfigurator_planning::blueprint_builder::BlueprintBuilder; + use nexus_reconfigurator_planning::blueprint_editor::ExternalNetworkingAllocator; use nexus_reconfigurator_planning::example::ExampleSystemBuilder; use nexus_reconfigurator_planning::planner::PlannerRng; use nexus_types::deployment::BlueprintSource; @@ -160,9 +161,23 @@ mod test { // Create the same number of Nexus zones in the next generation. // We'll use the same images. let g2 = g1.next(); + let mut external_networking_alloc = + ExternalNetworkingAllocator::from_current_zones( + &builder, + example.input.external_ip_policy(), + ) + .expect("constructed ExternalNetworkingAllocator"); for (sled_id, _zone_id, image_source) in &g1_nexus_ids { + let external_ip = external_networking_alloc + .for_new_nexus() + .expect("found external IP for Nexus"); builder - .sled_add_zone_nexus(*sled_id, image_source.clone(), g2) + .sled_add_zone_nexus( + *sled_id, + image_source.clone(), + external_ip, + g2, + ) .expect("add Nexus zone"); } From 4d9bcdf4f7a8db6d472dddee5d37908c38ccc7ab Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Wed, 5 Nov 2025 15:53:20 -0500 Subject: [PATCH 08/11] fixup datastore tests --- nexus/db-queries/src/db/datastore/vpc.rs | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/nexus/db-queries/src/db/datastore/vpc.rs b/nexus/db-queries/src/db/datastore/vpc.rs index 45fffdc77f7..a457d23269e 100644 --- a/nexus/db-queries/src/db/datastore/vpc.rs +++ b/nexus/db-queries/src/db/datastore/vpc.rs @@ -2979,6 +2979,7 @@ mod tests { use nexus_db_model::IncompleteNetworkInterface; use nexus_db_model::IpConfig; use nexus_reconfigurator_planning::blueprint_builder::BlueprintBuilder; + use nexus_reconfigurator_planning::blueprint_editor::ExternalNetworkingAllocator; use nexus_reconfigurator_planning::planner::PlannerRng; use nexus_reconfigurator_planning::system::SledBuilder; use nexus_reconfigurator_planning::system::SystemDescription; @@ -3347,12 +3348,20 @@ mod tests { ) .expect("ensured disks"); } + let external_ip = ExternalNetworkingAllocator::from_current_zones( + &builder, + planning_input.external_ip_policy(), + ) + .expect("constructed ExternalNetworkingAllocator") + .for_new_nexus() + .expect("found external IP for Nexus"); builder .sled_add_zone_nexus_with_config( sled_ids[2], false, Vec::new(), BlueprintZoneImageSource::InstallDataset, + external_ip, bp0.nexus_generation, ) .expect("added nexus to third sled"); @@ -3422,13 +3431,23 @@ mod tests { PlannerRng::from_entropy(), ) .expect("created blueprint builder"); + let mut external_networking_alloc = + ExternalNetworkingAllocator::from_current_zones( + &builder, + planning_input.external_ip_policy(), + ) + .expect("constructed ExternalNetworkingAllocator"); for &sled_id in &sled_ids { + let external_ip = external_networking_alloc + .for_new_nexus() + .expect("found external IP for Nexus"); builder .sled_add_zone_nexus_with_config( sled_id, false, Vec::new(), BlueprintZoneImageSource::InstallDataset, + external_ip, bp2.nexus_generation, ) .expect("added nexus to third sled"); From cbf56e1c73b9f5c3b3db45842eb8b1366b38be46 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Wed, 5 Nov 2025 15:53:37 -0500 Subject: [PATCH 09/11] cargo fmt --- nexus/reconfigurator/planning/src/planner.rs | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/nexus/reconfigurator/planning/src/planner.rs b/nexus/reconfigurator/planning/src/planner.rs index 29d58914732..1c5e56c0695 100644 --- a/nexus/reconfigurator/planning/src/planner.rs +++ b/nexus/reconfigurator/planning/src/planner.rs @@ -1133,21 +1133,20 @@ impl<'a> Planner<'a> { // ...and our external networking allocator. (We can't use // `get_or_insert_with()` because we can't bubble out the error, // so effectively recreate that manually. - let external_networking_alloc = match external_networking_alloc - .as_mut() - { - Some(allocator) => allocator, - None => { - let allocator = + let external_networking_alloc = + match external_networking_alloc.as_mut() { + Some(allocator) => allocator, + None => { + let allocator = ExternalNetworkingAllocator::from_current_zones( &self.blueprint, self.input.external_ip_policy(), ) .map_err(Error::ExternalNetworkingAllocator)?; - external_networking_alloc = Some(allocator); - external_networking_alloc.as_mut().unwrap() - } - }; + external_networking_alloc = Some(allocator); + external_networking_alloc.as_mut().unwrap() + } + }; self.add_discretionary_zones( zone_placement, From 05155936a45a1575b419cb6e5fdb7c126e9d60a0 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Wed, 5 Nov 2025 15:57:51 -0500 Subject: [PATCH 10/11] typo --- nexus/reconfigurator/planning/src/planner.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nexus/reconfigurator/planning/src/planner.rs b/nexus/reconfigurator/planning/src/planner.rs index 1c5e56c0695..a58e11500e7 100644 --- a/nexus/reconfigurator/planning/src/planner.rs +++ b/nexus/reconfigurator/planning/src/planner.rs @@ -1132,7 +1132,7 @@ impl<'a> Planner<'a> { }); // ...and our external networking allocator. (We can't use // `get_or_insert_with()` because we can't bubble out the error, - // so effectively recreate that manually. + // so recreate the same effect manually.) let external_networking_alloc = match external_networking_alloc.as_mut() { Some(allocator) => allocator, From ea4cfbc78f3a20f8f5f9ef028839ef124ed4b179 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Mon, 10 Nov 2025 12:23:54 -0500 Subject: [PATCH 11/11] more consistent error context strings --- live-tests/tests/test_nexus_add_remove.rs | 4 ++-- live-tests/tests/test_nexus_handoff.rs | 6 ++++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/live-tests/tests/test_nexus_add_remove.rs b/live-tests/tests/test_nexus_add_remove.rs index bc33ac098d4..738f971892b 100644 --- a/live-tests/tests/test_nexus_add_remove.rs +++ b/live-tests/tests/test_nexus_add_remove.rs @@ -120,9 +120,9 @@ async fn test_nexus_add_remove(lc: &LiveTestContext) { builder, planning_input.external_ip_policy(), ) - .context("constructing ExternalNetworkingAllocator")? + .context("failed to construct external networking allocator")? .for_new_nexus() - .context("choosing external IP for new Nexus")?; + .context("failed to pick an external IP for Nexus")?; builder .sled_add_zone_nexus( sled_id, diff --git a/live-tests/tests/test_nexus_handoff.rs b/live-tests/tests/test_nexus_handoff.rs index e4af32ef37c..3020cb3e131 100644 --- a/live-tests/tests/test_nexus_handoff.rs +++ b/live-tests/tests/test_nexus_handoff.rs @@ -179,11 +179,13 @@ async fn test_nexus_handoff(lc: &LiveTestContext) { builder, planning_input.external_ip_policy(), ) - .context("constructing ExternalNetworkingAllocator")?; + .context( + "failed to construct external networking allocator", + )?; for current_nexus in current_nexus_zones.values() { let external_ip = external_networking_alloc .for_new_nexus() - .context("choosing external IP for new Nexus")?; + .context("failed to pick an external IP for Nexus")?; builder .sled_add_zone_nexus( current_nexus.sled_id,