From 274b668a5ee1ce98f8ad11c24bef513e3bb56932 Mon Sep 17 00:00:00 2001 From: James MacMahon Date: Fri, 28 Mar 2025 15:48:13 +0000 Subject: [PATCH 01/17] Prevent region allocation from filling pools Recent customer issues have highlighted problems with storage accounting, namely that while there are quotas and reservations for individual Crucible regions, there's nothing set for the whole Crucible dataset. Crucible _could_ end up using the whole disk, or some large fraction of it, such that other users of the same U2 could be starved out. This commit adds a buffer to each zpool that the Crucible region allocation query will not allocate into. This overhead will be set to 250G initially (see oxidecomputer/omicron#7875 for reasoning) but could also be modified with omdb. Part of this commit's changes include using a CTE with `regions_hard_delete`, which is much more efficient than the previous for loop but has the effect of overwriting `size_used` for all datasets, which will undo any time this column value was manually set to prevent allocation for particular datasets / pools. Because of this, this commit also adds a `no_provision` flag for a Crucible dataset: if it is set, then the region allocation query will not allocate into that dataset. This flag can be toggled with omdb. Part of the upgrade to R14 will include a support procedure to address if the addition of the control plane storage buffer of 250G causes a Crucible dataset to be "overprovisioned", necessitating manually requested region replacement requests to reduce the size allocated for a particular Crucible dataset. This commit adds an omdb command to show all overprovisioned crucible datasets, and changes the region listing command so it can list regions for a particular dataset. Fixes #3480 --- dev-tools/omdb/Cargo.toml | 2 +- dev-tools/omdb/src/bin/omdb/db.rs | 366 +++++++++++++++++- dev-tools/omdb/tests/usage_errors.out | 4 + nexus/db-model/src/crucible_dataset.rs | 13 + nexus/db-model/src/schema_versions.rs | 3 +- nexus/db-model/src/zpool.rs | 15 + nexus/db-queries/Cargo.toml | 1 + .../src/db/datastore/crucible_dataset.rs | 40 ++ nexus/db-queries/src/db/datastore/mod.rs | 8 +- .../src/db/datastore/physical_disk.rs | 7 +- .../src/db/datastore/support_bundle.rs | 2 + nexus/db-queries/src/db/datastore/zpool.rs | 25 ++ .../src/db/queries/region_allocation.rs | 21 +- nexus/db-queries/src/db/raw_query_builder.rs | 2 +- .../output/region_allocate_distinct_sleds.sql | 12 +- .../output/region_allocate_random_sleds.sql | 12 +- ..._allocate_with_snapshot_distinct_sleds.sql | 12 +- ...on_allocate_with_snapshot_random_sleds.sql | 12 +- nexus/db-schema/src/schema.rs | 4 + .../execution/src/omicron_physical_disks.rs | 8 +- .../rendezvous/src/crucible_dataset.rs | 2 + .../background/tasks/blueprint_execution.rs | 2 + .../tasks/decommissioned_disk_cleaner.rs | 8 +- .../tasks/physical_disk_adoption.rs | 5 +- .../tasks/support_bundle_collector.rs | 8 +- nexus/src/app/rack.rs | 3 + nexus/src/app/sled.rs | 4 + nexus/tests/integration_tests/disks.rs | 244 ++++++++++++ schema/crdb/dbinit.sql | 14 +- .../up01.sql | 2 + .../up02.sql | 2 + .../up03.sql | 2 + .../up04.sql | 2 + 33 files changed, 829 insertions(+), 38 deletions(-) create mode 100644 schema/crdb/do-not-provision-flag-for-crucible-dataset/up01.sql create mode 100644 schema/crdb/do-not-provision-flag-for-crucible-dataset/up02.sql create mode 100644 schema/crdb/do-not-provision-flag-for-crucible-dataset/up03.sql create mode 100644 schema/crdb/do-not-provision-flag-for-crucible-dataset/up04.sql diff --git a/dev-tools/omdb/Cargo.toml b/dev-tools/omdb/Cargo.toml index e605bd58300..18a35be0956 100644 --- a/dev-tools/omdb/Cargo.toml +++ b/dev-tools/omdb/Cargo.toml @@ -34,7 +34,7 @@ itertools.workspace = true nexus-client.workspace = true nexus-config.workspace = true nexus-db-model.workspace = true -nexus-db-queries.workspace = true +nexus-db-queries = { workspace = true, features = ["omdb"] } nexus-db-schema.workspace = true nexus-inventory.workspace = true nexus-reconfigurator-preparation.workspace = true diff --git a/dev-tools/omdb/src/bin/omdb/db.rs b/dev-tools/omdb/src/bin/omdb/db.rs index e4a32c1b2e5..36214cda2ef 100644 --- a/dev-tools/omdb/src/bin/omdb/db.rs +++ b/dev-tools/omdb/src/bin/omdb/db.rs @@ -144,6 +144,7 @@ use omicron_uuid_kinds::PhysicalDiskUuid; use omicron_uuid_kinds::PropolisUuid; use omicron_uuid_kinds::SledUuid; use omicron_uuid_kinds::VolumeUuid; +use omicron_uuid_kinds::ZpoolUuid; use sled_agent_client::VolumeConstructionRequest; use std::borrow::Cow; use std::cmp::Ordering; @@ -325,6 +326,8 @@ pub struct DbFetchOptions { /// Subcommands that query or update the database #[derive(Debug, Subcommand, Clone)] enum DbCommands { + /// Commands relevant to Crucible datasets + CrucibleDataset(CrucibleDatasetArgs), /// Print any Crucible resources that are located on expunged physical disks ReplacementsToDo, /// Print information about the rack @@ -371,6 +374,37 @@ enum DbCommands { Vmms(VmmListArgs), /// Print information about the oximeter collector. Oximeter(OximeterArgs), + /// Commands for querying and interacting with pools + Pool(PoolArgs), +} + +#[derive(Debug, Args, Clone)] +struct CrucibleDatasetArgs { + #[command(subcommand)] + command: CrucibleDatasetCommands, +} + +#[derive(Debug, Subcommand, Clone)] +enum CrucibleDatasetCommands { + List, + + ShowOverprovisioned, + + MarkNonProvisionable(MarkNonProvisionableArgs), + + MarkProvisionable(MarkProvisionableArgs), +} + +#[derive(Debug, Args, Clone)] +struct MarkNonProvisionableArgs { + /// The UUID of the Crucible dataset + dataset_id: DatasetUuid, +} + +#[derive(Debug, Args, Clone)] +struct MarkProvisionableArgs { + /// The UUID of the Crucible dataset + dataset_id: DatasetUuid, } #[derive(Debug, Args, Clone)] @@ -609,8 +643,12 @@ enum RegionCommands { #[derive(Debug, Args, Clone)] struct RegionListArgs { /// Print region IDs only - #[arg(short)] + #[arg(long, short)] id_only: bool, + + /// List regions only in a certain dataset + #[arg(long, short)] + dataset_id: Option, } #[derive(Debug, Args, Clone)] @@ -947,6 +985,30 @@ struct VmmListArgs { states: Vec, } +#[derive(Debug, Args, Clone)] +struct PoolArgs { + #[command(subcommand)] + command: PoolCommands, +} + +#[derive(Debug, Subcommand, Clone)] +enum PoolCommands { + /// List pools + List, + + /// Set the control plane storage buffer for a pool + SetStorageBuffer(SetStorageBufferArgs), +} + +#[derive(Debug, Args, Clone)] +struct SetStorageBufferArgs { + /// The UUID of Pool + id: Uuid, + + /// How many bytes to set the buffer to + storage_buffer: i64, +} + impl DbArgs { /// Run a `omdb db` subcommand. /// @@ -961,6 +1023,34 @@ impl DbArgs { self.db_url_opts.with_datastore(omdb, log, |opctx, datastore| { async move { match &self.command { + DbCommands::CrucibleDataset(CrucibleDatasetArgs { + command: CrucibleDatasetCommands::List, + }) => { + cmd_crucible_dataset_list(&opctx, &datastore).await + } + DbCommands::CrucibleDataset(CrucibleDatasetArgs { + command: CrucibleDatasetCommands::ShowOverprovisioned, + }) => { + cmd_crucible_dataset_show_overprovisioned( + &opctx, &datastore, + ).await + } + DbCommands::CrucibleDataset(CrucibleDatasetArgs { + command: CrucibleDatasetCommands::MarkNonProvisionable(args), + }) => { + let token = omdb.check_allow_destructive()?; + cmd_crucible_dataset_mark_non_provisionable( + &opctx, &datastore, args, token, + ).await + } + DbCommands::CrucibleDataset(CrucibleDatasetArgs { + command: CrucibleDatasetCommands::MarkProvisionable(args), + }) => { + let token = omdb.check_allow_destructive()?; + cmd_crucible_dataset_mark_provisionable( + &opctx, &datastore, args, token, + ).await + } DbCommands::ReplacementsToDo => { replacements_to_do(&opctx, &datastore).await } @@ -1226,6 +1316,16 @@ impl DbArgs { DbCommands::Oximeter(OximeterArgs { command: OximeterCommands::ListProducers }) => cmd_db_oximeter_list_producers(&datastore, fetch_opts).await, + DbCommands::Pool(PoolArgs { + command: PoolCommands::List + }) => cmd_db_zpool_list(&opctx, &datastore).await, + DbCommands::Pool(PoolArgs { + command: PoolCommands::SetStorageBuffer(args) + }) => cmd_db_zpool_set_storage_buffer( + &opctx, + &datastore, + &args, + ).await, } } }).await @@ -1415,6 +1515,173 @@ async fn lookup_project( .with_context(|| format!("loading project {project_id}")) } +// Crucible datasets + +async fn cmd_crucible_dataset_list( + opctx: &OpContext, + datastore: &DataStore, +) -> Result<(), anyhow::Error> { + let crucible_datasets = + datastore.crucible_dataset_list_all_batched(opctx).await?; + + #[derive(Tabled)] + #[tabled(rename_all = "SCREAMING_SNAKE_CASE")] + struct CrucibleDatasetRow { + id: Uuid, + time_deleted: String, + pool_id: Uuid, + address: String, + size_used: i64, + no_provision: bool, + } + + let rows: Vec<_> = crucible_datasets + .into_iter() + .map(|d| CrucibleDatasetRow { + id: d.id().into_untyped_uuid(), + time_deleted: match d.time_deleted() { + Some(t) => t.to_string(), + None => String::from(""), + }, + pool_id: d.pool_id, + address: d.address().to_string(), + size_used: d.size_used, + no_provision: d.no_provision(), + }) + .collect(); + + let table = tabled::Table::new(rows) + .with(tabled::settings::Style::psql()) + .with(tabled::settings::Padding::new(0, 1, 0, 0)) + .to_string(); + + println!("{}", table); + + Ok(()) +} + +async fn cmd_crucible_dataset_show_overprovisioned( + opctx: &OpContext, + datastore: &DataStore, +) -> Result<(), anyhow::Error> { + // A Crucible dataset is overprovisioned if size_used (amount taken up by + // Crucible region reservations) plus the control plane storage buffer + // (note this is _not_ a ZFS reservation! it's currently just a per-pool + // value in the database) is larger than the backing pool's total size. + + let crucible_datasets = + datastore.crucible_dataset_list_all_batched(opctx).await?; + + let Some(latest_collection) = + datastore.inventory_get_latest_collection(opctx).await? + else { + bail!("no latest inventory found!"); + }; + + let mut zpool_total_size: HashMap = HashMap::new(); + + for (_, sled_agent) in latest_collection.sled_agents { + for zpool in sled_agent.zpools { + zpool_total_size + .insert(zpool.id.into_untyped_uuid(), zpool.total_size.into()); + } + } + + let zpools: HashMap = datastore + .zpool_list_all_external_batched(opctx) + .await? + .into_iter() + .map(|(zpool, _)| (zpool.id().into_untyped_uuid(), zpool)) + .collect(); + + #[derive(Tabled)] + #[tabled(rename_all = "SCREAMING_SNAKE_CASE")] + struct CrucibleDatasetOverprovisionedRow { + // dataset fields + id: Uuid, + size_used: i64, + no_provision: bool, + + // zpool fields + pool_id: Uuid, + control_plane_storage_buffer: i64, + pool_total_size: i64, + } + + let mut rows = vec![]; + + for crucible_dataset in crucible_datasets { + if crucible_dataset.time_deleted().is_some() { + continue; + } + + let control_plane_storage_buffer = zpools + .get(&crucible_dataset.pool_id) + .unwrap() + .control_plane_storage_buffer(); + + let total_dataset_size = + crucible_dataset.size_used + control_plane_storage_buffer; + + match zpool_total_size.get(&crucible_dataset.pool_id) { + Some(pool_total_size) => { + if total_dataset_size >= *pool_total_size { + rows.push(CrucibleDatasetOverprovisionedRow { + id: crucible_dataset.id().into_untyped_uuid(), + size_used: crucible_dataset.size_used, + no_provision: crucible_dataset.no_provision(), + + pool_id: crucible_dataset.pool_id, + control_plane_storage_buffer, + pool_total_size: *pool_total_size, + }); + } + } + + None => {} + } + } + + let table = tabled::Table::new(rows) + .with(tabled::settings::Style::psql()) + .with(tabled::settings::Padding::new(0, 1, 0, 0)) + .to_string(); + + println!("{}", table); + + Ok(()) +} + +async fn cmd_crucible_dataset_mark_non_provisionable( + opctx: &OpContext, + datastore: &DataStore, + args: &MarkNonProvisionableArgs, + _destruction_token: DestructiveOperationToken, +) -> Result<(), anyhow::Error> { + datastore + .mark_crucible_dataset_not_provisionable(opctx, args.dataset_id) + .await?; + + println!("marked {:?} as non-provisionable", args.dataset_id); + + Ok(()) +} + +async fn cmd_crucible_dataset_mark_provisionable( + opctx: &OpContext, + datastore: &DataStore, + args: &MarkProvisionableArgs, + _destruction_token: DestructiveOperationToken, +) -> Result<(), anyhow::Error> { + datastore + .mark_crucible_dataset_provisionable(opctx, args.dataset_id) + .await?; + + println!("marked {:?} as provisionable", args.dataset_id); + + Ok(()) +} + // Disks #[derive(Tabled)] @@ -2943,14 +3210,20 @@ async fn cmd_db_region_list( ) -> Result<(), anyhow::Error> { use nexus_db_schema::schema::region::dsl; - let regions: Vec = paginated( + let mut query = paginated( dsl::region, dsl::id, &first_page::(fetch_opts.fetch_limit), - ) - .select(Region::as_select()) - .load_async(&*datastore.pool_connection_for_tests().await?) - .await?; + ); + + if let Some(dataset_id) = args.dataset_id { + query = query.filter(dsl::dataset_id.eq(to_db_typed_uuid(dataset_id))); + } + + let regions: Vec = query + .select(Region::as_select()) + .load_async(&*datastore.pool_connection_for_tests().await?) + .await?; check_limit(®ions, fetch_opts.fetch_limit, || { String::from("listing regions") @@ -7433,3 +7706,84 @@ fn datetime_opt_rfc3339_concise(t: &Option>) -> String { t.map(|t| t.to_rfc3339_opts(chrono::format::SecondsFormat::Millis, true)) .unwrap_or_else(|| "-".to_string()) } + +async fn cmd_db_zpool_list( + opctx: &OpContext, + datastore: &DataStore, +) -> Result<(), anyhow::Error> { + let zpools = datastore.zpool_list_all_external_batched(opctx).await?; + + let Some(latest_collection) = + datastore.inventory_get_latest_collection(opctx).await? + else { + bail!("no latest inventory found!"); + }; + + let mut zpool_total_size: HashMap = HashMap::new(); + + for (_, sled_agent) in latest_collection.sled_agents { + for zpool in sled_agent.zpools { + zpool_total_size + .insert(zpool.id.into_untyped_uuid(), zpool.total_size.into()); + } + } + + #[derive(Tabled)] + #[tabled(rename_all = "SCREAMING_SNAKE_CASE")] + struct ZpoolRow { + id: Uuid, + time_deleted: String, + sled_id: Uuid, + physical_disk_id: Uuid, + total_size: i64, + control_plane_storage_buffer: i64, + } + + let rows: Vec<_> = zpools + .into_iter() + .map(|(p, _)| { + let zpool_id = p.id().into_untyped_uuid(); + ZpoolRow { + id: zpool_id, + time_deleted: match p.time_deleted() { + Some(t) => t.to_string(), + None => String::from(""), + }, + sled_id: p.sled_id, + physical_disk_id: p.physical_disk_id.into_untyped_uuid(), + total_size: *zpool_total_size.get(&zpool_id).unwrap(), + control_plane_storage_buffer: p.control_plane_storage_buffer(), + } + }) + .collect(); + + let table = tabled::Table::new(rows) + .with(tabled::settings::Style::psql()) + .with(tabled::settings::Padding::new(0, 1, 0, 0)) + .to_string(); + + println!("{}", table); + + Ok(()) +} + +async fn cmd_db_zpool_set_storage_buffer( + opctx: &OpContext, + datastore: &DataStore, + args: &SetStorageBufferArgs, +) -> Result<(), anyhow::Error> { + datastore + .zpool_set_control_plane_storage_buffer( + opctx, + ZpoolUuid::from_untyped_uuid(args.id), + args.storage_buffer, + ) + .await?; + + println!( + "set pool {} control plane storage buffer bytes to {}", + args.id, args.storage_buffer, + ); + + Ok(()) +} diff --git a/dev-tools/omdb/tests/usage_errors.out b/dev-tools/omdb/tests/usage_errors.out index 43cefccf0a4..4244d9bb027 100644 --- a/dev-tools/omdb/tests/usage_errors.out +++ b/dev-tools/omdb/tests/usage_errors.out @@ -113,6 +113,7 @@ Query the control plane database (CockroachDB) Usage: omdb db [OPTIONS] Commands: + crucible-dataset Commands relevant to Crucible datasets replacements-to-do Print any Crucible resources that are located on expunged physical disks rack Print information about the rack @@ -138,6 +139,7 @@ Commands: processes vmms Alias to `omdb db vmm list` oximeter Print information about the oximeter collector + pool Commands for querying and interacting with pools help Print this message or the help of the given subcommand(s) Options: @@ -167,6 +169,7 @@ Query the control plane database (CockroachDB) Usage: omdb db [OPTIONS] Commands: + crucible-dataset Commands relevant to Crucible datasets replacements-to-do Print any Crucible resources that are located on expunged physical disks rack Print information about the rack @@ -192,6 +195,7 @@ Commands: processes vmms Alias to `omdb db vmm list` oximeter Print information about the oximeter collector + pool Commands for querying and interacting with pools help Print this message or the help of the given subcommand(s) Options: diff --git a/nexus/db-model/src/crucible_dataset.rs b/nexus/db-model/src/crucible_dataset.rs index a7d11571a61..c4ceb38cc97 100644 --- a/nexus/db-model/src/crucible_dataset.rs +++ b/nexus/db-model/src/crucible_dataset.rs @@ -41,6 +41,10 @@ pub struct CrucibleDataset { port: SqlU16, pub size_used: i64, + + /// Do not consider this dataset as a candidate during region allocation + #[serde(default)] + no_provision: bool, } impl CrucibleDataset { @@ -57,9 +61,14 @@ impl CrucibleDataset { ip: addr.ip().into(), port: addr.port().into(), size_used: 0, + no_provision: false, } } + pub fn time_deleted(&self) -> Option> { + self.time_deleted + } + pub fn address(&self) -> SocketAddrV6 { self.address_with_port(self.port.into()) } @@ -67,6 +76,10 @@ impl CrucibleDataset { pub fn address_with_port(&self, port: u16) -> SocketAddrV6 { SocketAddrV6::new(Ipv6Addr::from(self.ip), port, 0, 0) } + + pub fn no_provision(&self) -> bool { + self.no_provision + } } // Datasets contain regions diff --git a/nexus/db-model/src/schema_versions.rs b/nexus/db-model/src/schema_versions.rs index ad09695d792..7e751cf4aac 100644 --- a/nexus/db-model/src/schema_versions.rs +++ b/nexus/db-model/src/schema_versions.rs @@ -16,7 +16,7 @@ use std::{collections::BTreeMap, sync::LazyLock}; /// /// This must be updated when you change the database schema. Refer to /// schema/crdb/README.adoc in the root of this repository for details. -pub const SCHEMA_VERSION: Version = Version::new(134, 0, 0); +pub const SCHEMA_VERSION: Version = Version::new(135, 0, 0); /// List of all past database schema versions, in *reverse* order /// @@ -28,6 +28,7 @@ static KNOWN_VERSIONS: LazyLock> = LazyLock::new(|| { // | leaving the first copy as an example for the next person. // v // KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"), + KnownVersion::new(135, "do-not-provision-flag-for-crucible-dataset"), KnownVersion::new(134, "crucible-agent-reservation-overhead"), KnownVersion::new(133, "delete-defunct-reservations"), KnownVersion::new(132, "bp-omicron-zone-filesystem-pool-not-null"), diff --git a/nexus/db-model/src/zpool.rs b/nexus/db-model/src/zpool.rs index ace723754f2..95414fb7f45 100644 --- a/nexus/db-model/src/zpool.rs +++ b/nexus/db-model/src/zpool.rs @@ -3,6 +3,7 @@ // file, You can obtain one at https://mozilla.org/MPL/2.0/. use super::{CrucibleDataset, Generation}; +use crate::ByteCount; use crate::collection::DatastoreCollectionConfig; use crate::typed_uuid::DbTypedUuid; use chrono::{DateTime, Utc}; @@ -29,6 +30,10 @@ pub struct Zpool { // The physical disk to which this Zpool is attached. pub physical_disk_id: DbTypedUuid, + + /// How much storage to prevent Crucible from allocating for regions on a + /// given pool. + control_plane_storage_buffer: i64, } impl Zpool { @@ -36,6 +41,7 @@ impl Zpool { id: Uuid, sled_id: Uuid, physical_disk_id: PhysicalDiskUuid, + control_plane_storage_buffer: ByteCount, ) -> Self { Self { identity: ZpoolIdentity::new(id), @@ -43,8 +49,17 @@ impl Zpool { rcgen: Generation::new(), sled_id, physical_disk_id: physical_disk_id.into(), + control_plane_storage_buffer: control_plane_storage_buffer.into(), } } + + pub fn time_deleted(&self) -> Option> { + self.time_deleted + } + + pub fn control_plane_storage_buffer(&self) -> i64 { + self.control_plane_storage_buffer + } } impl DatastoreCollectionConfig for Zpool { diff --git a/nexus/db-queries/Cargo.toml b/nexus/db-queries/Cargo.toml index cbff5591754..ac9c8de295e 100644 --- a/nexus/db-queries/Cargo.toml +++ b/nexus/db-queries/Cargo.toml @@ -74,6 +74,7 @@ omicron-test-utils = { workspace = true, optional = true } [features] # Enable to export `TestDatabase` testing = ["omicron-test-utils"] +omdb = [] [dev-dependencies] assert_matches.workspace = true diff --git a/nexus/db-queries/src/db/datastore/crucible_dataset.rs b/nexus/db-queries/src/db/datastore/crucible_dataset.rs index 10a47400111..40204e54152 100644 --- a/nexus/db-queries/src/db/datastore/crucible_dataset.rs +++ b/nexus/db-queries/src/db/datastore/crucible_dataset.rs @@ -245,6 +245,44 @@ impl DataStore { Ok(physical_disk.disk_policy == PhysicalDiskPolicy::InService) } + + #[cfg(any(test, feature = "testing", feature = "omdb"))] + pub async fn mark_crucible_dataset_not_provisionable( + &self, + opctx: &OpContext, + dataset_id: DatasetUuid, + ) -> Result<(), Error> { + let conn = self.pool_connection_authorized(opctx).await?; + + use nexus_db_schema::schema::crucible_dataset::dsl; + + diesel::update(dsl::crucible_dataset) + .filter(dsl::id.eq(to_db_typed_uuid(dataset_id))) + .set(dsl::no_provision.eq(true)) + .execute_async(&*conn) + .await + .map(|_| ()) + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } + + #[cfg(any(test, feature = "testing", feature = "omdb"))] + pub async fn mark_crucible_dataset_provisionable( + &self, + opctx: &OpContext, + dataset_id: DatasetUuid, + ) -> Result<(), Error> { + let conn = self.pool_connection_authorized(opctx).await?; + + use nexus_db_schema::schema::crucible_dataset::dsl; + + diesel::update(dsl::crucible_dataset) + .filter(dsl::id.eq(to_db_typed_uuid(dataset_id))) + .set(dsl::no_provision.eq(false)) + .execute_async(&*conn) + .await + .map(|_| ()) + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } } #[cfg(test)] @@ -255,6 +293,7 @@ mod test { use nexus_db_model::SledBaseboard; use nexus_db_model::SledSystemHardware; use nexus_db_model::SledUpdate; + use omicron_common::api::external::ByteCount; use omicron_test_utils::dev; use omicron_uuid_kinds::DatasetUuid; use omicron_uuid_kinds::PhysicalDiskUuid; @@ -293,6 +332,7 @@ mod test { *zpool_id.as_untyped_uuid(), *sled_id.as_untyped_uuid(), PhysicalDiskUuid::new_v4(), + ByteCount::from(0).into(), ); datastore .zpool_insert(opctx, zpool) diff --git a/nexus/db-queries/src/db/datastore/mod.rs b/nexus/db-queries/src/db/datastore/mod.rs index 4ea4c765fba..b630b1265a7 100644 --- a/nexus/db-queries/src/db/datastore/mod.rs +++ b/nexus/db-queries/src/db/datastore/mod.rs @@ -764,8 +764,12 @@ mod test { physical_disk_id: PhysicalDiskUuid, ) -> Uuid { let zpool_id = Uuid::new_v4(); - let zpool = - Zpool::new(zpool_id, sled_id.into_untyped_uuid(), physical_disk_id); + let zpool = Zpool::new( + zpool_id, + sled_id.into_untyped_uuid(), + physical_disk_id, + ByteCount::from(0).into(), + ); datastore.zpool_insert(opctx, zpool).await.unwrap(); zpool_id } diff --git a/nexus/db-queries/src/db/datastore/physical_disk.rs b/nexus/db-queries/src/db/datastore/physical_disk.rs index 3230ae35522..ccfa6c46133 100644 --- a/nexus/db-queries/src/db/datastore/physical_disk.rs +++ b/nexus/db-queries/src/db/datastore/physical_disk.rs @@ -766,7 +766,12 @@ mod test { sled_id, ); - let zpool = Zpool::new(Uuid::new_v4(), sled_id, disk.id()); + let zpool = Zpool::new( + Uuid::new_v4(), + sled_id, + disk.id(), + ByteCount::from(0).into(), + ); (disk, zpool) } diff --git a/nexus/db-queries/src/db/datastore/support_bundle.rs b/nexus/db-queries/src/db/datastore/support_bundle.rs index ef619403a18..9cefefdae1f 100644 --- a/nexus/db-queries/src/db/datastore/support_bundle.rs +++ b/nexus/db-queries/src/db/datastore/support_bundle.rs @@ -478,6 +478,7 @@ mod test { use nexus_reconfigurator_planning::example::SimRngState; use nexus_types::deployment::Blueprint; use nexus_types::deployment::BlueprintZoneType; + use omicron_common::api::external::ByteCount; use omicron_common::api::external::LookupType; use omicron_common::api::internal::shared::DatasetKind::Debug as DebugDatasetKind; use omicron_test_utils::dev; @@ -584,6 +585,7 @@ mod test { *pool.pool.as_untyped_uuid(), *self.sled.as_untyped_uuid(), PhysicalDiskUuid::new_v4(), + ByteCount::from(0).into(), ); datastore .zpool_insert(opctx, zpool) diff --git a/nexus/db-queries/src/db/datastore/zpool.rs b/nexus/db-queries/src/db/datastore/zpool.rs index 1a44e478f27..67325ed309e 100644 --- a/nexus/db-queries/src/db/datastore/zpool.rs +++ b/nexus/db-queries/src/db/datastore/zpool.rs @@ -27,6 +27,7 @@ use chrono::Utc; use diesel::prelude::*; use diesel::upsert::excluded; use nexus_db_model::PhysicalDiskKind; +use nexus_db_model::to_db_typed_uuid; use omicron_common::api::external::CreateResult; use omicron_common::api::external::DataPageParams; use omicron_common::api::external::DeleteResult; @@ -310,4 +311,28 @@ impl DataStore { Ok(SledUuid::from_untyped_uuid(id)) } + + #[cfg(any(test, feature = "testing", feature = "omdb"))] + pub async fn zpool_set_control_plane_storage_buffer( + &self, + opctx: &OpContext, + id: ZpoolUuid, + control_plane_storage_buffer: i64, + ) -> Result<(), Error> { + use nexus_db_schema::schema::zpool::dsl; + + opctx.authorize(authz::Action::Modify, &authz::FLEET).await?; + let conn = self.pool_connection_authorized(opctx).await?; + + diesel::update(dsl::zpool) + .filter(dsl::id.eq(to_db_typed_uuid(id))) + .set( + dsl::control_plane_storage_buffer + .eq(control_plane_storage_buffer), + ) + .execute_async(&*conn) + .await + .map(|_| ()) + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } } diff --git a/nexus/db-queries/src/db/queries/region_allocation.rs b/nexus/db-queries/src/db/queries/region_allocation.rs index 268af89b58e..c28813da998 100644 --- a/nexus/db-queries/src/db/queries/region_allocation.rs +++ b/nexus/db-queries/src/db/queries/region_allocation.rs @@ -273,7 +273,10 @@ pub fn allocation_query( SELECT crucible_dataset.pool_id, sum(crucible_dataset.size_used) AS size_used - FROM crucible_dataset WHERE ((crucible_dataset.size_used IS NOT NULL) AND (crucible_dataset.time_deleted IS NULL)) GROUP BY crucible_dataset.pool_id),"); + FROM crucible_dataset + WHERE + ((crucible_dataset.size_used IS NOT NULL) AND (crucible_dataset.time_deleted IS NULL)) + GROUP BY crucible_dataset.pool_id),"); if let Some(snapshot_id) = snapshot_id { // Any zpool already have this volume's existing regions, or host the @@ -288,7 +291,7 @@ pub fn allocation_query( select crucible_dataset.pool_id from crucible_dataset inner join region_snapshot on (region_snapshot.dataset_id = crucible_dataset.id) where region_snapshot.snapshot_id = ").param().sql(")),") - .bind::(snapshot_id) + .bind::(snapshot_id); } else { // Any zpool already have this volume's existing regions? builder.sql(" @@ -297,8 +300,8 @@ pub fn allocation_query( crucible_dataset.pool_id FROM crucible_dataset INNER JOIN old_regions ON (old_regions.dataset_id = crucible_dataset.id) - ),") - }; + ),"); + } // If `distinct_sleds` is selected, then take note of the sleds used by // existing allocations, and filter those out later. This step is required @@ -317,7 +320,7 @@ pub fn allocation_query( zpool.id = ANY(SELECT pool_id FROM existing_zpools) ),", ); - }; + } // Identifies zpools with enough space for region allocation, that are not // currently used by this Volume's existing regions. @@ -341,8 +344,10 @@ pub fn allocation_query( (zpool INNER JOIN sled ON (zpool.sled_id = sled.id)) ON (zpool.id = old_zpool_usage.pool_id) INNER JOIN physical_disk ON (zpool.physical_disk_id = physical_disk.id) + INNER JOIN + crucible_dataset ON (crucible_dataset.pool_id = zpool.id) WHERE ( - (old_zpool_usage.size_used + ").param().sql(" ) <= + (old_zpool_usage.size_used + ").param().sql(" + zpool.control_plane_storage_buffer) <= (SELECT total_size FROM omicron.public.inv_zpool WHERE inv_zpool.id = old_zpool_usage.pool_id ORDER BY inv_zpool.time_collected DESC LIMIT 1) @@ -351,6 +356,8 @@ pub fn allocation_query( AND physical_disk.disk_policy = 'in_service' AND physical_disk.disk_state = 'active' AND NOT(zpool.id = ANY(SELECT existing_zpools.pool_id FROM existing_zpools)) + AND (crucible_dataset.time_deleted is NULL) + AND (crucible_dataset.no_provision = false) " ).bind::(size_delta); @@ -375,7 +382,7 @@ pub fn allocation_query( crucible_dataset.id, crucible_dataset.pool_id FROM (crucible_dataset INNER JOIN candidate_zpools ON (crucible_dataset.pool_id = candidate_zpools.pool_id)) - WHERE (crucible_dataset.time_deleted IS NULL) + WHERE (crucible_dataset.time_deleted IS NULL) AND (crucible_dataset.no_provision = false) ORDER BY crucible_dataset.pool_id, md5((CAST(crucible_dataset.id as BYTEA) || ").param().sql(")) ),") .bind::(seed.clone()) diff --git a/nexus/db-queries/src/db/raw_query_builder.rs b/nexus/db-queries/src/db/raw_query_builder.rs index 75dc46fe4ff..d4f0c364bdd 100644 --- a/nexus/db-queries/src/db/raw_query_builder.rs +++ b/nexus/db-queries/src/db/raw_query_builder.rs @@ -135,7 +135,7 @@ impl QueryBuilder { Value: diesel::serialize::ToSql + Send + 'static, BindSt: Send + 'static, { - self.query = self.query.take().map(|q| q.bind(b)); + self.query = self.query.take().map(|q| q.bind::(b)); self } diff --git a/nexus/db-queries/tests/output/region_allocate_distinct_sleds.sql b/nexus/db-queries/tests/output/region_allocate_distinct_sleds.sql index 51f56f89ac6..2106a598110 100644 --- a/nexus/db-queries/tests/output/region_allocate_distinct_sleds.sql +++ b/nexus/db-queries/tests/output/region_allocate_distinct_sleds.sql @@ -55,8 +55,9 @@ WITH INNER JOIN (zpool INNER JOIN sled ON zpool.sled_id = sled.id) ON zpool.id = old_zpool_usage.pool_id INNER JOIN physical_disk ON zpool.physical_disk_id = physical_disk.id + INNER JOIN crucible_dataset ON crucible_dataset.pool_id = zpool.id WHERE - (old_zpool_usage.size_used + $2) + (old_zpool_usage.size_used + $2 + zpool.control_plane_storage_buffer) <= ( SELECT total_size @@ -74,6 +75,8 @@ WITH AND physical_disk.disk_policy = 'in_service' AND physical_disk.disk_state = 'active' AND NOT (zpool.id = ANY (SELECT existing_zpools.pool_id FROM existing_zpools)) + AND (crucible_dataset.time_deleted IS NULL) + AND crucible_dataset.no_provision = false AND NOT (sled.id = ANY (SELECT existing_sleds.id FROM existing_sleds)) ORDER BY zpool.sled_id, md5(CAST(zpool.id AS BYTES) || $3) @@ -86,7 +89,7 @@ WITH crucible_dataset INNER JOIN candidate_zpools ON crucible_dataset.pool_id = candidate_zpools.pool_id WHERE - crucible_dataset.time_deleted IS NULL + (crucible_dataset.time_deleted IS NULL) AND crucible_dataset.no_provision = false ORDER BY crucible_dataset.pool_id, md5(CAST(crucible_dataset.id AS BYTES) || $4) ), @@ -290,7 +293,8 @@ WITH crucible_dataset.pool_id, crucible_dataset.ip, crucible_dataset.port, - crucible_dataset.size_used + crucible_dataset.size_used, + crucible_dataset.no_provision ) ( SELECT @@ -303,6 +307,7 @@ WITH crucible_dataset.ip, crucible_dataset.port, crucible_dataset.size_used, + crucible_dataset.no_provision, old_regions.id, old_regions.time_created, old_regions.time_modified, @@ -330,6 +335,7 @@ UNION updated_datasets.ip, updated_datasets.port, updated_datasets.size_used, + updated_datasets.no_provision, inserted_regions.id, inserted_regions.time_created, inserted_regions.time_modified, diff --git a/nexus/db-queries/tests/output/region_allocate_random_sleds.sql b/nexus/db-queries/tests/output/region_allocate_random_sleds.sql index 7e1ac153a3f..f1b9d73038b 100644 --- a/nexus/db-queries/tests/output/region_allocate_random_sleds.sql +++ b/nexus/db-queries/tests/output/region_allocate_random_sleds.sql @@ -46,8 +46,9 @@ WITH INNER JOIN (zpool INNER JOIN sled ON zpool.sled_id = sled.id) ON zpool.id = old_zpool_usage.pool_id INNER JOIN physical_disk ON zpool.physical_disk_id = physical_disk.id + INNER JOIN crucible_dataset ON crucible_dataset.pool_id = zpool.id WHERE - (old_zpool_usage.size_used + $2) + (old_zpool_usage.size_used + $2 + zpool.control_plane_storage_buffer) <= ( SELECT total_size @@ -65,6 +66,8 @@ WITH AND physical_disk.disk_policy = 'in_service' AND physical_disk.disk_state = 'active' AND NOT (zpool.id = ANY (SELECT existing_zpools.pool_id FROM existing_zpools)) + AND (crucible_dataset.time_deleted IS NULL) + AND crucible_dataset.no_provision = false ), candidate_datasets AS ( @@ -74,7 +77,7 @@ WITH crucible_dataset INNER JOIN candidate_zpools ON crucible_dataset.pool_id = candidate_zpools.pool_id WHERE - crucible_dataset.time_deleted IS NULL + (crucible_dataset.time_deleted IS NULL) AND crucible_dataset.no_provision = false ORDER BY crucible_dataset.pool_id, md5(CAST(crucible_dataset.id AS BYTES) || $3) ), @@ -278,7 +281,8 @@ WITH crucible_dataset.pool_id, crucible_dataset.ip, crucible_dataset.port, - crucible_dataset.size_used + crucible_dataset.size_used, + crucible_dataset.no_provision ) ( SELECT @@ -291,6 +295,7 @@ WITH crucible_dataset.ip, crucible_dataset.port, crucible_dataset.size_used, + crucible_dataset.no_provision, old_regions.id, old_regions.time_created, old_regions.time_modified, @@ -318,6 +323,7 @@ UNION updated_datasets.ip, updated_datasets.port, updated_datasets.size_used, + updated_datasets.no_provision, inserted_regions.id, inserted_regions.time_created, inserted_regions.time_modified, diff --git a/nexus/db-queries/tests/output/region_allocate_with_snapshot_distinct_sleds.sql b/nexus/db-queries/tests/output/region_allocate_with_snapshot_distinct_sleds.sql index 03b2b6cb708..697091df30c 100644 --- a/nexus/db-queries/tests/output/region_allocate_with_snapshot_distinct_sleds.sql +++ b/nexus/db-queries/tests/output/region_allocate_with_snapshot_distinct_sleds.sql @@ -67,8 +67,9 @@ WITH INNER JOIN (zpool INNER JOIN sled ON zpool.sled_id = sled.id) ON zpool.id = old_zpool_usage.pool_id INNER JOIN physical_disk ON zpool.physical_disk_id = physical_disk.id + INNER JOIN crucible_dataset ON crucible_dataset.pool_id = zpool.id WHERE - (old_zpool_usage.size_used + $3) + (old_zpool_usage.size_used + $3 + zpool.control_plane_storage_buffer) <= ( SELECT total_size @@ -86,6 +87,8 @@ WITH AND physical_disk.disk_policy = 'in_service' AND physical_disk.disk_state = 'active' AND NOT (zpool.id = ANY (SELECT existing_zpools.pool_id FROM existing_zpools)) + AND (crucible_dataset.time_deleted IS NULL) + AND crucible_dataset.no_provision = false AND NOT (sled.id = ANY (SELECT existing_sleds.id FROM existing_sleds)) ORDER BY zpool.sled_id, md5(CAST(zpool.id AS BYTES) || $4) @@ -98,7 +101,7 @@ WITH crucible_dataset INNER JOIN candidate_zpools ON crucible_dataset.pool_id = candidate_zpools.pool_id WHERE - crucible_dataset.time_deleted IS NULL + (crucible_dataset.time_deleted IS NULL) AND crucible_dataset.no_provision = false ORDER BY crucible_dataset.pool_id, md5(CAST(crucible_dataset.id AS BYTES) || $5) ), @@ -302,7 +305,8 @@ WITH crucible_dataset.pool_id, crucible_dataset.ip, crucible_dataset.port, - crucible_dataset.size_used + crucible_dataset.size_used, + crucible_dataset.no_provision ) ( SELECT @@ -315,6 +319,7 @@ WITH crucible_dataset.ip, crucible_dataset.port, crucible_dataset.size_used, + crucible_dataset.no_provision, old_regions.id, old_regions.time_created, old_regions.time_modified, @@ -342,6 +347,7 @@ UNION updated_datasets.ip, updated_datasets.port, updated_datasets.size_used, + updated_datasets.no_provision, inserted_regions.id, inserted_regions.time_created, inserted_regions.time_modified, diff --git a/nexus/db-queries/tests/output/region_allocate_with_snapshot_random_sleds.sql b/nexus/db-queries/tests/output/region_allocate_with_snapshot_random_sleds.sql index b71578e0509..9fcfac921fb 100644 --- a/nexus/db-queries/tests/output/region_allocate_with_snapshot_random_sleds.sql +++ b/nexus/db-queries/tests/output/region_allocate_with_snapshot_random_sleds.sql @@ -58,8 +58,9 @@ WITH INNER JOIN (zpool INNER JOIN sled ON zpool.sled_id = sled.id) ON zpool.id = old_zpool_usage.pool_id INNER JOIN physical_disk ON zpool.physical_disk_id = physical_disk.id + INNER JOIN crucible_dataset ON crucible_dataset.pool_id = zpool.id WHERE - (old_zpool_usage.size_used + $3) + (old_zpool_usage.size_used + $3 + zpool.control_plane_storage_buffer) <= ( SELECT total_size @@ -77,6 +78,8 @@ WITH AND physical_disk.disk_policy = 'in_service' AND physical_disk.disk_state = 'active' AND NOT (zpool.id = ANY (SELECT existing_zpools.pool_id FROM existing_zpools)) + AND (crucible_dataset.time_deleted IS NULL) + AND crucible_dataset.no_provision = false ), candidate_datasets AS ( @@ -86,7 +89,7 @@ WITH crucible_dataset INNER JOIN candidate_zpools ON crucible_dataset.pool_id = candidate_zpools.pool_id WHERE - crucible_dataset.time_deleted IS NULL + (crucible_dataset.time_deleted IS NULL) AND crucible_dataset.no_provision = false ORDER BY crucible_dataset.pool_id, md5(CAST(crucible_dataset.id AS BYTES) || $4) ), @@ -290,7 +293,8 @@ WITH crucible_dataset.pool_id, crucible_dataset.ip, crucible_dataset.port, - crucible_dataset.size_used + crucible_dataset.size_used, + crucible_dataset.no_provision ) ( SELECT @@ -303,6 +307,7 @@ WITH crucible_dataset.ip, crucible_dataset.port, crucible_dataset.size_used, + crucible_dataset.no_provision, old_regions.id, old_regions.time_created, old_regions.time_modified, @@ -330,6 +335,7 @@ UNION updated_datasets.ip, updated_datasets.port, updated_datasets.size_used, + updated_datasets.no_provision, inserted_regions.id, inserted_regions.time_created, inserted_regions.time_modified, diff --git a/nexus/db-schema/src/schema.rs b/nexus/db-schema/src/schema.rs index 737b7d5665b..7e128575496 100644 --- a/nexus/db-schema/src/schema.rs +++ b/nexus/db-schema/src/schema.rs @@ -1071,6 +1071,8 @@ table! { sled_id -> Uuid, physical_disk_id -> Uuid, + + control_plane_storage_buffer -> Int8, } } @@ -1093,6 +1095,8 @@ table! { port -> Int4, size_used -> Int8, + + no_provision -> Bool, } } diff --git a/nexus/reconfigurator/execution/src/omicron_physical_disks.rs b/nexus/reconfigurator/execution/src/omicron_physical_disks.rs index a2409e0a194..2b11d9935fb 100644 --- a/nexus/reconfigurator/execution/src/omicron_physical_disks.rs +++ b/nexus/reconfigurator/execution/src/omicron_physical_disks.rs @@ -87,6 +87,7 @@ mod test { use nexus_test_utils_macros::nexus_test; use nexus_types::deployment::DiskFilter; use nexus_types::identity::Asset; + use omicron_common::api::external::ByteCount; use omicron_common::api::external::DataPageParams; use omicron_uuid_kinds::DatasetUuid; use omicron_uuid_kinds::GenericUuid; @@ -132,7 +133,12 @@ mod test { let zpool = datastore .zpool_insert( opctx, - Zpool::new(Uuid::new_v4(), sled_id.into_untyped_uuid(), id), + Zpool::new( + Uuid::new_v4(), + sled_id.into_untyped_uuid(), + id, + ByteCount::from(0).into(), + ), ) .await .unwrap(); diff --git a/nexus/reconfigurator/rendezvous/src/crucible_dataset.rs b/nexus/reconfigurator/rendezvous/src/crucible_dataset.rs index 417079895af..0d4fd8a8382 100644 --- a/nexus/reconfigurator/rendezvous/src/crucible_dataset.rs +++ b/nexus/reconfigurator/rendezvous/src/crucible_dataset.rs @@ -136,6 +136,7 @@ mod tests { use nexus_db_queries::db::pub_test_utils::TestDatabase; use nexus_db_queries::db::queries::ALLOW_FULL_TABLE_SCAN_SQL; use nexus_types::inventory::ZpoolName; + use omicron_common::api::external::ByteCount; use omicron_common::disk::CompressionAlgorithm; use omicron_test_utils::dev; use omicron_uuid_kinds::GenericUuid; @@ -213,6 +214,7 @@ mod tests { zpool_id.into_untyped_uuid(), sled_id.into_untyped_uuid(), disk_id, + ByteCount::from(0).into(), ), ) .await diff --git a/nexus/src/app/background/tasks/blueprint_execution.rs b/nexus/src/app/background/tasks/blueprint_execution.rs index dcd1e6b4fca..fccba57a0bf 100644 --- a/nexus/src/app/background/tasks/blueprint_execution.rs +++ b/nexus/src/app/background/tasks/blueprint_execution.rs @@ -193,6 +193,7 @@ mod test { CockroachDbPreserveDowngrade, blueprint_zone_type, }; use nexus_types::external_api::views::SledState; + use omicron_common::api::external; use omicron_common::api::external::Generation; use omicron_common::zpool_name::ZpoolName; use omicron_uuid_kinds::BlueprintUuid; @@ -457,6 +458,7 @@ mod test { pool_id.into_untyped_uuid(), sled_id.into_untyped_uuid(), PhysicalDiskUuid::new_v4(), + external::ByteCount::from(0).into(), ); datastore .zpool_insert(&opctx, zpool) diff --git a/nexus/src/app/background/tasks/decommissioned_disk_cleaner.rs b/nexus/src/app/background/tasks/decommissioned_disk_cleaner.rs index c06f09c4ab2..7b96aaf3785 100644 --- a/nexus/src/app/background/tasks/decommissioned_disk_cleaner.rs +++ b/nexus/src/app/background/tasks/decommissioned_disk_cleaner.rs @@ -185,6 +185,7 @@ mod tests { use nexus_db_model::Region; use nexus_test_utils::SLED_AGENT_UUID; use nexus_test_utils_macros::nexus_test; + use omicron_common::api::external::ByteCount; use omicron_uuid_kinds::{ DatasetUuid, PhysicalDiskUuid, RegionUuid, SledUuid, VolumeUuid, }; @@ -225,7 +226,12 @@ mod tests { let zpool = datastore .zpool_insert( opctx, - Zpool::new(Uuid::new_v4(), sled_id.into_untyped_uuid(), id), + Zpool::new( + Uuid::new_v4(), + sled_id.into_untyped_uuid(), + id, + ByteCount::from(0).into(), + ), ) .await .unwrap(); diff --git a/nexus/src/app/background/tasks/physical_disk_adoption.rs b/nexus/src/app/background/tasks/physical_disk_adoption.rs index b7686a99555..059752df6ea 100644 --- a/nexus/src/app/background/tasks/physical_disk_adoption.rs +++ b/nexus/src/app/background/tasks/physical_disk_adoption.rs @@ -19,6 +19,7 @@ use nexus_db_model::Zpool; use nexus_db_queries::context::OpContext; use nexus_db_queries::db::DataStore; use nexus_types::identity::Asset; +use omicron_common::api::external::ByteCount; use omicron_common::api::external::DataPageParams; use omicron_uuid_kinds::CollectionUuid; use omicron_uuid_kinds::GenericUuid; @@ -138,7 +139,9 @@ impl BackgroundTask for PhysicalDiskAdoption { let zpool = Zpool::new( Uuid::new_v4(), inv_disk.sled_id.into_untyped_uuid(), - disk.id() + disk.id(), + // See oxidecomputer/omicron#7875 for the 250G determination + ByteCount::from_gibibytes_u32(250).into(), ); let result = self.datastore.physical_disk_and_zpool_insert( diff --git a/nexus/src/app/background/tasks/support_bundle_collector.rs b/nexus/src/app/background/tasks/support_bundle_collector.rs index a17e6973b94..b0626228aab 100644 --- a/nexus/src/app/background/tasks/support_bundle_collector.rs +++ b/nexus/src/app/background/tasks/support_bundle_collector.rs @@ -839,6 +839,7 @@ mod test { use nexus_db_model::Zpool; use nexus_test_utils::SLED_AGENT_UUID; use nexus_test_utils_macros::nexus_test; + use omicron_common::api::external::ByteCount; use omicron_common::api::external::Generation; use omicron_common::api::internal::shared::DatasetKind; use omicron_common::disk::DatasetConfig; @@ -930,7 +931,12 @@ mod test { let zpool = datastore .zpool_insert( opctx, - Zpool::new(Uuid::new_v4(), sled_id.into_untyped_uuid(), id), + Zpool::new( + Uuid::new_v4(), + sled_id.into_untyped_uuid(), + id, + ByteCount::from(0).into(), + ), ) .await .unwrap(); diff --git a/nexus/src/app/rack.rs b/nexus/src/app/rack.rs index 6734b1a51ba..018341900a3 100644 --- a/nexus/src/app/rack.rs +++ b/nexus/src/app/rack.rs @@ -52,6 +52,7 @@ use nexus_types::silo::silo_dns_name; use omicron_common::address::{Ipv6Subnet, RACK_PREFIX, get_64_subnet}; use omicron_common::api::external::AddressLotKind; use omicron_common::api::external::BgpPeer; +use omicron_common::api::external::ByteCount; use omicron_common::api::external::DataPageParams; use omicron_common::api::external::Error; use omicron_common::api::external::IdentityMetadataCreateParams; @@ -136,6 +137,8 @@ impl super::Nexus { pool.id, pool.sled_id, pool.physical_disk_id, + // See oxidecomputer/omicron#7875 for the 250G determination + ByteCount::from_gibibytes_u32(250).into(), ) }) .collect(); diff --git a/nexus/src/app/sled.rs b/nexus/src/app/sled.rs index 4523ee893b6..bf3a3ef2956 100644 --- a/nexus/src/app/sled.rs +++ b/nexus/src/app/sled.rs @@ -19,6 +19,7 @@ use nexus_types::deployment::SledFilter; use nexus_types::external_api::views::PhysicalDiskPolicy; use nexus_types::external_api::views::SledPolicy; use nexus_types::external_api::views::SledProvisionPolicy; +use omicron_common::api::external::ByteCount; use omicron_common::api::external::DataPageParams; use omicron_common::api::external::Error; use omicron_common::api::external::ListResultVec; @@ -330,6 +331,9 @@ impl super::Nexus { request.id, request.sled_id, request.physical_disk_id, + // This function is only called from tests, so it does not need a + // real value here. + ByteCount::from_gibibytes_u32(0).into(), ); self.db_datastore.zpool_insert(&opctx, zpool).await?; Ok(()) diff --git a/nexus/tests/integration_tests/disks.rs b/nexus/tests/integration_tests/disks.rs index 29e3d7cd865..b38bd059007 100644 --- a/nexus/tests/integration_tests/disks.rs +++ b/nexus/tests/integration_tests/disks.rs @@ -35,6 +35,7 @@ use nexus_test_utils::resource_helpers::objects_list_page_authz; use nexus_test_utils::wait_for_producer; use nexus_test_utils_macros::nexus_test; use nexus_types::external_api::params; +use nexus_types::identity::Asset; use nexus_types::silo::DEFAULT_SILO_ID; use omicron_common::api::external::Disk; use omicron_common::api::external::DiskState; @@ -2594,6 +2595,249 @@ async fn test_disk_expunge(cptestctx: &ControlPlaneTestContext) { assert_eq!(expunged_regions.len(), 3); } +#[nexus_test(extra_sled_agents = 3)] +async fn test_do_not_provision_on_dataset(cptestctx: &ControlPlaneTestContext) { + let nexus = &cptestctx.server.server_context().nexus; + let datastore = nexus.datastore(); + let opctx = + OpContext::for_tests(cptestctx.logctx.log.new(o!()), datastore.clone()); + + // Create one zpool, each with one dataset, on all the sleds + let disk_test = DiskTestBuilder::new(&cptestctx) + .on_all_sleds() + .with_zpool_count(1) + .build() + .await; + + // For one of the datasets, mark it as not provisionable + let dataset = &disk_test.zpools().next().unwrap().datasets[0]; + + datastore + .mark_crucible_dataset_not_provisionable(&opctx, dataset.id) + .await + .unwrap(); + + // Create a disk + let client = &cptestctx.external_client; + let _project_id = create_project_and_pool(client).await; + + let disk = create_disk(&client, PROJECT_NAME, DISK_NAME).await; + + // Assert no region was allocated to the marked dataset + let disk_id = disk.identity.id; + let (.., db_disk) = LookupPath::new(&opctx, &datastore) + .disk_id(disk_id) + .fetch() + .await + .unwrap_or_else(|_| panic!("test disk {:?} should exist", disk_id)); + + let allocated_regions = + datastore.get_allocated_regions(db_disk.volume_id()).await.unwrap(); + + for (allocated_region_dataset, _) in allocated_regions { + assert_ne!(allocated_region_dataset.id(), dataset.id); + } +} + +#[nexus_test(extra_sled_agents = 2)] +async fn test_do_not_provision_on_dataset_not_enough( + cptestctx: &ControlPlaneTestContext, +) { + let nexus = &cptestctx.server.server_context().nexus; + let datastore = nexus.datastore(); + let opctx = + OpContext::for_tests(cptestctx.logctx.log.new(o!()), datastore.clone()); + + // Create one zpool, each with one dataset, on all the sleds + let disk_test = DiskTestBuilder::new(&cptestctx) + .on_all_sleds() + .with_zpool_count(1) + .build() + .await; + + // For one of the datasets, mark it as not provisionable + let dataset = &disk_test.zpools().next().unwrap().datasets[0]; + + datastore + .mark_crucible_dataset_not_provisionable(&opctx, dataset.id) + .await + .unwrap(); + + // Because there's only 3 sled agents, each with one zpool with one dataset, + // this shouldn't be enough to create a disk. + + let client = &cptestctx.external_client; + create_project_and_pool(client).await; + + let disks_url = get_disks_url(); + + let new_disk = params::DiskCreate { + identity: IdentityMetadataCreateParams { + name: DISK_NAME.parse().unwrap(), + description: String::from("sells rainsticks"), + }, + disk_source: params::DiskSource::Blank { + block_size: params::BlockSize::try_from(512).unwrap(), + }, + size: ByteCount::from_gibibytes_u32(1), + }; + + NexusRequest::new( + RequestBuilder::new(client, Method::POST, &disks_url) + .body(Some(&new_disk)) + .expect_status(Some(StatusCode::INSUFFICIENT_STORAGE)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap(); + + // Marking that dataset as provisionable should allow the disk to be + // created. + + datastore + .mark_crucible_dataset_provisionable(&opctx, dataset.id) + .await + .unwrap(); + + NexusRequest::new( + RequestBuilder::new(client, Method::POST, &disks_url) + .body(Some(&new_disk)) + .expect_status(Some(StatusCode::CREATED)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap(); +} + +#[nexus_test(extra_sled_agents = 2)] +async fn test_zpool_control_plane_storage_buffer( + cptestctx: &ControlPlaneTestContext, +) { + let nexus = &cptestctx.server.server_context().nexus; + let datastore = nexus.datastore(); + let opctx = + OpContext::for_tests(cptestctx.logctx.log.new(o!()), datastore.clone()); + + // Create one zpool, each with one dataset, on all the sleds + let disk_test = DiskTestBuilder::new(&cptestctx) + .on_all_sleds() + .with_zpool_count(1) + .build() + .await; + + // Assert default is still 16 GiB + assert_eq!(16, DiskTest::DEFAULT_ZPOOL_SIZE_GIB); + + let client = &cptestctx.external_client; + create_project_and_pool(client).await; + + let disks_url = get_disks_url(); + + // Creating a 8G disk will work (10G size used due to reservation overhead) + let new_disk = params::DiskCreate { + identity: IdentityMetadataCreateParams { + name: "disk1".parse().unwrap(), + description: String::from("sells rainsticks"), + }, + disk_source: params::DiskSource::Blank { + block_size: params::BlockSize::try_from(512).unwrap(), + }, + size: ByteCount::from_gibibytes_u32(8), + }; + + NexusRequest::new( + RequestBuilder::new(client, Method::POST, &disks_url) + .body(Some(&new_disk)) + .expect_status(Some(StatusCode::CREATED)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap(); + + // Creating a 4G disk will also work (5G size used due to reservation + // overhead plus the previous 10G size used is less than 16G) + let new_disk = params::DiskCreate { + identity: IdentityMetadataCreateParams { + name: "disk2".parse().unwrap(), + description: String::from("sells rainsticks"), + }, + disk_source: params::DiskSource::Blank { + block_size: params::BlockSize::try_from(512).unwrap(), + }, + size: ByteCount::from_gibibytes_u32(4), + }; + + NexusRequest::new( + RequestBuilder::new(client, Method::POST, &disks_url) + .body(Some(&new_disk)) + .expect_status(Some(StatusCode::CREATED)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap(); + + // Delete the 4G disk + let disk_url = get_disk_url("disk2"); + NexusRequest::object_delete(client, &disk_url) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("failed to delete disk"); + + // For any of the zpools, set the control plane storage buffer to 2G. This + // should prevent the disk's region allocation from succeeding (as the + // reserved sizes of 10G + 5G plus the storage buffer of 2G is 1G over the + // the backing pool's 16G). + + let zpool = &disk_test.zpools().next().unwrap(); + datastore + .zpool_set_control_plane_storage_buffer( + &opctx, + zpool.id, + ByteCount::from_gibibytes_u32(2).into(), + ) + .await + .unwrap(); + + // Now creating the 4G disk should fail + + NexusRequest::new( + RequestBuilder::new(client, Method::POST, &disks_url) + .body(Some(&new_disk)) + .expect_status(Some(StatusCode::INSUFFICIENT_STORAGE)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap(); + + // Setting the storage buffer to 1G should allow the disk creation to + // succeed. + + datastore + .zpool_set_control_plane_storage_buffer( + &opctx, + zpool.id, + ByteCount::from_gibibytes_u32(1).into(), + ) + .await + .unwrap(); + + NexusRequest::new( + RequestBuilder::new(client, Method::POST, &disks_url) + .body(Some(&new_disk)) + .expect_status(Some(StatusCode::CREATED)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap(); +} + async fn disk_get(client: &ClientTestContext, disk_url: &str) -> Disk { NexusRequest::object_get(client, disk_url) .authn_as(AuthnMode::PrivilegedUser) diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index b71842b65ef..2c082e51921 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -541,7 +541,12 @@ CREATE TABLE IF NOT EXISTS omicron.public.zpool ( sled_id UUID NOT NULL, /* FK into the Physical Disk table */ - physical_disk_id UUID NOT NULL + physical_disk_id UUID NOT NULL, + + /* + * How many bytes to reserve for non-Crucible control plane storage + */ + control_plane_storage_buffer INT NOT NULL ); /* Create an index on the physical disk id */ @@ -604,7 +609,10 @@ CREATE TABLE IF NOT EXISTS omicron.public.crucible_dataset ( * dataset that contains the regions (which is larger than the the actual * region size). */ - size_used INT NOT NULL + size_used INT NOT NULL, + + /* Do not consider this dataset during region allocation */ + no_provision BOOL NOT NULL ); /* Create an index on the size usage for any Crucible dataset */ @@ -5018,7 +5026,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - (TRUE, NOW(), NOW(), '134.0.0', NULL) + (TRUE, NOW(), NOW(), '135.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; diff --git a/schema/crdb/do-not-provision-flag-for-crucible-dataset/up01.sql b/schema/crdb/do-not-provision-flag-for-crucible-dataset/up01.sql new file mode 100644 index 00000000000..d940c9408bf --- /dev/null +++ b/schema/crdb/do-not-provision-flag-for-crucible-dataset/up01.sql @@ -0,0 +1,2 @@ +ALTER TABLE omicron.public.crucible_dataset + ADD COLUMN IF NOT EXISTS no_provision BOOL NOT NULL DEFAULT false; diff --git a/schema/crdb/do-not-provision-flag-for-crucible-dataset/up02.sql b/schema/crdb/do-not-provision-flag-for-crucible-dataset/up02.sql new file mode 100644 index 00000000000..1d0c88d7a72 --- /dev/null +++ b/schema/crdb/do-not-provision-flag-for-crucible-dataset/up02.sql @@ -0,0 +1,2 @@ +ALTER TABLE omicron.public.crucible_dataset + ALTER COLUMN no_provision DROP DEFAULT; diff --git a/schema/crdb/do-not-provision-flag-for-crucible-dataset/up03.sql b/schema/crdb/do-not-provision-flag-for-crucible-dataset/up03.sql new file mode 100644 index 00000000000..22f7499491e --- /dev/null +++ b/schema/crdb/do-not-provision-flag-for-crucible-dataset/up03.sql @@ -0,0 +1,2 @@ +ALTER TABLE omicron.public.zpool + ADD COLUMN IF NOT EXISTS control_plane_storage_buffer INT NOT NULL DEFAULT 268435456000; diff --git a/schema/crdb/do-not-provision-flag-for-crucible-dataset/up04.sql b/schema/crdb/do-not-provision-flag-for-crucible-dataset/up04.sql new file mode 100644 index 00000000000..534c9dad819 --- /dev/null +++ b/schema/crdb/do-not-provision-flag-for-crucible-dataset/up04.sql @@ -0,0 +1,2 @@ +ALTER TABLE omicron.public.zpool + ALTER COLUMN control_plane_storage_buffer DROP DEFAULT; From 77c0cf9b6d8bed11df7b7730fbdf6882634e0715 Mon Sep 17 00:00:00 2001 From: James MacMahon Date: Fri, 4 Apr 2025 13:48:19 +0000 Subject: [PATCH 02/17] separate omdb feature not necessary --- dev-tools/omdb/Cargo.toml | 2 +- nexus/db-queries/Cargo.toml | 1 - nexus/db-queries/src/db/datastore/crucible_dataset.rs | 2 -- nexus/db-queries/src/db/datastore/zpool.rs | 1 - 4 files changed, 1 insertion(+), 5 deletions(-) diff --git a/dev-tools/omdb/Cargo.toml b/dev-tools/omdb/Cargo.toml index 18a35be0956..e605bd58300 100644 --- a/dev-tools/omdb/Cargo.toml +++ b/dev-tools/omdb/Cargo.toml @@ -34,7 +34,7 @@ itertools.workspace = true nexus-client.workspace = true nexus-config.workspace = true nexus-db-model.workspace = true -nexus-db-queries = { workspace = true, features = ["omdb"] } +nexus-db-queries.workspace = true nexus-db-schema.workspace = true nexus-inventory.workspace = true nexus-reconfigurator-preparation.workspace = true diff --git a/nexus/db-queries/Cargo.toml b/nexus/db-queries/Cargo.toml index ac9c8de295e..cbff5591754 100644 --- a/nexus/db-queries/Cargo.toml +++ b/nexus/db-queries/Cargo.toml @@ -74,7 +74,6 @@ omicron-test-utils = { workspace = true, optional = true } [features] # Enable to export `TestDatabase` testing = ["omicron-test-utils"] -omdb = [] [dev-dependencies] assert_matches.workspace = true diff --git a/nexus/db-queries/src/db/datastore/crucible_dataset.rs b/nexus/db-queries/src/db/datastore/crucible_dataset.rs index 40204e54152..6bdd0f44496 100644 --- a/nexus/db-queries/src/db/datastore/crucible_dataset.rs +++ b/nexus/db-queries/src/db/datastore/crucible_dataset.rs @@ -246,7 +246,6 @@ impl DataStore { Ok(physical_disk.disk_policy == PhysicalDiskPolicy::InService) } - #[cfg(any(test, feature = "testing", feature = "omdb"))] pub async fn mark_crucible_dataset_not_provisionable( &self, opctx: &OpContext, @@ -265,7 +264,6 @@ impl DataStore { .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) } - #[cfg(any(test, feature = "testing", feature = "omdb"))] pub async fn mark_crucible_dataset_provisionable( &self, opctx: &OpContext, diff --git a/nexus/db-queries/src/db/datastore/zpool.rs b/nexus/db-queries/src/db/datastore/zpool.rs index 67325ed309e..457ffdbb8b4 100644 --- a/nexus/db-queries/src/db/datastore/zpool.rs +++ b/nexus/db-queries/src/db/datastore/zpool.rs @@ -312,7 +312,6 @@ impl DataStore { Ok(SledUuid::from_untyped_uuid(id)) } - #[cfg(any(test, feature = "testing", feature = "omdb"))] pub async fn zpool_set_control_plane_storage_buffer( &self, opctx: &OpContext, From 35cffce2e99e6c5af7d789a0480befdd2a40aff5 Mon Sep 17 00:00:00 2001 From: James MacMahon Date: Fri, 4 Apr 2025 13:49:24 +0000 Subject: [PATCH 03/17] remove my dumb --- nexus/db-queries/src/db/raw_query_builder.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nexus/db-queries/src/db/raw_query_builder.rs b/nexus/db-queries/src/db/raw_query_builder.rs index d4f0c364bdd..75dc46fe4ff 100644 --- a/nexus/db-queries/src/db/raw_query_builder.rs +++ b/nexus/db-queries/src/db/raw_query_builder.rs @@ -135,7 +135,7 @@ impl QueryBuilder { Value: diesel::serialize::ToSql + Send + 'static, BindSt: Send + 'static, { - self.query = self.query.take().map(|q| q.bind::(b)); + self.query = self.query.take().map(|q| q.bind(b)); self } From 46cc30b0e55c6952bb0ca0d520e3ac879b3db605 Mon Sep 17 00:00:00 2001 From: James MacMahon Date: Fri, 4 Apr 2025 14:14:20 +0000 Subject: [PATCH 04/17] check for time_deleted --- nexus/db-queries/src/db/datastore/crucible_dataset.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/nexus/db-queries/src/db/datastore/crucible_dataset.rs b/nexus/db-queries/src/db/datastore/crucible_dataset.rs index 6bdd0f44496..38820c3f203 100644 --- a/nexus/db-queries/src/db/datastore/crucible_dataset.rs +++ b/nexus/db-queries/src/db/datastore/crucible_dataset.rs @@ -257,6 +257,7 @@ impl DataStore { diesel::update(dsl::crucible_dataset) .filter(dsl::id.eq(to_db_typed_uuid(dataset_id))) + .filter(dsl::time_deleted.is_null()) .set(dsl::no_provision.eq(true)) .execute_async(&*conn) .await @@ -275,6 +276,7 @@ impl DataStore { diesel::update(dsl::crucible_dataset) .filter(dsl::id.eq(to_db_typed_uuid(dataset_id))) + .filter(dsl::time_deleted.is_null()) .set(dsl::no_provision.eq(false)) .execute_async(&*conn) .await From cc38e26d7340fd4e965e2a799f3819729e547a13 Mon Sep 17 00:00:00 2001 From: James MacMahon Date: Fri, 4 Apr 2025 14:45:14 +0000 Subject: [PATCH 05/17] use ByteCount, and define a const for buffer --- dev-tools/omdb/src/bin/omdb/db.rs | 9 ++++++--- nexus/db-model/src/zpool.rs | 6 +++--- nexus/src/app/background/tasks/physical_disk_adoption.rs | 5 ++--- nexus/src/app/mod.rs | 9 +++++++++ nexus/src/app/rack.rs | 5 ++--- 5 files changed, 22 insertions(+), 12 deletions(-) diff --git a/dev-tools/omdb/src/bin/omdb/db.rs b/dev-tools/omdb/src/bin/omdb/db.rs index 36214cda2ef..69953aab783 100644 --- a/dev-tools/omdb/src/bin/omdb/db.rs +++ b/dev-tools/omdb/src/bin/omdb/db.rs @@ -1615,10 +1615,11 @@ async fn cmd_crucible_dataset_show_overprovisioned( continue; } - let control_plane_storage_buffer = zpools + let control_plane_storage_buffer: i64 = zpools .get(&crucible_dataset.pool_id) .unwrap() - .control_plane_storage_buffer(); + .control_plane_storage_buffer() + .into(); let total_dataset_size = crucible_dataset.size_used + control_plane_storage_buffer; @@ -7752,7 +7753,9 @@ async fn cmd_db_zpool_list( sled_id: p.sled_id, physical_disk_id: p.physical_disk_id.into_untyped_uuid(), total_size: *zpool_total_size.get(&zpool_id).unwrap(), - control_plane_storage_buffer: p.control_plane_storage_buffer(), + control_plane_storage_buffer: p + .control_plane_storage_buffer() + .into(), } }) .collect(); diff --git a/nexus/db-model/src/zpool.rs b/nexus/db-model/src/zpool.rs index 95414fb7f45..ff2e9a4ffc7 100644 --- a/nexus/db-model/src/zpool.rs +++ b/nexus/db-model/src/zpool.rs @@ -33,7 +33,7 @@ pub struct Zpool { /// How much storage to prevent Crucible from allocating for regions on a /// given pool. - control_plane_storage_buffer: i64, + control_plane_storage_buffer: ByteCount, } impl Zpool { @@ -49,7 +49,7 @@ impl Zpool { rcgen: Generation::new(), sled_id, physical_disk_id: physical_disk_id.into(), - control_plane_storage_buffer: control_plane_storage_buffer.into(), + control_plane_storage_buffer, } } @@ -57,7 +57,7 @@ impl Zpool { self.time_deleted } - pub fn control_plane_storage_buffer(&self) -> i64 { + pub fn control_plane_storage_buffer(&self) -> ByteCount { self.control_plane_storage_buffer } } diff --git a/nexus/src/app/background/tasks/physical_disk_adoption.rs b/nexus/src/app/background/tasks/physical_disk_adoption.rs index 059752df6ea..35c72437ebc 100644 --- a/nexus/src/app/background/tasks/physical_disk_adoption.rs +++ b/nexus/src/app/background/tasks/physical_disk_adoption.rs @@ -11,6 +11,7 @@ //! //! In the future, this may become more explicitly operator-controlled. +use crate::app::CONTROL_PLANE_STORAGE_BUFFER; use crate::app::background::BackgroundTask; use futures::FutureExt; use futures::future::BoxFuture; @@ -19,7 +20,6 @@ use nexus_db_model::Zpool; use nexus_db_queries::context::OpContext; use nexus_db_queries::db::DataStore; use nexus_types::identity::Asset; -use omicron_common::api::external::ByteCount; use omicron_common::api::external::DataPageParams; use omicron_uuid_kinds::CollectionUuid; use omicron_uuid_kinds::GenericUuid; @@ -140,8 +140,7 @@ impl BackgroundTask for PhysicalDiskAdoption { Uuid::new_v4(), inv_disk.sled_id.into_untyped_uuid(), disk.id(), - // See oxidecomputer/omicron#7875 for the 250G determination - ByteCount::from_gibibytes_u32(250).into(), + CONTROL_PLANE_STORAGE_BUFFER.into(), ); let result = self.datastore.physical_disk_and_zpool_insert( diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index 097ef27aff9..63e6628237a 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -27,6 +27,7 @@ use nexus_db_queries::db; use omicron_common::address::DENDRITE_PORT; use omicron_common::address::MGD_PORT; use omicron_common::address::MGS_PORT; +use omicron_common::api::external::ByteCount; use omicron_common::api::external::Error; use omicron_common::api::internal::shared::SwitchLocation; use omicron_uuid_kinds::OmicronZoneUuid; @@ -125,6 +126,14 @@ pub const MAX_DISK_SIZE_BYTES: u64 = 1023 * (1 << 30); // 1023 GiB /// This value is aribtrary pub const MAX_SSH_KEYS_PER_INSTANCE: u32 = 100; +/// The amount of disk space to reserve for non-Crucible / control plane +/// storage. This amount represents a buffer that the region allocation query +/// will not use for each U2. +/// +/// See oxidecomputer/omicron#7875 for the 250G determination. +pub const CONTROL_PLANE_STORAGE_BUFFER: ByteCount = + ByteCount::from_gibibytes_u32(250); + /// Manages an Oxide fleet -- the heart of the control plane pub struct Nexus { /// uuid for this nexus instance. diff --git a/nexus/src/app/rack.rs b/nexus/src/app/rack.rs index 018341900a3..a509b1b5599 100644 --- a/nexus/src/app/rack.rs +++ b/nexus/src/app/rack.rs @@ -4,6 +4,7 @@ //! Rack management +use crate::app::CONTROL_PLANE_STORAGE_BUFFER; use crate::external_api::params; use crate::external_api::params::CertificateCreate; use crate::external_api::shared::ServiceUsingCertificate; @@ -52,7 +53,6 @@ use nexus_types::silo::silo_dns_name; use omicron_common::address::{Ipv6Subnet, RACK_PREFIX, get_64_subnet}; use omicron_common::api::external::AddressLotKind; use omicron_common::api::external::BgpPeer; -use omicron_common::api::external::ByteCount; use omicron_common::api::external::DataPageParams; use omicron_common::api::external::Error; use omicron_common::api::external::IdentityMetadataCreateParams; @@ -137,8 +137,7 @@ impl super::Nexus { pool.id, pool.sled_id, pool.physical_disk_id, - // See oxidecomputer/omicron#7875 for the 250G determination - ByteCount::from_gibibytes_u32(250).into(), + CONTROL_PLANE_STORAGE_BUFFER.into(), ) }) .collect(); From 78abeac8a7e1c20a7f82954d524fb00be37363dd Mon Sep 17 00:00:00 2001 From: James MacMahon Date: Mon, 7 Apr 2025 20:39:30 +0000 Subject: [PATCH 06/17] require destructive token for cmd_db_zpool_set_storage_buffer --- dev-tools/omdb/src/bin/omdb/db.rs | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/dev-tools/omdb/src/bin/omdb/db.rs b/dev-tools/omdb/src/bin/omdb/db.rs index 69953aab783..7ec74dbe24e 100644 --- a/dev-tools/omdb/src/bin/omdb/db.rs +++ b/dev-tools/omdb/src/bin/omdb/db.rs @@ -1321,11 +1321,15 @@ impl DbArgs { }) => cmd_db_zpool_list(&opctx, &datastore).await, DbCommands::Pool(PoolArgs { command: PoolCommands::SetStorageBuffer(args) - }) => cmd_db_zpool_set_storage_buffer( - &opctx, - &datastore, - &args, - ).await, + }) => { + let token = omdb.check_allow_destructive()?; + cmd_db_zpool_set_storage_buffer( + &opctx, + &datastore, + &args, + token, + ).await + } } } }).await @@ -7774,6 +7778,7 @@ async fn cmd_db_zpool_set_storage_buffer( opctx: &OpContext, datastore: &DataStore, args: &SetStorageBufferArgs, + _token: DestructiveOperationToken, ) -> Result<(), anyhow::Error> { datastore .zpool_set_control_plane_storage_buffer( From ac3356637fa2d283a327d2aaa605a9ad16b3f91a Mon Sep 17 00:00:00 2001 From: James MacMahon Date: Mon, 7 Apr 2025 20:42:35 +0000 Subject: [PATCH 07/17] list -i, plus adjust buffer for CI --- .github/buildomat/jobs/deploy.sh | 6 ++++++ dev-tools/omdb/src/bin/omdb/db.rs | 30 ++++++++++++++++++++++-------- 2 files changed, 28 insertions(+), 8 deletions(-) diff --git a/.github/buildomat/jobs/deploy.sh b/.github/buildomat/jobs/deploy.sh index eea26700057..453936b5f28 100755 --- a/.github/buildomat/jobs/deploy.sh +++ b/.github/buildomat/jobs/deploy.sh @@ -397,6 +397,12 @@ until zoneadm list | grep nexus; do done echo "Waited for nexus: ${retry}s" +# The bootstrap command creates a disk, so before that: adjust the control plane +# storage buffer to 0 as the virtual hardware only creates 20G pools + +pfexec zlogin oxz_switch omdb db pool list -i | \ + xargs -I{} -i pfexec zlogin oxz_switch omdb -w db pool set-storage-buffer "{}" 0 + export RUST_BACKTRACE=1 export E2E_TLS_CERT IPPOOL_START IPPOOL_END eval "$(./target/debug/bootstrap)" diff --git a/dev-tools/omdb/src/bin/omdb/db.rs b/dev-tools/omdb/src/bin/omdb/db.rs index 7ec74dbe24e..d5bdac14ca7 100644 --- a/dev-tools/omdb/src/bin/omdb/db.rs +++ b/dev-tools/omdb/src/bin/omdb/db.rs @@ -994,12 +994,19 @@ struct PoolArgs { #[derive(Debug, Subcommand, Clone)] enum PoolCommands { /// List pools - List, + List(PoolListArgs), /// Set the control plane storage buffer for a pool SetStorageBuffer(SetStorageBufferArgs), } +#[derive(Debug, Args, Clone)] +struct PoolListArgs { + /// Only output zpool ids + #[clap(short, long)] + id_only: bool, +} + #[derive(Debug, Args, Clone)] struct SetStorageBufferArgs { /// The UUID of Pool @@ -1317,8 +1324,8 @@ impl DbArgs { command: OximeterCommands::ListProducers }) => cmd_db_oximeter_list_producers(&datastore, fetch_opts).await, DbCommands::Pool(PoolArgs { - command: PoolCommands::List - }) => cmd_db_zpool_list(&opctx, &datastore).await, + command: PoolCommands::List(args) + }) => cmd_db_zpool_list(&opctx, &datastore, &args).await, DbCommands::Pool(PoolArgs { command: PoolCommands::SetStorageBuffer(args) }) => { @@ -7715,6 +7722,7 @@ fn datetime_opt_rfc3339_concise(t: &Option>) -> String { async fn cmd_db_zpool_list( opctx: &OpContext, datastore: &DataStore, + args: &PoolListArgs, ) -> Result<(), anyhow::Error> { let zpools = datastore.zpool_list_all_external_batched(opctx).await?; @@ -7764,12 +7772,18 @@ async fn cmd_db_zpool_list( }) .collect(); - let table = tabled::Table::new(rows) - .with(tabled::settings::Style::psql()) - .with(tabled::settings::Padding::new(0, 1, 0, 0)) - .to_string(); + if args.id_only { + for row in rows { + println!("{}", row.id); + } + } else { + let table = tabled::Table::new(rows) + .with(tabled::settings::Style::psql()) + .with(tabled::settings::Padding::new(0, 1, 0, 0)) + .to_string(); - println!("{}", table); + println!("{}", table); + } Ok(()) } From 5de0a56a1315fbb02e43a4c16b7b6e55df2dd044 Mon Sep 17 00:00:00 2001 From: James MacMahon Date: Mon, 7 Apr 2025 20:45:16 +0000 Subject: [PATCH 08/17] add size_left column, refactor to get_crucible_dataset_rows --- dev-tools/omdb/src/bin/omdb/db.rs | 167 +++++++++++++++--------------- 1 file changed, 81 insertions(+), 86 deletions(-) diff --git a/dev-tools/omdb/src/bin/omdb/db.rs b/dev-tools/omdb/src/bin/omdb/db.rs index d5bdac14ca7..4b609650def 100644 --- a/dev-tools/omdb/src/bin/omdb/db.rs +++ b/dev-tools/omdb/src/bin/omdb/db.rs @@ -1528,58 +1528,29 @@ async fn lookup_project( // Crucible datasets -async fn cmd_crucible_dataset_list( - opctx: &OpContext, - datastore: &DataStore, -) -> Result<(), anyhow::Error> { - let crucible_datasets = - datastore.crucible_dataset_list_all_batched(opctx).await?; - - #[derive(Tabled)] - #[tabled(rename_all = "SCREAMING_SNAKE_CASE")] - struct CrucibleDatasetRow { - id: Uuid, - time_deleted: String, - pool_id: Uuid, - address: String, - size_used: i64, - no_provision: bool, - } - - let rows: Vec<_> = crucible_datasets - .into_iter() - .map(|d| CrucibleDatasetRow { - id: d.id().into_untyped_uuid(), - time_deleted: match d.time_deleted() { - Some(t) => t.to_string(), - None => String::from(""), - }, - pool_id: d.pool_id, - address: d.address().to_string(), - size_used: d.size_used, - no_provision: d.no_provision(), - }) - .collect(); - - let table = tabled::Table::new(rows) - .with(tabled::settings::Style::psql()) - .with(tabled::settings::Padding::new(0, 1, 0, 0)) - .to_string(); +#[derive(Tabled)] +#[tabled(rename_all = "SCREAMING_SNAKE_CASE")] +struct CrucibleDatasetRow { + // dataset fields + id: Uuid, + time_deleted: String, + pool_id: Uuid, + address: String, + size_used: i64, + no_provision: bool, - println!("{}", table); + // zpool fields + control_plane_storage_buffer: i64, + pool_total_size: i64, - Ok(()) + // computed fields + size_left: i128, } -async fn cmd_crucible_dataset_show_overprovisioned( +async fn get_crucible_dataset_rows( opctx: &OpContext, datastore: &DataStore, -) -> Result<(), anyhow::Error> { - // A Crucible dataset is overprovisioned if size_used (amount taken up by - // Crucible region reservations) plus the control plane storage buffer - // (note this is _not_ a ZFS reservation! it's currently just a per-pool - // value in the database) is larger than the backing pool's total size. - +) -> Result, anyhow::Error> { let crucible_datasets = datastore.crucible_dataset_list_all_batched(opctx).await?; @@ -1605,55 +1576,79 @@ async fn cmd_crucible_dataset_show_overprovisioned( .map(|(zpool, _)| (zpool.id().into_untyped_uuid(), zpool)) .collect(); - #[derive(Tabled)] - #[tabled(rename_all = "SCREAMING_SNAKE_CASE")] - struct CrucibleDatasetOverprovisionedRow { - // dataset fields - id: Uuid, - size_used: i64, - no_provision: bool, - - // zpool fields - pool_id: Uuid, - control_plane_storage_buffer: i64, - pool_total_size: i64, - } - - let mut rows = vec![]; - - for crucible_dataset in crucible_datasets { - if crucible_dataset.time_deleted().is_some() { - continue; - } + let mut result: Vec = + Vec::with_capacity(crucible_datasets.len()); + for d in crucible_datasets { let control_plane_storage_buffer: i64 = zpools - .get(&crucible_dataset.pool_id) + .get(&d.pool_id) .unwrap() .control_plane_storage_buffer() .into(); - let total_dataset_size = - crucible_dataset.size_used + control_plane_storage_buffer; - - match zpool_total_size.get(&crucible_dataset.pool_id) { - Some(pool_total_size) => { - if total_dataset_size >= *pool_total_size { - rows.push(CrucibleDatasetOverprovisionedRow { - id: crucible_dataset.id().into_untyped_uuid(), - size_used: crucible_dataset.size_used, - no_provision: crucible_dataset.no_provision(), - - pool_id: crucible_dataset.pool_id, - control_plane_storage_buffer, - pool_total_size: *pool_total_size, - }); - } - } + let pool_total_size = *zpool_total_size.get(&d.pool_id).unwrap(); - None => {} - } + result.push(CrucibleDatasetRow { + // dataset fields + id: d.id().into_untyped_uuid(), + time_deleted: match d.time_deleted() { + Some(t) => t.to_string(), + None => String::from(""), + }, + pool_id: d.pool_id, + address: d.address().to_string(), + size_used: d.size_used, + no_provision: d.no_provision(), + + // zpool fields + control_plane_storage_buffer, + pool_total_size, + + // computed fields + size_left: i128::from(pool_total_size) + - i128::from(control_plane_storage_buffer) + - i128::from(d.size_used), + }); } + Ok(result) +} + +async fn cmd_crucible_dataset_list( + opctx: &OpContext, + datastore: &DataStore, +) -> Result<(), anyhow::Error> { + let rows: Vec<_> = get_crucible_dataset_rows(opctx, datastore).await?; + + let table = tabled::Table::new(rows) + .with(tabled::settings::Style::psql()) + .with(tabled::settings::Padding::new(0, 1, 0, 0)) + .to_string(); + + println!("{}", table); + + Ok(()) +} + +async fn cmd_crucible_dataset_show_overprovisioned( + opctx: &OpContext, + datastore: &DataStore, +) -> Result<(), anyhow::Error> { + // A Crucible dataset is overprovisioned if size_used (amount taken up by + // Crucible region reservations) plus the control plane storage buffer + // (note this is _not_ a ZFS reservation! it's currently just a per-pool + // value in the database) is larger than the backing pool's total size. + + let rows: Vec<_> = get_crucible_dataset_rows(opctx, datastore).await?; + let rows: Vec<_> = rows + .into_iter() + .filter(|row| { + (i128::from(row.size_used) + + i128::from(row.control_plane_storage_buffer)) + >= i128::from(row.pool_total_size) + }) + .collect(); + let table = tabled::Table::new(rows) .with(tabled::settings::Style::psql()) .with(tabled::settings::Padding::new(0, 1, 0, 0)) From ce059ad83d67480f0be070911d2cd74f83dce5ec Mon Sep 17 00:00:00 2001 From: James MacMahon Date: Mon, 7 Apr 2025 21:06:16 +0000 Subject: [PATCH 09/17] change pool to zpool --- dev-tools/omdb/src/bin/omdb/db.rs | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/dev-tools/omdb/src/bin/omdb/db.rs b/dev-tools/omdb/src/bin/omdb/db.rs index 4b609650def..0a78de28d13 100644 --- a/dev-tools/omdb/src/bin/omdb/db.rs +++ b/dev-tools/omdb/src/bin/omdb/db.rs @@ -375,7 +375,7 @@ enum DbCommands { /// Print information about the oximeter collector. Oximeter(OximeterArgs), /// Commands for querying and interacting with pools - Pool(PoolArgs), + Zpool(ZpoolArgs), } #[derive(Debug, Args, Clone)] @@ -986,22 +986,22 @@ struct VmmListArgs { } #[derive(Debug, Args, Clone)] -struct PoolArgs { +struct ZpoolArgs { #[command(subcommand)] - command: PoolCommands, + command: ZpoolCommands, } #[derive(Debug, Subcommand, Clone)] -enum PoolCommands { +enum ZpoolCommands { /// List pools - List(PoolListArgs), + List(ZpoolListArgs), /// Set the control plane storage buffer for a pool SetStorageBuffer(SetStorageBufferArgs), } #[derive(Debug, Args, Clone)] -struct PoolListArgs { +struct ZpoolListArgs { /// Only output zpool ids #[clap(short, long)] id_only: bool, @@ -1323,11 +1323,11 @@ impl DbArgs { DbCommands::Oximeter(OximeterArgs { command: OximeterCommands::ListProducers }) => cmd_db_oximeter_list_producers(&datastore, fetch_opts).await, - DbCommands::Pool(PoolArgs { - command: PoolCommands::List(args) + DbCommands::Zpool(ZpoolArgs { + command: ZpoolCommands::List(args) }) => cmd_db_zpool_list(&opctx, &datastore, &args).await, - DbCommands::Pool(PoolArgs { - command: PoolCommands::SetStorageBuffer(args) + DbCommands::Zpool(ZpoolArgs { + command: ZpoolCommands::SetStorageBuffer(args) }) => { let token = omdb.check_allow_destructive()?; cmd_db_zpool_set_storage_buffer( @@ -7717,7 +7717,7 @@ fn datetime_opt_rfc3339_concise(t: &Option>) -> String { async fn cmd_db_zpool_list( opctx: &OpContext, datastore: &DataStore, - args: &PoolListArgs, + args: &ZpoolListArgs, ) -> Result<(), anyhow::Error> { let zpools = datastore.zpool_list_all_external_batched(opctx).await?; From 264465789de4f55a44b1021c52e5537b2b37cbbc Mon Sep 17 00:00:00 2001 From: James MacMahon Date: Mon, 7 Apr 2025 21:15:51 +0000 Subject: [PATCH 10/17] log message when changing control plane storage buffer --- nexus/db-queries/src/db/datastore/zpool.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/nexus/db-queries/src/db/datastore/zpool.rs b/nexus/db-queries/src/db/datastore/zpool.rs index 457ffdbb8b4..6aa8245bc51 100644 --- a/nexus/db-queries/src/db/datastore/zpool.rs +++ b/nexus/db-queries/src/db/datastore/zpool.rs @@ -323,6 +323,12 @@ impl DataStore { opctx.authorize(authz::Action::Modify, &authz::FLEET).await?; let conn = self.pool_connection_authorized(opctx).await?; + info!( + opctx.log, + "changing {id} control plane storage buffer to \ + {control_plane_storage_buffer}", + ); + diesel::update(dsl::zpool) .filter(dsl::id.eq(to_db_typed_uuid(id))) .set( From 0bbf9a76908799709c2d6feb4442e50cabd76a8d Mon Sep 17 00:00:00 2001 From: James MacMahon Date: Mon, 7 Apr 2025 21:49:40 +0000 Subject: [PATCH 11/17] expand comment for control_plane_storage_buffer --- nexus/db-model/src/zpool.rs | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/nexus/db-model/src/zpool.rs b/nexus/db-model/src/zpool.rs index ff2e9a4ffc7..879ad09c4c3 100644 --- a/nexus/db-model/src/zpool.rs +++ b/nexus/db-model/src/zpool.rs @@ -31,8 +31,18 @@ pub struct Zpool { // The physical disk to which this Zpool is attached. pub physical_disk_id: DbTypedUuid, - /// How much storage to prevent Crucible from allocating for regions on a - /// given pool. + /// Currently, a single dataset is created per pool, and this dataset (and + /// children of it) is used for all persistent data, both customer data (in + /// the form of Crucible regions) and non-customer data (zone root datasets, + /// delegated zone datasets, debug logs, core files, and more). To prevent + /// Crucible regions from taking all the dataset space, reserve space that + /// region allocation is not allowed to use. + /// + /// This value is consulted by the region allocation query, and can change + /// at runtime. A pool could become "overprovisioned" if this value + /// increases over the total storage minus how much storage Crucible regions + /// currently occupy, though this won't immediately cause any problems and + /// can be identified and fixed via omdb commands. control_plane_storage_buffer: ByteCount, } From bb680d918d17b6b9ac7a66b4e096f6e25cb44e6a Mon Sep 17 00:00:00 2001 From: James MacMahon Date: Tue, 8 Apr 2025 00:48:02 +0000 Subject: [PATCH 12/17] expectorate --- dev-tools/omdb/tests/usage_errors.out | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dev-tools/omdb/tests/usage_errors.out b/dev-tools/omdb/tests/usage_errors.out index 4244d9bb027..dd1e1244380 100644 --- a/dev-tools/omdb/tests/usage_errors.out +++ b/dev-tools/omdb/tests/usage_errors.out @@ -139,7 +139,7 @@ Commands: processes vmms Alias to `omdb db vmm list` oximeter Print information about the oximeter collector - pool Commands for querying and interacting with pools + zpool Commands for querying and interacting with pools help Print this message or the help of the given subcommand(s) Options: @@ -195,7 +195,7 @@ Commands: processes vmms Alias to `omdb db vmm list` oximeter Print information about the oximeter collector - pool Commands for querying and interacting with pools + zpool Commands for querying and interacting with pools help Print this message or the help of the given subcommand(s) Options: From 5e0d84f3e299c14c195b095259cad28ece3ce1f7 Mon Sep 17 00:00:00 2001 From: James MacMahon Date: Tue, 8 Apr 2025 01:04:34 +0000 Subject: [PATCH 13/17] no more unwraps --- dev-tools/omdb/src/bin/omdb/db.rs | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/dev-tools/omdb/src/bin/omdb/db.rs b/dev-tools/omdb/src/bin/omdb/db.rs index 0a78de28d13..19e80849c02 100644 --- a/dev-tools/omdb/src/bin/omdb/db.rs +++ b/dev-tools/omdb/src/bin/omdb/db.rs @@ -1582,11 +1582,14 @@ async fn get_crucible_dataset_rows( for d in crucible_datasets { let control_plane_storage_buffer: i64 = zpools .get(&d.pool_id) - .unwrap() + .ok_or_else(|| anyhow::anyhow!("zpool {} not found!", d.pool_id))? .control_plane_storage_buffer() .into(); - let pool_total_size = *zpool_total_size.get(&d.pool_id).unwrap(); + let pool_total_size = + *zpool_total_size.get(&d.pool_id).ok_or_else(|| { + anyhow::anyhow!("zpool {} not part of inventory!", d.pool_id) + })?; result.push(CrucibleDatasetRow { // dataset fields @@ -7747,11 +7750,11 @@ async fn cmd_db_zpool_list( control_plane_storage_buffer: i64, } - let rows: Vec<_> = zpools + let rows: Vec = zpools .into_iter() .map(|(p, _)| { let zpool_id = p.id().into_untyped_uuid(); - ZpoolRow { + Ok(ZpoolRow { id: zpool_id, time_deleted: match p.time_deleted() { Some(t) => t.to_string(), @@ -7759,13 +7762,19 @@ async fn cmd_db_zpool_list( }, sled_id: p.sled_id, physical_disk_id: p.physical_disk_id.into_untyped_uuid(), - total_size: *zpool_total_size.get(&zpool_id).unwrap(), + total_size: *zpool_total_size.get(&zpool_id).ok_or_else( + || { + anyhow::anyhow!( + "zpool {zpool_id} not found in inventory!" + ) + }, + )?, control_plane_storage_buffer: p .control_plane_storage_buffer() .into(), - } + }) }) - .collect(); + .collect::, anyhow::Error>>()?; if args.id_only { for row in rows { From e295a79686dde86cb4a54d99ff16176b8e19ab8c Mon Sep 17 00:00:00 2001 From: James MacMahon Date: Tue, 8 Apr 2025 13:15:31 +0000 Subject: [PATCH 14/17] try full path to omdb --- .github/buildomat/jobs/deploy.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/buildomat/jobs/deploy.sh b/.github/buildomat/jobs/deploy.sh index 453936b5f28..f2fbe2d41eb 100755 --- a/.github/buildomat/jobs/deploy.sh +++ b/.github/buildomat/jobs/deploy.sh @@ -400,8 +400,8 @@ echo "Waited for nexus: ${retry}s" # The bootstrap command creates a disk, so before that: adjust the control plane # storage buffer to 0 as the virtual hardware only creates 20G pools -pfexec zlogin oxz_switch omdb db pool list -i | \ - xargs -I{} -i pfexec zlogin oxz_switch omdb -w db pool set-storage-buffer "{}" 0 +pfexec zlogin oxz_switch /opt/oxide/omdb/bin/omdb db pool list -i | \ + xargs -I{} -i pfexec zlogin oxz_switch /opt/oxide/omdb/bin/omdb -w db pool set-storage-buffer "{}" 0 export RUST_BACKTRACE=1 export E2E_TLS_CERT IPPOOL_START IPPOOL_END From 24e569313ea81685bebea06d7ccc43688ba82fd1 Mon Sep 17 00:00:00 2001 From: James MacMahon Date: Tue, 8 Apr 2025 17:32:08 +0000 Subject: [PATCH 15/17] pool -> zpool --- .github/buildomat/jobs/deploy.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/buildomat/jobs/deploy.sh b/.github/buildomat/jobs/deploy.sh index f2fbe2d41eb..6f8a10ce52d 100755 --- a/.github/buildomat/jobs/deploy.sh +++ b/.github/buildomat/jobs/deploy.sh @@ -400,8 +400,8 @@ echo "Waited for nexus: ${retry}s" # The bootstrap command creates a disk, so before that: adjust the control plane # storage buffer to 0 as the virtual hardware only creates 20G pools -pfexec zlogin oxz_switch /opt/oxide/omdb/bin/omdb db pool list -i | \ - xargs -I{} -i pfexec zlogin oxz_switch /opt/oxide/omdb/bin/omdb -w db pool set-storage-buffer "{}" 0 +pfexec zlogin oxz_switch /opt/oxide/omdb/bin/omdb db zpool list -i | \ + xargs -I{} -i pfexec zlogin oxz_switch /opt/oxide/omdb/bin/omdb -w db zpool set-storage-buffer "{}" 0 export RUST_BACKTRACE=1 export E2E_TLS_CERT IPPOOL_START IPPOOL_END From 651ddb167a2fb96b51805b4ca2024fa617141c9a Mon Sep 17 00:00:00 2001 From: James MacMahon Date: Tue, 8 Apr 2025 20:44:46 +0000 Subject: [PATCH 16/17] wait for handoff --- .github/buildomat/jobs/deploy.sh | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/.github/buildomat/jobs/deploy.sh b/.github/buildomat/jobs/deploy.sh index 6f8a10ce52d..229f1084cb2 100755 --- a/.github/buildomat/jobs/deploy.sh +++ b/.github/buildomat/jobs/deploy.sh @@ -397,6 +397,20 @@ until zoneadm list | grep nexus; do done echo "Waited for nexus: ${retry}s" +# Wait for handoff, as zpools as inserted into the database during +# `rack_initialize`, and the next omdb command requires them to exist in the +# db. +retry=0 +until grep "Handoff to Nexus is complete" /var/svc/log/oxide-sled-agent:default.log; do + if [[ $retry -gt 300 ]]; then + echo "Failed to handoff to Nexus after 300 seconds" + exit 1 + fi + sleep 1 + retry=$((retry + 1)) +done +echo "Waited for handoff: ${retry}s" + # The bootstrap command creates a disk, so before that: adjust the control plane # storage buffer to 0 as the virtual hardware only creates 20G pools From 743a45c5da154594107647be0d53cd7e535040f8 Mon Sep 17 00:00:00 2001 From: James MacMahon Date: Wed, 9 Apr 2025 00:53:08 +0000 Subject: [PATCH 17/17] wait for all zpools to show up in db --- .github/buildomat/jobs/deploy.sh | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/.github/buildomat/jobs/deploy.sh b/.github/buildomat/jobs/deploy.sh index 229f1084cb2..f0ed3556156 100755 --- a/.github/buildomat/jobs/deploy.sh +++ b/.github/buildomat/jobs/deploy.sh @@ -270,6 +270,9 @@ tar xf out/omicron-sled-agent.tar pkg/config-rss.toml pkg/config.toml sed -E -i~ "s/(m2|u2)(.*\.vdev)/\/scratch\/\1\2/g" pkg/config.toml diff -u pkg/config.toml{~,} || true +EXPECTED_ZPOOL_COUNT=$(grep -c -E 'u2.*\.vdev' pkg/config.toml) +echo "expected number of zpools is ${EXPECTED_ZPOOL_COUNT}" + SILO_NAME="$(sed -n 's/silo_name = "\(.*\)"/\1/p' pkg/config-rss.toml)" EXTERNAL_DNS_DOMAIN="$(sed -n 's/external_dns_zone_name = "\(.*\)"/\1/p' pkg/config-rss.toml)" @@ -411,11 +414,32 @@ until grep "Handoff to Nexus is complete" /var/svc/log/oxide-sled-agent:default. done echo "Waited for handoff: ${retry}s" +# Wait for the number of expected U2 zpools +retry=0 +ACTUAL_ZPOOL_COUNT=$(pfexec zlogin oxz_switch /opt/oxide/omdb/bin/omdb db zpool list -i | wc -l) +until [[ "${ACTUAL_ZPOOL_COUNT}" -eq "${EXPECTED_ZPOOL_COUNT}" ]]; +do + pfexec zlogin oxz_switch /opt/oxide/omdb/bin/omdb db zpool list + if [[ $retry -gt 300 ]]; then + echo "Failed to wait for ${EXPECTED_ZPOOL_COUNT} zpools after 300 seconds" + exit 1 + fi + sleep 1 + retry=$((retry + 1)) + ACTUAL_ZPOOL_COUNT=$(pfexec zlogin oxz_switch /opt/oxide/omdb/bin/omdb db zpool list -i | wc -l) +done + # The bootstrap command creates a disk, so before that: adjust the control plane # storage buffer to 0 as the virtual hardware only creates 20G pools -pfexec zlogin oxz_switch /opt/oxide/omdb/bin/omdb db zpool list -i | \ - xargs -I{} -i pfexec zlogin oxz_switch /opt/oxide/omdb/bin/omdb -w db zpool set-storage-buffer "{}" 0 +pfexec zlogin oxz_switch /opt/oxide/omdb/bin/omdb db zpool list + +for ZPOOL in $(pfexec zlogin oxz_switch /opt/oxide/omdb/bin/omdb db zpool list -i); +do + pfexec zlogin oxz_switch /opt/oxide/omdb/bin/omdb -w db zpool set-storage-buffer "${ZPOOL}" 0 +done + +pfexec zlogin oxz_switch /opt/oxide/omdb/bin/omdb db zpool list export RUST_BACKTRACE=1 export E2E_TLS_CERT IPPOOL_START IPPOOL_END