diff --git a/.github/buildomat/jobs/deploy.sh b/.github/buildomat/jobs/deploy.sh index eea26700057..f0ed3556156 100755 --- a/.github/buildomat/jobs/deploy.sh +++ b/.github/buildomat/jobs/deploy.sh @@ -270,6 +270,9 @@ tar xf out/omicron-sled-agent.tar pkg/config-rss.toml pkg/config.toml sed -E -i~ "s/(m2|u2)(.*\.vdev)/\/scratch\/\1\2/g" pkg/config.toml diff -u pkg/config.toml{~,} || true +EXPECTED_ZPOOL_COUNT=$(grep -c -E 'u2.*\.vdev' pkg/config.toml) +echo "expected number of zpools is ${EXPECTED_ZPOOL_COUNT}" + SILO_NAME="$(sed -n 's/silo_name = "\(.*\)"/\1/p' pkg/config-rss.toml)" EXTERNAL_DNS_DOMAIN="$(sed -n 's/external_dns_zone_name = "\(.*\)"/\1/p' pkg/config-rss.toml)" @@ -397,6 +400,47 @@ until zoneadm list | grep nexus; do done echo "Waited for nexus: ${retry}s" +# Wait for handoff, as zpools as inserted into the database during +# `rack_initialize`, and the next omdb command requires them to exist in the +# db. +retry=0 +until grep "Handoff to Nexus is complete" /var/svc/log/oxide-sled-agent:default.log; do + if [[ $retry -gt 300 ]]; then + echo "Failed to handoff to Nexus after 300 seconds" + exit 1 + fi + sleep 1 + retry=$((retry + 1)) +done +echo "Waited for handoff: ${retry}s" + +# Wait for the number of expected U2 zpools +retry=0 +ACTUAL_ZPOOL_COUNT=$(pfexec zlogin oxz_switch /opt/oxide/omdb/bin/omdb db zpool list -i | wc -l) +until [[ "${ACTUAL_ZPOOL_COUNT}" -eq "${EXPECTED_ZPOOL_COUNT}" ]]; +do + pfexec zlogin oxz_switch /opt/oxide/omdb/bin/omdb db zpool list + if [[ $retry -gt 300 ]]; then + echo "Failed to wait for ${EXPECTED_ZPOOL_COUNT} zpools after 300 seconds" + exit 1 + fi + sleep 1 + retry=$((retry + 1)) + ACTUAL_ZPOOL_COUNT=$(pfexec zlogin oxz_switch /opt/oxide/omdb/bin/omdb db zpool list -i | wc -l) +done + +# The bootstrap command creates a disk, so before that: adjust the control plane +# storage buffer to 0 as the virtual hardware only creates 20G pools + +pfexec zlogin oxz_switch /opt/oxide/omdb/bin/omdb db zpool list + +for ZPOOL in $(pfexec zlogin oxz_switch /opt/oxide/omdb/bin/omdb db zpool list -i); +do + pfexec zlogin oxz_switch /opt/oxide/omdb/bin/omdb -w db zpool set-storage-buffer "${ZPOOL}" 0 +done + +pfexec zlogin oxz_switch /opt/oxide/omdb/bin/omdb db zpool list + export RUST_BACKTRACE=1 export E2E_TLS_CERT IPPOOL_START IPPOOL_END eval "$(./target/debug/bootstrap)" diff --git a/dev-tools/omdb/src/bin/omdb/db.rs b/dev-tools/omdb/src/bin/omdb/db.rs index e4a32c1b2e5..19e80849c02 100644 --- a/dev-tools/omdb/src/bin/omdb/db.rs +++ b/dev-tools/omdb/src/bin/omdb/db.rs @@ -144,6 +144,7 @@ use omicron_uuid_kinds::PhysicalDiskUuid; use omicron_uuid_kinds::PropolisUuid; use omicron_uuid_kinds::SledUuid; use omicron_uuid_kinds::VolumeUuid; +use omicron_uuid_kinds::ZpoolUuid; use sled_agent_client::VolumeConstructionRequest; use std::borrow::Cow; use std::cmp::Ordering; @@ -325,6 +326,8 @@ pub struct DbFetchOptions { /// Subcommands that query or update the database #[derive(Debug, Subcommand, Clone)] enum DbCommands { + /// Commands relevant to Crucible datasets + CrucibleDataset(CrucibleDatasetArgs), /// Print any Crucible resources that are located on expunged physical disks ReplacementsToDo, /// Print information about the rack @@ -371,6 +374,37 @@ enum DbCommands { Vmms(VmmListArgs), /// Print information about the oximeter collector. Oximeter(OximeterArgs), + /// Commands for querying and interacting with pools + Zpool(ZpoolArgs), +} + +#[derive(Debug, Args, Clone)] +struct CrucibleDatasetArgs { + #[command(subcommand)] + command: CrucibleDatasetCommands, +} + +#[derive(Debug, Subcommand, Clone)] +enum CrucibleDatasetCommands { + List, + + ShowOverprovisioned, + + MarkNonProvisionable(MarkNonProvisionableArgs), + + MarkProvisionable(MarkProvisionableArgs), +} + +#[derive(Debug, Args, Clone)] +struct MarkNonProvisionableArgs { + /// The UUID of the Crucible dataset + dataset_id: DatasetUuid, +} + +#[derive(Debug, Args, Clone)] +struct MarkProvisionableArgs { + /// The UUID of the Crucible dataset + dataset_id: DatasetUuid, } #[derive(Debug, Args, Clone)] @@ -609,8 +643,12 @@ enum RegionCommands { #[derive(Debug, Args, Clone)] struct RegionListArgs { /// Print region IDs only - #[arg(short)] + #[arg(long, short)] id_only: bool, + + /// List regions only in a certain dataset + #[arg(long, short)] + dataset_id: Option, } #[derive(Debug, Args, Clone)] @@ -947,6 +985,37 @@ struct VmmListArgs { states: Vec, } +#[derive(Debug, Args, Clone)] +struct ZpoolArgs { + #[command(subcommand)] + command: ZpoolCommands, +} + +#[derive(Debug, Subcommand, Clone)] +enum ZpoolCommands { + /// List pools + List(ZpoolListArgs), + + /// Set the control plane storage buffer for a pool + SetStorageBuffer(SetStorageBufferArgs), +} + +#[derive(Debug, Args, Clone)] +struct ZpoolListArgs { + /// Only output zpool ids + #[clap(short, long)] + id_only: bool, +} + +#[derive(Debug, Args, Clone)] +struct SetStorageBufferArgs { + /// The UUID of Pool + id: Uuid, + + /// How many bytes to set the buffer to + storage_buffer: i64, +} + impl DbArgs { /// Run a `omdb db` subcommand. /// @@ -961,6 +1030,34 @@ impl DbArgs { self.db_url_opts.with_datastore(omdb, log, |opctx, datastore| { async move { match &self.command { + DbCommands::CrucibleDataset(CrucibleDatasetArgs { + command: CrucibleDatasetCommands::List, + }) => { + cmd_crucible_dataset_list(&opctx, &datastore).await + } + DbCommands::CrucibleDataset(CrucibleDatasetArgs { + command: CrucibleDatasetCommands::ShowOverprovisioned, + }) => { + cmd_crucible_dataset_show_overprovisioned( + &opctx, &datastore, + ).await + } + DbCommands::CrucibleDataset(CrucibleDatasetArgs { + command: CrucibleDatasetCommands::MarkNonProvisionable(args), + }) => { + let token = omdb.check_allow_destructive()?; + cmd_crucible_dataset_mark_non_provisionable( + &opctx, &datastore, args, token, + ).await + } + DbCommands::CrucibleDataset(CrucibleDatasetArgs { + command: CrucibleDatasetCommands::MarkProvisionable(args), + }) => { + let token = omdb.check_allow_destructive()?; + cmd_crucible_dataset_mark_provisionable( + &opctx, &datastore, args, token, + ).await + } DbCommands::ReplacementsToDo => { replacements_to_do(&opctx, &datastore).await } @@ -1226,6 +1323,20 @@ impl DbArgs { DbCommands::Oximeter(OximeterArgs { command: OximeterCommands::ListProducers }) => cmd_db_oximeter_list_producers(&datastore, fetch_opts).await, + DbCommands::Zpool(ZpoolArgs { + command: ZpoolCommands::List(args) + }) => cmd_db_zpool_list(&opctx, &datastore, &args).await, + DbCommands::Zpool(ZpoolArgs { + command: ZpoolCommands::SetStorageBuffer(args) + }) => { + let token = omdb.check_allow_destructive()?; + cmd_db_zpool_set_storage_buffer( + &opctx, + &datastore, + &args, + token, + ).await + } } } }).await @@ -1415,6 +1526,172 @@ async fn lookup_project( .with_context(|| format!("loading project {project_id}")) } +// Crucible datasets + +#[derive(Tabled)] +#[tabled(rename_all = "SCREAMING_SNAKE_CASE")] +struct CrucibleDatasetRow { + // dataset fields + id: Uuid, + time_deleted: String, + pool_id: Uuid, + address: String, + size_used: i64, + no_provision: bool, + + // zpool fields + control_plane_storage_buffer: i64, + pool_total_size: i64, + + // computed fields + size_left: i128, +} + +async fn get_crucible_dataset_rows( + opctx: &OpContext, + datastore: &DataStore, +) -> Result, anyhow::Error> { + let crucible_datasets = + datastore.crucible_dataset_list_all_batched(opctx).await?; + + let Some(latest_collection) = + datastore.inventory_get_latest_collection(opctx).await? + else { + bail!("no latest inventory found!"); + }; + + let mut zpool_total_size: HashMap = HashMap::new(); + + for (_, sled_agent) in latest_collection.sled_agents { + for zpool in sled_agent.zpools { + zpool_total_size + .insert(zpool.id.into_untyped_uuid(), zpool.total_size.into()); + } + } + + let zpools: HashMap = datastore + .zpool_list_all_external_batched(opctx) + .await? + .into_iter() + .map(|(zpool, _)| (zpool.id().into_untyped_uuid(), zpool)) + .collect(); + + let mut result: Vec = + Vec::with_capacity(crucible_datasets.len()); + + for d in crucible_datasets { + let control_plane_storage_buffer: i64 = zpools + .get(&d.pool_id) + .ok_or_else(|| anyhow::anyhow!("zpool {} not found!", d.pool_id))? + .control_plane_storage_buffer() + .into(); + + let pool_total_size = + *zpool_total_size.get(&d.pool_id).ok_or_else(|| { + anyhow::anyhow!("zpool {} not part of inventory!", d.pool_id) + })?; + + result.push(CrucibleDatasetRow { + // dataset fields + id: d.id().into_untyped_uuid(), + time_deleted: match d.time_deleted() { + Some(t) => t.to_string(), + None => String::from(""), + }, + pool_id: d.pool_id, + address: d.address().to_string(), + size_used: d.size_used, + no_provision: d.no_provision(), + + // zpool fields + control_plane_storage_buffer, + pool_total_size, + + // computed fields + size_left: i128::from(pool_total_size) + - i128::from(control_plane_storage_buffer) + - i128::from(d.size_used), + }); + } + + Ok(result) +} + +async fn cmd_crucible_dataset_list( + opctx: &OpContext, + datastore: &DataStore, +) -> Result<(), anyhow::Error> { + let rows: Vec<_> = get_crucible_dataset_rows(opctx, datastore).await?; + + let table = tabled::Table::new(rows) + .with(tabled::settings::Style::psql()) + .with(tabled::settings::Padding::new(0, 1, 0, 0)) + .to_string(); + + println!("{}", table); + + Ok(()) +} + +async fn cmd_crucible_dataset_show_overprovisioned( + opctx: &OpContext, + datastore: &DataStore, +) -> Result<(), anyhow::Error> { + // A Crucible dataset is overprovisioned if size_used (amount taken up by + // Crucible region reservations) plus the control plane storage buffer + // (note this is _not_ a ZFS reservation! it's currently just a per-pool + // value in the database) is larger than the backing pool's total size. + + let rows: Vec<_> = get_crucible_dataset_rows(opctx, datastore).await?; + let rows: Vec<_> = rows + .into_iter() + .filter(|row| { + (i128::from(row.size_used) + + i128::from(row.control_plane_storage_buffer)) + >= i128::from(row.pool_total_size) + }) + .collect(); + + let table = tabled::Table::new(rows) + .with(tabled::settings::Style::psql()) + .with(tabled::settings::Padding::new(0, 1, 0, 0)) + .to_string(); + + println!("{}", table); + + Ok(()) +} + +async fn cmd_crucible_dataset_mark_non_provisionable( + opctx: &OpContext, + datastore: &DataStore, + args: &MarkNonProvisionableArgs, + _destruction_token: DestructiveOperationToken, +) -> Result<(), anyhow::Error> { + datastore + .mark_crucible_dataset_not_provisionable(opctx, args.dataset_id) + .await?; + + println!("marked {:?} as non-provisionable", args.dataset_id); + + Ok(()) +} + +async fn cmd_crucible_dataset_mark_provisionable( + opctx: &OpContext, + datastore: &DataStore, + args: &MarkProvisionableArgs, + _destruction_token: DestructiveOperationToken, +) -> Result<(), anyhow::Error> { + datastore + .mark_crucible_dataset_provisionable(opctx, args.dataset_id) + .await?; + + println!("marked {:?} as provisionable", args.dataset_id); + + Ok(()) +} + // Disks #[derive(Tabled)] @@ -2943,14 +3220,20 @@ async fn cmd_db_region_list( ) -> Result<(), anyhow::Error> { use nexus_db_schema::schema::region::dsl; - let regions: Vec = paginated( + let mut query = paginated( dsl::region, dsl::id, &first_page::(fetch_opts.fetch_limit), - ) - .select(Region::as_select()) - .load_async(&*datastore.pool_connection_for_tests().await?) - .await?; + ); + + if let Some(dataset_id) = args.dataset_id { + query = query.filter(dsl::dataset_id.eq(to_db_typed_uuid(dataset_id))); + } + + let regions: Vec = query + .select(Region::as_select()) + .load_async(&*datastore.pool_connection_for_tests().await?) + .await?; check_limit(®ions, fetch_opts.fetch_limit, || { String::from("listing regions") @@ -7433,3 +7716,100 @@ fn datetime_opt_rfc3339_concise(t: &Option>) -> String { t.map(|t| t.to_rfc3339_opts(chrono::format::SecondsFormat::Millis, true)) .unwrap_or_else(|| "-".to_string()) } + +async fn cmd_db_zpool_list( + opctx: &OpContext, + datastore: &DataStore, + args: &ZpoolListArgs, +) -> Result<(), anyhow::Error> { + let zpools = datastore.zpool_list_all_external_batched(opctx).await?; + + let Some(latest_collection) = + datastore.inventory_get_latest_collection(opctx).await? + else { + bail!("no latest inventory found!"); + }; + + let mut zpool_total_size: HashMap = HashMap::new(); + + for (_, sled_agent) in latest_collection.sled_agents { + for zpool in sled_agent.zpools { + zpool_total_size + .insert(zpool.id.into_untyped_uuid(), zpool.total_size.into()); + } + } + + #[derive(Tabled)] + #[tabled(rename_all = "SCREAMING_SNAKE_CASE")] + struct ZpoolRow { + id: Uuid, + time_deleted: String, + sled_id: Uuid, + physical_disk_id: Uuid, + total_size: i64, + control_plane_storage_buffer: i64, + } + + let rows: Vec = zpools + .into_iter() + .map(|(p, _)| { + let zpool_id = p.id().into_untyped_uuid(); + Ok(ZpoolRow { + id: zpool_id, + time_deleted: match p.time_deleted() { + Some(t) => t.to_string(), + None => String::from(""), + }, + sled_id: p.sled_id, + physical_disk_id: p.physical_disk_id.into_untyped_uuid(), + total_size: *zpool_total_size.get(&zpool_id).ok_or_else( + || { + anyhow::anyhow!( + "zpool {zpool_id} not found in inventory!" + ) + }, + )?, + control_plane_storage_buffer: p + .control_plane_storage_buffer() + .into(), + }) + }) + .collect::, anyhow::Error>>()?; + + if args.id_only { + for row in rows { + println!("{}", row.id); + } + } else { + let table = tabled::Table::new(rows) + .with(tabled::settings::Style::psql()) + .with(tabled::settings::Padding::new(0, 1, 0, 0)) + .to_string(); + + println!("{}", table); + } + + Ok(()) +} + +async fn cmd_db_zpool_set_storage_buffer( + opctx: &OpContext, + datastore: &DataStore, + args: &SetStorageBufferArgs, + _token: DestructiveOperationToken, +) -> Result<(), anyhow::Error> { + datastore + .zpool_set_control_plane_storage_buffer( + opctx, + ZpoolUuid::from_untyped_uuid(args.id), + args.storage_buffer, + ) + .await?; + + println!( + "set pool {} control plane storage buffer bytes to {}", + args.id, args.storage_buffer, + ); + + Ok(()) +} diff --git a/dev-tools/omdb/tests/usage_errors.out b/dev-tools/omdb/tests/usage_errors.out index 43cefccf0a4..dd1e1244380 100644 --- a/dev-tools/omdb/tests/usage_errors.out +++ b/dev-tools/omdb/tests/usage_errors.out @@ -113,6 +113,7 @@ Query the control plane database (CockroachDB) Usage: omdb db [OPTIONS] Commands: + crucible-dataset Commands relevant to Crucible datasets replacements-to-do Print any Crucible resources that are located on expunged physical disks rack Print information about the rack @@ -138,6 +139,7 @@ Commands: processes vmms Alias to `omdb db vmm list` oximeter Print information about the oximeter collector + zpool Commands for querying and interacting with pools help Print this message or the help of the given subcommand(s) Options: @@ -167,6 +169,7 @@ Query the control plane database (CockroachDB) Usage: omdb db [OPTIONS] Commands: + crucible-dataset Commands relevant to Crucible datasets replacements-to-do Print any Crucible resources that are located on expunged physical disks rack Print information about the rack @@ -192,6 +195,7 @@ Commands: processes vmms Alias to `omdb db vmm list` oximeter Print information about the oximeter collector + zpool Commands for querying and interacting with pools help Print this message or the help of the given subcommand(s) Options: diff --git a/nexus/db-model/src/crucible_dataset.rs b/nexus/db-model/src/crucible_dataset.rs index a7d11571a61..c4ceb38cc97 100644 --- a/nexus/db-model/src/crucible_dataset.rs +++ b/nexus/db-model/src/crucible_dataset.rs @@ -41,6 +41,10 @@ pub struct CrucibleDataset { port: SqlU16, pub size_used: i64, + + /// Do not consider this dataset as a candidate during region allocation + #[serde(default)] + no_provision: bool, } impl CrucibleDataset { @@ -57,9 +61,14 @@ impl CrucibleDataset { ip: addr.ip().into(), port: addr.port().into(), size_used: 0, + no_provision: false, } } + pub fn time_deleted(&self) -> Option> { + self.time_deleted + } + pub fn address(&self) -> SocketAddrV6 { self.address_with_port(self.port.into()) } @@ -67,6 +76,10 @@ impl CrucibleDataset { pub fn address_with_port(&self, port: u16) -> SocketAddrV6 { SocketAddrV6::new(Ipv6Addr::from(self.ip), port, 0, 0) } + + pub fn no_provision(&self) -> bool { + self.no_provision + } } // Datasets contain regions diff --git a/nexus/db-model/src/schema_versions.rs b/nexus/db-model/src/schema_versions.rs index 0dcabd7f889..c8ffa31502c 100644 --- a/nexus/db-model/src/schema_versions.rs +++ b/nexus/db-model/src/schema_versions.rs @@ -16,7 +16,7 @@ use std::{collections::BTreeMap, sync::LazyLock}; /// /// This must be updated when you change the database schema. Refer to /// schema/crdb/README.adoc in the root of this repository for details. -pub const SCHEMA_VERSION: Version = Version::new(135, 0, 0); +pub const SCHEMA_VERSION: Version = Version::new(136, 0, 0); /// List of all past database schema versions, in *reverse* order /// @@ -28,6 +28,7 @@ static KNOWN_VERSIONS: LazyLock> = LazyLock::new(|| { // | leaving the first copy as an example for the next person. // v // KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"), + KnownVersion::new(136, "do-not-provision-flag-for-crucible-dataset"), KnownVersion::new(135, "blueprint-zone-image-source"), KnownVersion::new(134, "crucible-agent-reservation-overhead"), KnownVersion::new(133, "delete-defunct-reservations"), diff --git a/nexus/db-model/src/zpool.rs b/nexus/db-model/src/zpool.rs index ace723754f2..879ad09c4c3 100644 --- a/nexus/db-model/src/zpool.rs +++ b/nexus/db-model/src/zpool.rs @@ -3,6 +3,7 @@ // file, You can obtain one at https://mozilla.org/MPL/2.0/. use super::{CrucibleDataset, Generation}; +use crate::ByteCount; use crate::collection::DatastoreCollectionConfig; use crate::typed_uuid::DbTypedUuid; use chrono::{DateTime, Utc}; @@ -29,6 +30,20 @@ pub struct Zpool { // The physical disk to which this Zpool is attached. pub physical_disk_id: DbTypedUuid, + + /// Currently, a single dataset is created per pool, and this dataset (and + /// children of it) is used for all persistent data, both customer data (in + /// the form of Crucible regions) and non-customer data (zone root datasets, + /// delegated zone datasets, debug logs, core files, and more). To prevent + /// Crucible regions from taking all the dataset space, reserve space that + /// region allocation is not allowed to use. + /// + /// This value is consulted by the region allocation query, and can change + /// at runtime. A pool could become "overprovisioned" if this value + /// increases over the total storage minus how much storage Crucible regions + /// currently occupy, though this won't immediately cause any problems and + /// can be identified and fixed via omdb commands. + control_plane_storage_buffer: ByteCount, } impl Zpool { @@ -36,6 +51,7 @@ impl Zpool { id: Uuid, sled_id: Uuid, physical_disk_id: PhysicalDiskUuid, + control_plane_storage_buffer: ByteCount, ) -> Self { Self { identity: ZpoolIdentity::new(id), @@ -43,8 +59,17 @@ impl Zpool { rcgen: Generation::new(), sled_id, physical_disk_id: physical_disk_id.into(), + control_plane_storage_buffer, } } + + pub fn time_deleted(&self) -> Option> { + self.time_deleted + } + + pub fn control_plane_storage_buffer(&self) -> ByteCount { + self.control_plane_storage_buffer + } } impl DatastoreCollectionConfig for Zpool { diff --git a/nexus/db-queries/src/db/datastore/crucible_dataset.rs b/nexus/db-queries/src/db/datastore/crucible_dataset.rs index 10a47400111..38820c3f203 100644 --- a/nexus/db-queries/src/db/datastore/crucible_dataset.rs +++ b/nexus/db-queries/src/db/datastore/crucible_dataset.rs @@ -245,6 +245,44 @@ impl DataStore { Ok(physical_disk.disk_policy == PhysicalDiskPolicy::InService) } + + pub async fn mark_crucible_dataset_not_provisionable( + &self, + opctx: &OpContext, + dataset_id: DatasetUuid, + ) -> Result<(), Error> { + let conn = self.pool_connection_authorized(opctx).await?; + + use nexus_db_schema::schema::crucible_dataset::dsl; + + diesel::update(dsl::crucible_dataset) + .filter(dsl::id.eq(to_db_typed_uuid(dataset_id))) + .filter(dsl::time_deleted.is_null()) + .set(dsl::no_provision.eq(true)) + .execute_async(&*conn) + .await + .map(|_| ()) + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } + + pub async fn mark_crucible_dataset_provisionable( + &self, + opctx: &OpContext, + dataset_id: DatasetUuid, + ) -> Result<(), Error> { + let conn = self.pool_connection_authorized(opctx).await?; + + use nexus_db_schema::schema::crucible_dataset::dsl; + + diesel::update(dsl::crucible_dataset) + .filter(dsl::id.eq(to_db_typed_uuid(dataset_id))) + .filter(dsl::time_deleted.is_null()) + .set(dsl::no_provision.eq(false)) + .execute_async(&*conn) + .await + .map(|_| ()) + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } } #[cfg(test)] @@ -255,6 +293,7 @@ mod test { use nexus_db_model::SledBaseboard; use nexus_db_model::SledSystemHardware; use nexus_db_model::SledUpdate; + use omicron_common::api::external::ByteCount; use omicron_test_utils::dev; use omicron_uuid_kinds::DatasetUuid; use omicron_uuid_kinds::PhysicalDiskUuid; @@ -293,6 +332,7 @@ mod test { *zpool_id.as_untyped_uuid(), *sled_id.as_untyped_uuid(), PhysicalDiskUuid::new_v4(), + ByteCount::from(0).into(), ); datastore .zpool_insert(opctx, zpool) diff --git a/nexus/db-queries/src/db/datastore/mod.rs b/nexus/db-queries/src/db/datastore/mod.rs index 4ea4c765fba..b630b1265a7 100644 --- a/nexus/db-queries/src/db/datastore/mod.rs +++ b/nexus/db-queries/src/db/datastore/mod.rs @@ -764,8 +764,12 @@ mod test { physical_disk_id: PhysicalDiskUuid, ) -> Uuid { let zpool_id = Uuid::new_v4(); - let zpool = - Zpool::new(zpool_id, sled_id.into_untyped_uuid(), physical_disk_id); + let zpool = Zpool::new( + zpool_id, + sled_id.into_untyped_uuid(), + physical_disk_id, + ByteCount::from(0).into(), + ); datastore.zpool_insert(opctx, zpool).await.unwrap(); zpool_id } diff --git a/nexus/db-queries/src/db/datastore/physical_disk.rs b/nexus/db-queries/src/db/datastore/physical_disk.rs index 3230ae35522..ccfa6c46133 100644 --- a/nexus/db-queries/src/db/datastore/physical_disk.rs +++ b/nexus/db-queries/src/db/datastore/physical_disk.rs @@ -766,7 +766,12 @@ mod test { sled_id, ); - let zpool = Zpool::new(Uuid::new_v4(), sled_id, disk.id()); + let zpool = Zpool::new( + Uuid::new_v4(), + sled_id, + disk.id(), + ByteCount::from(0).into(), + ); (disk, zpool) } diff --git a/nexus/db-queries/src/db/datastore/support_bundle.rs b/nexus/db-queries/src/db/datastore/support_bundle.rs index ef619403a18..9cefefdae1f 100644 --- a/nexus/db-queries/src/db/datastore/support_bundle.rs +++ b/nexus/db-queries/src/db/datastore/support_bundle.rs @@ -478,6 +478,7 @@ mod test { use nexus_reconfigurator_planning::example::SimRngState; use nexus_types::deployment::Blueprint; use nexus_types::deployment::BlueprintZoneType; + use omicron_common::api::external::ByteCount; use omicron_common::api::external::LookupType; use omicron_common::api::internal::shared::DatasetKind::Debug as DebugDatasetKind; use omicron_test_utils::dev; @@ -584,6 +585,7 @@ mod test { *pool.pool.as_untyped_uuid(), *self.sled.as_untyped_uuid(), PhysicalDiskUuid::new_v4(), + ByteCount::from(0).into(), ); datastore .zpool_insert(opctx, zpool) diff --git a/nexus/db-queries/src/db/datastore/zpool.rs b/nexus/db-queries/src/db/datastore/zpool.rs index 1a44e478f27..6aa8245bc51 100644 --- a/nexus/db-queries/src/db/datastore/zpool.rs +++ b/nexus/db-queries/src/db/datastore/zpool.rs @@ -27,6 +27,7 @@ use chrono::Utc; use diesel::prelude::*; use diesel::upsert::excluded; use nexus_db_model::PhysicalDiskKind; +use nexus_db_model::to_db_typed_uuid; use omicron_common::api::external::CreateResult; use omicron_common::api::external::DataPageParams; use omicron_common::api::external::DeleteResult; @@ -310,4 +311,33 @@ impl DataStore { Ok(SledUuid::from_untyped_uuid(id)) } + + pub async fn zpool_set_control_plane_storage_buffer( + &self, + opctx: &OpContext, + id: ZpoolUuid, + control_plane_storage_buffer: i64, + ) -> Result<(), Error> { + use nexus_db_schema::schema::zpool::dsl; + + opctx.authorize(authz::Action::Modify, &authz::FLEET).await?; + let conn = self.pool_connection_authorized(opctx).await?; + + info!( + opctx.log, + "changing {id} control plane storage buffer to \ + {control_plane_storage_buffer}", + ); + + diesel::update(dsl::zpool) + .filter(dsl::id.eq(to_db_typed_uuid(id))) + .set( + dsl::control_plane_storage_buffer + .eq(control_plane_storage_buffer), + ) + .execute_async(&*conn) + .await + .map(|_| ()) + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } } diff --git a/nexus/db-queries/src/db/queries/region_allocation.rs b/nexus/db-queries/src/db/queries/region_allocation.rs index 268af89b58e..c28813da998 100644 --- a/nexus/db-queries/src/db/queries/region_allocation.rs +++ b/nexus/db-queries/src/db/queries/region_allocation.rs @@ -273,7 +273,10 @@ pub fn allocation_query( SELECT crucible_dataset.pool_id, sum(crucible_dataset.size_used) AS size_used - FROM crucible_dataset WHERE ((crucible_dataset.size_used IS NOT NULL) AND (crucible_dataset.time_deleted IS NULL)) GROUP BY crucible_dataset.pool_id),"); + FROM crucible_dataset + WHERE + ((crucible_dataset.size_used IS NOT NULL) AND (crucible_dataset.time_deleted IS NULL)) + GROUP BY crucible_dataset.pool_id),"); if let Some(snapshot_id) = snapshot_id { // Any zpool already have this volume's existing regions, or host the @@ -288,7 +291,7 @@ pub fn allocation_query( select crucible_dataset.pool_id from crucible_dataset inner join region_snapshot on (region_snapshot.dataset_id = crucible_dataset.id) where region_snapshot.snapshot_id = ").param().sql(")),") - .bind::(snapshot_id) + .bind::(snapshot_id); } else { // Any zpool already have this volume's existing regions? builder.sql(" @@ -297,8 +300,8 @@ pub fn allocation_query( crucible_dataset.pool_id FROM crucible_dataset INNER JOIN old_regions ON (old_regions.dataset_id = crucible_dataset.id) - ),") - }; + ),"); + } // If `distinct_sleds` is selected, then take note of the sleds used by // existing allocations, and filter those out later. This step is required @@ -317,7 +320,7 @@ pub fn allocation_query( zpool.id = ANY(SELECT pool_id FROM existing_zpools) ),", ); - }; + } // Identifies zpools with enough space for region allocation, that are not // currently used by this Volume's existing regions. @@ -341,8 +344,10 @@ pub fn allocation_query( (zpool INNER JOIN sled ON (zpool.sled_id = sled.id)) ON (zpool.id = old_zpool_usage.pool_id) INNER JOIN physical_disk ON (zpool.physical_disk_id = physical_disk.id) + INNER JOIN + crucible_dataset ON (crucible_dataset.pool_id = zpool.id) WHERE ( - (old_zpool_usage.size_used + ").param().sql(" ) <= + (old_zpool_usage.size_used + ").param().sql(" + zpool.control_plane_storage_buffer) <= (SELECT total_size FROM omicron.public.inv_zpool WHERE inv_zpool.id = old_zpool_usage.pool_id ORDER BY inv_zpool.time_collected DESC LIMIT 1) @@ -351,6 +356,8 @@ pub fn allocation_query( AND physical_disk.disk_policy = 'in_service' AND physical_disk.disk_state = 'active' AND NOT(zpool.id = ANY(SELECT existing_zpools.pool_id FROM existing_zpools)) + AND (crucible_dataset.time_deleted is NULL) + AND (crucible_dataset.no_provision = false) " ).bind::(size_delta); @@ -375,7 +382,7 @@ pub fn allocation_query( crucible_dataset.id, crucible_dataset.pool_id FROM (crucible_dataset INNER JOIN candidate_zpools ON (crucible_dataset.pool_id = candidate_zpools.pool_id)) - WHERE (crucible_dataset.time_deleted IS NULL) + WHERE (crucible_dataset.time_deleted IS NULL) AND (crucible_dataset.no_provision = false) ORDER BY crucible_dataset.pool_id, md5((CAST(crucible_dataset.id as BYTEA) || ").param().sql(")) ),") .bind::(seed.clone()) diff --git a/nexus/db-queries/tests/output/region_allocate_distinct_sleds.sql b/nexus/db-queries/tests/output/region_allocate_distinct_sleds.sql index 51f56f89ac6..2106a598110 100644 --- a/nexus/db-queries/tests/output/region_allocate_distinct_sleds.sql +++ b/nexus/db-queries/tests/output/region_allocate_distinct_sleds.sql @@ -55,8 +55,9 @@ WITH INNER JOIN (zpool INNER JOIN sled ON zpool.sled_id = sled.id) ON zpool.id = old_zpool_usage.pool_id INNER JOIN physical_disk ON zpool.physical_disk_id = physical_disk.id + INNER JOIN crucible_dataset ON crucible_dataset.pool_id = zpool.id WHERE - (old_zpool_usage.size_used + $2) + (old_zpool_usage.size_used + $2 + zpool.control_plane_storage_buffer) <= ( SELECT total_size @@ -74,6 +75,8 @@ WITH AND physical_disk.disk_policy = 'in_service' AND physical_disk.disk_state = 'active' AND NOT (zpool.id = ANY (SELECT existing_zpools.pool_id FROM existing_zpools)) + AND (crucible_dataset.time_deleted IS NULL) + AND crucible_dataset.no_provision = false AND NOT (sled.id = ANY (SELECT existing_sleds.id FROM existing_sleds)) ORDER BY zpool.sled_id, md5(CAST(zpool.id AS BYTES) || $3) @@ -86,7 +89,7 @@ WITH crucible_dataset INNER JOIN candidate_zpools ON crucible_dataset.pool_id = candidate_zpools.pool_id WHERE - crucible_dataset.time_deleted IS NULL + (crucible_dataset.time_deleted IS NULL) AND crucible_dataset.no_provision = false ORDER BY crucible_dataset.pool_id, md5(CAST(crucible_dataset.id AS BYTES) || $4) ), @@ -290,7 +293,8 @@ WITH crucible_dataset.pool_id, crucible_dataset.ip, crucible_dataset.port, - crucible_dataset.size_used + crucible_dataset.size_used, + crucible_dataset.no_provision ) ( SELECT @@ -303,6 +307,7 @@ WITH crucible_dataset.ip, crucible_dataset.port, crucible_dataset.size_used, + crucible_dataset.no_provision, old_regions.id, old_regions.time_created, old_regions.time_modified, @@ -330,6 +335,7 @@ UNION updated_datasets.ip, updated_datasets.port, updated_datasets.size_used, + updated_datasets.no_provision, inserted_regions.id, inserted_regions.time_created, inserted_regions.time_modified, diff --git a/nexus/db-queries/tests/output/region_allocate_random_sleds.sql b/nexus/db-queries/tests/output/region_allocate_random_sleds.sql index 7e1ac153a3f..f1b9d73038b 100644 --- a/nexus/db-queries/tests/output/region_allocate_random_sleds.sql +++ b/nexus/db-queries/tests/output/region_allocate_random_sleds.sql @@ -46,8 +46,9 @@ WITH INNER JOIN (zpool INNER JOIN sled ON zpool.sled_id = sled.id) ON zpool.id = old_zpool_usage.pool_id INNER JOIN physical_disk ON zpool.physical_disk_id = physical_disk.id + INNER JOIN crucible_dataset ON crucible_dataset.pool_id = zpool.id WHERE - (old_zpool_usage.size_used + $2) + (old_zpool_usage.size_used + $2 + zpool.control_plane_storage_buffer) <= ( SELECT total_size @@ -65,6 +66,8 @@ WITH AND physical_disk.disk_policy = 'in_service' AND physical_disk.disk_state = 'active' AND NOT (zpool.id = ANY (SELECT existing_zpools.pool_id FROM existing_zpools)) + AND (crucible_dataset.time_deleted IS NULL) + AND crucible_dataset.no_provision = false ), candidate_datasets AS ( @@ -74,7 +77,7 @@ WITH crucible_dataset INNER JOIN candidate_zpools ON crucible_dataset.pool_id = candidate_zpools.pool_id WHERE - crucible_dataset.time_deleted IS NULL + (crucible_dataset.time_deleted IS NULL) AND crucible_dataset.no_provision = false ORDER BY crucible_dataset.pool_id, md5(CAST(crucible_dataset.id AS BYTES) || $3) ), @@ -278,7 +281,8 @@ WITH crucible_dataset.pool_id, crucible_dataset.ip, crucible_dataset.port, - crucible_dataset.size_used + crucible_dataset.size_used, + crucible_dataset.no_provision ) ( SELECT @@ -291,6 +295,7 @@ WITH crucible_dataset.ip, crucible_dataset.port, crucible_dataset.size_used, + crucible_dataset.no_provision, old_regions.id, old_regions.time_created, old_regions.time_modified, @@ -318,6 +323,7 @@ UNION updated_datasets.ip, updated_datasets.port, updated_datasets.size_used, + updated_datasets.no_provision, inserted_regions.id, inserted_regions.time_created, inserted_regions.time_modified, diff --git a/nexus/db-queries/tests/output/region_allocate_with_snapshot_distinct_sleds.sql b/nexus/db-queries/tests/output/region_allocate_with_snapshot_distinct_sleds.sql index 03b2b6cb708..697091df30c 100644 --- a/nexus/db-queries/tests/output/region_allocate_with_snapshot_distinct_sleds.sql +++ b/nexus/db-queries/tests/output/region_allocate_with_snapshot_distinct_sleds.sql @@ -67,8 +67,9 @@ WITH INNER JOIN (zpool INNER JOIN sled ON zpool.sled_id = sled.id) ON zpool.id = old_zpool_usage.pool_id INNER JOIN physical_disk ON zpool.physical_disk_id = physical_disk.id + INNER JOIN crucible_dataset ON crucible_dataset.pool_id = zpool.id WHERE - (old_zpool_usage.size_used + $3) + (old_zpool_usage.size_used + $3 + zpool.control_plane_storage_buffer) <= ( SELECT total_size @@ -86,6 +87,8 @@ WITH AND physical_disk.disk_policy = 'in_service' AND physical_disk.disk_state = 'active' AND NOT (zpool.id = ANY (SELECT existing_zpools.pool_id FROM existing_zpools)) + AND (crucible_dataset.time_deleted IS NULL) + AND crucible_dataset.no_provision = false AND NOT (sled.id = ANY (SELECT existing_sleds.id FROM existing_sleds)) ORDER BY zpool.sled_id, md5(CAST(zpool.id AS BYTES) || $4) @@ -98,7 +101,7 @@ WITH crucible_dataset INNER JOIN candidate_zpools ON crucible_dataset.pool_id = candidate_zpools.pool_id WHERE - crucible_dataset.time_deleted IS NULL + (crucible_dataset.time_deleted IS NULL) AND crucible_dataset.no_provision = false ORDER BY crucible_dataset.pool_id, md5(CAST(crucible_dataset.id AS BYTES) || $5) ), @@ -302,7 +305,8 @@ WITH crucible_dataset.pool_id, crucible_dataset.ip, crucible_dataset.port, - crucible_dataset.size_used + crucible_dataset.size_used, + crucible_dataset.no_provision ) ( SELECT @@ -315,6 +319,7 @@ WITH crucible_dataset.ip, crucible_dataset.port, crucible_dataset.size_used, + crucible_dataset.no_provision, old_regions.id, old_regions.time_created, old_regions.time_modified, @@ -342,6 +347,7 @@ UNION updated_datasets.ip, updated_datasets.port, updated_datasets.size_used, + updated_datasets.no_provision, inserted_regions.id, inserted_regions.time_created, inserted_regions.time_modified, diff --git a/nexus/db-queries/tests/output/region_allocate_with_snapshot_random_sleds.sql b/nexus/db-queries/tests/output/region_allocate_with_snapshot_random_sleds.sql index b71578e0509..9fcfac921fb 100644 --- a/nexus/db-queries/tests/output/region_allocate_with_snapshot_random_sleds.sql +++ b/nexus/db-queries/tests/output/region_allocate_with_snapshot_random_sleds.sql @@ -58,8 +58,9 @@ WITH INNER JOIN (zpool INNER JOIN sled ON zpool.sled_id = sled.id) ON zpool.id = old_zpool_usage.pool_id INNER JOIN physical_disk ON zpool.physical_disk_id = physical_disk.id + INNER JOIN crucible_dataset ON crucible_dataset.pool_id = zpool.id WHERE - (old_zpool_usage.size_used + $3) + (old_zpool_usage.size_used + $3 + zpool.control_plane_storage_buffer) <= ( SELECT total_size @@ -77,6 +78,8 @@ WITH AND physical_disk.disk_policy = 'in_service' AND physical_disk.disk_state = 'active' AND NOT (zpool.id = ANY (SELECT existing_zpools.pool_id FROM existing_zpools)) + AND (crucible_dataset.time_deleted IS NULL) + AND crucible_dataset.no_provision = false ), candidate_datasets AS ( @@ -86,7 +89,7 @@ WITH crucible_dataset INNER JOIN candidate_zpools ON crucible_dataset.pool_id = candidate_zpools.pool_id WHERE - crucible_dataset.time_deleted IS NULL + (crucible_dataset.time_deleted IS NULL) AND crucible_dataset.no_provision = false ORDER BY crucible_dataset.pool_id, md5(CAST(crucible_dataset.id AS BYTES) || $4) ), @@ -290,7 +293,8 @@ WITH crucible_dataset.pool_id, crucible_dataset.ip, crucible_dataset.port, - crucible_dataset.size_used + crucible_dataset.size_used, + crucible_dataset.no_provision ) ( SELECT @@ -303,6 +307,7 @@ WITH crucible_dataset.ip, crucible_dataset.port, crucible_dataset.size_used, + crucible_dataset.no_provision, old_regions.id, old_regions.time_created, old_regions.time_modified, @@ -330,6 +335,7 @@ UNION updated_datasets.ip, updated_datasets.port, updated_datasets.size_used, + updated_datasets.no_provision, inserted_regions.id, inserted_regions.time_created, inserted_regions.time_modified, diff --git a/nexus/db-schema/src/schema.rs b/nexus/db-schema/src/schema.rs index 8d1a175911b..0e52fcce852 100644 --- a/nexus/db-schema/src/schema.rs +++ b/nexus/db-schema/src/schema.rs @@ -1071,6 +1071,8 @@ table! { sled_id -> Uuid, physical_disk_id -> Uuid, + + control_plane_storage_buffer -> Int8, } } @@ -1093,6 +1095,8 @@ table! { port -> Int4, size_used -> Int8, + + no_provision -> Bool, } } diff --git a/nexus/reconfigurator/execution/src/omicron_physical_disks.rs b/nexus/reconfigurator/execution/src/omicron_physical_disks.rs index a2409e0a194..2b11d9935fb 100644 --- a/nexus/reconfigurator/execution/src/omicron_physical_disks.rs +++ b/nexus/reconfigurator/execution/src/omicron_physical_disks.rs @@ -87,6 +87,7 @@ mod test { use nexus_test_utils_macros::nexus_test; use nexus_types::deployment::DiskFilter; use nexus_types::identity::Asset; + use omicron_common::api::external::ByteCount; use omicron_common::api::external::DataPageParams; use omicron_uuid_kinds::DatasetUuid; use omicron_uuid_kinds::GenericUuid; @@ -132,7 +133,12 @@ mod test { let zpool = datastore .zpool_insert( opctx, - Zpool::new(Uuid::new_v4(), sled_id.into_untyped_uuid(), id), + Zpool::new( + Uuid::new_v4(), + sled_id.into_untyped_uuid(), + id, + ByteCount::from(0).into(), + ), ) .await .unwrap(); diff --git a/nexus/reconfigurator/rendezvous/src/crucible_dataset.rs b/nexus/reconfigurator/rendezvous/src/crucible_dataset.rs index 417079895af..0d4fd8a8382 100644 --- a/nexus/reconfigurator/rendezvous/src/crucible_dataset.rs +++ b/nexus/reconfigurator/rendezvous/src/crucible_dataset.rs @@ -136,6 +136,7 @@ mod tests { use nexus_db_queries::db::pub_test_utils::TestDatabase; use nexus_db_queries::db::queries::ALLOW_FULL_TABLE_SCAN_SQL; use nexus_types::inventory::ZpoolName; + use omicron_common::api::external::ByteCount; use omicron_common::disk::CompressionAlgorithm; use omicron_test_utils::dev; use omicron_uuid_kinds::GenericUuid; @@ -213,6 +214,7 @@ mod tests { zpool_id.into_untyped_uuid(), sled_id.into_untyped_uuid(), disk_id, + ByteCount::from(0).into(), ), ) .await diff --git a/nexus/src/app/background/tasks/blueprint_execution.rs b/nexus/src/app/background/tasks/blueprint_execution.rs index dcd1e6b4fca..fccba57a0bf 100644 --- a/nexus/src/app/background/tasks/blueprint_execution.rs +++ b/nexus/src/app/background/tasks/blueprint_execution.rs @@ -193,6 +193,7 @@ mod test { CockroachDbPreserveDowngrade, blueprint_zone_type, }; use nexus_types::external_api::views::SledState; + use omicron_common::api::external; use omicron_common::api::external::Generation; use omicron_common::zpool_name::ZpoolName; use omicron_uuid_kinds::BlueprintUuid; @@ -457,6 +458,7 @@ mod test { pool_id.into_untyped_uuid(), sled_id.into_untyped_uuid(), PhysicalDiskUuid::new_v4(), + external::ByteCount::from(0).into(), ); datastore .zpool_insert(&opctx, zpool) diff --git a/nexus/src/app/background/tasks/decommissioned_disk_cleaner.rs b/nexus/src/app/background/tasks/decommissioned_disk_cleaner.rs index c06f09c4ab2..7b96aaf3785 100644 --- a/nexus/src/app/background/tasks/decommissioned_disk_cleaner.rs +++ b/nexus/src/app/background/tasks/decommissioned_disk_cleaner.rs @@ -185,6 +185,7 @@ mod tests { use nexus_db_model::Region; use nexus_test_utils::SLED_AGENT_UUID; use nexus_test_utils_macros::nexus_test; + use omicron_common::api::external::ByteCount; use omicron_uuid_kinds::{ DatasetUuid, PhysicalDiskUuid, RegionUuid, SledUuid, VolumeUuid, }; @@ -225,7 +226,12 @@ mod tests { let zpool = datastore .zpool_insert( opctx, - Zpool::new(Uuid::new_v4(), sled_id.into_untyped_uuid(), id), + Zpool::new( + Uuid::new_v4(), + sled_id.into_untyped_uuid(), + id, + ByteCount::from(0).into(), + ), ) .await .unwrap(); diff --git a/nexus/src/app/background/tasks/physical_disk_adoption.rs b/nexus/src/app/background/tasks/physical_disk_adoption.rs index b7686a99555..35c72437ebc 100644 --- a/nexus/src/app/background/tasks/physical_disk_adoption.rs +++ b/nexus/src/app/background/tasks/physical_disk_adoption.rs @@ -11,6 +11,7 @@ //! //! In the future, this may become more explicitly operator-controlled. +use crate::app::CONTROL_PLANE_STORAGE_BUFFER; use crate::app::background::BackgroundTask; use futures::FutureExt; use futures::future::BoxFuture; @@ -138,7 +139,8 @@ impl BackgroundTask for PhysicalDiskAdoption { let zpool = Zpool::new( Uuid::new_v4(), inv_disk.sled_id.into_untyped_uuid(), - disk.id() + disk.id(), + CONTROL_PLANE_STORAGE_BUFFER.into(), ); let result = self.datastore.physical_disk_and_zpool_insert( diff --git a/nexus/src/app/background/tasks/support_bundle_collector.rs b/nexus/src/app/background/tasks/support_bundle_collector.rs index 87f469d68bc..bcefd4398c6 100644 --- a/nexus/src/app/background/tasks/support_bundle_collector.rs +++ b/nexus/src/app/background/tasks/support_bundle_collector.rs @@ -839,6 +839,7 @@ mod test { use nexus_db_model::Zpool; use nexus_test_utils::SLED_AGENT_UUID; use nexus_test_utils_macros::nexus_test; + use omicron_common::api::external::ByteCount; use omicron_common::api::external::Generation; use omicron_common::api::internal::shared::DatasetKind; use omicron_common::disk::DatasetConfig; @@ -930,7 +931,12 @@ mod test { let zpool = datastore .zpool_insert( opctx, - Zpool::new(Uuid::new_v4(), sled_id.into_untyped_uuid(), id), + Zpool::new( + Uuid::new_v4(), + sled_id.into_untyped_uuid(), + id, + ByteCount::from(0).into(), + ), ) .await .unwrap(); diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index 097ef27aff9..63e6628237a 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -27,6 +27,7 @@ use nexus_db_queries::db; use omicron_common::address::DENDRITE_PORT; use omicron_common::address::MGD_PORT; use omicron_common::address::MGS_PORT; +use omicron_common::api::external::ByteCount; use omicron_common::api::external::Error; use omicron_common::api::internal::shared::SwitchLocation; use omicron_uuid_kinds::OmicronZoneUuid; @@ -125,6 +126,14 @@ pub const MAX_DISK_SIZE_BYTES: u64 = 1023 * (1 << 30); // 1023 GiB /// This value is aribtrary pub const MAX_SSH_KEYS_PER_INSTANCE: u32 = 100; +/// The amount of disk space to reserve for non-Crucible / control plane +/// storage. This amount represents a buffer that the region allocation query +/// will not use for each U2. +/// +/// See oxidecomputer/omicron#7875 for the 250G determination. +pub const CONTROL_PLANE_STORAGE_BUFFER: ByteCount = + ByteCount::from_gibibytes_u32(250); + /// Manages an Oxide fleet -- the heart of the control plane pub struct Nexus { /// uuid for this nexus instance. diff --git a/nexus/src/app/rack.rs b/nexus/src/app/rack.rs index 6734b1a51ba..a509b1b5599 100644 --- a/nexus/src/app/rack.rs +++ b/nexus/src/app/rack.rs @@ -4,6 +4,7 @@ //! Rack management +use crate::app::CONTROL_PLANE_STORAGE_BUFFER; use crate::external_api::params; use crate::external_api::params::CertificateCreate; use crate::external_api::shared::ServiceUsingCertificate; @@ -136,6 +137,7 @@ impl super::Nexus { pool.id, pool.sled_id, pool.physical_disk_id, + CONTROL_PLANE_STORAGE_BUFFER.into(), ) }) .collect(); diff --git a/nexus/src/app/sled.rs b/nexus/src/app/sled.rs index 4523ee893b6..bf3a3ef2956 100644 --- a/nexus/src/app/sled.rs +++ b/nexus/src/app/sled.rs @@ -19,6 +19,7 @@ use nexus_types::deployment::SledFilter; use nexus_types::external_api::views::PhysicalDiskPolicy; use nexus_types::external_api::views::SledPolicy; use nexus_types::external_api::views::SledProvisionPolicy; +use omicron_common::api::external::ByteCount; use omicron_common::api::external::DataPageParams; use omicron_common::api::external::Error; use omicron_common::api::external::ListResultVec; @@ -330,6 +331,9 @@ impl super::Nexus { request.id, request.sled_id, request.physical_disk_id, + // This function is only called from tests, so it does not need a + // real value here. + ByteCount::from_gibibytes_u32(0).into(), ); self.db_datastore.zpool_insert(&opctx, zpool).await?; Ok(()) diff --git a/nexus/tests/integration_tests/disks.rs b/nexus/tests/integration_tests/disks.rs index 29e3d7cd865..b38bd059007 100644 --- a/nexus/tests/integration_tests/disks.rs +++ b/nexus/tests/integration_tests/disks.rs @@ -35,6 +35,7 @@ use nexus_test_utils::resource_helpers::objects_list_page_authz; use nexus_test_utils::wait_for_producer; use nexus_test_utils_macros::nexus_test; use nexus_types::external_api::params; +use nexus_types::identity::Asset; use nexus_types::silo::DEFAULT_SILO_ID; use omicron_common::api::external::Disk; use omicron_common::api::external::DiskState; @@ -2594,6 +2595,249 @@ async fn test_disk_expunge(cptestctx: &ControlPlaneTestContext) { assert_eq!(expunged_regions.len(), 3); } +#[nexus_test(extra_sled_agents = 3)] +async fn test_do_not_provision_on_dataset(cptestctx: &ControlPlaneTestContext) { + let nexus = &cptestctx.server.server_context().nexus; + let datastore = nexus.datastore(); + let opctx = + OpContext::for_tests(cptestctx.logctx.log.new(o!()), datastore.clone()); + + // Create one zpool, each with one dataset, on all the sleds + let disk_test = DiskTestBuilder::new(&cptestctx) + .on_all_sleds() + .with_zpool_count(1) + .build() + .await; + + // For one of the datasets, mark it as not provisionable + let dataset = &disk_test.zpools().next().unwrap().datasets[0]; + + datastore + .mark_crucible_dataset_not_provisionable(&opctx, dataset.id) + .await + .unwrap(); + + // Create a disk + let client = &cptestctx.external_client; + let _project_id = create_project_and_pool(client).await; + + let disk = create_disk(&client, PROJECT_NAME, DISK_NAME).await; + + // Assert no region was allocated to the marked dataset + let disk_id = disk.identity.id; + let (.., db_disk) = LookupPath::new(&opctx, &datastore) + .disk_id(disk_id) + .fetch() + .await + .unwrap_or_else(|_| panic!("test disk {:?} should exist", disk_id)); + + let allocated_regions = + datastore.get_allocated_regions(db_disk.volume_id()).await.unwrap(); + + for (allocated_region_dataset, _) in allocated_regions { + assert_ne!(allocated_region_dataset.id(), dataset.id); + } +} + +#[nexus_test(extra_sled_agents = 2)] +async fn test_do_not_provision_on_dataset_not_enough( + cptestctx: &ControlPlaneTestContext, +) { + let nexus = &cptestctx.server.server_context().nexus; + let datastore = nexus.datastore(); + let opctx = + OpContext::for_tests(cptestctx.logctx.log.new(o!()), datastore.clone()); + + // Create one zpool, each with one dataset, on all the sleds + let disk_test = DiskTestBuilder::new(&cptestctx) + .on_all_sleds() + .with_zpool_count(1) + .build() + .await; + + // For one of the datasets, mark it as not provisionable + let dataset = &disk_test.zpools().next().unwrap().datasets[0]; + + datastore + .mark_crucible_dataset_not_provisionable(&opctx, dataset.id) + .await + .unwrap(); + + // Because there's only 3 sled agents, each with one zpool with one dataset, + // this shouldn't be enough to create a disk. + + let client = &cptestctx.external_client; + create_project_and_pool(client).await; + + let disks_url = get_disks_url(); + + let new_disk = params::DiskCreate { + identity: IdentityMetadataCreateParams { + name: DISK_NAME.parse().unwrap(), + description: String::from("sells rainsticks"), + }, + disk_source: params::DiskSource::Blank { + block_size: params::BlockSize::try_from(512).unwrap(), + }, + size: ByteCount::from_gibibytes_u32(1), + }; + + NexusRequest::new( + RequestBuilder::new(client, Method::POST, &disks_url) + .body(Some(&new_disk)) + .expect_status(Some(StatusCode::INSUFFICIENT_STORAGE)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap(); + + // Marking that dataset as provisionable should allow the disk to be + // created. + + datastore + .mark_crucible_dataset_provisionable(&opctx, dataset.id) + .await + .unwrap(); + + NexusRequest::new( + RequestBuilder::new(client, Method::POST, &disks_url) + .body(Some(&new_disk)) + .expect_status(Some(StatusCode::CREATED)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap(); +} + +#[nexus_test(extra_sled_agents = 2)] +async fn test_zpool_control_plane_storage_buffer( + cptestctx: &ControlPlaneTestContext, +) { + let nexus = &cptestctx.server.server_context().nexus; + let datastore = nexus.datastore(); + let opctx = + OpContext::for_tests(cptestctx.logctx.log.new(o!()), datastore.clone()); + + // Create one zpool, each with one dataset, on all the sleds + let disk_test = DiskTestBuilder::new(&cptestctx) + .on_all_sleds() + .with_zpool_count(1) + .build() + .await; + + // Assert default is still 16 GiB + assert_eq!(16, DiskTest::DEFAULT_ZPOOL_SIZE_GIB); + + let client = &cptestctx.external_client; + create_project_and_pool(client).await; + + let disks_url = get_disks_url(); + + // Creating a 8G disk will work (10G size used due to reservation overhead) + let new_disk = params::DiskCreate { + identity: IdentityMetadataCreateParams { + name: "disk1".parse().unwrap(), + description: String::from("sells rainsticks"), + }, + disk_source: params::DiskSource::Blank { + block_size: params::BlockSize::try_from(512).unwrap(), + }, + size: ByteCount::from_gibibytes_u32(8), + }; + + NexusRequest::new( + RequestBuilder::new(client, Method::POST, &disks_url) + .body(Some(&new_disk)) + .expect_status(Some(StatusCode::CREATED)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap(); + + // Creating a 4G disk will also work (5G size used due to reservation + // overhead plus the previous 10G size used is less than 16G) + let new_disk = params::DiskCreate { + identity: IdentityMetadataCreateParams { + name: "disk2".parse().unwrap(), + description: String::from("sells rainsticks"), + }, + disk_source: params::DiskSource::Blank { + block_size: params::BlockSize::try_from(512).unwrap(), + }, + size: ByteCount::from_gibibytes_u32(4), + }; + + NexusRequest::new( + RequestBuilder::new(client, Method::POST, &disks_url) + .body(Some(&new_disk)) + .expect_status(Some(StatusCode::CREATED)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap(); + + // Delete the 4G disk + let disk_url = get_disk_url("disk2"); + NexusRequest::object_delete(client, &disk_url) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("failed to delete disk"); + + // For any of the zpools, set the control plane storage buffer to 2G. This + // should prevent the disk's region allocation from succeeding (as the + // reserved sizes of 10G + 5G plus the storage buffer of 2G is 1G over the + // the backing pool's 16G). + + let zpool = &disk_test.zpools().next().unwrap(); + datastore + .zpool_set_control_plane_storage_buffer( + &opctx, + zpool.id, + ByteCount::from_gibibytes_u32(2).into(), + ) + .await + .unwrap(); + + // Now creating the 4G disk should fail + + NexusRequest::new( + RequestBuilder::new(client, Method::POST, &disks_url) + .body(Some(&new_disk)) + .expect_status(Some(StatusCode::INSUFFICIENT_STORAGE)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap(); + + // Setting the storage buffer to 1G should allow the disk creation to + // succeed. + + datastore + .zpool_set_control_plane_storage_buffer( + &opctx, + zpool.id, + ByteCount::from_gibibytes_u32(1).into(), + ) + .await + .unwrap(); + + NexusRequest::new( + RequestBuilder::new(client, Method::POST, &disks_url) + .body(Some(&new_disk)) + .expect_status(Some(StatusCode::CREATED)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap(); +} + async fn disk_get(client: &ClientTestContext, disk_url: &str) -> Disk { NexusRequest::object_get(client, disk_url) .authn_as(AuthnMode::PrivilegedUser) diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 6be3c48bdc7..e5d2299ac0a 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -541,7 +541,12 @@ CREATE TABLE IF NOT EXISTS omicron.public.zpool ( sled_id UUID NOT NULL, /* FK into the Physical Disk table */ - physical_disk_id UUID NOT NULL + physical_disk_id UUID NOT NULL, + + /* + * How many bytes to reserve for non-Crucible control plane storage + */ + control_plane_storage_buffer INT NOT NULL ); /* Create an index on the physical disk id */ @@ -604,7 +609,10 @@ CREATE TABLE IF NOT EXISTS omicron.public.crucible_dataset ( * dataset that contains the regions (which is larger than the the actual * region size). */ - size_used INT NOT NULL + size_used INT NOT NULL, + + /* Do not consider this dataset during region allocation */ + no_provision BOOL NOT NULL ); /* Create an index on the size usage for any Crucible dataset */ @@ -5040,7 +5048,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - (TRUE, NOW(), NOW(), '135.0.0', NULL) + (TRUE, NOW(), NOW(), '136.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; diff --git a/schema/crdb/do-not-provision-flag-for-crucible-dataset/up01.sql b/schema/crdb/do-not-provision-flag-for-crucible-dataset/up01.sql new file mode 100644 index 00000000000..d940c9408bf --- /dev/null +++ b/schema/crdb/do-not-provision-flag-for-crucible-dataset/up01.sql @@ -0,0 +1,2 @@ +ALTER TABLE omicron.public.crucible_dataset + ADD COLUMN IF NOT EXISTS no_provision BOOL NOT NULL DEFAULT false; diff --git a/schema/crdb/do-not-provision-flag-for-crucible-dataset/up02.sql b/schema/crdb/do-not-provision-flag-for-crucible-dataset/up02.sql new file mode 100644 index 00000000000..1d0c88d7a72 --- /dev/null +++ b/schema/crdb/do-not-provision-flag-for-crucible-dataset/up02.sql @@ -0,0 +1,2 @@ +ALTER TABLE omicron.public.crucible_dataset + ALTER COLUMN no_provision DROP DEFAULT; diff --git a/schema/crdb/do-not-provision-flag-for-crucible-dataset/up03.sql b/schema/crdb/do-not-provision-flag-for-crucible-dataset/up03.sql new file mode 100644 index 00000000000..22f7499491e --- /dev/null +++ b/schema/crdb/do-not-provision-flag-for-crucible-dataset/up03.sql @@ -0,0 +1,2 @@ +ALTER TABLE omicron.public.zpool + ADD COLUMN IF NOT EXISTS control_plane_storage_buffer INT NOT NULL DEFAULT 268435456000; diff --git a/schema/crdb/do-not-provision-flag-for-crucible-dataset/up04.sql b/schema/crdb/do-not-provision-flag-for-crucible-dataset/up04.sql new file mode 100644 index 00000000000..534c9dad819 --- /dev/null +++ b/schema/crdb/do-not-provision-flag-for-crucible-dataset/up04.sql @@ -0,0 +1,2 @@ +ALTER TABLE omicron.public.zpool + ALTER COLUMN control_plane_storage_buffer DROP DEFAULT;