From 8c7db6d346801ee45842ea4258b882824d6d0b7d Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Mon, 13 Jan 2025 08:35:29 -0500 Subject: [PATCH 1/4] [reconfigurator] Add rendezvous_debug_dataset table Also adds datastore methods and a library crate to populate it. --- Cargo.lock | 23 ++ Cargo.toml | 3 + nexus/db-model/src/lib.rs | 2 + .../db-model/src/rendezvous_debug_dataset.rs | 66 ++++ nexus/db-model/src/schema.rs | 10 + nexus/db-model/src/schema_versions.rs | 3 +- nexus/db-queries/src/db/datastore/mod.rs | 1 + .../db/datastore/rendezvous_debug_dataset.rs | 263 ++++++++++++++++ nexus/reconfigurator/rendezvous/Cargo.toml | 36 +++ nexus/reconfigurator/rendezvous/build.rs | 10 + .../proptest-regressions/debug_dataset.txt | 7 + .../rendezvous/src/debug_dataset.rs | 297 ++++++++++++++++++ nexus/reconfigurator/rendezvous/src/lib.rs | 41 +++ schema/crdb/dbinit.sql | 52 ++- schema/crdb/rendezvous-debug-dataset/up1.sql | 7 + schema/crdb/rendezvous-debug-dataset/up2.sql | 3 + 16 files changed, 822 insertions(+), 2 deletions(-) create mode 100644 nexus/db-model/src/rendezvous_debug_dataset.rs create mode 100644 nexus/db-queries/src/db/datastore/rendezvous_debug_dataset.rs create mode 100644 nexus/reconfigurator/rendezvous/Cargo.toml create mode 100644 nexus/reconfigurator/rendezvous/build.rs create mode 100644 nexus/reconfigurator/rendezvous/proptest-regressions/debug_dataset.txt create mode 100644 nexus/reconfigurator/rendezvous/src/debug_dataset.rs create mode 100644 nexus/reconfigurator/rendezvous/src/lib.rs create mode 100644 schema/crdb/rendezvous-debug-dataset/up1.sql create mode 100644 schema/crdb/rendezvous-debug-dataset/up2.sql diff --git a/Cargo.lock b/Cargo.lock index 6cb2c83583e..766c2e19302 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6183,6 +6183,29 @@ dependencies = [ "slog-error-chain", ] +[[package]] +name = "nexus-reconfigurator-rendezvous" +version = "0.1.0" +dependencies = [ + "anyhow", + "async-bb8-diesel", + "diesel", + "nexus-db-model", + "nexus-db-queries", + "nexus-types", + "omicron-common", + "omicron-rpaths", + "omicron-test-utils", + "omicron-uuid-kinds", + "omicron-workspace-hack", + "pq-sys", + "proptest", + "slog", + "test-strategy", + "tokio", + "uuid", +] + [[package]] name = "nexus-reconfigurator-simulation" version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml index 8ea13cce5ba..01674756889 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -83,6 +83,7 @@ members = [ "nexus/reconfigurator/execution", "nexus/reconfigurator/planning", "nexus/reconfigurator/preparation", + "nexus/reconfigurator/rendezvous", "nexus/reconfigurator/simulation", "nexus/saga-recovery", "nexus/test-interface", @@ -218,6 +219,7 @@ default-members = [ "nexus/reconfigurator/execution", "nexus/reconfigurator/planning", "nexus/reconfigurator/preparation", + "nexus/reconfigurator/rendezvous", "nexus/reconfigurator/simulation", "nexus/saga-recovery", "nexus/test-interface", @@ -475,6 +477,7 @@ nexus-reconfigurator-blippy = { path = "nexus/reconfigurator/blippy" } nexus-reconfigurator-execution = { path = "nexus/reconfigurator/execution" } nexus-reconfigurator-planning = { path = "nexus/reconfigurator/planning" } nexus-reconfigurator-preparation = { path = "nexus/reconfigurator/preparation" } +nexus-reconfigurator-rendezvous = { path = "nexus/reconfigurator/rendezvous" } nexus-reconfigurator-simulation = { path = "nexus/reconfigurator/simulation" } nexus-saga-recovery = { path = "nexus/saga-recovery" } nexus-sled-agent-shared = { path = "nexus-sled-agent-shared" } diff --git a/nexus/db-model/src/lib.rs b/nexus/db-model/src/lib.rs index b6aea15c892..99214a54c6f 100644 --- a/nexus/db-model/src/lib.rs +++ b/nexus/db-model/src/lib.rs @@ -58,6 +58,7 @@ mod physical_disk_state; mod probe; mod producer_endpoint; mod project; +mod rendezvous_debug_dataset; mod semver_version; mod switch_interface; mod switch_port; @@ -186,6 +187,7 @@ pub use region_replacement_step::*; pub use region_snapshot::*; pub use region_snapshot_replacement::*; pub use region_snapshot_replacement_step::*; +pub use rendezvous_debug_dataset::*; pub use role_assignment::*; pub use role_builtin::*; pub use saga_types::*; diff --git a/nexus/db-model/src/rendezvous_debug_dataset.rs b/nexus/db-model/src/rendezvous_debug_dataset.rs new file mode 100644 index 00000000000..918e63d58dc --- /dev/null +++ b/nexus/db-model/src/rendezvous_debug_dataset.rs @@ -0,0 +1,66 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use crate::schema::rendezvous_debug_dataset; +use crate::typed_uuid::DbTypedUuid; +use chrono::{DateTime, Utc}; +use omicron_uuid_kinds::BlueprintKind; +use omicron_uuid_kinds::BlueprintUuid; +use omicron_uuid_kinds::DatasetKind; +use omicron_uuid_kinds::DatasetUuid; +use omicron_uuid_kinds::ZpoolKind; +use omicron_uuid_kinds::ZpoolUuid; +use serde::{Deserialize, Serialize}; + +/// Database representation of a Debug Dataset available for use. +#[derive( + Queryable, + Insertable, + Debug, + Clone, + Selectable, + Deserialize, + Serialize, + PartialEq, +)] +#[diesel(table_name = rendezvous_debug_dataset)] +pub struct RendezvousDebugDataset { + id: DbTypedUuid, + time_created: DateTime, + time_tombstoned: Option>, + pool_id: DbTypedUuid, + blueprint_id_when_recorded: DbTypedUuid, +} + +impl RendezvousDebugDataset { + pub fn new( + id: DatasetUuid, + pool_id: ZpoolUuid, + blueprint_id: BlueprintUuid, + ) -> Self { + Self { + id: id.into(), + time_created: Utc::now(), + time_tombstoned: None, + pool_id: pool_id.into(), + blueprint_id_when_recorded: blueprint_id.into(), + } + } + + pub fn id(&self) -> DatasetUuid { + self.id.into() + } + + pub fn pool_id(&self) -> ZpoolUuid { + self.pool_id.into() + } + + pub fn blueprint_id_when_recorded(&self) -> BlueprintUuid { + self.blueprint_id_when_recorded.into() + } + + pub fn is_tombstoned(&self) -> bool { + self.time_tombstoned.is_some() + } +} diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index a8e1141db6e..971d5942cae 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -1059,6 +1059,16 @@ table! { allow_tables_to_appear_in_same_query!(zpool, dataset); +table! { + rendezvous_debug_dataset (id) { + id -> Uuid, + time_created -> Timestamptz, + time_tombstoned -> Nullable, + pool_id -> Uuid, + blueprint_id_when_recorded -> Uuid, + } +} + table! { region (id) { id -> Uuid, diff --git a/nexus/db-model/src/schema_versions.rs b/nexus/db-model/src/schema_versions.rs index 4542283aac1..ec7d113b966 100644 --- a/nexus/db-model/src/schema_versions.rs +++ b/nexus/db-model/src/schema_versions.rs @@ -17,7 +17,7 @@ use std::collections::BTreeMap; /// /// This must be updated when you change the database schema. Refer to /// schema/crdb/README.adoc in the root of this repository for details. -pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(118, 0, 0); +pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(119, 0, 0); /// List of all past database schema versions, in *reverse* order /// @@ -29,6 +29,7 @@ static KNOWN_VERSIONS: Lazy> = Lazy::new(|| { // | leaving the first copy as an example for the next person. // v // KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"), + KnownVersion::new(119, "rendezvous-debug-dataset"), KnownVersion::new(118, "support-bundles"), KnownVersion::new(117, "add-completing-and-new-region-volume"), KnownVersion::new(116, "bp-physical-disk-disposition"), diff --git a/nexus/db-queries/src/db/datastore/mod.rs b/nexus/db-queries/src/db/datastore/mod.rs index db3eab61750..ecd341a0f40 100644 --- a/nexus/db-queries/src/db/datastore/mod.rs +++ b/nexus/db-queries/src/db/datastore/mod.rs @@ -84,6 +84,7 @@ mod region; mod region_replacement; mod region_snapshot; pub mod region_snapshot_replacement; +mod rendezvous_debug_dataset; mod role; mod saga; mod silo; diff --git a/nexus/db-queries/src/db/datastore/rendezvous_debug_dataset.rs b/nexus/db-queries/src/db/datastore/rendezvous_debug_dataset.rs new file mode 100644 index 00000000000..85ebac8e957 --- /dev/null +++ b/nexus/db-queries/src/db/datastore/rendezvous_debug_dataset.rs @@ -0,0 +1,263 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! [`DataStore`] methods on [`DebugDataset`]s. + +use super::DataStore; +use super::SQL_BATCH_SIZE; +use crate::authz; +use crate::context::OpContext; +use crate::db; +use crate::db::error::public_error_from_diesel; +use crate::db::error::ErrorHandler; +use crate::db::pagination::paginated; +use crate::db::pagination::Paginator; +use async_bb8_diesel::AsyncRunQueryDsl; +use chrono::Utc; +use diesel::prelude::*; +use nexus_db_model::to_db_typed_uuid; +use nexus_db_model::RendezvousDebugDataset; +use omicron_common::api::external::CreateResult; +use omicron_common::api::external::DataPageParams; +use omicron_common::api::external::Error; +use omicron_common::api::external::ListResultVec; +use omicron_uuid_kinds::DatasetUuid; +use omicron_uuid_kinds::GenericUuid; + +impl DataStore { + /// List one page of debug datasets + /// + /// This fetches all debug datasets, including those that have been + /// tombstoned. + async fn debug_dataset_list_all_page( + &self, + opctx: &OpContext, + pagparams: &DataPageParams<'_, DatasetUuid>, + ) -> ListResultVec { + opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?; + use db::schema::rendezvous_debug_dataset::dsl; + + paginated( + dsl::rendezvous_debug_dataset, + dsl::id, + &pagparams.map_name(|id| id.as_untyped_uuid()), + ) + .select(RendezvousDebugDataset::as_select()) + .load_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } + + /// List all debug datasets, making as many queries as needed to get them + /// all + /// + /// This fetches all debug datasets, including those that have been + /// tombstoned. + /// + /// This should generally not be used in API handlers or other + /// latency-sensitive contexts, but it can make sense in saga actions or + /// background tasks. + pub async fn debug_dataset_list_all_batched( + &self, + opctx: &OpContext, + ) -> ListResultVec { + opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?; + opctx.check_complex_operations_allowed()?; + + let mut all_datasets = Vec::new(); + let mut paginator = Paginator::new(SQL_BATCH_SIZE); + while let Some(p) = paginator.next() { + let batch = self + .debug_dataset_list_all_page(opctx, &p.current_pagparams()) + .await?; + paginator = + p.found_batch(&batch, &|d: &RendezvousDebugDataset| d.id()); + all_datasets.extend(batch); + } + + Ok(all_datasets) + } + + /// Insert a new debug dataset record + /// + /// If this dataset row already exists, does nothing. In particular: + /// + /// * The zpool ID associated with a dataset cannot change + /// * If the dataset has been tombstoned, this will not "untombstone" it + pub async fn debug_dataset_insert_if_not_exists( + &self, + opctx: &OpContext, + dataset: RendezvousDebugDataset, + ) -> CreateResult> { + opctx.authorize(authz::Action::CreateChild, &authz::FLEET).await?; + + use db::schema::rendezvous_debug_dataset::dsl; + + diesel::insert_into(dsl::rendezvous_debug_dataset) + .values(dataset) + .on_conflict(dsl::id) + .do_nothing() + .returning(RendezvousDebugDataset::as_select()) + .get_result_async(&*self.pool_connection_authorized(opctx).await?) + .await + .optional() + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } + + /// Mark a debug dataset as tombstoned + /// + /// If the dataset has already been tombstoned, does nothing. + /// + /// # Returns + /// + /// Returns `true` if the dataset was tombstoned; `false` if the dataset was + /// already tombstoned or does not exist. + pub async fn debug_dataset_tombstone( + &self, + opctx: &OpContext, + dataset_id: DatasetUuid, + ) -> Result { + opctx.authorize(authz::Action::Delete, &authz::FLEET).await?; + + use db::schema::rendezvous_debug_dataset::dsl; + + diesel::update(dsl::rendezvous_debug_dataset) + .filter(dsl::id.eq(to_db_typed_uuid(dataset_id))) + .filter(dsl::time_tombstoned.is_null()) + .set(dsl::time_tombstoned.eq(Utc::now())) + .execute_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map(|rows_modified| match rows_modified { + 0 => false, + 1 => true, + n => unreachable!( + "update restricted to 1 row return {n} rows modified" + ), + }) + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::db::pub_test_utils::TestDatabase; + use omicron_test_utils::dev; + use omicron_uuid_kinds::BlueprintUuid; + use omicron_uuid_kinds::ZpoolUuid; + + #[tokio::test] + async fn insert_if_not_exists() { + let logctx = dev::test_setup_log("insert_if_not_exists"); + let db = TestDatabase::new_with_datastore(&logctx.log).await; + let (opctx, datastore) = (db.opctx(), db.datastore()); + + // There should be no datasets initially. + assert_eq!( + datastore.dataset_list_all_batched(opctx, None).await.unwrap(), + [] + ); + + // Inserting a new dataset should succeed. + let dataset1 = datastore + .debug_dataset_insert_if_not_exists( + opctx, + RendezvousDebugDataset::new( + DatasetUuid::new_v4(), + ZpoolUuid::new_v4(), + BlueprintUuid::new_v4(), + ), + ) + .await + .expect("query succeeded") + .expect("inserted dataset"); + let expected_datasets = vec![dataset1.clone()]; + assert_eq!( + datastore.debug_dataset_list_all_batched(opctx).await.unwrap(), + expected_datasets, + ); + + // Attempting to insert another dataset with the same ID should succeed + // without updating the existing record. We'll check this by passing a + // pool / blueprint ID but confirming we get back exactly the same + // record as we did previously. + let insert_again_result = datastore + .debug_dataset_insert_if_not_exists( + opctx, + RendezvousDebugDataset::new( + dataset1.id(), + ZpoolUuid::new_v4(), + BlueprintUuid::new_v4(), + ), + ) + .await + .expect("query succeeded"); + assert_eq!(insert_again_result, None); + assert_eq!( + datastore.debug_dataset_list_all_batched(opctx).await.unwrap(), + expected_datasets, + ); + + db.terminate().await; + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn tombstone() { + let logctx = dev::test_setup_log("tombstone"); + let db = TestDatabase::new_with_datastore(&logctx.log).await; + let (opctx, datastore) = (db.opctx(), db.datastore()); + + // There should be no datasets initially. + assert_eq!( + datastore.dataset_list_all_batched(opctx, None).await.unwrap(), + [] + ); + + let dataset = RendezvousDebugDataset::new( + DatasetUuid::new_v4(), + ZpoolUuid::new_v4(), + BlueprintUuid::new_v4(), + ); + + // Tombstoning a nonexistent dataset should do nothing. + let tombstoned = datastore + .debug_dataset_tombstone(opctx, dataset.id()) + .await + .expect("query succeeded"); + assert!(!tombstoned); + + // Insert the dataset and confirm we see it. + let dataset = datastore + .debug_dataset_insert_if_not_exists(opctx, dataset) + .await + .expect("query succeeded") + .expect("inserted dataset"); + assert_eq!( + datastore.debug_dataset_list_all_batched(opctx).await.unwrap(), + [dataset.clone()], + ); + + // Tombstoning it should now succeed, and we should see the tombstoned + // version when listing all datasets. + let tombstoned = datastore + .debug_dataset_tombstone(opctx, dataset.id()) + .await + .expect("query succeeded"); + assert!(tombstoned); + let all_datasets = + datastore.debug_dataset_list_all_batched(opctx).await.unwrap(); + assert_eq!(all_datasets.len(), 1); + assert_eq!(all_datasets[0].id(), dataset.id()); + assert_eq!(all_datasets[0].pool_id(), dataset.pool_id()); + assert_eq!( + all_datasets[0].blueprint_id_when_recorded(), + dataset.blueprint_id_when_recorded() + ); + assert!(all_datasets[0].is_tombstoned()); + + db.terminate().await; + logctx.cleanup_successful(); + } +} diff --git a/nexus/reconfigurator/rendezvous/Cargo.toml b/nexus/reconfigurator/rendezvous/Cargo.toml new file mode 100644 index 00000000000..bda567da8fe --- /dev/null +++ b/nexus/reconfigurator/rendezvous/Cargo.toml @@ -0,0 +1,36 @@ +[package] +name = "nexus-reconfigurator-rendezvous" +version = "0.1.0" +edition = "2021" + +[lints] +workspace = true + +[build-dependencies] +omicron-rpaths.workspace = true + +[dependencies] +anyhow.workspace = true +nexus-db-queries.workspace = true +nexus-types.workspace = true +omicron-common.workspace = true +omicron-uuid-kinds.workspace = true +slog.workspace = true + +# See omicron-rpaths for more about the "pq-sys" dependency. This is needed +# because we use the database in the test suite, though it doesn't appear to +# work to put the pq-sys dependency only in dev-dependencies. +pq-sys = "*" + +omicron-workspace-hack.workspace = true + +[dev-dependencies] +async-bb8-diesel.workspace = true +diesel.workspace = true +nexus-db-model.workspace = true +nexus-db-queries = { workspace = true, features = ["testing"] } +omicron-test-utils.workspace = true +proptest.workspace = true +test-strategy.workspace = true +tokio.workspace = true +uuid.workspace = true diff --git a/nexus/reconfigurator/rendezvous/build.rs b/nexus/reconfigurator/rendezvous/build.rs new file mode 100644 index 00000000000..1ba9acd41c9 --- /dev/null +++ b/nexus/reconfigurator/rendezvous/build.rs @@ -0,0 +1,10 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +// See omicron-rpaths for documentation. +// NOTE: This file MUST be kept in sync with the other build.rs files in this +// repository. +fn main() { + omicron_rpaths::configure_default_omicron_rpaths(); +} diff --git a/nexus/reconfigurator/rendezvous/proptest-regressions/debug_dataset.txt b/nexus/reconfigurator/rendezvous/proptest-regressions/debug_dataset.txt new file mode 100644 index 00000000000..dcf95820f48 --- /dev/null +++ b/nexus/reconfigurator/rendezvous/proptest-regressions/debug_dataset.txt @@ -0,0 +1,7 @@ +# Seeds for failure cases proptest has generated in the past. It is +# automatically read and these particular cases re-run before any +# novel cases are generated. +# +# It is recommended to check this file in to source control so that +# everyone who runs the test benefits from these saved cases. +cc 703bd7e17f7c4601cde5ae0bcccb28c53c78d92ed0046987e3e94387731281f1 # shrinks to input = _ProptestReconciliationArgs { prep: {} } diff --git a/nexus/reconfigurator/rendezvous/src/debug_dataset.rs b/nexus/reconfigurator/rendezvous/src/debug_dataset.rs new file mode 100644 index 00000000000..8d8edf60066 --- /dev/null +++ b/nexus/reconfigurator/rendezvous/src/debug_dataset.rs @@ -0,0 +1,297 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Recording debug datasets in their rendezvous table + +use anyhow::Context; +use nexus_db_queries::context::OpContext; +use nexus_db_queries::db::model::RendezvousDebugDataset; +use nexus_db_queries::db::DataStore; +use nexus_types::deployment::BlueprintDatasetConfig; +use nexus_types::deployment::BlueprintDatasetDisposition; +use omicron_common::api::internal::shared::DatasetKind; +use omicron_uuid_kinds::BlueprintUuid; +use omicron_uuid_kinds::DatasetUuid; +use slog::info; +use std::collections::BTreeSet; + +pub(crate) async fn reconcile_debug_datasets( + opctx: &OpContext, + datastore: &DataStore, + blueprint_id: BlueprintUuid, + blueprint_datasets: impl Iterator, + inventory_datasets: &BTreeSet, +) -> anyhow::Result<()> { + // We expect basically all executions of this task to do nothing: we're + // activated periodically, and only do work when a dataset has been + // newly-added or newly-expunged. For the newly-added case, we can first + // fetch all the existing datasets and then avoid trying to spuriously + // insert them again. + // + // This is a minor performance optimization. If we removed this fetch, the + // code below would still be correct, but it would issue a bunch of + // do-nothing inserts for already-existing datasets. + let existing_datasets = datastore + .debug_dataset_list_all_batched(opctx) + .await + .context("failed to list all debug datasets")? + .into_iter() + .map(|d| d.id()) + .collect::>(); + + // We want to insert any in-service datasets (according to the blueprint) + // that are also present in `inventory_datasets` but that are not already + // present in the database (described by `existing_datasets`). + let datasets_to_insert = inventory_datasets + .difference(&existing_datasets) + .collect::>(); + + for dataset in blueprint_datasets.filter(|d| d.kind == DatasetKind::Debug) { + match dataset.disposition { + BlueprintDatasetDisposition::InService => { + if datasets_to_insert.contains(&dataset.id) { + let db_dataset = RendezvousDebugDataset::new( + dataset.id, + dataset.pool.id(), + blueprint_id, + ); + datastore + .debug_dataset_insert_if_not_exists(opctx, db_dataset) + .await + .with_context(|| { + format!("failed to insert dataset {}", dataset.id) + })?; + } + } + BlueprintDatasetDisposition::Expunged => { + // We don't have a way to short-circuit tombstoning, assuming we + // don't want to query the db for "all tombstoned datasets" + // first. + // + // We could do that or we could add a + // `debug_dataset_tombstone_batched` that takes the entire set + // of expunged datasets? Or we could just no-op tombstone every + // expunged dataset on every execution like this. + if datastore + .debug_dataset_tombstone(opctx, dataset.id) + .await + .with_context(|| { + format!("failed to tombstone dataset {}", dataset.id) + })? + { + info!( + opctx.log, "tombstoned expunged dataset"; + "dataset_id" => %dataset.id, + ); + } + } + } + } + + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + use async_bb8_diesel::AsyncRunQueryDsl; + use async_bb8_diesel::AsyncSimpleConnection; + use nexus_db_queries::db::pub_test_utils::TestDatabase; + use nexus_db_queries::db::queries::ALLOW_FULL_TABLE_SCAN_SQL; + use nexus_types::inventory::ZpoolName; + use omicron_common::disk::CompressionAlgorithm; + use omicron_test_utils::dev; + use omicron_uuid_kinds::{GenericUuid, TypedUuid, TypedUuidKind}; + use proptest::prelude::*; + use proptest::proptest; + use std::collections::BTreeMap; + use test_strategy::Arbitrary; + use uuid::Uuid; + + // Helpers to describe how a dataset should be prepared for the proptest + // below. + #[derive(Debug, Clone, Copy, PartialEq, Eq, Arbitrary)] + enum ArbitraryDisposition { + InService, + Expunged, + } + + impl From for BlueprintDatasetDisposition { + fn from(value: ArbitraryDisposition) -> Self { + match value { + ArbitraryDisposition::InService => Self::InService, + ArbitraryDisposition::Expunged => Self::Expunged, + } + } + } + + #[derive(Debug, Clone, Copy, Arbitrary)] + struct DatasetPrep { + disposition: ArbitraryDisposition, + in_inventory: bool, + in_database: bool, + } + + fn u32_to_id(n: u32) -> TypedUuid { + let untyped = Uuid::from_u128(u128::from(n)); + TypedUuid::from_untyped_uuid(untyped) + } + + async fn proptest_do_prep( + opctx: &OpContext, + datastore: &DataStore, + blueprint_id: BlueprintUuid, + prep: &BTreeMap, + ) -> (Vec, BTreeSet) { + let mut datasets = Vec::with_capacity(prep.len()); + let mut inventory = BTreeSet::new(); + + // Clean up from any previous proptest cases + { + use nexus_db_model::schema::rendezvous_debug_dataset::dsl; + let conn = datastore.pool_connection_for_tests().await.unwrap(); + datastore + .transaction_non_retry_wrapper("proptest_prep_cleanup") + .transaction(&conn, |conn| async move { + conn.batch_execute_async(ALLOW_FULL_TABLE_SCAN_SQL).await?; + diesel::delete(dsl::rendezvous_debug_dataset) + .execute_async(&conn) + .await?; + Ok::<_, diesel::result::Error>(()) + }) + .await + .unwrap(); + } + + for (&id, prep) in prep { + let d = BlueprintDatasetConfig { + disposition: prep.disposition.into(), + id: u32_to_id(id), + pool: ZpoolName::new_external(u32_to_id(id)), + kind: DatasetKind::Debug, + address: None, + quota: None, + reservation: None, + compression: CompressionAlgorithm::Off, + }; + + if prep.in_inventory { + inventory.insert(d.id); + } + + if prep.in_database { + datastore + .debug_dataset_insert_if_not_exists( + opctx, + RendezvousDebugDataset::new( + d.id, + d.pool.id(), + blueprint_id, + ), + ) + .await + .expect("query succeeded") + .expect("inserted dataset"); + } + + datasets.push(d); + } + + (datasets, inventory) + } + + #[test] + fn proptest_reconciliation() { + // We create our own runtime so we can interleave expensive async code + // (setting up a datastore) with a proptest that itself runs some async + // code (querying the datastore). + let runtime = tokio::runtime::Builder::new_current_thread() + .enable_time() + .start_paused(true) + .enable_io() + .build() + .expect("tokio Runtime built successfully"); + + let logctx = dev::test_setup_log("tombstone"); + let db = + runtime.block_on(TestDatabase::new_with_datastore(&logctx.log)); + let (opctx, datastore) = (db.opctx(), db.datastore()); + + proptest!(ProptestConfig::with_cases(64), + |(prep in proptest::collection::btree_map( + any::(), + any::(), + 0..20, + ))| { + let blueprint_id = BlueprintUuid::new_v4(); + + let datastore_datasets = runtime.block_on(async { + let (blueprint_datasets, inventory_datasets) = proptest_do_prep( + opctx, + datastore, + blueprint_id, + &prep, + ).await; + + reconcile_debug_datasets( + opctx, + datastore, + blueprint_id, + blueprint_datasets.iter(), + &inventory_datasets, + ) + .await + .expect("reconciled debug dataset"); + + datastore + .debug_dataset_list_all_batched(opctx) + .await + .unwrap() + .into_iter() + .map(|d| (d.id(), d)) + .collect::>() + }); + + for (id, prep) in prep { + let id: DatasetUuid = u32_to_id(id); + + // If the dataset wasn't in the database already, we should not + // have inserted it if either: + // + // * it wasn't present in inventory + // * it was expunged (tombstoning an already-hard-deleted + // dataset is a no-op) + if !prep.in_database && (!prep.in_inventory + || prep.disposition == ArbitraryDisposition::Expunged + ){ + assert!( + !datastore_datasets.contains_key(&id), + "unexpected dataset present in database: \ + {id}, {prep:?}" + ); + continue; + } + + // Otherwise, we should have either inserted or attempted to + // update the record. Get it from the datastore and confirm that + // its tombstoned bit is correct based on the blueprint + // disposition. + let db = datastore_datasets + .get(&id) + .expect("missing entry in database"); + match prep.disposition { + ArbitraryDisposition::InService => { + assert!(!db.is_tombstoned()); + } + ArbitraryDisposition::Expunged => { + assert!(db.is_tombstoned()) + } + } + } + }); + + runtime.block_on(db.terminate()); + logctx.cleanup_successful(); + } +} diff --git a/nexus/reconfigurator/rendezvous/src/lib.rs b/nexus/reconfigurator/rendezvous/src/lib.rs new file mode 100644 index 00000000000..a9e63349fc1 --- /dev/null +++ b/nexus/reconfigurator/rendezvous/src/lib.rs @@ -0,0 +1,41 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Reconcilation of blueprints (the intended state of the system) and inventory +//! (collected state of the system) into rendezvous tables. +//! +//! Rendezvous tables reflect resources that are in service and available for +//! other parts of Nexus to use. See RFD 541 for more background. + +use nexus_db_queries::context::OpContext; +use nexus_db_queries::db::DataStore; +use nexus_types::deployment::Blueprint; +use nexus_types::deployment::BlueprintDatasetFilter; +use nexus_types::inventory::Collection; + +mod debug_dataset; + +pub async fn reconcile_blueprint_rendezvous_tables( + opctx: &OpContext, + datastore: &DataStore, + blueprint: &Blueprint, + inventory: &Collection, +) -> anyhow::Result<()> { + debug_dataset::reconcile_debug_datasets( + opctx, + datastore, + blueprint.id, + blueprint + .all_omicron_datasets(BlueprintDatasetFilter::All) + .map(|(_sled_id, dataset)| dataset), + &inventory + .sled_agents + .values() + .flat_map(|sled| sled.datasets.iter().flat_map(|d| d.id)) + .collect(), + ) + .await?; + + Ok(()) +} diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index ce6764bd17c..0a5c15ddaf2 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -4031,6 +4031,56 @@ CREATE TABLE IF NOT EXISTS omicron.public.cockroachdb_zone_id_to_node_id ( PRIMARY KEY (omicron_zone_id, crdb_node_id) ); +/* + * List of debug datasets available for use (e.g., by support bundles). + * + * This is a Reconfigurator rendezvous table: it reflects resources that + * Reconfigurator has ensured exist. It is always possible that a resource + * chosen from this table could be deleted after it's selected, but any + * non-deleted row in this table is guaranteed to have been created. + */ +CREATE TABLE IF NOT EXISTS omicron.public.rendezvous_debug_dataset ( + /* ID of the dataset in a blueprint */ + id UUID PRIMARY KEY, + + /* Time this dataset was added to the table */ + time_created TIMESTAMPTZ NOT NULL, + + /* + * If not NULL, indicates this dataset has been expunged in a blueprint. + * Multiple Nexus instances operate concurrently, and it's possible any + * given Nexus is operating on an old blueprint. We need to avoid a Nexus + * operating on an old blueprint from inserting a dataset that has already + * been expunged and removed from this table by a later blueprint, so + * instead of hard deleting, we tombstone rows via this column. + * + * Hard deletion of tombstoned datasets will require some care with respect + * to the problem above. For now we keep tombstoned datasets around forever. + */ + time_tombstoned TIMESTAMPTZ, + + /* ID of the zpool on which this dataset is placed */ + pool_id UUID NOT NULL, + + /* + * ID of the target blueprint from which this row was inserted. + * + * Rows are added to this table when the Reconfigurator reconciliation + * background task sees (a) an in-service debug dataset in the current + * target blueprint and (b) a report from an inventory collection that that + * dataset has been created on a sled. Recording the inventory collection is + * not particularly useful, as they age off quickly, but we do record the + * blueprint ID that was the target at the point at which this row was + * added. + */ + blueprint_id_when_recorded UUID NOT NULL +); + +/* Add an index which lets us find usable debug datasets */ +CREATE INDEX IF NOT EXISTS lookup_usable_rendezvous_debug_dataset + ON omicron.public.rendezvous_debug_dataset (id) + WHERE time_tombstoned IS NULL; + /*******************************************************************/ /* @@ -4757,7 +4807,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - (TRUE, NOW(), NOW(), '118.0.0', NULL) + (TRUE, NOW(), NOW(), '119.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; diff --git a/schema/crdb/rendezvous-debug-dataset/up1.sql b/schema/crdb/rendezvous-debug-dataset/up1.sql new file mode 100644 index 00000000000..7591d8312b7 --- /dev/null +++ b/schema/crdb/rendezvous-debug-dataset/up1.sql @@ -0,0 +1,7 @@ +CREATE TABLE IF NOT EXISTS omicron.public.rendezvous_debug_dataset ( + id UUID PRIMARY KEY, + time_created TIMESTAMPTZ NOT NULL, + time_tombstoned TIMESTAMPTZ, + pool_id UUID NOT NULL, + blueprint_id_when_recorded UUID NOT NULL +); diff --git a/schema/crdb/rendezvous-debug-dataset/up2.sql b/schema/crdb/rendezvous-debug-dataset/up2.sql new file mode 100644 index 00000000000..89cfc6ef5c5 --- /dev/null +++ b/schema/crdb/rendezvous-debug-dataset/up2.sql @@ -0,0 +1,3 @@ +CREATE INDEX IF NOT EXISTS lookup_usable_rendezvous_debug_dataset + ON omicron.public.rendezvous_debug_dataset (id) + WHERE time_tombstoned IS NULL; From 541a22e5d49a2cec94fe0b5caf8154905affdb44 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Tue, 14 Jan 2025 14:27:53 -0500 Subject: [PATCH 2/4] rustdoc fix --- nexus/db-queries/src/db/datastore/rendezvous_debug_dataset.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nexus/db-queries/src/db/datastore/rendezvous_debug_dataset.rs b/nexus/db-queries/src/db/datastore/rendezvous_debug_dataset.rs index 85ebac8e957..b3476ca3717 100644 --- a/nexus/db-queries/src/db/datastore/rendezvous_debug_dataset.rs +++ b/nexus/db-queries/src/db/datastore/rendezvous_debug_dataset.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -//! [`DataStore`] methods on [`DebugDataset`]s. +//! [`DataStore`] methods on [`RendezvousDebugDataset`]s. use super::DataStore; use super::SQL_BATCH_SIZE; From 8372bb69b0e21b6c6446282e7c9d902976c30086 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Wed, 15 Jan 2025 10:50:01 -0500 Subject: [PATCH 3/4] add blueprint_id_when_tombstoned column --- .../db-model/src/rendezvous_debug_dataset.rs | 21 ++++++++-- nexus/db-model/src/schema.rs | 3 +- .../db/datastore/rendezvous_debug_dataset.rs | 32 ++++++++++++---- schema/crdb/dbinit.sql | 38 ++++++++++++++----- schema/crdb/rendezvous-debug-dataset/up1.sql | 10 ++++- 5 files changed, 82 insertions(+), 22 deletions(-) diff --git a/nexus/db-model/src/rendezvous_debug_dataset.rs b/nexus/db-model/src/rendezvous_debug_dataset.rs index 918e63d58dc..a80b3230597 100644 --- a/nexus/db-model/src/rendezvous_debug_dataset.rs +++ b/nexus/db-model/src/rendezvous_debug_dataset.rs @@ -30,7 +30,8 @@ pub struct RendezvousDebugDataset { time_created: DateTime, time_tombstoned: Option>, pool_id: DbTypedUuid, - blueprint_id_when_recorded: DbTypedUuid, + blueprint_id_when_created: DbTypedUuid, + blueprint_id_when_tombstoned: Option>, } impl RendezvousDebugDataset { @@ -44,7 +45,8 @@ impl RendezvousDebugDataset { time_created: Utc::now(), time_tombstoned: None, pool_id: pool_id.into(), - blueprint_id_when_recorded: blueprint_id.into(), + blueprint_id_when_created: blueprint_id.into(), + blueprint_id_when_tombstoned: None, } } @@ -56,11 +58,22 @@ impl RendezvousDebugDataset { self.pool_id.into() } - pub fn blueprint_id_when_recorded(&self) -> BlueprintUuid { - self.blueprint_id_when_recorded.into() + pub fn blueprint_id_when_created(&self) -> BlueprintUuid { + self.blueprint_id_when_created.into() + } + + pub fn blueprint_id_when_tombstoned(&self) -> Option { + self.blueprint_id_when_tombstoned.map(From::from) } pub fn is_tombstoned(&self) -> bool { + // A CHECK constraint in the schema guarantees both the `*_tombstoned` + // fields are set or not set; document that check here with an + // assertion. + debug_assert_eq!( + self.time_tombstoned.is_some(), + self.blueprint_id_when_tombstoned.is_some() + ); self.time_tombstoned.is_some() } } diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index 971d5942cae..a1ae3119e46 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -1065,7 +1065,8 @@ table! { time_created -> Timestamptz, time_tombstoned -> Nullable, pool_id -> Uuid, - blueprint_id_when_recorded -> Uuid, + blueprint_id_when_created -> Uuid, + blueprint_id_when_tombstoned -> Nullable, } } diff --git a/nexus/db-queries/src/db/datastore/rendezvous_debug_dataset.rs b/nexus/db-queries/src/db/datastore/rendezvous_debug_dataset.rs index b3476ca3717..ff75b2480a8 100644 --- a/nexus/db-queries/src/db/datastore/rendezvous_debug_dataset.rs +++ b/nexus/db-queries/src/db/datastore/rendezvous_debug_dataset.rs @@ -22,13 +22,14 @@ use omicron_common::api::external::CreateResult; use omicron_common::api::external::DataPageParams; use omicron_common::api::external::Error; use omicron_common::api::external::ListResultVec; +use omicron_uuid_kinds::BlueprintUuid; use omicron_uuid_kinds::DatasetUuid; use omicron_uuid_kinds::GenericUuid; impl DataStore { /// List one page of debug datasets /// - /// This fetches all debug datasets, including those that have been + /// This fetches all debug datasets, including those that have been /// tombstoned. async fn debug_dataset_list_all_page( &self, @@ -117,6 +118,7 @@ impl DataStore { &self, opctx: &OpContext, dataset_id: DatasetUuid, + blueprint_id: BlueprintUuid, ) -> Result { opctx.authorize(authz::Action::Delete, &authz::FLEET).await?; @@ -125,7 +127,11 @@ impl DataStore { diesel::update(dsl::rendezvous_debug_dataset) .filter(dsl::id.eq(to_db_typed_uuid(dataset_id))) .filter(dsl::time_tombstoned.is_null()) - .set(dsl::time_tombstoned.eq(Utc::now())) + .set(( + dsl::time_tombstoned.eq(Utc::now()), + dsl::blueprint_id_when_tombstoned + .eq(to_db_typed_uuid(blueprint_id)), + )) .execute_async(&*self.pool_connection_authorized(opctx).await?) .await .map(|rows_modified| match rows_modified { @@ -144,7 +150,6 @@ mod tests { use super::*; use crate::db::pub_test_utils::TestDatabase; use omicron_test_utils::dev; - use omicron_uuid_kinds::BlueprintUuid; use omicron_uuid_kinds::ZpoolUuid; #[tokio::test] @@ -222,8 +227,13 @@ mod tests { ); // Tombstoning a nonexistent dataset should do nothing. + let tombstone_blueprint_id = BlueprintUuid::new_v4(); let tombstoned = datastore - .debug_dataset_tombstone(opctx, dataset.id()) + .debug_dataset_tombstone( + opctx, + dataset.id(), + tombstone_blueprint_id, + ) .await .expect("query succeeded"); assert!(!tombstoned); @@ -242,7 +252,11 @@ mod tests { // Tombstoning it should now succeed, and we should see the tombstoned // version when listing all datasets. let tombstoned = datastore - .debug_dataset_tombstone(opctx, dataset.id()) + .debug_dataset_tombstone( + opctx, + dataset.id(), + tombstone_blueprint_id, + ) .await .expect("query succeeded"); assert!(tombstoned); @@ -252,8 +266,12 @@ mod tests { assert_eq!(all_datasets[0].id(), dataset.id()); assert_eq!(all_datasets[0].pool_id(), dataset.pool_id()); assert_eq!( - all_datasets[0].blueprint_id_when_recorded(), - dataset.blueprint_id_when_recorded() + all_datasets[0].blueprint_id_when_created(), + dataset.blueprint_id_when_created() + ); + assert_eq!( + all_datasets[0].blueprint_id_when_tombstoned(), + Some(tombstone_blueprint_id) ); assert!(all_datasets[0].is_tombstoned()); diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 0a5c15ddaf2..d944f99a24c 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -4063,17 +4063,37 @@ CREATE TABLE IF NOT EXISTS omicron.public.rendezvous_debug_dataset ( pool_id UUID NOT NULL, /* - * ID of the target blueprint from which this row was inserted. + * ID of the target blueprint the Reconfigurator reconciliation RPW was + * acting on when this row was created. * - * Rows are added to this table when the Reconfigurator reconciliation - * background task sees (a) an in-service debug dataset in the current - * target blueprint and (b) a report from an inventory collection that that - * dataset has been created on a sled. Recording the inventory collection is - * not particularly useful, as they age off quickly, but we do record the - * blueprint ID that was the target at the point at which this row was - * added. + * In practice, this will often be the same blueprint ID in which this + * dataset was added, but it's not guaranteed to be (it could be any + * descendent blueprint in which this dataset is still in service). */ - blueprint_id_when_recorded UUID NOT NULL + blueprint_id_when_created UUID NOT NULL, + + /* + * ID of the target blueprint the Reconfigurator reconciliation RPW was + * acting on when this row was tombstoned. + * + * In practice, this will often be the same blueprint ID in which this + * dataset was expunged, but it's not guaranteed to be (it could be any + * descendent blueprint in which this dataset is expunged and not yet + * pruned). + */ + blueprint_id_when_tombstoned UUID, + + /* + * Either both `*_tombstoned` columns should be set (if this row has been + * tombstoned) or neither should (if it has not). + */ + CONSTRAINT tombstoned_consistency CHECK ( + (time_tombstoned IS NULL + AND blueprint_id_when_tombstoned IS NULL) + OR + (time_tombstoned IS NOT NULL + AND blueprint_id_when_tombstoned IS NOT NULL) + ) ); /* Add an index which lets us find usable debug datasets */ diff --git a/schema/crdb/rendezvous-debug-dataset/up1.sql b/schema/crdb/rendezvous-debug-dataset/up1.sql index 7591d8312b7..b2f2d4fc598 100644 --- a/schema/crdb/rendezvous-debug-dataset/up1.sql +++ b/schema/crdb/rendezvous-debug-dataset/up1.sql @@ -3,5 +3,13 @@ CREATE TABLE IF NOT EXISTS omicron.public.rendezvous_debug_dataset ( time_created TIMESTAMPTZ NOT NULL, time_tombstoned TIMESTAMPTZ, pool_id UUID NOT NULL, - blueprint_id_when_recorded UUID NOT NULL + blueprint_id_when_created UUID NOT NULL, + blueprint_id_when_tombstoned UUID, + CONSTRAINT tombstoned_consistency CHECK ( + (time_tombstoned IS NULL + AND blueprint_id_when_tombstoned IS NULL) + OR + (time_tombstoned IS NOT NULL + AND blueprint_id_when_tombstoned IS NOT NULL) + ) ); From 08315cff423effd44ac69bf7ad15bc87a0fc27cc Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Wed, 15 Jan 2025 11:02:49 -0500 Subject: [PATCH 4/4] clean up RPW implementation --- .../rendezvous/src/debug_dataset.rs | 88 +++++++++++-------- 1 file changed, 52 insertions(+), 36 deletions(-) diff --git a/nexus/reconfigurator/rendezvous/src/debug_dataset.rs b/nexus/reconfigurator/rendezvous/src/debug_dataset.rs index 8d8edf60066..6c8e1fa0376 100644 --- a/nexus/reconfigurator/rendezvous/src/debug_dataset.rs +++ b/nexus/reconfigurator/rendezvous/src/debug_dataset.rs @@ -14,6 +14,7 @@ use omicron_common::api::internal::shared::DatasetKind; use omicron_uuid_kinds::BlueprintUuid; use omicron_uuid_kinds::DatasetUuid; use slog::info; +use std::collections::BTreeMap; use std::collections::BTreeSet; pub(crate) async fn reconcile_debug_datasets( @@ -25,32 +26,29 @@ pub(crate) async fn reconcile_debug_datasets( ) -> anyhow::Result<()> { // We expect basically all executions of this task to do nothing: we're // activated periodically, and only do work when a dataset has been - // newly-added or newly-expunged. For the newly-added case, we can first - // fetch all the existing datasets and then avoid trying to spuriously - // insert them again. + // newly-added or newly-expunged. // - // This is a minor performance optimization. If we removed this fetch, the - // code below would still be correct, but it would issue a bunch of - // do-nothing inserts for already-existing datasets. - let existing_datasets = datastore + // This is a performance optimization. If we removed this fetch, the code + // below would still be correct, but it would issue a bunch of do-nothing + // queries for every individual dataset in `blueprint_datasets`. + let existing_db_datasets = datastore .debug_dataset_list_all_batched(opctx) .await .context("failed to list all debug datasets")? .into_iter() - .map(|d| d.id()) - .collect::>(); - - // We want to insert any in-service datasets (according to the blueprint) - // that are also present in `inventory_datasets` but that are not already - // present in the database (described by `existing_datasets`). - let datasets_to_insert = inventory_datasets - .difference(&existing_datasets) - .collect::>(); + .map(|d| (d.id(), d)) + .collect::>(); for dataset in blueprint_datasets.filter(|d| d.kind == DatasetKind::Debug) { match dataset.disposition { BlueprintDatasetDisposition::InService => { - if datasets_to_insert.contains(&dataset.id) { + // Only attempt to insert this dataset if it has shown up in + // inventory (required for correctness) and isn't already + // present in the db (performance optimization only). Inserting + // an already-present row is a no-op, so it's safe to skip. + if inventory_datasets.contains(&dataset.id) + && !existing_db_datasets.contains_key(&dataset.id) + { let db_dataset = RendezvousDebugDataset::new( dataset.id, dataset.pool.id(), @@ -65,25 +63,44 @@ pub(crate) async fn reconcile_debug_datasets( } } BlueprintDatasetDisposition::Expunged => { - // We don't have a way to short-circuit tombstoning, assuming we - // don't want to query the db for "all tombstoned datasets" - // first. + // Only attempt to tombstone this dataset if it isn't already + // marked as tombstoned in the database. // - // We could do that or we could add a - // `debug_dataset_tombstone_batched` that takes the entire set - // of expunged datasets? Or we could just no-op tombstone every - // expunged dataset on every execution like this. - if datastore - .debug_dataset_tombstone(opctx, dataset.id) - .await - .with_context(|| { - format!("failed to tombstone dataset {}", dataset.id) - })? - { - info!( - opctx.log, "tombstoned expunged dataset"; - "dataset_id" => %dataset.id, - ); + // The `.unwrap_or(false)` means we'll attempt to tombstone a + // row even if it wasn't present in the db at all when we + // queried it above. This is _probably_ unnecessary (tombstoning + // a nonexistent row is a no-op), but I'm not positive there + // isn't a case where we might be operating on a blueprint + // simultaneously to some other Nexus operating on an older + // blueprint/inventory where it might insert this row after we + // queried above and before we tombstone below. It seems safer + // to issue a probably-do-nothing query than to _not_ issue a + // probably-do-nothing-but-might-do-something-if-I'm-wrong + // query. + let already_tombstoned = existing_db_datasets + .get(&dataset.id) + .map(|d| d.is_tombstoned()) + .unwrap_or(false); + if !already_tombstoned { + if datastore + .debug_dataset_tombstone( + opctx, + dataset.id, + blueprint_id, + ) + .await + .with_context(|| { + format!( + "failed to tombstone dataset {}", + dataset.id + ) + })? + { + info!( + opctx.log, "tombstoned expunged dataset"; + "dataset_id" => %dataset.id, + ); + } } } } @@ -105,7 +122,6 @@ mod tests { use omicron_uuid_kinds::{GenericUuid, TypedUuid, TypedUuidKind}; use proptest::prelude::*; use proptest::proptest; - use std::collections::BTreeMap; use test_strategy::Arbitrary; use uuid::Uuid;