From cfe893d07ee8903dcbb27829874164fcec3c38a6 Mon Sep 17 00:00:00 2001 From: James MacMahon Date: Wed, 7 Sep 2022 09:27:46 -0400 Subject: [PATCH 1/8] Cleanup Crucible resources with volume delete saga Add a volume delete saga, and call that whenever a volume needs to be deleted. Crucible resources now have accounting for their use, and when the number of references reaches zero then cleanup occurs. Adds a table to store created region snapshots as part of this accounting, and make a record of when a Crucible snapshot is taken of a region. Region snapshots are uniquely identified by (dataset_id, region_id, snapshot_id), and additionally contain a snapshot address for identification as part of a volume construction request. All DELETE calls to Crucible Agent(s) are now only performed by the volume delete saga. Add tests for cases where if Nexus is properly accounting for snapshots. It should not delete the disk's corresponding region when the disk is deleted because a snapshot exists, or if multiple disks reference a running snapshot. A small change to the simulated Crucible agent is also included in this commit - logic was moved out of the HTTP endpoint function into the Crucible struct. Logic doesn't belong in HTTP endpoint functions :) Closes https://github.com/oxidecomputer/omicron/issues/1632 --- common/src/sql/dbinit.sql | 52 +- nexus/db-model/src/lib.rs | 2 + nexus/db-model/src/region_snapshot.rs | 36 + nexus/db-model/src/schema.rs | 19 +- nexus/db-model/src/volume.rs | 5 +- nexus/src/app/disk.rs | 3 + nexus/src/app/mod.rs | 1 + nexus/src/app/sagas/disk_create.rs | 82 +- nexus/src/app/sagas/disk_delete.rs | 63 +- nexus/src/app/sagas/mod.rs | 9 + nexus/src/app/sagas/snapshot_create.rs | 147 +-- nexus/src/app/sagas/volume_delete.rs | 443 +++++++ nexus/src/app/volume.rs | 40 + nexus/src/db/datastore/mod.rs | 3 + nexus/src/db/datastore/region.rs | 10 +- nexus/src/db/datastore/region_snapshot.rs | 63 + nexus/src/db/datastore/snapshot.rs | 45 +- nexus/src/db/datastore/volume.rs | 453 ++++++- nexus/test-utils/src/resource_helpers.rs | 17 + nexus/tests/integration_tests/mod.rs | 1 + .../integration_tests/volume_management.rs | 1147 +++++++++++++++++ .../src/sim/http_entrypoints_storage.rs | 12 +- sled-agent/src/sim/storage.rs | 51 +- 23 files changed, 2354 insertions(+), 350 deletions(-) create mode 100644 nexus/db-model/src/region_snapshot.rs create mode 100644 nexus/src/app/sagas/volume_delete.rs create mode 100644 nexus/src/app/volume.rs create mode 100644 nexus/src/db/datastore/region_snapshot.rs create mode 100644 nexus/tests/integration_tests/volume_management.rs diff --git a/common/src/sql/dbinit.sql b/common/src/sql/dbinit.sql index 01dc6bca472..3f0c601040a 100644 --- a/common/src/sql/dbinit.sql +++ b/common/src/sql/dbinit.sql @@ -223,6 +223,45 @@ CREATE INDEX on omicron.public.Region ( dataset_id ); +/* + * A snapshot of a region, within a dataset. + */ +CREATE TABLE omicron.public.region_snapshot ( + dataset_id UUID NOT NULL, + region_id UUID NOT NULL, + + /* Associated higher level virtual snapshot */ + snapshot_id UUID NOT NULL, + + /* + * Target string, for identification as part of + * volume construction request(s) + */ + snapshot_addr TEXT NOT NULL, + + /* How many volumes reference this? */ + volume_references INT8 NOT NULL, + + PRIMARY KEY (dataset_id, region_id, snapshot_id) +); + +/* Index for use during join with region table */ +CREATE INDEX on omicron.public.region_snapshot ( + dataset_id, region_id +); + +/* + * Index on volume_references and snapshot_addr for crucible + * resource accounting lookup + */ +CREATE INDEX on omicron.public.region_snapshot ( + volume_references +); + +CREATE INDEX on omicron.public.region_snapshot ( + snapshot_addr +); + /* * A volume within Crucible */ @@ -241,7 +280,18 @@ CREATE TABLE omicron.public.volume ( * consumed by some Upstairs code to perform the volume creation. The Rust * type of this column should be Crucible::VolumeConstructionRequest. */ - data TEXT NOT NULL + data TEXT NOT NULL, + + /* + * A JSON document describing what resources to clean up when deleting this + * volume. + */ + resources_to_clean_up TEXT +); + +/* Quickly find deleted volumes */ +CREATE INDEX on omicron.public.volume ( + time_deleted ); /* diff --git a/nexus/db-model/src/lib.rs b/nexus/db-model/src/lib.rs index 95c79b6c863..95311d79453 100644 --- a/nexus/db-model/src/lib.rs +++ b/nexus/db-model/src/lib.rs @@ -41,6 +41,7 @@ mod producer_endpoint; mod project; mod rack; mod region; +mod region_snapshot; mod role_assignment; mod role_builtin; pub mod saga_types; @@ -106,6 +107,7 @@ pub use producer_endpoint::*; pub use project::*; pub use rack::*; pub use region::*; +pub use region_snapshot::*; pub use role_assignment::*; pub use role_builtin::*; pub use service::*; diff --git a/nexus/db-model/src/region_snapshot.rs b/nexus/db-model/src/region_snapshot.rs new file mode 100644 index 00000000000..fcd2823e375 --- /dev/null +++ b/nexus/db-model/src/region_snapshot.rs @@ -0,0 +1,36 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use crate::schema::region_snapshot; +use serde::{Deserialize, Serialize}; +use uuid::Uuid; + +/// Database representation of a Region's snapshot. +/// +/// A region snapshot represents a snapshot of a region, taken during the higher +/// level virtual disk snapshot operation. +#[derive( + Queryable, + Insertable, + Debug, + Clone, + Selectable, + Serialize, + Deserialize, + PartialEq, +)] +#[diesel(table_name = region_snapshot)] +pub struct RegionSnapshot { + // unique identifier of this region snapshot + pub dataset_id: Uuid, + pub region_id: Uuid, + pub snapshot_id: Uuid, + + // used for identifying volumes that reference this + pub snapshot_addr: String, + + // how many volumes reference this? + pub volume_references: i64, +} + diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index 73d91b11a51..54ec6fb3519 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -442,6 +442,16 @@ table! { } } +table! { + region_snapshot (dataset_id, region_id, snapshot_id) { + dataset_id -> Uuid, + region_id -> Uuid, + snapshot_id -> Uuid, + snapshot_addr -> Text, + volume_references -> Int8, + } +} + table! { volume (id) { id -> Uuid, @@ -451,7 +461,12 @@ table! { rcgen -> Int8, data -> Text, - /* TODO: some sort of refcount? */ + + /* + * During volume deletion, a serialized list of Crucible resources to + * clean up will be written here, along with setting time_deleted. + */ + resources_to_clean_up -> Nullable, } } @@ -623,6 +638,7 @@ allow_tables_to_appear_in_same_query!( project, rack, region, + region_snapshot, saga, saga_node_event, silo, @@ -631,6 +647,7 @@ allow_tables_to_appear_in_same_query!( service, sled, router_route, + volume, vpc, vpc_subnet, vpc_router, diff --git a/nexus/db-model/src/volume.rs b/nexus/db-model/src/volume.rs index b53815c10e3..3fbb4378416 100644 --- a/nexus/db-model/src/volume.rs +++ b/nexus/db-model/src/volume.rs @@ -24,11 +24,13 @@ use uuid::Uuid; pub struct Volume { #[diesel(embed)] identity: VolumeIdentity, - time_deleted: Option>, + pub time_deleted: Option>, rcgen: Generation, data: String, + + pub resources_to_clean_up: Option, } impl Volume { @@ -38,6 +40,7 @@ impl Volume { time_deleted: None, rcgen: Generation::new(), data, + resources_to_clean_up: None, } } diff --git a/nexus/src/app/disk.rs b/nexus/src/app/disk.rs index 7c327546cd8..e26955f12fe 100644 --- a/nexus/src/app/disk.rs +++ b/nexus/src/app/disk.rs @@ -510,6 +510,9 @@ impl super::Nexus { .project_delete_snapshot(opctx, &authz_snapshot, &db_snapshot) .await?; + // Kick off volume deletion saga + self.volume_delete(db_snapshot.volume_id).await?; + Ok(()) } } diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index 1ad0a3f34de..782bd3fbece 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -39,6 +39,7 @@ mod silo; mod sled; pub mod test_interfaces; mod update; +mod volume; mod vpc; mod vpc_router; mod vpc_subnet; diff --git a/nexus/src/app/sagas/disk_create.rs b/nexus/src/app/sagas/disk_create.rs index 34c8ac445f7..ab4ef3887af 100644 --- a/nexus/src/app/sagas/disk_create.rs +++ b/nexus/src/app/sagas/disk_create.rs @@ -4,7 +4,7 @@ use super::{ ActionRegistry, NexusActionContext, NexusSaga, SagaInitError, - ACTION_GENERATE_ID, + ACTION_GENERATE_ID, MAX_CONCURRENT_REGION_REQUESTS, }; use crate::app::sagas::NexusAction; use crate::context::OpContext; @@ -51,15 +51,13 @@ lazy_static! { sdc_create_disk_record, sdc_create_disk_record_undo ); - static ref REGIONS_ALLOC: NexusAction = ActionFunc::new_action( + static ref REGIONS_ALLOC: NexusAction = new_action_noop_undo( "disk-create.regions-alloc", sdc_alloc_regions, - sdc_alloc_regions_undo ); - static ref REGIONS_ENSURE: NexusAction = ActionFunc::new_action( + static ref REGIONS_ENSURE: NexusAction = new_action_noop_undo( "disk-create.regions-ensure", sdc_regions_ensure, - sdc_regions_ensure_undo ); static ref CREATE_VOLUME_RECORD: NexusAction = ActionFunc::new_action( "disk-create.create-volume-record", @@ -231,6 +229,7 @@ async fn sdc_alloc_regions( let osagactx = sagactx.user_data(); let params = sagactx.saga_params::()?; let volume_id = sagactx.lookup::("volume_id")?; + // Ensure the disk is backed by appropriate regions. // // This allocates regions in the database, but the disk state is still @@ -251,16 +250,7 @@ async fn sdc_alloc_regions( Ok(datasets_and_regions) } -async fn sdc_alloc_regions_undo( - sagactx: NexusActionContext, -) -> Result<(), anyhow::Error> { - let osagactx = sagactx.user_data(); - - let volume_id = sagactx.lookup::("volume_id")?; - osagactx.datastore().regions_hard_delete(volume_id).await?; - Ok(()) -} - +/// Call out to Crucible agent and perform region creation. async fn ensure_region_in_dataset( log: &Logger, dataset: &db::model::Dataset, @@ -317,10 +307,7 @@ async fn ensure_region_in_dataset( Ok(region.into_inner()) } -// Arbitrary limit on concurrency, for operations issued -// on multiple regions within a disk at the same time. -const MAX_CONCURRENT_REGION_REQUESTS: usize = 3; - +/// Call out to Crucible agent and perform region creation. async fn sdc_regions_ensure( sagactx: NexusActionContext, ) -> Result { @@ -379,7 +366,7 @@ async fn sdc_regions_ensure( let block_size = datasets_and_regions[0].1.block_size; - // If requested, back disk by image + // If a disk source was requested, set the read-only parent of this disk. let osagactx = sagactx.user_data(); let params = sagactx.saga_params::()?; let log = osagactx.log(); @@ -497,7 +484,7 @@ async fn sdc_regions_ensure( ); } - // Store volume details in db + // Create volume construction request for this disk let mut rng = StdRng::from_entropy(); let volume_construction_request = VolumeConstructionRequest::Volume { id: disk_id, @@ -554,55 +541,6 @@ async fn sdc_regions_ensure( Ok(volume_data) } -pub(super) async fn delete_regions( - datasets_and_regions: Vec<(db::model::Dataset, db::model::Region)>, -) -> Result<(), Error> { - let request_count = datasets_and_regions.len(); - futures::stream::iter(datasets_and_regions) - .map(|(dataset, region)| async move { - let url = format!("http://{}", dataset.address()); - let client = CrucibleAgentClient::new(&url); - let id = RegionId(region.id().to_string()); - client.region_delete(&id).await.map_err(|e| match e { - crucible_agent_client::Error::ErrorResponse(rv) => { - match rv.status() { - http::StatusCode::SERVICE_UNAVAILABLE => { - Error::unavail(&rv.message) - } - status if status.is_client_error() => { - Error::invalid_request(&rv.message) - } - _ => Error::internal_error(&rv.message), - } - } - _ => Error::internal_error( - "unexpected failure during `region_delete`", - ), - }) - }) - // Execute the allocation requests concurrently. - .buffer_unordered(std::cmp::min( - request_count, - MAX_CONCURRENT_REGION_REQUESTS, - )) - .collect::>>() - .await - .into_iter() - .collect::, _>>()?; - Ok(()) -} - -async fn sdc_regions_ensure_undo( - sagactx: NexusActionContext, -) -> Result<(), anyhow::Error> { - let datasets_and_regions = sagactx - .lookup::>( - "datasets_and_regions", - )?; - delete_regions(datasets_and_regions).await?; - Ok(()) -} - async fn sdc_create_volume_record( sagactx: NexusActionContext, ) -> Result { @@ -628,7 +566,7 @@ async fn sdc_create_volume_record_undo( let osagactx = sagactx.user_data(); let volume_id = sagactx.lookup::("volume_id")?; - osagactx.datastore().volume_delete(volume_id).await?; + osagactx.nexus().volume_delete(volume_id).await?; Ok(()) } @@ -647,6 +585,7 @@ async fn sdc_finalize_disk_record( .lookup_for(authz::Action::Modify) .await .map_err(ActionError::action_failed)?; + // TODO-security Review whether this can ever fail an authz check. We don't // want this to ever fail the authz check here -- if it did, we would have // wanted to catch that a lot sooner. It wouldn't make sense for it to fail @@ -665,6 +604,7 @@ async fn sdc_finalize_disk_record( ) .await .map_err(ActionError::action_failed)?; + Ok(()) } diff --git a/nexus/src/app/sagas/disk_delete.rs b/nexus/src/app/sagas/disk_delete.rs index 4ab26222b87..10182be3cb4 100644 --- a/nexus/src/app/sagas/disk_delete.rs +++ b/nexus/src/app/sagas/disk_delete.rs @@ -2,7 +2,6 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -use super::disk_create::delete_regions; use super::ActionRegistry; use super::NexusActionContext; use super::NexusSaga; @@ -33,23 +32,6 @@ lazy_static! { // underlying regions. sdd_delete_disk_record ); - static ref DELETE_REGIONS: NexusAction = new_action_noop_undo( - "disk-delete.delete-regions", - // TODO(https://github.com/oxidecomputer/omicron/issues/612): - // We need a way to deal with this operation failing, aside from - // propagating the error to the user. - // - // What if the Sled goes offline? Nexus must ultimately be - // responsible for reconciling this scenario. - // - // The current behavior causes the disk deletion saga to - // fail, but still marks the disk as destroyed. - sdd_delete_regions - ); - static ref DELETE_REGION_RECORDS: NexusAction = new_action_noop_undo( - "disk-delete.delete-region-records", - sdd_delete_region_records - ); static ref DELETE_VOLUME_RECORD: NexusAction = new_action_noop_undo( "disk-delete.delete-volume-record", sdd_delete_volume_record @@ -66,8 +48,6 @@ impl NexusSaga for SagaDiskDelete { fn register_actions(registry: &mut ActionRegistry) { registry.register(Arc::clone(&*DELETE_DISK_RECORD)); - registry.register(Arc::clone(&*DELETE_REGIONS)); - registry.register(Arc::clone(&*DELETE_REGION_RECORDS)); registry.register(Arc::clone(&*DELETE_VOLUME_RECORD)); } @@ -81,17 +61,7 @@ impl NexusSaga for SagaDiskDelete { DELETE_DISK_RECORD.as_ref(), )); builder.append(Node::action( - "no_result1", - "DeleteRegions", - DELETE_REGIONS.as_ref(), - )); - builder.append(Node::action( - "no_result2", - "DeleteRegionRecords", - DELETE_REGION_RECORDS.as_ref(), - )); - builder.append(Node::action( - "no_result3", + "no_result", "DeleteVolumeRecord", DELETE_VOLUME_RECORD.as_ref(), )); @@ -115,42 +85,13 @@ async fn sdd_delete_disk_record( Ok(volume_id) } -async fn sdd_delete_regions( - sagactx: NexusActionContext, -) -> Result<(), ActionError> { - let osagactx = sagactx.user_data(); - let volume_id = sagactx.lookup::("volume_id")?; - let datasets_and_regions = osagactx - .datastore() - .get_allocated_regions(volume_id) - .await - .map_err(ActionError::action_failed)?; - delete_regions(datasets_and_regions) - .await - .map_err(ActionError::action_failed)?; - Ok(()) -} - -async fn sdd_delete_region_records( - sagactx: NexusActionContext, -) -> Result<(), ActionError> { - let osagactx = sagactx.user_data(); - let volume_id = sagactx.lookup::("volume_id")?; - osagactx - .datastore() - .regions_hard_delete(volume_id) - .await - .map_err(ActionError::action_failed)?; - Ok(()) -} - async fn sdd_delete_volume_record( sagactx: NexusActionContext, ) -> Result<(), ActionError> { let osagactx = sagactx.user_data(); let volume_id = sagactx.lookup::("volume_id")?; osagactx - .datastore() + .nexus() .volume_delete(volume_id) .await .map_err(ActionError::action_failed)?; diff --git a/nexus/src/app/sagas/mod.rs b/nexus/src/app/sagas/mod.rs index 35dcae5b5bd..cf842e95dda 100644 --- a/nexus/src/app/sagas/mod.rs +++ b/nexus/src/app/sagas/mod.rs @@ -24,6 +24,7 @@ pub mod disk_delete; pub mod instance_create; pub mod instance_migrate; pub mod snapshot_create; +pub mod volume_delete; #[derive(Debug)] pub struct NexusSagaType; @@ -97,6 +98,9 @@ fn make_action_registry() -> ActionRegistry { ::register_actions( &mut registry, ); + ::register_actions( + &mut registry, + ); registry } @@ -106,3 +110,8 @@ pub(super) async fn saga_generate_uuid( ) -> Result { Ok(Uuid::new_v4()) } + +// Arbitrary limit on concurrency, for operations issued on multiple regions +// within a disk at the same time. +const MAX_CONCURRENT_REGION_REQUESTS: usize = 3; + diff --git a/nexus/src/app/sagas/snapshot_create.rs b/nexus/src/app/sagas/snapshot_create.rs index 3d9fe37f3a2..57c884090cb 100644 --- a/nexus/src/app/sagas/snapshot_create.rs +++ b/nexus/src/app/sagas/snapshot_create.rs @@ -123,28 +123,16 @@ pub struct Params { // snapshot create saga: actions -/// A no-op action used because if saga nodes fail their undo action isn't run! -async fn ssc_noop(_sagactx: NexusActionContext) -> Result<(), ActionError> { - Ok(()) -} - lazy_static! { static ref CREATE_SNAPSHOT_RECORD: NexusAction = ActionFunc::new_action( "snapshot-create.create-snapshot-record", ssc_create_snapshot_record, ssc_create_snapshot_record_undo, ); - static ref SEND_SNAPSHOT_REQUEST: NexusAction = ActionFunc::new_action( + static ref SEND_SNAPSHOT_REQUEST: NexusAction = new_action_noop_undo( "snapshot-create.send-snapshot-request", ssc_send_snapshot_request, - ssc_send_snapshot_request_undo, ); - static ref NOOP_FOR_START_RUNNING_SNAPSHOT: NexusAction = - ActionFunc::new_action( - "snapshot-create.noop-for-start-running-snapshot", - ssc_noop, - ssc_start_running_snapshot_undo, - ); static ref START_RUNNING_SNAPSHOT: NexusAction = new_action_noop_undo( "snapshot-create.start-running-snapshot", ssc_start_running_snapshot, @@ -171,7 +159,6 @@ impl NexusSaga for SagaSnapshotCreate { fn register_actions(registry: &mut ActionRegistry) { registry.register(Arc::clone(&*CREATE_SNAPSHOT_RECORD)); registry.register(Arc::clone(&*SEND_SNAPSHOT_REQUEST)); - registry.register(Arc::clone(&*NOOP_FOR_START_RUNNING_SNAPSHOT)); registry.register(Arc::clone(&*START_RUNNING_SNAPSHOT)); registry.register(Arc::clone(&*CREATE_VOLUME_RECORD)); registry.register(Arc::clone(&*FINALIZE_SNAPSHOT_RECORD)); @@ -208,27 +195,7 @@ impl NexusSaga for SagaSnapshotCreate { SEND_SNAPSHOT_REQUEST.as_ref(), )); - // The following saga action iterates over the datasets and regions for a - // disk and make requests for each tuple, and this violates the saga's - // mental model where actions should do one thing at a time and be - // idempotent + atomic. If only a few of the requests succeed, the saga will - // leave things in a partial state because the undo function of a node is - // not run when the action fails. - // - // Use a noop action and an undo, followed by an action + no undo, to work - // around this: - // - // - [noop, undo function] - // - [action function, noop] - // - // With this, if the action function fails, the undo function will run. - // Validate with crucible agent and start snapshot downstairs - builder.append(Node::action( - "noop_for_replace_sockets_map", - "NoopForStartRunningSnapshot", - NOOP_FOR_START_RUNNING_SNAPSHOT.as_ref(), - )); builder.append(Node::action( "replace_sockets_map", "StartRunningSnapshot", @@ -442,50 +409,6 @@ async fn ssc_send_snapshot_request( Ok(()) } -async fn ssc_send_snapshot_request_undo( - sagactx: NexusActionContext, -) -> Result<(), anyhow::Error> { - let log = sagactx.user_data().log(); - let osagactx = sagactx.user_data(); - let params = sagactx.saga_params::()?; - let opctx = OpContext::for_saga_action(&sagactx, ¶ms.serialized_authn); - - let snapshot_id = sagactx.lookup::("snapshot_id")?; - - let (.., disk) = LookupPath::new(&opctx, &osagactx.datastore()) - .project_id(params.project_id) - .disk_name(¶ms.create_params.disk.clone().into()) - .fetch() - .await - .map_err(ActionError::action_failed)?; - - // Delete any snapshots created by this saga - let datasets_and_regions = osagactx - .datastore() - .get_allocated_regions(disk.volume_id) - .await - .map_err(ActionError::action_failed)?; - - for (dataset, region) in datasets_and_regions { - // Create a Crucible agent client - let url = format!("http://{}", dataset.address()); - let client = CrucibleAgentClient::new(&url); - - // Delete snapshot, it was created by this saga - info!(log, "deleting snapshot {} {} {}", url, region.id(), snapshot_id); - client - .region_delete_snapshot( - &RegionId(region.id().to_string()), - &snapshot_id.to_string(), - ) - .await - .map_err(|e| e.to_string()) - .map_err(ActionError::action_failed)?; - } - - Ok(()) -} - async fn ssc_start_running_snapshot( sagactx: NexusActionContext, ) -> Result, ActionError> { @@ -562,60 +485,24 @@ async fn ssc_start_running_snapshot( dataset.address_with_port(crucible_running_snapshot.port_number) ); info!(log, "map {} to {}", region_addr, snapshot_addr); - map.insert(region_addr, snapshot_addr); - } - - Ok(map) -} - -async fn ssc_start_running_snapshot_undo( - sagactx: NexusActionContext, -) -> Result<(), anyhow::Error> { - let log = sagactx.user_data().log(); - let osagactx = sagactx.user_data(); - let params = sagactx.saga_params::()?; - let opctx = OpContext::for_saga_action(&sagactx, ¶ms.serialized_authn); - - let snapshot_id = sagactx.lookup::("snapshot_id")?; - - let (.., disk) = LookupPath::new(&opctx, &osagactx.datastore()) - .project_id(params.project_id) - .disk_name(¶ms.create_params.disk.clone().into()) - .fetch() - .await - .map_err(ActionError::action_failed)?; - - // Delete any running snapshots created by this saga - let datasets_and_regions = osagactx - .datastore() - .get_allocated_regions(disk.volume_id) - .await - .map_err(ActionError::action_failed)?; - - for (dataset, region) in datasets_and_regions { - // Create a Crucible agent client - let url = format!("http://{}", dataset.address()); - let client = CrucibleAgentClient::new(&url); - - // Delete running snapshot - info!( - log, - "deleting running snapshot {} {} {}", - url, - region.id(), - snapshot_id - ); - client - .region_delete_running_snapshot( - &RegionId(region.id().to_string()), - &snapshot_id.to_string(), - ) + map.insert(region_addr, snapshot_addr.clone()); + + // Once snapshot has been validated, and running snapshot has been + // started, add an entry in the region_snapshot table to correspond to + // these Crucible resources. + osagactx.datastore() + .region_snapshot_create(db::model::RegionSnapshot { + dataset_id: dataset.id(), + region_id: region.id(), + snapshot_id, + snapshot_addr, + volume_references: 0, // to be filled later + }) .await - .map_err(|e| e.to_string()) .map_err(ActionError::action_failed)?; } - Ok(()) + Ok(map) } async fn ssc_create_volume_record( @@ -679,6 +566,7 @@ async fn ssc_create_volume_record( info!(log, "snapshot volume construction request {}", volume_data); let volume = db::model::Volume::new(volume_id, volume_data); + // Insert volume record into the DB let volume_created = osagactx .datastore() .volume_create(volume) @@ -696,8 +584,9 @@ async fn ssc_create_volume_record_undo( let log = sagactx.user_data().log(); let osagactx = sagactx.user_data(); let volume_id = sagactx.lookup::("volume_id")?; + info!(log, "deleting volume {}", volume_id); - osagactx.datastore().volume_delete(volume_id).await?; + osagactx.nexus().volume_delete(volume_id).await?; Ok(()) } diff --git a/nexus/src/app/sagas/volume_delete.rs b/nexus/src/app/sagas/volume_delete.rs new file mode 100644 index 00000000000..8073a98a15c --- /dev/null +++ b/nexus/src/app/sagas/volume_delete.rs @@ -0,0 +1,443 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Nexus is responsible for telling Crucible Agent(s) when to clean up +//! resources - those Agents do not have any idea of what volumes are +//! constructed, currently active, etc. Plus, volumes can (and will) change +//! during their lifetime. Operations like growing a disk, removing a read-only +//! parent after a scrub has completed, or re-encrypting a disk will all change +//! the volume that backs a disk. +//! +//! Nexus has to account for all the Crucible resources it is using, and count +//! how many volumes are using those resources. Only when that count drops to +//! zero is it valid to clean up the appropriate Crucible resource. +//! +//! Complicating things is the fact that ZFS datasets cannot be deleted if there +//! are snapshots of that dataset. Nexus' resource accounting must take this +//! dependency into account. Note that ZFS snapshots can layer, but any snapshot +//! can be deleted without the requirement of (for example) deleting the +//! snapshots in a certain order. +//! +//! One problem to solve is doing this idempotently. Volumes reference Crucible +//! resources, and when they are inserted or deleted the accounting needs to +//! change. Saga nodes must be idempotent in order to work correctly. + +use super::ActionRegistry; +use super::NexusActionContext; +use super::NexusSaga; +use super::MAX_CONCURRENT_REGION_REQUESTS; +use crate::app::sagas::NexusAction; +use crate::db; +use crate::db::datastore::CrucibleResources; +use lazy_static::lazy_static; +use serde::Deserialize; +use serde::Serialize; +use std::sync::Arc; +use steno::new_action_noop_undo; +use steno::ActionError; +use steno::Node; +use uuid::Uuid; +use omicron_common::api::external::Error; +use futures::StreamExt; +use crucible_agent_client::{ + types::RegionId, + Client as CrucibleAgentClient, +}; +use nexus_types::identity::Asset; + +// volume delete saga: input parameters + +#[derive(Debug, Deserialize, Serialize)] +pub struct Params { + pub volume_id: Uuid, +} + +// volume delete saga: actions + +lazy_static! { + // TODO(https://github.com/oxidecomputer/omicron/issues/612): + // + // We need a way to deal with this operation failing, aside from + // propagating the error to the user. + // + // What if the Sled goes offline? Nexus must ultimately be + // responsible for reconciling this scenario. + + static ref DECREASE_CRUCIBLE_RESOURCE_COUNT: NexusAction = new_action_noop_undo( + "volume-delete.decrease-resource-count", + svd_decrease_crucible_resource_count, + ); + + static ref DELETE_CRUCIBLE_REGIONS: NexusAction = new_action_noop_undo( + "volume-delete.delete-crucible-regions", + svd_delete_crucible_regions, + ); + + static ref DELETE_CRUCIBLE_SNAPSHOTS: NexusAction = new_action_noop_undo( + "volume-delete.delete-crucible-snapshots", + svd_delete_crucible_snapshots, + ); + + static ref DELETE_FREED_CRUCIBLE_REGIONS: NexusAction = new_action_noop_undo( + "volume-delete.delete-freed-crucible-regions", + svd_delete_freed_crucible_regions, + ); + + static ref HARD_DELETE_VOLUME_RECORD: NexusAction = new_action_noop_undo( + "volume-delete.hard-delete-volume-record", + svd_hard_delete_volume_record, + ); +} + +// volume delete saga: definition + +#[derive(Debug)] +pub struct SagaVolumeDelete; +impl NexusSaga for SagaVolumeDelete { + const NAME: &'static str = "volume-delete"; + type Params = Params; + + fn register_actions(registry: &mut ActionRegistry) { + registry.register(Arc::clone(&*DECREASE_CRUCIBLE_RESOURCE_COUNT)); + registry.register(Arc::clone(&*DELETE_CRUCIBLE_REGIONS)); + registry.register(Arc::clone(&*DELETE_CRUCIBLE_SNAPSHOTS)); + registry.register(Arc::clone(&*DELETE_FREED_CRUCIBLE_REGIONS)); + registry.register(Arc::clone(&*HARD_DELETE_VOLUME_RECORD)); + } + + fn make_saga_dag( + _params: &Self::Params, + mut builder: steno::DagBuilder, + ) -> Result { + builder.append(Node::action( + "crucible_resources_to_delete", + "DecreaseCrucibleResources", + DECREASE_CRUCIBLE_RESOURCE_COUNT.as_ref(), + )); + + builder.append_parallel(vec![ + // clean up top level regions for volume + Node::action( + "no_result_1", + "DeleteCrucibleRegions", + DELETE_CRUCIBLE_REGIONS.as_ref(), + ), + // clean up snapshots no longer referenced by any volume + Node::action( + "no_result_2", + "DeleteCrucibleSnapshots", + DELETE_CRUCIBLE_SNAPSHOTS.as_ref(), + ) + ]); + + // clean up regions that were freed by deleting snapshots + builder.append(Node::action( + "no_result_3", + "DeleteFreedCrucibleRegions", + DELETE_FREED_CRUCIBLE_REGIONS.as_ref(), + )); + + builder.append(Node::action( + "final_no_result", + "HardDeleteVolumeRecord", + HARD_DELETE_VOLUME_RECORD.as_ref(), + )); + + Ok(builder.build()?) + } +} + +// volume delete saga: action implementations + +/// Decrease Crucible resource accounting for this volume, and return Crucible +/// resources to delete. +async fn svd_decrease_crucible_resource_count( + sagactx: NexusActionContext, +) -> Result { + let osagactx = sagactx.user_data(); + let params = sagactx.saga_params::()?; + + let crucible_resources = osagactx + .datastore() + .decrease_crucible_resource_count_and_soft_delete_volume(params.volume_id) + .await + .map_err(ActionError::action_failed)?; + + Ok(crucible_resources) +} + +/// Clean up regions associated with this volume. +async fn svd_delete_crucible_regions( + sagactx: NexusActionContext, +) -> Result<(), ActionError> { + let osagactx = sagactx.user_data(); + + let crucible_resources_to_delete = + sagactx.lookup::("crucible_resources_to_delete")?; + + // Send DELETE calls to the corresponding Crucible agents + delete_crucible_regions( + crucible_resources_to_delete.datasets_and_regions.clone() + ) + .await + .map_err(ActionError::action_failed)?; + + // Remove DB records + let region_ids_to_delete = + crucible_resources_to_delete.datasets_and_regions + .iter() + .map(|(_, r)| r.id()) + .collect(); + + osagactx.datastore() + .regions_hard_delete(region_ids_to_delete) + .await + .map_err(ActionError::action_failed)?; + + Ok(()) +} + +/// Clean up snapshots freed up for deletion by deleting this volume. +/// +/// This Volume may have referenced read-only downstairs (and their snapshots), +/// and deleting it will remove the references - this may free up those +/// resources for deletion, which this Saga node does. +async fn svd_delete_crucible_snapshots( + sagactx: NexusActionContext, +) -> Result<(), ActionError> { + let osagactx = sagactx.user_data(); + + let crucible_resources_to_delete = + sagactx.lookup::("crucible_resources_to_delete")?; + + // Send DELETE calls to the corresponding Crucible agents + delete_crucible_snapshots( + crucible_resources_to_delete.datasets_and_snapshots.clone() + ) + .await + .map_err(ActionError::action_failed)?; + + // Remove DB records + for (_, region_snapshot) in &crucible_resources_to_delete.datasets_and_snapshots { + osagactx.datastore().region_snapshot_remove( + region_snapshot.dataset_id, + region_snapshot.region_id, + region_snapshot.snapshot_id, + ) + .await + .map_err(ActionError::action_failed)?; + } + + Ok(()) +} + +/// Deleting region snapshots in a previous saga node may have freed up regions +/// that were deleted in the DB but couldn't be deleted by the Crucible Agent +/// because a snapshot existed. Look for those here, and delete them. These will +/// be a different volume id (i.e. for a previously deleted disk) than the one +/// in this saga's params struct. +/// +/// Note: each delete of a snapshot could trigger another delete of a region, if +/// that region's use has gone to zero. A snapshot delete will never trigger +/// another snapshot delete. +async fn svd_delete_freed_crucible_regions( + sagactx: NexusActionContext, +) -> Result<(), ActionError> { + let osagactx = sagactx.user_data(); + + // Find regions freed up for deletion by a previous saga node deleting the + // region snapshots. + let freed_datasets_regions_and_volumes = osagactx.datastore() + .find_deleted_volume_regions() + .await + .map_err(ActionError::action_failed)?; + + // Send DELETE calls to the corresponding Crucible agents + delete_crucible_regions( + freed_datasets_regions_and_volumes + .iter() + .map(|(d, r, _)| (d.clone(), r.clone())) + .collect() + ) + .await + .map_err(ActionError::action_failed)?; + + // Remove region DB records + osagactx.datastore().regions_hard_delete( + freed_datasets_regions_and_volumes + .iter() + .map(|(_, r, _)| r.id()) + .collect() + ) + .await + .map_err(ActionError::action_failed)?; + + // Remove volume DB records + for (_, _, volume) in &freed_datasets_regions_and_volumes { + osagactx.datastore().volume_hard_delete( + volume.id() + ) + .await + .map_err(ActionError::action_failed)?; + } + + Ok(()) +} + +/// Hard delete the volume record +async fn svd_hard_delete_volume_record( + sagactx: NexusActionContext, +) -> Result<(), ActionError> { + let osagactx = sagactx.user_data(); + let params = sagactx.saga_params::()?; + + // Do not hard delete the volume record if there are lingering regions + // associated with them. This occurs when a region snapshot hasn't been + // deleted, which means we can't delete the region. Later on, deleting the + // region snapshot will free up the region(s) to be deleted (this occurs in + // svd_delete_freed_crucible_regions). + let allocated_regions = osagactx.datastore() + .get_allocated_regions(params.volume_id) + .await + .map_err(ActionError::action_failed)?; + + if !allocated_regions.is_empty() { + return Ok(()); + } + + osagactx + .datastore() + .volume_hard_delete(params.volume_id) + .await + .map_err(ActionError::action_failed)?; + + Ok(()) +} + +// helper functions + +// Given a list of datasets and regions, send DELETE calls to the datasets +// corresponding Crucible Agent for each region. +pub(super) async fn delete_crucible_regions( + datasets_and_regions: Vec<(db::model::Dataset, db::model::Region)>, +) -> Result<(), Error> { + let request_count = datasets_and_regions.len(); + if request_count == 0 { + return Ok(()); + } + + futures::stream::iter(datasets_and_regions) + .map(|(dataset, region)| async move { + let url = format!("http://{}", dataset.address()); + let client = CrucibleAgentClient::new(&url); + let id = RegionId(region.id().to_string()); + client.region_delete(&id).await.map_err(|e| match e { + crucible_agent_client::Error::ErrorResponse(rv) => { + match rv.status() { + http::StatusCode::SERVICE_UNAVAILABLE => { + Error::unavail(&rv.message) + } + status if status.is_client_error() => { + Error::invalid_request(&rv.message) + } + _ => Error::internal_error(&rv.message), + } + } + _ => Error::internal_error( + "unexpected failure during `delete_crucible_regions`", + ), + }) + }) + // Execute the allocation requests concurrently. + .buffer_unordered(std::cmp::min( + request_count, + MAX_CONCURRENT_REGION_REQUESTS, + )) + .collect::>>() + .await + .into_iter() + .collect::, _>>()?; + + Ok(()) +} + +// Given a list of datasets and region snapshots, send DELETE calls to the +// datasets corresponding Crucible Agent for each running read-only downstairs +// and snapshot. +pub(super) async fn delete_crucible_snapshots( + datasets_and_snapshots: Vec<(db::model::Dataset, db::model::RegionSnapshot)>, +) -> Result<(), Error> { + let request_count = datasets_and_snapshots.len(); + if request_count == 0 { + return Ok(()); + } + + futures::stream::iter(datasets_and_snapshots) + .map(|(dataset, region_snapshot)| async move { + let url = format!("http://{}", dataset.address()); + let client = CrucibleAgentClient::new(&url); + + // delete running snapshot + client + .region_delete_running_snapshot( + &RegionId(region_snapshot.region_id.to_string()), + ®ion_snapshot.snapshot_id.to_string(), + ) + .await + .map_err(|e| match e { + crucible_agent_client::Error::ErrorResponse(rv) => { + match rv.status() { + http::StatusCode::SERVICE_UNAVAILABLE => { + Error::unavail(&rv.message) + } + status if status.is_client_error() => { + Error::invalid_request(&rv.message) + } + _ => Error::internal_error(&rv.message), + } + } + _ => Error::internal_error( + "unexpected failure during `region_delete_running_snapshot`", + ), + })?; + + // delete snapshot + client + .region_delete_snapshot( + &RegionId(region_snapshot.region_id.to_string()), + ®ion_snapshot.snapshot_id.to_string(), + ) + .await + .map_err(|e| match e { + crucible_agent_client::Error::ErrorResponse(rv) => { + match rv.status() { + http::StatusCode::SERVICE_UNAVAILABLE => { + Error::unavail(&rv.message) + } + status if status.is_client_error() => { + Error::invalid_request(&rv.message) + } + _ => Error::internal_error(&rv.message), + } + } + _ => Error::internal_error( + "unexpected failure during `region_delete_snapshot`", + ), + })?; + + Ok(()) + }) + // Execute the allocation requests concurrently. + .buffer_unordered(std::cmp::min( + request_count, + MAX_CONCURRENT_REGION_REQUESTS, + )) + .collect::>>() + .await + .into_iter() + .collect::, _>>()?; + + Ok(()) +} + diff --git a/nexus/src/app/volume.rs b/nexus/src/app/volume.rs new file mode 100644 index 00000000000..f9a1a200d77 --- /dev/null +++ b/nexus/src/app/volume.rs @@ -0,0 +1,40 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Volumes + +use crate::app::sagas; +use omicron_common::api::external::DeleteResult; +use omicron_common::api::external::Error; +use std::sync::Arc; +use uuid::Uuid; + +impl super::Nexus { + /// Kick off a saga to delete a volume (and clean up any Crucible resources + /// as a result). Importantly, this should not be a sub-saga - whoever is + /// calling this should not block on cleaning up Crucible Resources, because + /// the deletion of a "disk" or "snapshot" could free up a *lot* of Crucible + /// resources and the user's query shouldn't wait on those DELETE calls. + pub async fn volume_delete( + self: &Arc, + volume_id: Uuid + ) -> DeleteResult { + let saga_params = sagas::volume_delete::Params { volume_id }; + + // TODO execute this in the background instead, not using the usual SEC + let saga_outputs = self + .execute_saga::( + saga_params, + ) + .await?; + + let volume_deleted = saga_outputs + .lookup_node_output::<()>("final_no_result") + .map_err(|e| Error::InternalError { + internal_message: e.to_string(), + })?; + + Ok(volume_deleted) + } +} diff --git a/nexus/src/db/datastore/mod.rs b/nexus/src/db/datastore/mod.rs index 4a7a4f055e9..93bb030ee02 100644 --- a/nexus/src/db/datastore/mod.rs +++ b/nexus/src/db/datastore/mod.rs @@ -55,6 +55,7 @@ mod oximeter; mod project; mod rack; mod region; +mod region_snapshot; mod role; mod saga; mod service; @@ -69,6 +70,8 @@ mod volume; mod vpc; mod zpool; +pub use volume::CrucibleResources; + // Number of unique datasets required to back a region. // TODO: This should likely turn into a configuration option. const REGION_REDUNDANCY_THRESHOLD: usize = 3; diff --git a/nexus/src/db/datastore/region.rs b/nexus/src/db/datastore/region.rs index acf14c8a4b0..bef92077abd 100644 --- a/nexus/src/db/datastore/region.rs +++ b/nexus/src/db/datastore/region.rs @@ -327,10 +327,14 @@ impl DataStore { }) } - /// Deletes all regions backing a disk. + /// Deletes a set of regions. /// /// Also updates the storage usage on their corresponding datasets. - pub async fn regions_hard_delete(&self, volume_id: Uuid) -> DeleteResult { + pub async fn regions_hard_delete(&self, region_ids: Vec) -> DeleteResult { + if region_ids.is_empty() { + return Ok(()); + } + #[derive(Debug, thiserror::Error)] enum RegionDeleteError { #[error("Numeric error: {0}")] @@ -345,7 +349,7 @@ impl DataStore { // Remove the regions, collecting datasets they're from. let datasets = diesel::delete(region_dsl::region) - .filter(region_dsl::volume_id.eq(volume_id)) + .filter(region_dsl::id.eq_any(region_ids)) .returning(region_dsl::dataset_id) .get_results::(conn)?; diff --git a/nexus/src/db/datastore/region_snapshot.rs b/nexus/src/db/datastore/region_snapshot.rs new file mode 100644 index 00000000000..e76989ce98d --- /dev/null +++ b/nexus/src/db/datastore/region_snapshot.rs @@ -0,0 +1,63 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! [`DataStore`] methods on [`RegionSnapshot`]s. + +use super::DataStore; +use crate::db; +use crate::db::error::public_error_from_diesel_pool; +use crate::db::error::ErrorHandler; +use crate::db::model::RegionSnapshot; +use async_bb8_diesel::AsyncRunQueryDsl; +use diesel::prelude::*; +use omicron_common::api::external::CreateResult; +use omicron_common::api::external::DeleteResult; +use uuid::Uuid; + +impl DataStore { + pub async fn region_snapshot_create( + &self, + region_snapshot: RegionSnapshot, + ) -> CreateResult { + use db::schema::region_snapshot::dsl; + + // TODO https://github.com/oxidecomputer/omicron/issues/1168 + diesel::insert_into(dsl::region_snapshot) + .values(region_snapshot.clone()) + .on_conflict((dsl::dataset_id, dsl::region_id, dsl::snapshot_id)) + .do_nothing() + .returning(RegionSnapshot::as_returning()) + .get_result_async(self.pool()) + .await + .map_err(|e| { + public_error_from_diesel_pool( + e, + ErrorHandler::Server, + ) + }) + } + + pub async fn region_snapshot_remove( + &self, + dataset_id: Uuid, + region_id: Uuid, + snapshot_id: Uuid, + ) -> DeleteResult { + use db::schema::region_snapshot::dsl; + + diesel::delete(dsl::region_snapshot) + .filter(dsl::dataset_id.eq(dataset_id)) + .filter(dsl::region_id.eq(region_id)) + .filter(dsl::snapshot_id.eq(snapshot_id)) + .execute_async(self.pool()) + .await + .map(|_rows_deleted| ()) + .map_err(|e| { + public_error_from_diesel_pool( + e, + ErrorHandler::Server, + ) + }) + } +} diff --git a/nexus/src/db/datastore/snapshot.rs b/nexus/src/db/datastore/snapshot.rs index 23c3f3d7453..4661ac21e04 100644 --- a/nexus/src/db/datastore/snapshot.rs +++ b/nexus/src/db/datastore/snapshot.rs @@ -11,7 +11,6 @@ use crate::db; use crate::db::datastore::RunnableQuery; use crate::db::error::public_error_from_diesel_pool; use crate::db::error::ErrorHandler; -use crate::db::error::TransactionError; use crate::db::lookup::LookupPath; use crate::db::model::Generation; use crate::db::model::Name; @@ -19,7 +18,6 @@ use crate::db::model::Snapshot; use crate::db::model::SnapshotState; use crate::db::pagination::paginated; use crate::db::update_and_check::UpdateAndCheck; -use async_bb8_diesel::AsyncConnection; use async_bb8_diesel::AsyncRunQueryDsl; use chrono::Utc; use diesel::prelude::*; @@ -144,45 +142,22 @@ impl DataStore { // A snapshot can be deleted in any state. It's never attached to an // instance, and any disk launched from it will copy and modify the volume - // construction request it's based on. The associated volume can be also - // be deleted - this will not affect any active crucible connections - // because no actual resources are cleaned up here. - // - // TODO-correctness this will leak on-disk snapshots and currently - // running read-only downstairs, which will need to be cleaned up once - // all volumes that reference them are gone. + // construction request it's based on. let snapshot_id = authz_snapshot.id(); let gen = db_snapshot.gen; - let volume_id = db_snapshot.volume_id; - let volume_delete_query = self.volume_delete_query(volume_id); - - self.pool_authorized(&opctx) - .await? - .transaction_async(|conn| async move { - use db::schema::snapshot::dsl; - - diesel::update(dsl::snapshot) - .filter(dsl::time_deleted.is_null()) - .filter(dsl::gen.eq(gen)) - .filter(dsl::id.eq(snapshot_id)) - .set(dsl::time_deleted.eq(now)) - .check_if_exists::(snapshot_id) - .execute_async(&conn) - .await?; - - volume_delete_query.execute_async(&conn).await?; + use db::schema::snapshot::dsl; - Ok(()) - }) + diesel::update(dsl::snapshot) + .filter(dsl::time_deleted.is_null()) + .filter(dsl::gen.eq(gen)) + .filter(dsl::id.eq(snapshot_id)) + .set(dsl::time_deleted.eq(now)) + .check_if_exists::(snapshot_id) + .execute_async(self.pool_authorized(&opctx).await?) .await - .map_err(|e| match e { - TransactionError::CustomError(e) => e, - TransactionError::Pool(e) => { - public_error_from_diesel_pool(e, ErrorHandler::Server) - } - })?; + .map_err(|e| public_error_from_diesel_pool(e, ErrorHandler::Server))?; Ok(snapshot_id) } diff --git a/nexus/src/db/datastore/volume.rs b/nexus/src/db/datastore/volume.rs index 4909c500ca2..4d0789d5309 100644 --- a/nexus/src/db/datastore/volume.rs +++ b/nexus/src/db/datastore/volume.rs @@ -5,62 +5,142 @@ //! [`DataStore`] methods on [`Volume`]s. use super::DataStore; +use async_bb8_diesel::AsyncConnection; +use async_bb8_diesel::AsyncRunQueryDsl; use crate::db; use crate::db::error::public_error_from_diesel_pool; use crate::db::error::ErrorHandler; +use crate::db::error::TransactionError; use crate::db::identity::Asset; +use crate::db::model::Dataset; +use crate::db::model::Region; +use crate::db::model::RegionSnapshot; use crate::db::model::Volume; -use async_bb8_diesel::AsyncRunQueryDsl; use chrono::Utc; use diesel::prelude::*; +use diesel::OptionalExtension as DieselOptionalExtension; use omicron_common::api::external::CreateResult; use omicron_common::api::external::DeleteResult; +use omicron_common::api::external::Error; +use omicron_common::api::external::ListResultVec; use omicron_common::api::external::LookupResult; use omicron_common::api::external::LookupType; use omicron_common::api::external::ResourceType; use uuid::Uuid; +use serde::Serialize; +use serde::Deserialize; +use sled_agent_client::types::VolumeConstructionRequest; impl DataStore { pub async fn volume_create(&self, volume: Volume) -> CreateResult { use db::schema::volume::dsl; - diesel::insert_into(dsl::volume) - .values(volume.clone()) - .on_conflict(dsl::id) - .do_nothing() - .returning(Volume::as_returning()) - .get_result_async(self.pool()) - .await - .map_err(|e| { - public_error_from_diesel_pool( - e, - ErrorHandler::Conflict( - ResourceType::Volume, - volume.id().to_string().as_str(), - ), - ) - }) + #[derive(Debug, thiserror::Error)] + enum VolumeCreationError { + #[error("Error from Volume creation: {0}")] + Public(Error), + + #[error("Serde error during Volume creation: {0}")] + SerdeError(#[from] serde_json::Error), + } + type TxnError = TransactionError; + + self.pool().transaction(move |conn| { + let maybe_volume: Option = + dsl::volume + .filter(dsl::id.eq(volume.id())) + .select(Volume::as_select()) + .first(conn) + .optional() + .map_err(|e| TxnError::CustomError( + VolumeCreationError::Public( + public_error_from_diesel_pool( + e.into(), + ErrorHandler::Server, + ) + ) + ))?; + + // If the volume existed already, return it and do not increase + // usage counts. + if let Some(volume) = maybe_volume { + return Ok(volume); + } + + // TODO do we need on_conflict do_nothing here? if the transaction + // model is read-committed, the SELECT above could return nothing, + // and the INSERT here could still result in a conflict. + // + // See also https://github.com/oxidecomputer/omicron/issues/1168 + let volume: Volume = diesel::insert_into(dsl::volume) + .values(volume.clone()) + .on_conflict(dsl::id) + .do_nothing() + .returning(Volume::as_returning()) + .get_result(conn) + .map_err(|e| TxnError::CustomError( + VolumeCreationError::Public( + public_error_from_diesel_pool( + e.into(), + ErrorHandler::Conflict( + ResourceType::Volume, + volume.id().to_string().as_str(), + ), + ) + ) + ))?; + + // Increase the usage count for Crucible resources according to the + // contents of the volume. + + // Grab all the targets that the volume construction request references. + let crucible_targets = { + let vcr: VolumeConstructionRequest = serde_json::from_str(&volume.data()) + .map_err(|e: serde_json::Error| TxnError::CustomError( + VolumeCreationError::SerdeError(e) + ))?; + + let mut crucible_targets = CrucibleTargets::default(); + resources_associated_with_volume(&vcr, &mut crucible_targets); + crucible_targets + }; + + // Increase the number of uses for each referenced region snapshot. + use db::schema::region_snapshot::dsl as rs_dsl; + for read_only_target in &crucible_targets.read_only_targets { + diesel::update(rs_dsl::region_snapshot) + .filter(rs_dsl::snapshot_addr.eq(read_only_target.clone())) + .set(rs_dsl::volume_references.eq(rs_dsl::volume_references + 1)) + .execute(conn) + .map_err(|e| TxnError::CustomError( + VolumeCreationError::Public( + public_error_from_diesel_pool( + e.into(), + ErrorHandler::Server, + ) + ) + ))?; + } + + Ok(volume) + }) + .await + .map_err(|e| match e { + TxnError::CustomError( + VolumeCreationError::Public(e) + ) => e, + + _ => Error::internal_error( + &format!("Transaction error: {}", e), + ), + }) } - pub fn volume_delete_query( - &self, - volume_id: Uuid, - ) -> impl diesel::query_builder::QueryFragment - + RunQueryDsl - + diesel::query_builder::QueryId - where - T: Connection, - { + pub async fn volume_hard_delete(&self, volume_id: Uuid) -> DeleteResult { use db::schema::volume::dsl; - let now = Utc::now(); - diesel::update(dsl::volume) + diesel::delete(dsl::volume) .filter(dsl::id.eq(volume_id)) - .set(dsl::time_deleted.eq(now)) - } - - pub async fn volume_delete(&self, volume_id: Uuid) -> DeleteResult { - self.volume_delete_query(volume_id) .execute_async(self.pool()) .await .map_err(|e| { @@ -86,4 +166,311 @@ impl DataStore { .await .map_err(|e| public_error_from_diesel_pool(e, ErrorHandler::Server)) } + + /// Find regions for deleted volumes that do not have associated region + /// snapshots. + pub async fn find_deleted_volume_regions(&self) -> ListResultVec<(Dataset, Region, Volume)> { + use db::schema::region_snapshot::dsl; + use db::schema::dataset::dsl as dataset_dsl; + use db::schema::region::dsl as region_dsl; + use db::schema::volume::dsl as volume_dsl; + + // Find all regions and datasets + region_dsl::region + .inner_join( + volume_dsl::volume + .on(region_dsl::volume_id.eq(volume_dsl::id)) + ) + .inner_join( + dataset_dsl::dataset + .on(region_dsl::dataset_id.eq(dataset_dsl::id)) + ) + // where there either are no region snapshots, or the region + // snapshot volume references have gone to zero + .left_join( + dsl::region_snapshot + .on( + dsl::region_id.eq(region_dsl::id) + .and(dsl::dataset_id.eq(dataset_dsl::id)) + ) + ) + .filter( + dsl::volume_references.eq(0) + .or(dsl::volume_references.is_null()) + ) + // where the volume has already been soft-deleted + .filter(volume_dsl::time_deleted.is_not_null()) + // and return them (along with the volume so it can be hard deleted) + .select((Dataset::as_select(), Region::as_select(), Volume::as_select())) + .load_async(self.pool()) + .await + .map_err(|e| public_error_from_diesel_pool(e, ErrorHandler::Server)) + } + + /// Decrease the usage count for Crucible resources according to the + /// contents of the volume. Call this when deleting a volume (but before the + /// volume record has been hard deleted). + /// + /// Returns a list of Crucible resources to clean up, and soft-deletes the + /// volume. Note this function must be idempotent, it is called from a saga + /// node. + pub async fn decrease_crucible_resource_count_and_soft_delete_volume( + &self, + volume_id: Uuid, + ) -> Result { + #[derive(Debug, thiserror::Error)] + enum DecreaseCrucibleResourcesError { + #[error("Error during decrease Crucible resources: {0}")] + DieselError(#[from] diesel::result::Error), + + #[error("Serde error during decrease Crucible resources: {0}")] + SerdeError(#[from] serde_json::Error), + } + type TxnError = TransactionError; + + // In a transaction: + // + // 1. decrease the number of references for each region snapshot that + // this Volume references + // 2. soft-delete the volume + // 3. record the resources to clean up + // + // Step 3 is important because this function is called from a saga node. + // If saga execution crashes after steps 1 and 2, but before serializing + // the resources to be cleaned up as part of the saga node context, then + // that list of resources will be lost. + // + // We also have to guard against the case where this function is called + // multiple times, and that is done by soft-deleting the volume during + // the transaction, and returning the previously serialized list of + // resources to clean up if a soft-delete has already occurred. + // + // TODO it would be nice to make this transaction_async, but I couldn't + // get the async optional extension to work. + self.pool().transaction(move |conn| { + // Grab the volume in question. If the volume record was already + // hard-deleted, assume clean-up has occurred and return an empty + // CrucibleResources. If the volume record was soft-deleted, then + // return the serialized CrucibleResources. + let volume = { + use db::schema::volume::dsl; + + let volume = dsl::volume + .filter(dsl::id.eq(volume_id)) + .select(Volume::as_select()) + .get_result(conn) + .optional()?; + + if volume.is_none() { + // the volume was hard-deleted, return an empty + // CrucibleResources + return Ok(CrucibleResources::default()); + } + + let volume = volume.unwrap(); + + if volume.time_deleted.is_none() { + // a volume record exists, and was not deleted - this is the + // first time through this transaction for a certain volume + // id. Get the volume for use in the transaction. + volume + } else { + // this volume was soft deleted - this is a repeat time + // through this transaction. + + if let Some(resources_to_clean_up) = volume.resources_to_clean_up { + // return the serialized CrucibleResources + return Ok(serde_json::from_str(&resources_to_clean_up) + .map_err(|e| + TxnError::CustomError( + DecreaseCrucibleResourcesError::SerdeError(e) + ) + )? + ); + } else { + // If no CrucibleResources struct was serialized, that's + // definitely a bug of some sort - the soft-delete below + // sets time_deleted at the same time as + // resources_to_clean_up! But, instead of a panic here, + // just return an empty CrucibleResources. + return Ok(CrucibleResources::default()); + } + } + }; + + // Grab all the targets that the volume construction request references. + let crucible_targets = { + let vcr: VolumeConstructionRequest = serde_json::from_str(&volume.data()) + .map_err(|e| + TxnError::CustomError( + DecreaseCrucibleResourcesError::SerdeError(e) + ) + )?; + + let mut crucible_targets = CrucibleTargets::default(); + resources_associated_with_volume(&vcr, &mut crucible_targets); + crucible_targets + }; + + // Decrease the number of uses for each referenced region snapshot. + use db::schema::region_snapshot::dsl; + + for read_only_target in &crucible_targets.read_only_targets { + diesel::update(dsl::region_snapshot) + .filter(dsl::snapshot_addr.eq(read_only_target.clone())) + .set(dsl::volume_references.eq(dsl::volume_references - 1)) + .execute(conn)?; + } + + // Return what results can be cleaned up + let result = CrucibleResources { + // The only use of a read-write region will be at the top level of a + // Volume. These are not shared, but if any snapshots are taken this + // will prevent deletion of the region. Filter out any regions that + // have associated snapshots. + datasets_and_regions: { + use db::schema::dataset::dsl as dataset_dsl; + use db::schema::region::dsl as region_dsl; + + // Return all regions for this volume_id, where there either are + // no region_snapshots, or region_snapshots.volume_references = + // 0. + region_dsl::region + .filter(region_dsl::volume_id.eq(volume_id)) + .inner_join( + dataset_dsl::dataset + .on(region_dsl::dataset_id.eq(dataset_dsl::id)), + ) + .left_join( + dsl::region_snapshot + .on( + dsl::region_id.eq(region_dsl::id) + .and(dsl::dataset_id.eq(dataset_dsl::id)) + ) + ) + .filter( + dsl::volume_references.eq(0) + .or(dsl::volume_references.is_null()) + ) + .select((Dataset::as_select(), Region::as_select())) + .get_results::<(Dataset, Region)>(conn)? + }, + + // A volume (for a disk or snapshot) may reference another nested + // volume as a read-only parent, and this may be arbitrarily deep. + // After decrementing volume_references above, get all region + // snapshot records where the volume_references has gone to 0. + // Consumers of this struct will be responsible for deleting the + // read-only downstairs running for the snapshot and the snapshot + // itself. + datasets_and_snapshots: { + use db::schema::dataset::dsl as dataset_dsl; + + dsl::region_snapshot + .filter(dsl::volume_references.eq(0)) + .inner_join( + dataset_dsl::dataset + .on(dsl::dataset_id.eq(dataset_dsl::id)), + ) + .select((Dataset::as_select(), RegionSnapshot::as_select())) + .get_results::<(Dataset, RegionSnapshot)>(conn)? + }, + }; + + // Soft delete this volume, and serialize the resources that are to + // be cleaned up. + use db::schema::volume::dsl as volume_dsl; + + let now = Utc::now(); + diesel::update(volume_dsl::volume) + .filter(volume_dsl::id.eq(volume_id)) + .set(( + volume_dsl::time_deleted.eq(now), + volume_dsl::resources_to_clean_up.eq( + serde_json::to_string(&result) + .map_err(|e| + TxnError::CustomError( + DecreaseCrucibleResourcesError::SerdeError(e) + ) + )? + ), + )) + .execute(conn)?; + + Ok(result) + }) + .await + .map_err(|e| match e { + TxnError::CustomError( + DecreaseCrucibleResourcesError::DieselError(e) + ) => public_error_from_diesel_pool(e.into(), ErrorHandler::Server), + + _ => Error::internal_error( + &format!("Transaction error: {}", e), + ), + }) + } +} + +#[derive(Default)] +struct CrucibleTargets { + read_only_targets: Vec, +} + +#[derive(Debug, Default, Serialize, Deserialize)] +pub struct CrucibleResources { + pub datasets_and_regions: Vec<(Dataset, Region)>, + pub datasets_and_snapshots: Vec<(Dataset, RegionSnapshot)>, } + +/// Return the targets from a VolumeConstructionRequest. +/// +/// The targets of a volume construction request map to resources. +fn resources_associated_with_volume( + vcr: &VolumeConstructionRequest, + crucible_targets: &mut CrucibleTargets, +) { + match vcr { + VolumeConstructionRequest::Volume { + id: _, + block_size: _, + sub_volumes, + read_only_parent, + } => { + for sub_volume in sub_volumes { + resources_associated_with_volume( + sub_volume, + crucible_targets, + ); + } + + if let Some(read_only_parent) = read_only_parent { + resources_associated_with_volume( + read_only_parent, + crucible_targets, + ); + } + } + + VolumeConstructionRequest::Url { id: _, block_size: _, url: _ } => { + // no action required + } + + VolumeConstructionRequest::Region { + block_size: _, + opts, + gen: _, + } => { + for target in &opts.target { + if opts.read_only { + crucible_targets.read_only_targets.push(target.clone()); + } + } + } + + VolumeConstructionRequest::File { id: _, block_size: _, path: _ } => { + // no action required + } + } +} + diff --git a/nexus/test-utils/src/resource_helpers.rs b/nexus/test-utils/src/resource_helpers.rs index 6fe6f6a9e28..3abb881080b 100644 --- a/nexus/test-utils/src/resource_helpers.rs +++ b/nexus/test-utils/src/resource_helpers.rs @@ -492,4 +492,21 @@ impl DiskTest { } } } + + /// Returns true if all Crucible resources were cleaned up, false otherwise. + pub async fn crucible_resources_deleted(&self) -> bool { + for zpool in &self.zpools { + for dataset in &zpool.datasets { + let crucible = self + .sled_agent + .get_crucible_dataset(zpool.id, dataset.id) + .await; + if !crucible.is_empty().await { + return false; + } + } + } + + true + } } diff --git a/nexus/tests/integration_tests/mod.rs b/nexus/tests/integration_tests/mod.rs index ff778156da5..6d062e034aa 100644 --- a/nexus/tests/integration_tests/mod.rs +++ b/nexus/tests/integration_tests/mod.rs @@ -31,6 +31,7 @@ mod unauthorized; mod unauthorized_coverage; mod updates; mod users_builtin; +mod volume_management; mod vpc_firewall; mod vpc_routers; mod vpc_subnets; diff --git a/nexus/tests/integration_tests/volume_management.rs b/nexus/tests/integration_tests/volume_management.rs new file mode 100644 index 00000000000..1aee2f6614a --- /dev/null +++ b/nexus/tests/integration_tests/volume_management.rs @@ -0,0 +1,1147 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Tests that Nexus properly manages and cleans up Crucible resources +//! associated with Volumes + +use dropshot::test_util::ClientTestContext; +use http::method::Method; +use http::StatusCode; +use nexus_test_utils::http_testing::AuthnMode; +use nexus_test_utils::http_testing::NexusRequest; +use nexus_test_utils::http_testing::RequestBuilder; +use nexus_test_utils::resource_helpers::create_ip_pool; +use nexus_test_utils::resource_helpers::create_organization; +use nexus_test_utils::resource_helpers::create_project; +use nexus_test_utils::resource_helpers::object_create; +use nexus_test_utils::resource_helpers::DiskTest; +use nexus_test_utils::ControlPlaneTestContext; +use nexus_test_utils_macros::nexus_test; +use omicron_common::api::external::ByteCount; +use omicron_common::api::external::Disk; +use omicron_common::api::external::IdentityMetadataCreateParams; +use omicron_common::api::external::Name; +use omicron_nexus::external_api::params; +use omicron_nexus::external_api::views; +use uuid::Uuid; +use sled_agent_client::types::VolumeConstructionRequest; +use rand::{SeedableRng, rngs::StdRng}; +use rand::prelude::SliceRandom; + +use httptest::{matchers::*, responders::*, Expectation, ServerBuilder}; + +const ORG_NAME: &str = "test-org"; +const PROJECT_NAME: &str = "springfield-squidport-disks"; + +fn get_project_url() -> String { + format!("/organizations/{}/projects/{}", ORG_NAME, PROJECT_NAME) +} + +fn get_disks_url() -> String { + format!("{}/disks", get_project_url()) +} + +async fn create_org_and_project(client: &ClientTestContext) -> Uuid { + create_organization(&client, ORG_NAME).await; + let project = create_project(client, ORG_NAME, PROJECT_NAME).await; + project.identity.id +} + +#[nexus_test] +async fn test_snapshot_then_delete_disk(cptestctx: &ControlPlaneTestContext) { + // Test that Nexus does not delete a region if there's a snapshot of that + // region: + // + // 1. Create a disk + // 2. Create a snapshot of that disk (creating running snapshots) + // 3. Delete the disk + // 4. Delete the snapshot + + let client = &cptestctx.external_client; + let disk_test = DiskTest::new(&cptestctx).await; + create_ip_pool(&client, "p0", None, None).await; + create_org_and_project(client).await; + let disks_url = get_disks_url(); + + // Define a global image + let server = ServerBuilder::new().run().unwrap(); + server.expect( + Expectation::matching(request::method_path("HEAD", "/image.raw")) + .times(1..) + .respond_with( + status_code(200).append_header( + "Content-Length", + format!("{}", 4096 * 1000), + ), + ), + ); + + let image_create_params = params::GlobalImageCreate { + identity: IdentityMetadataCreateParams { + name: "alpine-edge".parse().unwrap(), + description: String::from( + "you can boot any image, as long as it's alpine", + ), + }, + source: params::ImageSource::Url { + url: server.url("/image.raw").to_string(), + }, + distribution: params::Distribution { + name: "alpine".parse().unwrap(), + version: "edge".into(), + }, + block_size: params::BlockSize::try_from(512).unwrap(), + }; + + let global_image: views::GlobalImage = + NexusRequest::objects_post(client, "/images", &image_create_params) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap(); + + // Create a disk from this image + let disk_size = ByteCount::try_from(2u64 * 1024 * 1024 * 1024).unwrap(); + let base_disk_name: Name = "base-disk".parse().unwrap(); + let base_disk = params::DiskCreate { + identity: IdentityMetadataCreateParams { + name: base_disk_name.clone(), + description: String::from("sells rainsticks"), + }, + disk_source: params::DiskSource::GlobalImage { + image_id: global_image.identity.id, + }, + size: disk_size, + }; + + let base_disk: Disk = NexusRequest::new( + RequestBuilder::new(client, Method::POST, &disks_url) + .body(Some(&base_disk)) + .expect_status(Some(StatusCode::CREATED)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap(); + + // Issue snapshot request + let snapshots_url = format!( + "/organizations/{}/projects/{}/snapshots", + ORG_NAME, PROJECT_NAME + ); + + let snapshot: views::Snapshot = object_create( + client, + &snapshots_url, + ¶ms::SnapshotCreate { + identity: IdentityMetadataCreateParams { + name: "a-snapshot".parse().unwrap(), + description: "a snapshot!".to_string(), + }, + disk: base_disk_name.clone(), + }, + ) + .await; + + assert_eq!(snapshot.disk_id, base_disk.identity.id); + assert_eq!(snapshot.size, base_disk.size); + + // Delete the disk + let disk_url = format!("{}/{}", disks_url, base_disk_name); + NexusRequest::object_delete(client, &disk_url) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("failed to delete disk"); + + // Delete the snapshot + let snapshot_url = format!("{}/snapshots/{}", get_project_url(), "a-snapshot"); + NexusRequest::object_delete(client, &snapshot_url) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("failed to delete snapshot"); + + // Assert everything was cleaned up + assert!(disk_test.crucible_resources_deleted().await); +} + +#[nexus_test] +async fn test_delete_snapshot_then_disk(cptestctx: &ControlPlaneTestContext) { + // Test that Nexus cleans up resources properly: + // + // 1. Create a disk + // 2. Create a snapshot of that disk (creating running snapshots) + // 3. Delete the snapshot + // 4. Delete the disk + + let client = &cptestctx.external_client; + let disk_test = DiskTest::new(&cptestctx).await; + create_ip_pool(&client, "p0", None, None).await; + create_org_and_project(client).await; + let disks_url = get_disks_url(); + + // Define a global image + let server = ServerBuilder::new().run().unwrap(); + server.expect( + Expectation::matching(request::method_path("HEAD", "/image.raw")) + .times(1..) + .respond_with( + status_code(200).append_header( + "Content-Length", + format!("{}", 4096 * 1000), + ), + ), + ); + + let image_create_params = params::GlobalImageCreate { + identity: IdentityMetadataCreateParams { + name: "alpine-edge".parse().unwrap(), + description: String::from( + "you can boot any image, as long as it's alpine", + ), + }, + source: params::ImageSource::Url { + url: server.url("/image.raw").to_string(), + }, + distribution: params::Distribution { + name: "alpine".parse().unwrap(), + version: "edge".into(), + }, + block_size: params::BlockSize::try_from(512).unwrap(), + }; + + let global_image: views::GlobalImage = + NexusRequest::objects_post(client, "/images", &image_create_params) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap(); + + // Create a disk from this image + let disk_size = ByteCount::try_from(2u64 * 1024 * 1024 * 1024).unwrap(); + let base_disk_name: Name = "base-disk".parse().unwrap(); + let base_disk = params::DiskCreate { + identity: IdentityMetadataCreateParams { + name: base_disk_name.clone(), + description: String::from("sells rainsticks"), + }, + disk_source: params::DiskSource::GlobalImage { + image_id: global_image.identity.id, + }, + size: disk_size, + }; + + let base_disk: Disk = NexusRequest::new( + RequestBuilder::new(client, Method::POST, &disks_url) + .body(Some(&base_disk)) + .expect_status(Some(StatusCode::CREATED)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap(); + + // Issue snapshot request + let snapshots_url = format!( + "/organizations/{}/projects/{}/snapshots", + ORG_NAME, PROJECT_NAME + ); + + let snapshot: views::Snapshot = object_create( + client, + &snapshots_url, + ¶ms::SnapshotCreate { + identity: IdentityMetadataCreateParams { + name: "a-snapshot".parse().unwrap(), + description: "a snapshot!".to_string(), + }, + disk: base_disk_name.clone(), + }, + ) + .await; + + assert_eq!(snapshot.disk_id, base_disk.identity.id); + assert_eq!(snapshot.size, base_disk.size); + + // Delete the snapshot + let snapshot_url = format!("{}/snapshots/{}", get_project_url(), "a-snapshot"); + NexusRequest::object_delete(client, &snapshot_url) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("failed to delete snapshot"); + + // Delete the disk + let disk_url = format!("{}/{}", disks_url, base_disk_name); + NexusRequest::object_delete(client, &disk_url) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("failed to delete disk"); + + // Assert everything was cleaned up + assert!(disk_test.crucible_resources_deleted().await); +} + +#[nexus_test] +async fn test_multiple_snapshots(cptestctx: &ControlPlaneTestContext) { + // Test that Nexus cleans up resources properly: + // + // 1. Create a disk + // 2. Create multiple snapshots of that disk (creating running snapshots) + // 3. Delete the disk + // 4. Delete the snapshots + + let client = &cptestctx.external_client; + let disk_test = DiskTest::new(&cptestctx).await; + create_ip_pool(&client, "p0", None, None).await; + create_org_and_project(client).await; + let disks_url = get_disks_url(); + + // Define a global image + let server = ServerBuilder::new().run().unwrap(); + server.expect( + Expectation::matching(request::method_path("HEAD", "/image.raw")) + .times(1..) + .respond_with( + status_code(200).append_header( + "Content-Length", + format!("{}", 4096 * 1000), + ), + ), + ); + + let image_create_params = params::GlobalImageCreate { + identity: IdentityMetadataCreateParams { + name: "alpine-edge".parse().unwrap(), + description: String::from( + "you can boot any image, as long as it's alpine", + ), + }, + source: params::ImageSource::Url { + url: server.url("/image.raw").to_string(), + }, + distribution: params::Distribution { + name: "alpine".parse().unwrap(), + version: "edge".into(), + }, + block_size: params::BlockSize::try_from(512).unwrap(), + }; + + let global_image: views::GlobalImage = + NexusRequest::objects_post(client, "/images", &image_create_params) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap(); + + // Create a disk from this image + let disk_size = ByteCount::try_from(2u64 * 1024 * 1024 * 1024).unwrap(); + let base_disk_name: Name = "base-disk".parse().unwrap(); + let base_disk = params::DiskCreate { + identity: IdentityMetadataCreateParams { + name: base_disk_name.clone(), + description: String::from("sells rainsticks"), + }, + disk_source: params::DiskSource::GlobalImage { + image_id: global_image.identity.id, + }, + size: disk_size, + }; + + let base_disk: Disk = NexusRequest::new( + RequestBuilder::new(client, Method::POST, &disks_url) + .body(Some(&base_disk)) + .expect_status(Some(StatusCode::CREATED)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap(); + + // Issue snapshot requests + let snapshots_url = format!( + "/organizations/{}/projects/{}/snapshots", + ORG_NAME, PROJECT_NAME + ); + + for i in 0..4 { + let snapshot: views::Snapshot = object_create( + client, + &snapshots_url, + ¶ms::SnapshotCreate { + identity: IdentityMetadataCreateParams { + name: format!("a-snapshot-{}", i).parse().unwrap(), + description: "a snapshot!".to_string(), + }, + disk: base_disk_name.clone(), + }, + ) + .await; + + assert_eq!(snapshot.disk_id, base_disk.identity.id); + assert_eq!(snapshot.size, base_disk.size); + } + + // Delete the disk + let disk_url = format!("{}/{}", disks_url, base_disk_name); + NexusRequest::object_delete(client, &disk_url) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("failed to delete disk"); + + // Delete the snapshots + for i in 0..4 { + let snapshot_url = format!("{}/snapshots/a-snapshot-{}", get_project_url(), i); + NexusRequest::object_delete(client, &snapshot_url) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("failed to delete snapshot"); + } + + // Assert everything was cleaned up + assert!(disk_test.crucible_resources_deleted().await); +} + +#[nexus_test] +async fn test_snapshot_prevents_other_disk(cptestctx: &ControlPlaneTestContext) { + // Test that region remains if there is a snapshot, preventing further + // allocation. + let client = &cptestctx.external_client; + let disk_test = DiskTest::new(&cptestctx).await; + create_ip_pool(&client, "p0", None, None).await; + create_org_and_project(client).await; + let disks_url = get_disks_url(); + + // Define a global image + let server = ServerBuilder::new().run().unwrap(); + server.expect( + Expectation::matching(request::method_path("HEAD", "/image.raw")) + .times(1..) + .respond_with( + status_code(200).append_header( + "Content-Length", + format!("{}", 4096 * 1000), + ), + ), + ); + + let image_create_params = params::GlobalImageCreate { + identity: IdentityMetadataCreateParams { + name: "alpine-edge".parse().unwrap(), + description: String::from( + "you can boot any image, as long as it's alpine", + ), + }, + source: params::ImageSource::Url { + url: server.url("/image.raw").to_string(), + }, + distribution: params::Distribution { + name: "alpine".parse().unwrap(), + version: "edge".into(), + }, + block_size: params::BlockSize::try_from(512).unwrap(), + }; + + let global_image: views::GlobalImage = + NexusRequest::objects_post(client, "/images", &image_create_params) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap(); + + // Create a disk from this image + let disk_size = ByteCount::try_from(2u64 * 1024 * 1024 * 1024).unwrap(); + let base_disk_name: Name = "base-disk".parse().unwrap(); + let base_disk = params::DiskCreate { + identity: IdentityMetadataCreateParams { + name: base_disk_name.clone(), + description: String::from("sells rainsticks"), + }, + disk_source: params::DiskSource::GlobalImage { + image_id: global_image.identity.id, + }, + size: disk_size, + }; + + let base_disk: Disk = NexusRequest::new( + RequestBuilder::new(client, Method::POST, &disks_url) + .body(Some(&base_disk)) + .expect_status(Some(StatusCode::CREATED)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap(); + + // Issue snapshot request + let snapshots_url = format!( + "/organizations/{}/projects/{}/snapshots", + ORG_NAME, PROJECT_NAME + ); + + let snapshot: views::Snapshot = object_create( + client, + &snapshots_url, + ¶ms::SnapshotCreate { + identity: IdentityMetadataCreateParams { + name: "a-snapshot".parse().unwrap(), + description: "a snapshot!".to_string(), + }, + disk: base_disk_name.clone(), + }, + ) + .await; + + assert_eq!(snapshot.disk_id, base_disk.identity.id); + assert_eq!(snapshot.size, base_disk.size); + + // Delete the disk + let disk_url = format!("{}/{}", disks_url, base_disk_name); + NexusRequest::object_delete(client, &disk_url) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("failed to delete disk"); + + // Attempt disk allocation, which will fail - the presense of the snapshot + // means the region wasn't deleted. + let disk_size = ByteCount::try_from(10u64 * 1024 * 1024 * 1024).unwrap(); + let next_disk_name: Name = "next-disk".parse().unwrap(); + let next_disk = params::DiskCreate { + identity: IdentityMetadataCreateParams { + name: next_disk_name.clone(), + description: String::from("will fail"), + }, + disk_source: params::DiskSource::GlobalImage { + image_id: global_image.identity.id, + }, + size: disk_size, + }; + + NexusRequest::new( + RequestBuilder::new(client, Method::POST, &disks_url) + .body(Some(&next_disk)) + .expect_status(Some(StatusCode::SERVICE_UNAVAILABLE)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap(); + + // Delete the snapshot + let snapshot_url = format!("{}/snapshots/a-snapshot", get_project_url()); + NexusRequest::object_delete(client, &snapshot_url) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("failed to delete snapshot"); + + // Disk allocation will work now + let _next_disk: Disk = NexusRequest::new( + RequestBuilder::new(client, Method::POST, &disks_url) + .body(Some(&next_disk)) + .expect_status(Some(StatusCode::CREATED)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap(); + + // Delete it + let disk_url = format!("{}/{}", disks_url, next_disk_name); + NexusRequest::object_delete(client, &disk_url) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("failed to delete disk"); + + // Assert everything was cleaned up + assert!(disk_test.crucible_resources_deleted().await); +} + +#[nexus_test] +async fn test_multiple_disks_multiple_snapshots_order_1(cptestctx: &ControlPlaneTestContext) { + // Test multiple disks with multiple snapshots + let client = &cptestctx.external_client; + let disk_test = DiskTest::new(&cptestctx).await; + create_ip_pool(&client, "p0", None, None).await; + create_org_and_project(client).await; + let disks_url = get_disks_url(); + + // Create a blank disk + let disk_size = ByteCount::try_from(2u64 * 1024 * 1024 * 1024).unwrap(); + let first_disk_name: Name = "first-disk".parse().unwrap(); + let first_disk = params::DiskCreate { + identity: IdentityMetadataCreateParams { + name: first_disk_name.clone(), + description: String::from("disk 1"), + }, + disk_source: params::DiskSource::Blank { + block_size: params::BlockSize::try_from(512).unwrap(), + }, + size: disk_size, + }; + + let first_disk: Disk = NexusRequest::new( + RequestBuilder::new(client, Method::POST, &disks_url) + .body(Some(&first_disk)) + .expect_status(Some(StatusCode::CREATED)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap(); + + // Issue snapshot request + let snapshots_url = format!( + "/organizations/{}/projects/{}/snapshots", + ORG_NAME, PROJECT_NAME + ); + + let first_snapshot: views::Snapshot = object_create( + client, + &snapshots_url, + ¶ms::SnapshotCreate { + identity: IdentityMetadataCreateParams { + name: "first-snapshot".parse().unwrap(), + description: "first snapshot!".to_string(), + }, + disk: first_disk_name.clone(), + }, + ) + .await; + + assert_eq!(first_snapshot.disk_id, first_disk.identity.id); + assert_eq!(first_snapshot.size, first_disk.size); + + // Create another blank disk + let second_disk_name: Name = "second-disk".parse().unwrap(); + let second_disk = params::DiskCreate { + identity: IdentityMetadataCreateParams { + name: second_disk_name.clone(), + description: String::from("disk 1"), + }, + disk_source: params::DiskSource::Blank { + block_size: params::BlockSize::try_from(512).unwrap(), + }, + size: disk_size, + }; + + let second_disk: Disk = NexusRequest::new( + RequestBuilder::new(client, Method::POST, &disks_url) + .body(Some(&second_disk)) + .expect_status(Some(StatusCode::CREATED)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap(); + + // Issue snapshot request for the second disk + let second_snapshot: views::Snapshot = object_create( + client, + &snapshots_url, + ¶ms::SnapshotCreate { + identity: IdentityMetadataCreateParams { + name: "second-snapshot".parse().unwrap(), + description: "second snapshot!".to_string(), + }, + disk: second_disk_name.clone(), + }, + ) + .await; + + assert_eq!(second_snapshot.disk_id, second_disk.identity.id); + assert_eq!(second_snapshot.size, second_disk.size); + + // Delete the first disk + let disk_url = format!("{}/{}", disks_url, first_disk_name); + NexusRequest::object_delete(client, &disk_url) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("failed to delete disk"); + + // Delete the second snapshot + let snapshot_url = format!("{}/snapshots/second-snapshot", get_project_url()); + NexusRequest::object_delete(client, &snapshot_url) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("failed to delete snapshot"); + + // Delete the second disk + let disk_url = format!("{}/{}", disks_url, second_disk_name); + NexusRequest::object_delete(client, &disk_url) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("failed to delete disk"); + + // Delete the first snapshot + let snapshot_url = format!("{}/snapshots/first-snapshot", get_project_url()); + NexusRequest::object_delete(client, &snapshot_url) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("failed to delete snapshot"); + + // Assert everything was cleaned up + assert!(disk_test.crucible_resources_deleted().await); +} + +#[nexus_test] +async fn test_multiple_disks_multiple_snapshots_order_2(cptestctx: &ControlPlaneTestContext) { + // Test multiple disks with multiple snapshots, varying the delete order + let client = &cptestctx.external_client; + let disk_test = DiskTest::new(&cptestctx).await; + create_ip_pool(&client, "p0", None, None).await; + create_org_and_project(client).await; + let disks_url = get_disks_url(); + + // Create a blank disk + let disk_size = ByteCount::try_from(2u64 * 1024 * 1024 * 1024).unwrap(); + let first_disk_name: Name = "first-disk".parse().unwrap(); + let first_disk = params::DiskCreate { + identity: IdentityMetadataCreateParams { + name: first_disk_name.clone(), + description: String::from("disk 1"), + }, + disk_source: params::DiskSource::Blank { + block_size: params::BlockSize::try_from(512).unwrap(), + }, + size: disk_size, + }; + + let first_disk: Disk = NexusRequest::new( + RequestBuilder::new(client, Method::POST, &disks_url) + .body(Some(&first_disk)) + .expect_status(Some(StatusCode::CREATED)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap(); + + // Issue snapshot request + let snapshots_url = format!( + "/organizations/{}/projects/{}/snapshots", + ORG_NAME, PROJECT_NAME + ); + + let first_snapshot: views::Snapshot = object_create( + client, + &snapshots_url, + ¶ms::SnapshotCreate { + identity: IdentityMetadataCreateParams { + name: "first-snapshot".parse().unwrap(), + description: "first snapshot!".to_string(), + }, + disk: first_disk_name.clone(), + }, + ) + .await; + + assert_eq!(first_snapshot.disk_id, first_disk.identity.id); + assert_eq!(first_snapshot.size, first_disk.size); + + // Create another blank disk + let second_disk_name: Name = "second-disk".parse().unwrap(); + let second_disk = params::DiskCreate { + identity: IdentityMetadataCreateParams { + name: second_disk_name.clone(), + description: String::from("disk 1"), + }, + disk_source: params::DiskSource::Blank { + block_size: params::BlockSize::try_from(512).unwrap(), + }, + size: disk_size, + }; + + let second_disk: Disk = NexusRequest::new( + RequestBuilder::new(client, Method::POST, &disks_url) + .body(Some(&second_disk)) + .expect_status(Some(StatusCode::CREATED)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap(); + + // Issue snapshot request for the second disk + let second_snapshot: views::Snapshot = object_create( + client, + &snapshots_url, + ¶ms::SnapshotCreate { + identity: IdentityMetadataCreateParams { + name: "second-snapshot".parse().unwrap(), + description: "second snapshot!".to_string(), + }, + disk: second_disk_name.clone(), + }, + ) + .await; + + assert_eq!(second_snapshot.disk_id, second_disk.identity.id); + assert_eq!(second_snapshot.size, second_disk.size); + + // Delete the first disk + let disk_url = format!("{}/{}", disks_url, first_disk_name); + NexusRequest::object_delete(client, &disk_url) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("failed to delete disk"); + + // Delete the second disk + let disk_url = format!("{}/{}", disks_url, second_disk_name); + NexusRequest::object_delete(client, &disk_url) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("failed to delete disk"); + + // Delete the second snapshot + let snapshot_url = format!("{}/snapshots/second-snapshot", get_project_url()); + NexusRequest::object_delete(client, &snapshot_url) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("failed to delete snapshot"); + + // Delete the first snapshot + let snapshot_url = format!("{}/snapshots/first-snapshot", get_project_url()); + NexusRequest::object_delete(client, &snapshot_url) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("failed to delete snapshot"); + + // Assert everything was cleaned up + assert!(disk_test.crucible_resources_deleted().await); +} + +async fn prepare_for_test_multiple_layers_of_snapshots( + client: &ClientTestContext, +) { + let disks_url = get_disks_url(); + + // Create a blank disk + let disk_size = ByteCount::try_from(2u64 * 1024 * 1024 * 1024).unwrap(); + let layer_1_disk_name: Name = "layer-1-disk".parse().unwrap(); + let layer_1_disk = params::DiskCreate { + identity: IdentityMetadataCreateParams { + name: layer_1_disk_name.clone(), + description: String::from("layer 1"), + }, + disk_source: params::DiskSource::Blank { + block_size: params::BlockSize::try_from(512).unwrap(), + }, + size: disk_size, + }; + + let layer_1_disk: Disk = NexusRequest::new( + RequestBuilder::new(client, Method::POST, &disks_url) + .body(Some(&layer_1_disk)) + .expect_status(Some(StatusCode::CREATED)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap(); + + // Issue snapshot request + let snapshots_url = format!( + "/organizations/{}/projects/{}/snapshots", + ORG_NAME, PROJECT_NAME + ); + + let layer_1_snapshot: views::Snapshot = object_create( + client, + &snapshots_url, + ¶ms::SnapshotCreate { + identity: IdentityMetadataCreateParams { + name: "layer-1-snapshot".parse().unwrap(), + description: "layer 1 snapshot!".to_string(), + }, + disk: layer_1_disk_name.clone(), + }, + ) + .await; + + assert_eq!(layer_1_snapshot.disk_id, layer_1_disk.identity.id); + assert_eq!(layer_1_snapshot.size, layer_1_disk.size); + + // Create a layer 2 disk out of the layer 1 snapshot + let layer_2_disk_name: Name = "layer-2-disk".parse().unwrap(); + let layer_2_disk = params::DiskCreate { + identity: IdentityMetadataCreateParams { + name: layer_2_disk_name.clone(), + description: String::from("layer 2"), + }, + disk_source: params::DiskSource::Snapshot { + snapshot_id: layer_1_snapshot.identity.id, + }, + size: disk_size, + }; + + let layer_2_disk: Disk = NexusRequest::new( + RequestBuilder::new(client, Method::POST, &disks_url) + .body(Some(&layer_2_disk)) + .expect_status(Some(StatusCode::CREATED)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap(); + + // Issue snapshot request for the second disk + let layer_2_snapshot: views::Snapshot = object_create( + client, + &snapshots_url, + ¶ms::SnapshotCreate { + identity: IdentityMetadataCreateParams { + name: "layer-2-snapshot".parse().unwrap(), + description: "layer 2 snapshot!".to_string(), + }, + disk: layer_2_disk_name.clone(), + }, + ) + .await; + + assert_eq!(layer_2_snapshot.disk_id, layer_2_disk.identity.id); + assert_eq!(layer_2_snapshot.size, layer_2_disk.size); + + // Create a layer 3 disk out of the layer 2 snapshot + let layer_3_disk_name: Name = "layer-3-disk".parse().unwrap(); + let layer_3_disk = params::DiskCreate { + identity: IdentityMetadataCreateParams { + name: layer_3_disk_name.clone(), + description: String::from("layer 3"), + }, + disk_source: params::DiskSource::Snapshot { + snapshot_id: layer_2_snapshot.identity.id, + }, + size: disk_size, + }; + + let layer_3_disk: Disk = NexusRequest::new( + RequestBuilder::new(client, Method::POST, &disks_url) + .body(Some(&layer_3_disk)) + .expect_status(Some(StatusCode::CREATED)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap(); + + // Issue snapshot request for the third disk + let layer_3_snapshot: views::Snapshot = object_create( + client, + &snapshots_url, + ¶ms::SnapshotCreate { + identity: IdentityMetadataCreateParams { + name: "layer-3-snapshot".parse().unwrap(), + description: "layer 3 snapshot!".to_string(), + }, + disk: layer_3_disk_name.clone(), + }, + ) + .await; + + assert_eq!(layer_3_snapshot.disk_id, layer_3_disk.identity.id); + assert_eq!(layer_3_snapshot.size, layer_3_disk.size); +} + +#[nexus_test] +async fn test_multiple_layers_of_snapshots_delete_all_disks_first(cptestctx: &ControlPlaneTestContext) { + // Test layering read-only snapshots through multiple disks: + // delete all disks, then delete all snapshots + let client = &cptestctx.external_client; + let disk_test = DiskTest::new(&cptestctx).await; + create_ip_pool(&client, "p0", None, None).await; + create_org_and_project(client).await; + + prepare_for_test_multiple_layers_of_snapshots(&client).await; + + // Delete the disks + for name in ["layer-1-disk", "layer-2-disk", "layer-3-disk"] { + let disk_url = format!("{}/{}", get_disks_url(), name); + NexusRequest::object_delete(client, &disk_url) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("failed to delete disk"); + } + + // Delete the snapshots + for name in ["layer-1-snapshot", "layer-2-snapshot", "layer-3-snapshot"] { + let snapshot_url = format!("{}/snapshots/{}", get_project_url(), name); + NexusRequest::object_delete(client, &snapshot_url) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("failed to delete snapshot"); + } + + // Assert everything was cleaned up + assert!(disk_test.crucible_resources_deleted().await); +} + +#[nexus_test] +async fn test_multiple_layers_of_snapshots_delete_all_snapshots_first(cptestctx: &ControlPlaneTestContext) { + // Test layering read-only snapshots through multiple disks: + // delete all snapshots, then delete all disks + let client = &cptestctx.external_client; + let disk_test = DiskTest::new(&cptestctx).await; + create_ip_pool(&client, "p0", None, None).await; + create_org_and_project(client).await; + + prepare_for_test_multiple_layers_of_snapshots(&client).await; + + // Delete the snapshots + for name in ["layer-1-snapshot", "layer-2-snapshot", "layer-3-snapshot"] { + let snapshot_url = format!("{}/snapshots/{}", get_project_url(), name); + NexusRequest::object_delete(client, &snapshot_url) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("failed to delete snapshot"); + } + + // Delete the disks + for name in ["layer-1-disk", "layer-2-disk", "layer-3-disk"] { + let disk_url = format!("{}/{}", get_disks_url(), name); + NexusRequest::object_delete(client, &disk_url) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("failed to delete disk"); + } + + // Assert everything was cleaned up + assert!(disk_test.crucible_resources_deleted().await); +} + +#[nexus_test] +async fn test_multiple_layers_of_snapshots_random_delete_order(cptestctx: &ControlPlaneTestContext) { + // Test layering read-only snapshots through multiple disks: + // delete snapshots and disks in a random order + let client = &cptestctx.external_client; + let disk_test = DiskTest::new(&cptestctx).await; + create_ip_pool(&client, "p0", None, None).await; + create_org_and_project(client).await; + + prepare_for_test_multiple_layers_of_snapshots(&client).await; + + #[derive(Debug)] + enum DeleteObject { + Disk(String), + Snapshot(String), + } + + let objects = { + let mut objects = vec![ + DeleteObject::Disk("layer-1-disk".to_string()), + DeleteObject::Disk("layer-2-disk".to_string()), + DeleteObject::Disk("layer-3-disk".to_string()), + + DeleteObject::Snapshot("layer-1-snapshot".to_string()), + DeleteObject::Snapshot("layer-2-snapshot".to_string()), + DeleteObject::Snapshot("layer-3-snapshot".to_string()), + ]; + + let mut rng = StdRng::from_entropy(); + objects.shuffle(&mut rng); + + objects + }; + + // If this test fails, this should print out the order that caused the + // failure. + dbg!(&objects); + + for object in &objects { + match object { + DeleteObject::Disk(name) => { + let disk_url = format!("{}/{}", get_disks_url(), name); + NexusRequest::object_delete(client, &disk_url) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("failed to delete disk"); + } + + DeleteObject::Snapshot(name) => { + let snapshot_url = format!("{}/snapshots/{}", get_project_url(), name); + NexusRequest::object_delete(client, &snapshot_url) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("failed to delete snapshot"); + } + } + } + + // Assert everything was cleaned up + assert!(disk_test.crucible_resources_deleted().await); +} + +// volume_delete saga node idempotency tests + +#[nexus_test] +async fn test_volume_hard_delete_idempotent(cptestctx: &ControlPlaneTestContext) { + let nexus = &cptestctx.server.apictx.nexus; + let datastore = nexus.datastore(); + + let volume_id = Uuid::new_v4(); + datastore.volume_create(nexus_db_model::Volume::new( + volume_id, + serde_json::to_string(&VolumeConstructionRequest::File { + id: volume_id, + block_size: 512, + path: "/lol".to_string() + }).unwrap(), + )).await.unwrap(); + + datastore.volume_hard_delete(volume_id).await.unwrap(); + datastore.volume_hard_delete(volume_id).await.unwrap(); +} + diff --git a/sled-agent/src/sim/http_entrypoints_storage.rs b/sled-agent/src/sim/http_entrypoints_storage.rs index 40813a80a17..fba3f349311 100644 --- a/sled-agent/src/sim/http_entrypoints_storage.rs +++ b/sled-agent/src/sim/http_entrypoints_storage.rs @@ -89,6 +89,7 @@ async fn region_get( ) -> Result, HttpError> { let id = path.into_inner().id; let crucible = rc.context(); + match crucible.get(id).await { Some(region) => Ok(HttpResponseOk(region)), None => { @@ -108,15 +109,8 @@ async fn region_delete( let id = path.into_inner().id; let crucible = rc.context(); - // Can't delete a ZFS dataset if there are snapshots - if !crucible.snapshots_for_region(&id).await.is_empty() { - return Err(HttpError::for_bad_request( - None, - "must delete snapshots first!".to_string(), - )); - } - - match crucible.delete(id).await { + match crucible.delete(id).await + .map_err(|e| HttpError::for_bad_request(None, e.to_string()))? { Some(_) => Ok(HttpResponseDeleted()), None => { Err(HttpError::for_not_found(None, "Region not found".to_string())) diff --git a/sled-agent/src/sim/storage.rs b/sled-agent/src/sim/storage.rs index cbac12cc067..5918a87f530 100644 --- a/sled-agent/src/sim/storage.rs +++ b/sled-agent/src/sim/storage.rs @@ -97,11 +97,21 @@ impl CrucibleDataInner { self.regions.get_mut(&id) } - fn delete(&mut self, id: RegionId) -> Option { + fn delete(&mut self, id: RegionId) -> Result> { + // Can't delete a ZFS dataset if there are snapshots + if !self.snapshots_for_region(&id).is_empty() { + bail!("must delete snapshots {:?} first!", + self.snapshots_for_region(&id).into_iter().map(|s| s.name).collect::>(), + ); + } + let id = Uuid::from_str(&id.0).unwrap(); - let mut region = self.regions.get_mut(&id)?; - region.state = State::Destroyed; - Some(region.clone()) + if let Some(mut region) = self.regions.get_mut(&id) { + region.state = State::Destroyed; + Ok(Some(region.clone())) + } else { + Ok(None) + } } fn create_snapshot( @@ -143,7 +153,8 @@ impl CrucibleDataInner { } fn delete_snapshot(&mut self, id: &RegionId, name: &str) -> Result<()> { - if !self.running_snapshots_for_id(id).is_empty() { + let running_snapshots_for_id = self.running_snapshots_for_id(id); + if running_snapshots_for_id.contains_key(name) { bail!("downstairs running for region {} snapshot {}", id.0, name,); } @@ -204,6 +215,30 @@ impl CrucibleDataInner { Ok(()) } + + /// Return true if there are no undeleted Crucible resources + pub fn is_empty(&self) -> bool { + let non_destroyed_regions: Vec = self.regions + .values() + .filter(|r| r.state != State::Destroyed) + .cloned() + .collect(); + + let snapshots: Vec = self.snapshots + .values() + .flatten() + .cloned() + .collect(); + + let running_snapshots: Vec = self.running_snapshots + .values() + .map(|hm| hm.values()) + .flatten() + .cloned() + .collect(); + + non_destroyed_regions.is_empty() && snapshots.is_empty() && running_snapshots.is_empty() + } } /// Represents a running Crucible Agent. Contains regions. @@ -232,7 +267,7 @@ impl CrucibleData { self.inner.lock().await.get(id) } - pub async fn delete(&self, id: RegionId) -> Option { + pub async fn delete(&self, id: RegionId) -> Result> { self.inner.lock().await.delete(id) } @@ -287,6 +322,10 @@ impl CrucibleData { ) -> Result<()> { self.inner.lock().await.delete_running_snapshot(id, name) } + + pub async fn is_empty(&self) -> bool { + self.inner.lock().await.is_empty() + } } /// A simulated Crucible Dataset. From e1b4dc711ce115dbbe068c17c14504235c4f88ed Mon Sep 17 00:00:00 2001 From: James MacMahon Date: Tue, 13 Sep 2022 15:05:07 -0400 Subject: [PATCH 2/8] fmt and clippy --- nexus/db-model/src/region_snapshot.rs | 1 - nexus/src/app/sagas/disk_create.rs | 12 +- nexus/src/app/sagas/mod.rs | 1 - nexus/src/app/sagas/snapshot_create.rs | 3 +- nexus/src/app/sagas/volume_delete.rs | 90 +-- nexus/src/app/volume.rs | 15 +- nexus/src/db/datastore/region.rs | 5 +- nexus/src/db/datastore/region_snapshot.rs | 14 +- nexus/src/db/datastore/snapshot.rs | 4 +- nexus/src/db/datastore/volume.rs | 537 +++++++++--------- .../integration_tests/volume_management.rs | 80 ++- .../src/sim/http_entrypoints_storage.rs | 7 +- sled-agent/src/sim/storage.rs | 30 +- 13 files changed, 422 insertions(+), 377 deletions(-) diff --git a/nexus/db-model/src/region_snapshot.rs b/nexus/db-model/src/region_snapshot.rs index fcd2823e375..9addeb83e33 100644 --- a/nexus/db-model/src/region_snapshot.rs +++ b/nexus/db-model/src/region_snapshot.rs @@ -33,4 +33,3 @@ pub struct RegionSnapshot { // how many volumes reference this? pub volume_references: i64, } - diff --git a/nexus/src/app/sagas/disk_create.rs b/nexus/src/app/sagas/disk_create.rs index ab4ef3887af..c65f97d7331 100644 --- a/nexus/src/app/sagas/disk_create.rs +++ b/nexus/src/app/sagas/disk_create.rs @@ -51,14 +51,10 @@ lazy_static! { sdc_create_disk_record, sdc_create_disk_record_undo ); - static ref REGIONS_ALLOC: NexusAction = new_action_noop_undo( - "disk-create.regions-alloc", - sdc_alloc_regions, - ); - static ref REGIONS_ENSURE: NexusAction = new_action_noop_undo( - "disk-create.regions-ensure", - sdc_regions_ensure, - ); + static ref REGIONS_ALLOC: NexusAction = + new_action_noop_undo("disk-create.regions-alloc", sdc_alloc_regions,); + static ref REGIONS_ENSURE: NexusAction = + new_action_noop_undo("disk-create.regions-ensure", sdc_regions_ensure,); static ref CREATE_VOLUME_RECORD: NexusAction = ActionFunc::new_action( "disk-create.create-volume-record", sdc_create_volume_record, diff --git a/nexus/src/app/sagas/mod.rs b/nexus/src/app/sagas/mod.rs index cf842e95dda..564f1b342ad 100644 --- a/nexus/src/app/sagas/mod.rs +++ b/nexus/src/app/sagas/mod.rs @@ -114,4 +114,3 @@ pub(super) async fn saga_generate_uuid( // Arbitrary limit on concurrency, for operations issued on multiple regions // within a disk at the same time. const MAX_CONCURRENT_REGION_REQUESTS: usize = 3; - diff --git a/nexus/src/app/sagas/snapshot_create.rs b/nexus/src/app/sagas/snapshot_create.rs index 57c884090cb..2d11ad91e01 100644 --- a/nexus/src/app/sagas/snapshot_create.rs +++ b/nexus/src/app/sagas/snapshot_create.rs @@ -490,7 +490,8 @@ async fn ssc_start_running_snapshot( // Once snapshot has been validated, and running snapshot has been // started, add an entry in the region_snapshot table to correspond to // these Crucible resources. - osagactx.datastore() + osagactx + .datastore() .region_snapshot_create(db::model::RegionSnapshot { dataset_id: dataset.id(), region_id: region.id(), diff --git a/nexus/src/app/sagas/volume_delete.rs b/nexus/src/app/sagas/volume_delete.rs index 8073a98a15c..8879d8f2052 100644 --- a/nexus/src/app/sagas/volume_delete.rs +++ b/nexus/src/app/sagas/volume_delete.rs @@ -30,7 +30,11 @@ use super::MAX_CONCURRENT_REGION_REQUESTS; use crate::app::sagas::NexusAction; use crate::db; use crate::db::datastore::CrucibleResources; +use crucible_agent_client::{types::RegionId, Client as CrucibleAgentClient}; +use futures::StreamExt; use lazy_static::lazy_static; +use nexus_types::identity::Asset; +use omicron_common::api::external::Error; use serde::Deserialize; use serde::Serialize; use std::sync::Arc; @@ -38,13 +42,6 @@ use steno::new_action_noop_undo; use steno::ActionError; use steno::Node; use uuid::Uuid; -use omicron_common::api::external::Error; -use futures::StreamExt; -use crucible_agent_client::{ - types::RegionId, - Client as CrucibleAgentClient, -}; -use nexus_types::identity::Asset; // volume delete saga: input parameters @@ -128,7 +125,7 @@ impl NexusSaga for SagaVolumeDelete { "no_result_2", "DeleteCrucibleSnapshots", DELETE_CRUCIBLE_SNAPSHOTS.as_ref(), - ) + ), ]); // clean up regions that were freed by deleting snapshots @@ -160,7 +157,9 @@ async fn svd_decrease_crucible_resource_count( let crucible_resources = osagactx .datastore() - .decrease_crucible_resource_count_and_soft_delete_volume(params.volume_id) + .decrease_crucible_resource_count_and_soft_delete_volume( + params.volume_id, + ) .await .map_err(ActionError::action_failed)?; @@ -178,19 +177,20 @@ async fn svd_delete_crucible_regions( // Send DELETE calls to the corresponding Crucible agents delete_crucible_regions( - crucible_resources_to_delete.datasets_and_regions.clone() + crucible_resources_to_delete.datasets_and_regions.clone(), ) .await .map_err(ActionError::action_failed)?; // Remove DB records - let region_ids_to_delete = - crucible_resources_to_delete.datasets_and_regions - .iter() - .map(|(_, r)| r.id()) - .collect(); + let region_ids_to_delete = crucible_resources_to_delete + .datasets_and_regions + .iter() + .map(|(_, r)| r.id()) + .collect(); - osagactx.datastore() + osagactx + .datastore() .regions_hard_delete(region_ids_to_delete) .await .map_err(ActionError::action_failed)?; @@ -213,20 +213,24 @@ async fn svd_delete_crucible_snapshots( // Send DELETE calls to the corresponding Crucible agents delete_crucible_snapshots( - crucible_resources_to_delete.datasets_and_snapshots.clone() + crucible_resources_to_delete.datasets_and_snapshots.clone(), ) .await .map_err(ActionError::action_failed)?; // Remove DB records - for (_, region_snapshot) in &crucible_resources_to_delete.datasets_and_snapshots { - osagactx.datastore().region_snapshot_remove( - region_snapshot.dataset_id, - region_snapshot.region_id, - region_snapshot.snapshot_id, - ) - .await - .map_err(ActionError::action_failed)?; + for (_, region_snapshot) in + &crucible_resources_to_delete.datasets_and_snapshots + { + osagactx + .datastore() + .region_snapshot_remove( + region_snapshot.dataset_id, + region_snapshot.region_id, + region_snapshot.snapshot_id, + ) + .await + .map_err(ActionError::action_failed)?; } Ok(()) @@ -248,7 +252,8 @@ async fn svd_delete_freed_crucible_regions( // Find regions freed up for deletion by a previous saga node deleting the // region snapshots. - let freed_datasets_regions_and_volumes = osagactx.datastore() + let freed_datasets_regions_and_volumes = osagactx + .datastore() .find_deleted_volume_regions() .await .map_err(ActionError::action_failed)?; @@ -258,28 +263,30 @@ async fn svd_delete_freed_crucible_regions( freed_datasets_regions_and_volumes .iter() .map(|(d, r, _)| (d.clone(), r.clone())) - .collect() + .collect(), ) .await .map_err(ActionError::action_failed)?; // Remove region DB records - osagactx.datastore().regions_hard_delete( - freed_datasets_regions_and_volumes - .iter() - .map(|(_, r, _)| r.id()) - .collect() + osagactx + .datastore() + .regions_hard_delete( + freed_datasets_regions_and_volumes + .iter() + .map(|(_, r, _)| r.id()) + .collect(), ) .await .map_err(ActionError::action_failed)?; // Remove volume DB records for (_, _, volume) in &freed_datasets_regions_and_volumes { - osagactx.datastore().volume_hard_delete( - volume.id() - ) - .await - .map_err(ActionError::action_failed)?; + osagactx + .datastore() + .volume_hard_delete(volume.id()) + .await + .map_err(ActionError::action_failed)?; } Ok(()) @@ -297,7 +304,8 @@ async fn svd_hard_delete_volume_record( // deleted, which means we can't delete the region. Later on, deleting the // region snapshot will free up the region(s) to be deleted (this occurs in // svd_delete_freed_crucible_regions). - let allocated_regions = osagactx.datastore() + let allocated_regions = osagactx + .datastore() .get_allocated_regions(params.volume_id) .await .map_err(ActionError::action_failed)?; @@ -366,7 +374,10 @@ pub(super) async fn delete_crucible_regions( // datasets corresponding Crucible Agent for each running read-only downstairs // and snapshot. pub(super) async fn delete_crucible_snapshots( - datasets_and_snapshots: Vec<(db::model::Dataset, db::model::RegionSnapshot)>, + datasets_and_snapshots: Vec<( + db::model::Dataset, + db::model::RegionSnapshot, + )>, ) -> Result<(), Error> { let request_count = datasets_and_snapshots.len(); if request_count == 0 { @@ -440,4 +451,3 @@ pub(super) async fn delete_crucible_snapshots( Ok(()) } - diff --git a/nexus/src/app/volume.rs b/nexus/src/app/volume.rs index f9a1a200d77..9bb92481f9c 100644 --- a/nexus/src/app/volume.rs +++ b/nexus/src/app/volume.rs @@ -18,22 +18,19 @@ impl super::Nexus { /// resources and the user's query shouldn't wait on those DELETE calls. pub async fn volume_delete( self: &Arc, - volume_id: Uuid + volume_id: Uuid, ) -> DeleteResult { let saga_params = sagas::volume_delete::Params { volume_id }; // TODO execute this in the background instead, not using the usual SEC let saga_outputs = self - .execute_saga::( - saga_params, - ) + .execute_saga::(saga_params) .await?; - let volume_deleted = saga_outputs - .lookup_node_output::<()>("final_no_result") - .map_err(|e| Error::InternalError { - internal_message: e.to_string(), - })?; + let volume_deleted = + saga_outputs.lookup_node_output::<()>("final_no_result").map_err( + |e| Error::InternalError { internal_message: e.to_string() }, + )?; Ok(volume_deleted) } diff --git a/nexus/src/db/datastore/region.rs b/nexus/src/db/datastore/region.rs index bef92077abd..49e5605e898 100644 --- a/nexus/src/db/datastore/region.rs +++ b/nexus/src/db/datastore/region.rs @@ -330,7 +330,10 @@ impl DataStore { /// Deletes a set of regions. /// /// Also updates the storage usage on their corresponding datasets. - pub async fn regions_hard_delete(&self, region_ids: Vec) -> DeleteResult { + pub async fn regions_hard_delete( + &self, + region_ids: Vec, + ) -> DeleteResult { if region_ids.is_empty() { return Ok(()); } diff --git a/nexus/src/db/datastore/region_snapshot.rs b/nexus/src/db/datastore/region_snapshot.rs index e76989ce98d..0e36e9a1cbf 100644 --- a/nexus/src/db/datastore/region_snapshot.rs +++ b/nexus/src/db/datastore/region_snapshot.rs @@ -30,12 +30,7 @@ impl DataStore { .returning(RegionSnapshot::as_returning()) .get_result_async(self.pool()) .await - .map_err(|e| { - public_error_from_diesel_pool( - e, - ErrorHandler::Server, - ) - }) + .map_err(|e| public_error_from_diesel_pool(e, ErrorHandler::Server)) } pub async fn region_snapshot_remove( @@ -53,11 +48,6 @@ impl DataStore { .execute_async(self.pool()) .await .map(|_rows_deleted| ()) - .map_err(|e| { - public_error_from_diesel_pool( - e, - ErrorHandler::Server, - ) - }) + .map_err(|e| public_error_from_diesel_pool(e, ErrorHandler::Server)) } } diff --git a/nexus/src/db/datastore/snapshot.rs b/nexus/src/db/datastore/snapshot.rs index 4661ac21e04..ebe12b1665c 100644 --- a/nexus/src/db/datastore/snapshot.rs +++ b/nexus/src/db/datastore/snapshot.rs @@ -157,7 +157,9 @@ impl DataStore { .check_if_exists::(snapshot_id) .execute_async(self.pool_authorized(&opctx).await?) .await - .map_err(|e| public_error_from_diesel_pool(e, ErrorHandler::Server))?; + .map_err(|e| { + public_error_from_diesel_pool(e, ErrorHandler::Server) + })?; Ok(snapshot_id) } diff --git a/nexus/src/db/datastore/volume.rs b/nexus/src/db/datastore/volume.rs index 4d0789d5309..13553dc449c 100644 --- a/nexus/src/db/datastore/volume.rs +++ b/nexus/src/db/datastore/volume.rs @@ -5,8 +5,6 @@ //! [`DataStore`] methods on [`Volume`]s. use super::DataStore; -use async_bb8_diesel::AsyncConnection; -use async_bb8_diesel::AsyncRunQueryDsl; use crate::db; use crate::db::error::public_error_from_diesel_pool; use crate::db::error::ErrorHandler; @@ -16,6 +14,8 @@ use crate::db::model::Dataset; use crate::db::model::Region; use crate::db::model::RegionSnapshot; use crate::db::model::Volume; +use async_bb8_diesel::AsyncConnection; +use async_bb8_diesel::AsyncRunQueryDsl; use chrono::Utc; use diesel::prelude::*; use diesel::OptionalExtension as DieselOptionalExtension; @@ -26,10 +26,10 @@ use omicron_common::api::external::ListResultVec; use omicron_common::api::external::LookupResult; use omicron_common::api::external::LookupType; use omicron_common::api::external::ResourceType; -use uuid::Uuid; -use serde::Serialize; use serde::Deserialize; +use serde::Serialize; use sled_agent_client::types::VolumeConstructionRequest; +use uuid::Uuid; impl DataStore { pub async fn volume_create(&self, volume: Volume) -> CreateResult { @@ -45,95 +45,105 @@ impl DataStore { } type TxnError = TransactionError; - self.pool().transaction(move |conn| { - let maybe_volume: Option = - dsl::volume + self.pool() + .transaction(move |conn| { + let maybe_volume: Option = dsl::volume .filter(dsl::id.eq(volume.id())) .select(Volume::as_select()) .first(conn) .optional() - .map_err(|e| TxnError::CustomError( - VolumeCreationError::Public( + .map_err(|e| { + TxnError::CustomError(VolumeCreationError::Public( public_error_from_diesel_pool( e.into(), ErrorHandler::Server, - ) - ) - ))?; + ), + )) + })?; - // If the volume existed already, return it and do not increase - // usage counts. - if let Some(volume) = maybe_volume { - return Ok(volume); - } + // If the volume existed already, return it and do not increase + // usage counts. + if let Some(volume) = maybe_volume { + return Ok(volume); + } - // TODO do we need on_conflict do_nothing here? if the transaction - // model is read-committed, the SELECT above could return nothing, - // and the INSERT here could still result in a conflict. - // - // See also https://github.com/oxidecomputer/omicron/issues/1168 - let volume: Volume = diesel::insert_into(dsl::volume) - .values(volume.clone()) - .on_conflict(dsl::id) - .do_nothing() - .returning(Volume::as_returning()) - .get_result(conn) - .map_err(|e| TxnError::CustomError( - VolumeCreationError::Public( - public_error_from_diesel_pool( - e.into(), - ErrorHandler::Conflict( - ResourceType::Volume, - volume.id().to_string().as_str(), - ), - ) - ) - ))?; - - // Increase the usage count for Crucible resources according to the - // contents of the volume. - - // Grab all the targets that the volume construction request references. - let crucible_targets = { - let vcr: VolumeConstructionRequest = serde_json::from_str(&volume.data()) - .map_err(|e: serde_json::Error| TxnError::CustomError( - VolumeCreationError::SerdeError(e) - ))?; - - let mut crucible_targets = CrucibleTargets::default(); - resources_associated_with_volume(&vcr, &mut crucible_targets); - crucible_targets - }; - - // Increase the number of uses for each referenced region snapshot. - use db::schema::region_snapshot::dsl as rs_dsl; - for read_only_target in &crucible_targets.read_only_targets { - diesel::update(rs_dsl::region_snapshot) - .filter(rs_dsl::snapshot_addr.eq(read_only_target.clone())) - .set(rs_dsl::volume_references.eq(rs_dsl::volume_references + 1)) - .execute(conn) - .map_err(|e| TxnError::CustomError( - VolumeCreationError::Public( + // TODO do we need on_conflict do_nothing here? if the transaction + // model is read-committed, the SELECT above could return nothing, + // and the INSERT here could still result in a conflict. + // + // See also https://github.com/oxidecomputer/omicron/issues/1168 + let volume: Volume = diesel::insert_into(dsl::volume) + .values(volume.clone()) + .on_conflict(dsl::id) + .do_nothing() + .returning(Volume::as_returning()) + .get_result(conn) + .map_err(|e| { + TxnError::CustomError(VolumeCreationError::Public( public_error_from_diesel_pool( e.into(), - ErrorHandler::Server, - ) + ErrorHandler::Conflict( + ResourceType::Volume, + volume.id().to_string().as_str(), + ), + ), + )) + })?; + + // Increase the usage count for Crucible resources according to the + // contents of the volume. + + // Grab all the targets that the volume construction request references. + let crucible_targets = { + let vcr: VolumeConstructionRequest = serde_json::from_str( + &volume.data(), + ) + .map_err(|e: serde_json::Error| { + TxnError::CustomError(VolumeCreationError::SerdeError( + e, + )) + })?; + + let mut crucible_targets = CrucibleTargets::default(); + resources_associated_with_volume( + &vcr, + &mut crucible_targets, + ); + crucible_targets + }; + + // Increase the number of uses for each referenced region snapshot. + use db::schema::region_snapshot::dsl as rs_dsl; + for read_only_target in &crucible_targets.read_only_targets { + diesel::update(rs_dsl::region_snapshot) + .filter( + rs_dsl::snapshot_addr.eq(read_only_target.clone()), ) - ))?; - } + .set( + rs_dsl::volume_references + .eq(rs_dsl::volume_references + 1), + ) + .execute(conn) + .map_err(|e| { + TxnError::CustomError(VolumeCreationError::Public( + public_error_from_diesel_pool( + e.into(), + ErrorHandler::Server, + ), + )) + })?; + } - Ok(volume) - }) - .await - .map_err(|e| match e { - TxnError::CustomError( - VolumeCreationError::Public(e) - ) => e, - - _ => Error::internal_error( - &format!("Transaction error: {}", e), - ), - }) + Ok(volume) + }) + .await + .map_err(|e| match e { + TxnError::CustomError(VolumeCreationError::Public(e)) => e, + + _ => { + Error::internal_error(&format!("Transaction error: {}", e)) + } + }) } pub async fn volume_hard_delete(&self, volume_id: Uuid) -> DeleteResult { @@ -169,39 +179,43 @@ impl DataStore { /// Find regions for deleted volumes that do not have associated region /// snapshots. - pub async fn find_deleted_volume_regions(&self) -> ListResultVec<(Dataset, Region, Volume)> { - use db::schema::region_snapshot::dsl; + pub async fn find_deleted_volume_regions( + &self, + ) -> ListResultVec<(Dataset, Region, Volume)> { use db::schema::dataset::dsl as dataset_dsl; use db::schema::region::dsl as region_dsl; + use db::schema::region_snapshot::dsl; use db::schema::volume::dsl as volume_dsl; // Find all regions and datasets region_dsl::region .inner_join( - volume_dsl::volume - .on(region_dsl::volume_id.eq(volume_dsl::id)) + volume_dsl::volume.on(region_dsl::volume_id.eq(volume_dsl::id)), ) .inner_join( dataset_dsl::dataset - .on(region_dsl::dataset_id.eq(dataset_dsl::id)) + .on(region_dsl::dataset_id.eq(dataset_dsl::id)), ) // where there either are no region snapshots, or the region // snapshot volume references have gone to zero .left_join( - dsl::region_snapshot - .on( - dsl::region_id.eq(region_dsl::id) - .and(dsl::dataset_id.eq(dataset_dsl::id)) - ) + dsl::region_snapshot.on(dsl::region_id + .eq(region_dsl::id) + .and(dsl::dataset_id.eq(dataset_dsl::id))), ) .filter( - dsl::volume_references.eq(0) - .or(dsl::volume_references.is_null()) + dsl::volume_references + .eq(0) + .or(dsl::volume_references.is_null()), ) // where the volume has already been soft-deleted .filter(volume_dsl::time_deleted.is_not_null()) // and return them (along with the volume so it can be hard deleted) - .select((Dataset::as_select(), Region::as_select(), Volume::as_select())) + .select(( + Dataset::as_select(), + Region::as_select(), + Volume::as_select(), + )) .load_async(self.pool()) .await .map_err(|e| public_error_from_diesel_pool(e, ErrorHandler::Server)) @@ -247,168 +261,187 @@ impl DataStore { // // TODO it would be nice to make this transaction_async, but I couldn't // get the async optional extension to work. - self.pool().transaction(move |conn| { - // Grab the volume in question. If the volume record was already - // hard-deleted, assume clean-up has occurred and return an empty - // CrucibleResources. If the volume record was soft-deleted, then - // return the serialized CrucibleResources. - let volume = { - use db::schema::volume::dsl; - - let volume = dsl::volume - .filter(dsl::id.eq(volume_id)) - .select(Volume::as_select()) - .get_result(conn) - .optional()?; + self.pool() + .transaction(move |conn| { + // Grab the volume in question. If the volume record was already + // hard-deleted, assume clean-up has occurred and return an empty + // CrucibleResources. If the volume record was soft-deleted, then + // return the serialized CrucibleResources. + let volume = { + use db::schema::volume::dsl; + + let volume = dsl::volume + .filter(dsl::id.eq(volume_id)) + .select(Volume::as_select()) + .get_result(conn) + .optional()?; + + if volume.is_none() { + // the volume was hard-deleted, return an empty + // CrucibleResources + return Ok(CrucibleResources::default()); + } - if volume.is_none() { - // the volume was hard-deleted, return an empty - // CrucibleResources - return Ok(CrucibleResources::default()); - } + let volume = volume.unwrap(); - let volume = volume.unwrap(); - - if volume.time_deleted.is_none() { - // a volume record exists, and was not deleted - this is the - // first time through this transaction for a certain volume - // id. Get the volume for use in the transaction. - volume - } else { - // this volume was soft deleted - this is a repeat time - // through this transaction. - - if let Some(resources_to_clean_up) = volume.resources_to_clean_up { - // return the serialized CrucibleResources - return Ok(serde_json::from_str(&resources_to_clean_up) - .map_err(|e| + if volume.time_deleted.is_none() { + // a volume record exists, and was not deleted - this is the + // first time through this transaction for a certain volume + // id. Get the volume for use in the transaction. + volume + } else { + // this volume was soft deleted - this is a repeat time + // through this transaction. + + if let Some(resources_to_clean_up) = + volume.resources_to_clean_up + { + // return the serialized CrucibleResources + return serde_json::from_str( + &resources_to_clean_up, + ) + .map_err(|e| { TxnError::CustomError( - DecreaseCrucibleResourcesError::SerdeError(e) + DecreaseCrucibleResourcesError::SerdeError( + e, + ), ) - )? - ); - } else { - // If no CrucibleResources struct was serialized, that's - // definitely a bug of some sort - the soft-delete below - // sets time_deleted at the same time as - // resources_to_clean_up! But, instead of a panic here, - // just return an empty CrucibleResources. - return Ok(CrucibleResources::default()); + }); + } else { + // If no CrucibleResources struct was serialized, that's + // definitely a bug of some sort - the soft-delete below + // sets time_deleted at the same time as + // resources_to_clean_up! But, instead of a panic here, + // just return an empty CrucibleResources. + return Ok(CrucibleResources::default()); + } } - } - }; - - // Grab all the targets that the volume construction request references. - let crucible_targets = { - let vcr: VolumeConstructionRequest = serde_json::from_str(&volume.data()) - .map_err(|e| - TxnError::CustomError( - DecreaseCrucibleResourcesError::SerdeError(e) + }; + + // Grab all the targets that the volume construction request references. + let crucible_targets = { + let vcr: VolumeConstructionRequest = + serde_json::from_str(&volume.data()).map_err(|e| { + TxnError::CustomError( + DecreaseCrucibleResourcesError::SerdeError(e), + ) + })?; + + let mut crucible_targets = CrucibleTargets::default(); + resources_associated_with_volume( + &vcr, + &mut crucible_targets, + ); + crucible_targets + }; + + // Decrease the number of uses for each referenced region snapshot. + use db::schema::region_snapshot::dsl; + + for read_only_target in &crucible_targets.read_only_targets { + diesel::update(dsl::region_snapshot) + .filter(dsl::snapshot_addr.eq(read_only_target.clone())) + .set( + dsl::volume_references + .eq(dsl::volume_references - 1), ) - )?; - - let mut crucible_targets = CrucibleTargets::default(); - resources_associated_with_volume(&vcr, &mut crucible_targets); - crucible_targets - }; - - // Decrease the number of uses for each referenced region snapshot. - use db::schema::region_snapshot::dsl; - - for read_only_target in &crucible_targets.read_only_targets { - diesel::update(dsl::region_snapshot) - .filter(dsl::snapshot_addr.eq(read_only_target.clone())) - .set(dsl::volume_references.eq(dsl::volume_references - 1)) - .execute(conn)?; - } + .execute(conn)?; + } - // Return what results can be cleaned up - let result = CrucibleResources { - // The only use of a read-write region will be at the top level of a - // Volume. These are not shared, but if any snapshots are taken this - // will prevent deletion of the region. Filter out any regions that - // have associated snapshots. - datasets_and_regions: { - use db::schema::dataset::dsl as dataset_dsl; - use db::schema::region::dsl as region_dsl; - - // Return all regions for this volume_id, where there either are - // no region_snapshots, or region_snapshots.volume_references = - // 0. - region_dsl::region - .filter(region_dsl::volume_id.eq(volume_id)) - .inner_join( - dataset_dsl::dataset - .on(region_dsl::dataset_id.eq(dataset_dsl::id)), - ) - .left_join( - dsl::region_snapshot - .on( - dsl::region_id.eq(region_dsl::id) - .and(dsl::dataset_id.eq(dataset_dsl::id)) - ) - ) - .filter( - dsl::volume_references.eq(0) - .or(dsl::volume_references.is_null()) - ) - .select((Dataset::as_select(), Region::as_select())) - .get_results::<(Dataset, Region)>(conn)? - }, - - // A volume (for a disk or snapshot) may reference another nested - // volume as a read-only parent, and this may be arbitrarily deep. - // After decrementing volume_references above, get all region - // snapshot records where the volume_references has gone to 0. - // Consumers of this struct will be responsible for deleting the - // read-only downstairs running for the snapshot and the snapshot - // itself. - datasets_and_snapshots: { - use db::schema::dataset::dsl as dataset_dsl; - - dsl::region_snapshot - .filter(dsl::volume_references.eq(0)) - .inner_join( - dataset_dsl::dataset - .on(dsl::dataset_id.eq(dataset_dsl::id)), - ) - .select((Dataset::as_select(), RegionSnapshot::as_select())) - .get_results::<(Dataset, RegionSnapshot)>(conn)? - }, - }; - - // Soft delete this volume, and serialize the resources that are to - // be cleaned up. - use db::schema::volume::dsl as volume_dsl; - - let now = Utc::now(); - diesel::update(volume_dsl::volume) - .filter(volume_dsl::id.eq(volume_id)) - .set(( - volume_dsl::time_deleted.eq(now), - volume_dsl::resources_to_clean_up.eq( - serde_json::to_string(&result) - .map_err(|e| + // Return what results can be cleaned up + let result = CrucibleResources { + // The only use of a read-write region will be at the top level of a + // Volume. These are not shared, but if any snapshots are taken this + // will prevent deletion of the region. Filter out any regions that + // have associated snapshots. + datasets_and_regions: { + use db::schema::dataset::dsl as dataset_dsl; + use db::schema::region::dsl as region_dsl; + + // Return all regions for this volume_id, where there either are + // no region_snapshots, or region_snapshots.volume_references = + // 0. + region_dsl::region + .filter(region_dsl::volume_id.eq(volume_id)) + .inner_join( + dataset_dsl::dataset + .on(region_dsl::dataset_id + .eq(dataset_dsl::id)), + ) + .left_join( + dsl::region_snapshot.on(dsl::region_id + .eq(region_dsl::id) + .and(dsl::dataset_id.eq(dataset_dsl::id))), + ) + .filter( + dsl::volume_references + .eq(0) + .or(dsl::volume_references.is_null()), + ) + .select((Dataset::as_select(), Region::as_select())) + .get_results::<(Dataset, Region)>(conn)? + }, + + // A volume (for a disk or snapshot) may reference another nested + // volume as a read-only parent, and this may be arbitrarily deep. + // After decrementing volume_references above, get all region + // snapshot records where the volume_references has gone to 0. + // Consumers of this struct will be responsible for deleting the + // read-only downstairs running for the snapshot and the snapshot + // itself. + datasets_and_snapshots: { + use db::schema::dataset::dsl as dataset_dsl; + + dsl::region_snapshot + .filter(dsl::volume_references.eq(0)) + .inner_join( + dataset_dsl::dataset + .on(dsl::dataset_id.eq(dataset_dsl::id)), + ) + .select(( + Dataset::as_select(), + RegionSnapshot::as_select(), + )) + .get_results::<(Dataset, RegionSnapshot)>(conn)? + }, + }; + + // Soft delete this volume, and serialize the resources that are to + // be cleaned up. + use db::schema::volume::dsl as volume_dsl; + + let now = Utc::now(); + diesel::update(volume_dsl::volume) + .filter(volume_dsl::id.eq(volume_id)) + .set(( + volume_dsl::time_deleted.eq(now), + volume_dsl::resources_to_clean_up.eq( + serde_json::to_string(&result).map_err(|e| { TxnError::CustomError( - DecreaseCrucibleResourcesError::SerdeError(e) + DecreaseCrucibleResourcesError::SerdeError( + e, + ), ) - )? - ), - )) - .execute(conn)?; - - Ok(result) - }) - .await - .map_err(|e| match e { - TxnError::CustomError( - DecreaseCrucibleResourcesError::DieselError(e) - ) => public_error_from_diesel_pool(e.into(), ErrorHandler::Server), - - _ => Error::internal_error( - &format!("Transaction error: {}", e), - ), - }) + })?, + ), + )) + .execute(conn)?; + + Ok(result) + }) + .await + .map_err(|e| match e { + TxnError::CustomError( + DecreaseCrucibleResourcesError::DieselError(e), + ) => public_error_from_diesel_pool( + e.into(), + ErrorHandler::Server, + ), + + _ => { + Error::internal_error(&format!("Transaction error: {}", e)) + } + }) } } @@ -438,10 +471,7 @@ fn resources_associated_with_volume( read_only_parent, } => { for sub_volume in sub_volumes { - resources_associated_with_volume( - sub_volume, - crucible_targets, - ); + resources_associated_with_volume(sub_volume, crucible_targets); } if let Some(read_only_parent) = read_only_parent { @@ -456,11 +486,7 @@ fn resources_associated_with_volume( // no action required } - VolumeConstructionRequest::Region { - block_size: _, - opts, - gen: _, - } => { + VolumeConstructionRequest::Region { block_size: _, opts, gen: _ } => { for target in &opts.target { if opts.read_only { crucible_targets.read_only_targets.push(target.clone()); @@ -473,4 +499,3 @@ fn resources_associated_with_volume( } } } - diff --git a/nexus/tests/integration_tests/volume_management.rs b/nexus/tests/integration_tests/volume_management.rs index 1aee2f6614a..6d89582a132 100644 --- a/nexus/tests/integration_tests/volume_management.rs +++ b/nexus/tests/integration_tests/volume_management.rs @@ -24,10 +24,10 @@ use omicron_common::api::external::IdentityMetadataCreateParams; use omicron_common::api::external::Name; use omicron_nexus::external_api::params; use omicron_nexus::external_api::views; -use uuid::Uuid; -use sled_agent_client::types::VolumeConstructionRequest; -use rand::{SeedableRng, rngs::StdRng}; use rand::prelude::SliceRandom; +use rand::{rngs::StdRng, SeedableRng}; +use sled_agent_client::types::VolumeConstructionRequest; +use uuid::Uuid; use httptest::{matchers::*, responders::*, Expectation, ServerBuilder}; @@ -160,7 +160,8 @@ async fn test_snapshot_then_delete_disk(cptestctx: &ControlPlaneTestContext) { .expect("failed to delete disk"); // Delete the snapshot - let snapshot_url = format!("{}/snapshots/{}", get_project_url(), "a-snapshot"); + let snapshot_url = + format!("{}/snapshots/{}", get_project_url(), "a-snapshot"); NexusRequest::object_delete(client, &snapshot_url) .authn_as(AuthnMode::PrivilegedUser) .execute() @@ -274,7 +275,8 @@ async fn test_delete_snapshot_then_disk(cptestctx: &ControlPlaneTestContext) { assert_eq!(snapshot.size, base_disk.size); // Delete the snapshot - let snapshot_url = format!("{}/snapshots/{}", get_project_url(), "a-snapshot"); + let snapshot_url = + format!("{}/snapshots/{}", get_project_url(), "a-snapshot"); NexusRequest::object_delete(client, &snapshot_url) .authn_as(AuthnMode::PrivilegedUser) .execute() @@ -407,7 +409,8 @@ async fn test_multiple_snapshots(cptestctx: &ControlPlaneTestContext) { // Delete the snapshots for i in 0..4 { - let snapshot_url = format!("{}/snapshots/a-snapshot-{}", get_project_url(), i); + let snapshot_url = + format!("{}/snapshots/a-snapshot-{}", get_project_url(), i); NexusRequest::object_delete(client, &snapshot_url) .authn_as(AuthnMode::PrivilegedUser) .execute() @@ -420,7 +423,9 @@ async fn test_multiple_snapshots(cptestctx: &ControlPlaneTestContext) { } #[nexus_test] -async fn test_snapshot_prevents_other_disk(cptestctx: &ControlPlaneTestContext) { +async fn test_snapshot_prevents_other_disk( + cptestctx: &ControlPlaneTestContext, +) { // Test that region remains if there is a snapshot, preventing further // allocation. let client = &cptestctx.external_client; @@ -583,7 +588,9 @@ async fn test_snapshot_prevents_other_disk(cptestctx: &ControlPlaneTestContext) } #[nexus_test] -async fn test_multiple_disks_multiple_snapshots_order_1(cptestctx: &ControlPlaneTestContext) { +async fn test_multiple_disks_multiple_snapshots_order_1( + cptestctx: &ControlPlaneTestContext, +) { // Test multiple disks with multiple snapshots let client = &cptestctx.external_client; let disk_test = DiskTest::new(&cptestctx).await; @@ -690,7 +697,8 @@ async fn test_multiple_disks_multiple_snapshots_order_1(cptestctx: &ControlPlane .expect("failed to delete disk"); // Delete the second snapshot - let snapshot_url = format!("{}/snapshots/second-snapshot", get_project_url()); + let snapshot_url = + format!("{}/snapshots/second-snapshot", get_project_url()); NexusRequest::object_delete(client, &snapshot_url) .authn_as(AuthnMode::PrivilegedUser) .execute() @@ -706,7 +714,8 @@ async fn test_multiple_disks_multiple_snapshots_order_1(cptestctx: &ControlPlane .expect("failed to delete disk"); // Delete the first snapshot - let snapshot_url = format!("{}/snapshots/first-snapshot", get_project_url()); + let snapshot_url = + format!("{}/snapshots/first-snapshot", get_project_url()); NexusRequest::object_delete(client, &snapshot_url) .authn_as(AuthnMode::PrivilegedUser) .execute() @@ -718,7 +727,9 @@ async fn test_multiple_disks_multiple_snapshots_order_1(cptestctx: &ControlPlane } #[nexus_test] -async fn test_multiple_disks_multiple_snapshots_order_2(cptestctx: &ControlPlaneTestContext) { +async fn test_multiple_disks_multiple_snapshots_order_2( + cptestctx: &ControlPlaneTestContext, +) { // Test multiple disks with multiple snapshots, varying the delete order let client = &cptestctx.external_client; let disk_test = DiskTest::new(&cptestctx).await; @@ -833,7 +844,8 @@ async fn test_multiple_disks_multiple_snapshots_order_2(cptestctx: &ControlPlane .expect("failed to delete disk"); // Delete the second snapshot - let snapshot_url = format!("{}/snapshots/second-snapshot", get_project_url()); + let snapshot_url = + format!("{}/snapshots/second-snapshot", get_project_url()); NexusRequest::object_delete(client, &snapshot_url) .authn_as(AuthnMode::PrivilegedUser) .execute() @@ -841,7 +853,8 @@ async fn test_multiple_disks_multiple_snapshots_order_2(cptestctx: &ControlPlane .expect("failed to delete snapshot"); // Delete the first snapshot - let snapshot_url = format!("{}/snapshots/first-snapshot", get_project_url()); + let snapshot_url = + format!("{}/snapshots/first-snapshot", get_project_url()); NexusRequest::object_delete(client, &snapshot_url) .authn_as(AuthnMode::PrivilegedUser) .execute() @@ -991,7 +1004,9 @@ async fn prepare_for_test_multiple_layers_of_snapshots( } #[nexus_test] -async fn test_multiple_layers_of_snapshots_delete_all_disks_first(cptestctx: &ControlPlaneTestContext) { +async fn test_multiple_layers_of_snapshots_delete_all_disks_first( + cptestctx: &ControlPlaneTestContext, +) { // Test layering read-only snapshots through multiple disks: // delete all disks, then delete all snapshots let client = &cptestctx.external_client; @@ -1026,7 +1041,9 @@ async fn test_multiple_layers_of_snapshots_delete_all_disks_first(cptestctx: &Co } #[nexus_test] -async fn test_multiple_layers_of_snapshots_delete_all_snapshots_first(cptestctx: &ControlPlaneTestContext) { +async fn test_multiple_layers_of_snapshots_delete_all_snapshots_first( + cptestctx: &ControlPlaneTestContext, +) { // Test layering read-only snapshots through multiple disks: // delete all snapshots, then delete all disks let client = &cptestctx.external_client; @@ -1061,7 +1078,9 @@ async fn test_multiple_layers_of_snapshots_delete_all_snapshots_first(cptestctx: } #[nexus_test] -async fn test_multiple_layers_of_snapshots_random_delete_order(cptestctx: &ControlPlaneTestContext) { +async fn test_multiple_layers_of_snapshots_random_delete_order( + cptestctx: &ControlPlaneTestContext, +) { // Test layering read-only snapshots through multiple disks: // delete snapshots and disks in a random order let client = &cptestctx.external_client; @@ -1082,7 +1101,6 @@ async fn test_multiple_layers_of_snapshots_random_delete_order(cptestctx: &Contr DeleteObject::Disk("layer-1-disk".to_string()), DeleteObject::Disk("layer-2-disk".to_string()), DeleteObject::Disk("layer-3-disk".to_string()), - DeleteObject::Snapshot("layer-1-snapshot".to_string()), DeleteObject::Snapshot("layer-2-snapshot".to_string()), DeleteObject::Snapshot("layer-3-snapshot".to_string()), @@ -1110,7 +1128,8 @@ async fn test_multiple_layers_of_snapshots_random_delete_order(cptestctx: &Contr } DeleteObject::Snapshot(name) => { - let snapshot_url = format!("{}/snapshots/{}", get_project_url(), name); + let snapshot_url = + format!("{}/snapshots/{}", get_project_url(), name); NexusRequest::object_delete(client, &snapshot_url) .authn_as(AuthnMode::PrivilegedUser) .execute() @@ -1127,21 +1146,26 @@ async fn test_multiple_layers_of_snapshots_random_delete_order(cptestctx: &Contr // volume_delete saga node idempotency tests #[nexus_test] -async fn test_volume_hard_delete_idempotent(cptestctx: &ControlPlaneTestContext) { +async fn test_volume_hard_delete_idempotent( + cptestctx: &ControlPlaneTestContext, +) { let nexus = &cptestctx.server.apictx.nexus; let datastore = nexus.datastore(); let volume_id = Uuid::new_v4(); - datastore.volume_create(nexus_db_model::Volume::new( - volume_id, - serde_json::to_string(&VolumeConstructionRequest::File { - id: volume_id, - block_size: 512, - path: "/lol".to_string() - }).unwrap(), - )).await.unwrap(); + datastore + .volume_create(nexus_db_model::Volume::new( + volume_id, + serde_json::to_string(&VolumeConstructionRequest::File { + id: volume_id, + block_size: 512, + path: "/lol".to_string(), + }) + .unwrap(), + )) + .await + .unwrap(); datastore.volume_hard_delete(volume_id).await.unwrap(); datastore.volume_hard_delete(volume_id).await.unwrap(); } - diff --git a/sled-agent/src/sim/http_entrypoints_storage.rs b/sled-agent/src/sim/http_entrypoints_storage.rs index fba3f349311..fcdd19be70a 100644 --- a/sled-agent/src/sim/http_entrypoints_storage.rs +++ b/sled-agent/src/sim/http_entrypoints_storage.rs @@ -109,8 +109,11 @@ async fn region_delete( let id = path.into_inner().id; let crucible = rc.context(); - match crucible.delete(id).await - .map_err(|e| HttpError::for_bad_request(None, e.to_string()))? { + match crucible + .delete(id) + .await + .map_err(|e| HttpError::for_bad_request(None, e.to_string()))? + { Some(_) => Ok(HttpResponseDeleted()), None => { Err(HttpError::for_not_found(None, "Region not found".to_string())) diff --git a/sled-agent/src/sim/storage.rs b/sled-agent/src/sim/storage.rs index 5918a87f530..58c880eb20d 100644 --- a/sled-agent/src/sim/storage.rs +++ b/sled-agent/src/sim/storage.rs @@ -100,8 +100,12 @@ impl CrucibleDataInner { fn delete(&mut self, id: RegionId) -> Result> { // Can't delete a ZFS dataset if there are snapshots if !self.snapshots_for_region(&id).is_empty() { - bail!("must delete snapshots {:?} first!", - self.snapshots_for_region(&id).into_iter().map(|s| s.name).collect::>(), + bail!( + "must delete snapshots {:?} first!", + self.snapshots_for_region(&id) + .into_iter() + .map(|s| s.name) + .collect::>(), ); } @@ -218,26 +222,18 @@ impl CrucibleDataInner { /// Return true if there are no undeleted Crucible resources pub fn is_empty(&self) -> bool { - let non_destroyed_regions: Vec = self.regions + let non_destroyed_regions = self + .regions .values() .filter(|r| r.state != State::Destroyed) - .cloned() - .collect(); + .count(); - let snapshots: Vec = self.snapshots - .values() - .flatten() - .cloned() - .collect(); + let snapshots = self.snapshots.values().flatten().count(); - let running_snapshots: Vec = self.running_snapshots - .values() - .map(|hm| hm.values()) - .flatten() - .cloned() - .collect(); + let running_snapshots = + self.running_snapshots.values().flat_map(|hm| hm.values()).count(); - non_destroyed_regions.is_empty() && snapshots.is_empty() && running_snapshots.is_empty() + non_destroyed_regions == 0 && snapshots == 0 && running_snapshots == 0 } } From 5979cd6352c8dcd3e81def2d724cf02fe2c9ea96 Mon Sep 17 00:00:00 2001 From: James MacMahon Date: Mon, 26 Sep 2022 20:10:26 -0400 Subject: [PATCH 3/8] avoid the unwrap --- nexus/src/db/datastore/volume.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/nexus/src/db/datastore/volume.rs b/nexus/src/db/datastore/volume.rs index 13553dc449c..aabd1a0eeeb 100644 --- a/nexus/src/db/datastore/volume.rs +++ b/nexus/src/db/datastore/volume.rs @@ -276,13 +276,13 @@ impl DataStore { .get_result(conn) .optional()?; - if volume.is_none() { + let volume = if let Some(v) = volume { + v + } else { // the volume was hard-deleted, return an empty // CrucibleResources return Ok(CrucibleResources::default()); - } - - let volume = volume.unwrap(); + }; if volume.time_deleted.is_none() { // a volume record exists, and was not deleted - this is the From 9692caca9a3578e6b3ceb0639babcb4819874a9e Mon Sep 17 00:00:00 2001 From: James MacMahon Date: Tue, 27 Sep 2022 14:53:20 -0400 Subject: [PATCH 4/8] version CrucibleResources in the DB as an enum --- common/src/sql/dbinit.sql | 3 +- nexus/src/app/sagas/volume_delete.rs | 72 +++++++++++++++------------- nexus/src/db/datastore/volume.rs | 21 ++++++-- 3 files changed, 58 insertions(+), 38 deletions(-) diff --git a/common/src/sql/dbinit.sql b/common/src/sql/dbinit.sql index 3f0c601040a..aed8bfec2f5 100644 --- a/common/src/sql/dbinit.sql +++ b/common/src/sql/dbinit.sql @@ -284,7 +284,8 @@ CREATE TABLE omicron.public.volume ( /* * A JSON document describing what resources to clean up when deleting this - * volume. + * volume. The Rust type of this column should be the CrucibleResources + * enum. */ resources_to_clean_up TEXT ); diff --git a/nexus/src/app/sagas/volume_delete.rs b/nexus/src/app/sagas/volume_delete.rs index 8879d8f2052..74efcc5eb3b 100644 --- a/nexus/src/app/sagas/volume_delete.rs +++ b/nexus/src/app/sagas/volume_delete.rs @@ -176,24 +176,28 @@ async fn svd_delete_crucible_regions( sagactx.lookup::("crucible_resources_to_delete")?; // Send DELETE calls to the corresponding Crucible agents - delete_crucible_regions( - crucible_resources_to_delete.datasets_and_regions.clone(), - ) - .await - .map_err(ActionError::action_failed)?; + match crucible_resources_to_delete { + CrucibleResources::V1(crucible_resources_to_delete) => { + delete_crucible_regions( + crucible_resources_to_delete.datasets_and_regions.clone(), + ) + .await + .map_err(ActionError::action_failed)?; - // Remove DB records - let region_ids_to_delete = crucible_resources_to_delete - .datasets_and_regions - .iter() - .map(|(_, r)| r.id()) - .collect(); + // Remove DB records + let region_ids_to_delete = crucible_resources_to_delete + .datasets_and_regions + .iter() + .map(|(_, r)| r.id()) + .collect(); - osagactx - .datastore() - .regions_hard_delete(region_ids_to_delete) - .await - .map_err(ActionError::action_failed)?; + osagactx + .datastore() + .regions_hard_delete(region_ids_to_delete) + .await + .map_err(ActionError::action_failed)?; + } + } Ok(()) } @@ -212,25 +216,29 @@ async fn svd_delete_crucible_snapshots( sagactx.lookup::("crucible_resources_to_delete")?; // Send DELETE calls to the corresponding Crucible agents - delete_crucible_snapshots( - crucible_resources_to_delete.datasets_and_snapshots.clone(), - ) - .await - .map_err(ActionError::action_failed)?; - - // Remove DB records - for (_, region_snapshot) in - &crucible_resources_to_delete.datasets_and_snapshots - { - osagactx - .datastore() - .region_snapshot_remove( - region_snapshot.dataset_id, - region_snapshot.region_id, - region_snapshot.snapshot_id, + match crucible_resources_to_delete { + CrucibleResources::V1(crucible_resources_to_delete) => { + delete_crucible_snapshots( + crucible_resources_to_delete.datasets_and_snapshots.clone(), ) .await .map_err(ActionError::action_failed)?; + + // Remove DB records + for (_, region_snapshot) in + &crucible_resources_to_delete.datasets_and_snapshots + { + osagactx + .datastore() + .region_snapshot_remove( + region_snapshot.dataset_id, + region_snapshot.region_id, + region_snapshot.snapshot_id, + ) + .await + .map_err(ActionError::action_failed)?; + } + } } Ok(()) diff --git a/nexus/src/db/datastore/volume.rs b/nexus/src/db/datastore/volume.rs index aabd1a0eeeb..8be451592c0 100644 --- a/nexus/src/db/datastore/volume.rs +++ b/nexus/src/db/datastore/volume.rs @@ -281,7 +281,9 @@ impl DataStore { } else { // the volume was hard-deleted, return an empty // CrucibleResources - return Ok(CrucibleResources::default()); + return Ok(CrucibleResources::V1( + CrucibleResourcesV1::default(), + )); }; if volume.time_deleted.is_none() { @@ -313,7 +315,9 @@ impl DataStore { // sets time_deleted at the same time as // resources_to_clean_up! But, instead of a panic here, // just return an empty CrucibleResources. - return Ok(CrucibleResources::default()); + return Ok(CrucibleResources::V1( + CrucibleResourcesV1::default(), + )); } } }; @@ -349,7 +353,7 @@ impl DataStore { } // Return what results can be cleaned up - let result = CrucibleResources { + let result = CrucibleResources::V1(CrucibleResourcesV1 { // The only use of a read-write region will be at the top level of a // Volume. These are not shared, but if any snapshots are taken this // will prevent deletion of the region. Filter out any regions that @@ -404,7 +408,7 @@ impl DataStore { )) .get_results::<(Dataset, RegionSnapshot)>(conn)? }, - }; + }); // Soft delete this volume, and serialize the resources that are to // be cleaned up. @@ -450,8 +454,15 @@ struct CrucibleTargets { read_only_targets: Vec, } +// Serialize this enum into the `resources_to_clean_up` column to handle +// different versions over time. +#[derive(Debug, Serialize, Deserialize)] +pub enum CrucibleResources { + V1(CrucibleResourcesV1), +} + #[derive(Debug, Default, Serialize, Deserialize)] -pub struct CrucibleResources { +pub struct CrucibleResourcesV1 { pub datasets_and_regions: Vec<(Dataset, Region)>, pub datasets_and_snapshots: Vec<(Dataset, RegionSnapshot)>, } From e2d7b1d2744a807b5df2d1ac5cb7c5ed5b9835f3 Mon Sep 17 00:00:00 2001 From: James MacMahon Date: Tue, 27 Sep 2022 14:56:55 -0400 Subject: [PATCH 5/8] rename delete-volume-record to delete-volume --- nexus/src/app/sagas/disk_delete.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/nexus/src/app/sagas/disk_delete.rs b/nexus/src/app/sagas/disk_delete.rs index 10182be3cb4..9cb8ac04b88 100644 --- a/nexus/src/app/sagas/disk_delete.rs +++ b/nexus/src/app/sagas/disk_delete.rs @@ -32,9 +32,9 @@ lazy_static! { // underlying regions. sdd_delete_disk_record ); - static ref DELETE_VOLUME_RECORD: NexusAction = new_action_noop_undo( - "disk-delete.delete-volume-record", - sdd_delete_volume_record + static ref DELETE_VOLUME: NexusAction = new_action_noop_undo( + "disk-delete.delete-volume", + sdd_delete_volume ); } @@ -48,7 +48,7 @@ impl NexusSaga for SagaDiskDelete { fn register_actions(registry: &mut ActionRegistry) { registry.register(Arc::clone(&*DELETE_DISK_RECORD)); - registry.register(Arc::clone(&*DELETE_VOLUME_RECORD)); + registry.register(Arc::clone(&*DELETE_VOLUME)); } fn make_saga_dag( @@ -62,8 +62,8 @@ impl NexusSaga for SagaDiskDelete { )); builder.append(Node::action( "no_result", - "DeleteVolumeRecord", - DELETE_VOLUME_RECORD.as_ref(), + "DeleteVolume", + DELETE_VOLUME.as_ref(), )); Ok(builder.build()?) } @@ -85,7 +85,7 @@ async fn sdd_delete_disk_record( Ok(volume_id) } -async fn sdd_delete_volume_record( +async fn sdd_delete_volume( sagactx: NexusActionContext, ) -> Result<(), ActionError> { let osagactx = sagactx.user_data(); From e300f8d2b7de7ce4ec5a0e3a5353fb47de5d2eb7 Mon Sep 17 00:00:00 2001 From: James MacMahon Date: Tue, 27 Sep 2022 15:43:23 -0400 Subject: [PATCH 6/8] use on_conflict_do_nothing instead --- nexus/src/db/datastore/region_snapshot.rs | 10 ++++------ nexus/tests/integration_tests/snapshots.rs | 22 ++++++++++++++++++++++ 2 files changed, 26 insertions(+), 6 deletions(-) diff --git a/nexus/src/db/datastore/region_snapshot.rs b/nexus/src/db/datastore/region_snapshot.rs index 0e36e9a1cbf..dab3a90bcb7 100644 --- a/nexus/src/db/datastore/region_snapshot.rs +++ b/nexus/src/db/datastore/region_snapshot.rs @@ -19,17 +19,15 @@ impl DataStore { pub async fn region_snapshot_create( &self, region_snapshot: RegionSnapshot, - ) -> CreateResult { + ) -> CreateResult<()> { use db::schema::region_snapshot::dsl; - // TODO https://github.com/oxidecomputer/omicron/issues/1168 diesel::insert_into(dsl::region_snapshot) .values(region_snapshot.clone()) - .on_conflict((dsl::dataset_id, dsl::region_id, dsl::snapshot_id)) - .do_nothing() - .returning(RegionSnapshot::as_returning()) - .get_result_async(self.pool()) + .on_conflict_do_nothing() + .execute_async(self.pool()) .await + .map(|_| ()) .map_err(|e| public_error_from_diesel_pool(e, ErrorHandler::Server)) } diff --git a/nexus/tests/integration_tests/snapshots.rs b/nexus/tests/integration_tests/snapshots.rs index 4f8e80a8f49..a5e3de17d3f 100644 --- a/nexus/tests/integration_tests/snapshots.rs +++ b/nexus/tests/integration_tests/snapshots.rs @@ -704,3 +704,25 @@ async fn test_create_snapshot_record_idempotent( .await .unwrap(); } + +#[nexus_test] +async fn test_region_snapshot_create_idempotent( + cptestctx: &ControlPlaneTestContext, +) { + let nexus = &cptestctx.server.apictx.nexus; + let datastore = nexus.datastore(); + + let region_snapshot = db::model::RegionSnapshot { + dataset_id: Uuid::new_v4(), + region_id: Uuid::new_v4(), + snapshot_id: Uuid::new_v4(), + + snapshot_addr: "[::]:12345".to_string(), + + volume_references: 1, + }; + + datastore.region_snapshot_create(region_snapshot.clone()).await.unwrap(); + + datastore.region_snapshot_create(region_snapshot).await.unwrap(); +} From 33468d4f21d817fe00dd98121590b78cb092fe7e Mon Sep 17 00:00:00 2001 From: James MacMahon Date: Tue, 27 Sep 2022 15:59:18 -0400 Subject: [PATCH 7/8] assert when crucible resources not deleted too --- .../integration_tests/volume_management.rs | 53 +++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/nexus/tests/integration_tests/volume_management.rs b/nexus/tests/integration_tests/volume_management.rs index 6d89582a132..9eab3493c1e 100644 --- a/nexus/tests/integration_tests/volume_management.rs +++ b/nexus/tests/integration_tests/volume_management.rs @@ -151,6 +151,9 @@ async fn test_snapshot_then_delete_disk(cptestctx: &ControlPlaneTestContext) { assert_eq!(snapshot.disk_id, base_disk.identity.id); assert_eq!(snapshot.size, base_disk.size); + // The Crucible regions and snapshot still remains + assert!(!disk_test.crucible_resources_deleted().await); + // Delete the disk let disk_url = format!("{}/{}", disks_url, base_disk_name); NexusRequest::object_delete(client, &disk_url) @@ -159,6 +162,9 @@ async fn test_snapshot_then_delete_disk(cptestctx: &ControlPlaneTestContext) { .await .expect("failed to delete disk"); + // The Crucible snapshot still remains + assert!(!disk_test.crucible_resources_deleted().await); + // Delete the snapshot let snapshot_url = format!("{}/snapshots/{}", get_project_url(), "a-snapshot"); @@ -274,6 +280,9 @@ async fn test_delete_snapshot_then_disk(cptestctx: &ControlPlaneTestContext) { assert_eq!(snapshot.disk_id, base_disk.identity.id); assert_eq!(snapshot.size, base_disk.size); + // The Crucible regions and snapshot still remain + assert!(!disk_test.crucible_resources_deleted().await); + // Delete the snapshot let snapshot_url = format!("{}/snapshots/{}", get_project_url(), "a-snapshot"); @@ -283,6 +292,9 @@ async fn test_delete_snapshot_then_disk(cptestctx: &ControlPlaneTestContext) { .await .expect("failed to delete snapshot"); + // The Crucible regions still remain + assert!(!disk_test.crucible_resources_deleted().await); + // Delete the disk let disk_url = format!("{}/{}", disks_url, base_disk_name); NexusRequest::object_delete(client, &disk_url) @@ -399,6 +411,9 @@ async fn test_multiple_snapshots(cptestctx: &ControlPlaneTestContext) { assert_eq!(snapshot.size, base_disk.size); } + // The Crucible regions and snapshots still remain + assert!(!disk_test.crucible_resources_deleted().await); + // Delete the disk let disk_url = format!("{}/{}", disks_url, base_disk_name); NexusRequest::object_delete(client, &disk_url) @@ -409,6 +424,9 @@ async fn test_multiple_snapshots(cptestctx: &ControlPlaneTestContext) { // Delete the snapshots for i in 0..4 { + // The Crucible snapshots still remain + assert!(!disk_test.crucible_resources_deleted().await); + let snapshot_url = format!("{}/snapshots/a-snapshot-{}", get_project_url(), i); NexusRequest::object_delete(client, &snapshot_url) @@ -521,6 +539,9 @@ async fn test_snapshot_prevents_other_disk( assert_eq!(snapshot.disk_id, base_disk.identity.id); assert_eq!(snapshot.size, base_disk.size); + // The Crucible regions and snapshots still remain + assert!(!disk_test.crucible_resources_deleted().await); + // Delete the disk let disk_url = format!("{}/{}", disks_url, base_disk_name); NexusRequest::object_delete(client, &disk_url) @@ -529,6 +550,9 @@ async fn test_snapshot_prevents_other_disk( .await .expect("failed to delete disk"); + // The Crucible snapshots still remain + assert!(!disk_test.crucible_resources_deleted().await); + // Attempt disk allocation, which will fail - the presense of the snapshot // means the region wasn't deleted. let disk_size = ByteCount::try_from(10u64 * 1024 * 1024 * 1024).unwrap(); @@ -562,6 +586,9 @@ async fn test_snapshot_prevents_other_disk( .await .expect("failed to delete snapshot"); + // All resources were deleted + assert!(disk_test.crucible_resources_deleted().await); + // Disk allocation will work now let _next_disk: Disk = NexusRequest::new( RequestBuilder::new(client, Method::POST, &disks_url) @@ -688,6 +715,8 @@ async fn test_multiple_disks_multiple_snapshots_order_1( assert_eq!(second_snapshot.disk_id, second_disk.identity.id); assert_eq!(second_snapshot.size, second_disk.size); + assert!(!disk_test.crucible_resources_deleted().await); + // Delete the first disk let disk_url = format!("{}/{}", disks_url, first_disk_name); NexusRequest::object_delete(client, &disk_url) @@ -696,6 +725,8 @@ async fn test_multiple_disks_multiple_snapshots_order_1( .await .expect("failed to delete disk"); + assert!(!disk_test.crucible_resources_deleted().await); + // Delete the second snapshot let snapshot_url = format!("{}/snapshots/second-snapshot", get_project_url()); @@ -705,6 +736,8 @@ async fn test_multiple_disks_multiple_snapshots_order_1( .await .expect("failed to delete snapshot"); + assert!(!disk_test.crucible_resources_deleted().await); + // Delete the second disk let disk_url = format!("{}/{}", disks_url, second_disk_name); NexusRequest::object_delete(client, &disk_url) @@ -713,6 +746,8 @@ async fn test_multiple_disks_multiple_snapshots_order_1( .await .expect("failed to delete disk"); + assert!(!disk_test.crucible_resources_deleted().await); + // Delete the first snapshot let snapshot_url = format!("{}/snapshots/first-snapshot", get_project_url()); @@ -827,6 +862,8 @@ async fn test_multiple_disks_multiple_snapshots_order_2( assert_eq!(second_snapshot.disk_id, second_disk.identity.id); assert_eq!(second_snapshot.size, second_disk.size); + assert!(!disk_test.crucible_resources_deleted().await); + // Delete the first disk let disk_url = format!("{}/{}", disks_url, first_disk_name); NexusRequest::object_delete(client, &disk_url) @@ -835,6 +872,8 @@ async fn test_multiple_disks_multiple_snapshots_order_2( .await .expect("failed to delete disk"); + assert!(!disk_test.crucible_resources_deleted().await); + // Delete the second disk let disk_url = format!("{}/{}", disks_url, second_disk_name); NexusRequest::object_delete(client, &disk_url) @@ -843,6 +882,8 @@ async fn test_multiple_disks_multiple_snapshots_order_2( .await .expect("failed to delete disk"); + assert!(!disk_test.crucible_resources_deleted().await); + // Delete the second snapshot let snapshot_url = format!("{}/snapshots/second-snapshot", get_project_url()); @@ -852,6 +893,8 @@ async fn test_multiple_disks_multiple_snapshots_order_2( .await .expect("failed to delete snapshot"); + assert!(!disk_test.crucible_resources_deleted().await); + // Delete the first snapshot let snapshot_url = format!("{}/snapshots/first-snapshot", get_project_url()); @@ -1018,6 +1061,8 @@ async fn test_multiple_layers_of_snapshots_delete_all_disks_first( // Delete the disks for name in ["layer-1-disk", "layer-2-disk", "layer-3-disk"] { + assert!(!disk_test.crucible_resources_deleted().await); + let disk_url = format!("{}/{}", get_disks_url(), name); NexusRequest::object_delete(client, &disk_url) .authn_as(AuthnMode::PrivilegedUser) @@ -1028,6 +1073,8 @@ async fn test_multiple_layers_of_snapshots_delete_all_disks_first( // Delete the snapshots for name in ["layer-1-snapshot", "layer-2-snapshot", "layer-3-snapshot"] { + assert!(!disk_test.crucible_resources_deleted().await); + let snapshot_url = format!("{}/snapshots/{}", get_project_url(), name); NexusRequest::object_delete(client, &snapshot_url) .authn_as(AuthnMode::PrivilegedUser) @@ -1055,6 +1102,8 @@ async fn test_multiple_layers_of_snapshots_delete_all_snapshots_first( // Delete the snapshots for name in ["layer-1-snapshot", "layer-2-snapshot", "layer-3-snapshot"] { + assert!(!disk_test.crucible_resources_deleted().await); + let snapshot_url = format!("{}/snapshots/{}", get_project_url(), name); NexusRequest::object_delete(client, &snapshot_url) .authn_as(AuthnMode::PrivilegedUser) @@ -1065,6 +1114,8 @@ async fn test_multiple_layers_of_snapshots_delete_all_snapshots_first( // Delete the disks for name in ["layer-1-disk", "layer-2-disk", "layer-3-disk"] { + assert!(!disk_test.crucible_resources_deleted().await); + let disk_url = format!("{}/{}", get_disks_url(), name); NexusRequest::object_delete(client, &disk_url) .authn_as(AuthnMode::PrivilegedUser) @@ -1117,6 +1168,8 @@ async fn test_multiple_layers_of_snapshots_random_delete_order( dbg!(&objects); for object in &objects { + assert!(!disk_test.crucible_resources_deleted().await); + match object { DeleteObject::Disk(name) => { let disk_url = format!("{}/{}", get_disks_url(), name); From 62e54166c5c25f5fa92d90780fe20a0b8b942072 Mon Sep 17 00:00:00 2001 From: James MacMahon Date: Tue, 27 Sep 2022 19:23:09 -0400 Subject: [PATCH 8/8] expand comment for Nexus::volume_delete --- nexus/src/app/volume.rs | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/nexus/src/app/volume.rs b/nexus/src/app/volume.rs index 9bb92481f9c..e8bfe362920 100644 --- a/nexus/src/app/volume.rs +++ b/nexus/src/app/volume.rs @@ -12,10 +12,16 @@ use uuid::Uuid; impl super::Nexus { /// Kick off a saga to delete a volume (and clean up any Crucible resources - /// as a result). Importantly, this should not be a sub-saga - whoever is - /// calling this should not block on cleaning up Crucible Resources, because - /// the deletion of a "disk" or "snapshot" could free up a *lot* of Crucible - /// resources and the user's query shouldn't wait on those DELETE calls. + /// as a result). Note that this does not unconditionally delete the volume + /// record: if the allocated Crucible regions associated with this volume + /// still have references, we cannot delete it, so it will be soft-deleted. + /// Only when all the associated resources have been cleaned up does Nexus + /// hard delete the volume record. + /// + /// Importantly, this should not be a sub-saga - whoever is calling this + /// should not block on cleaning up Crucible Resources, because the deletion + /// of a "disk" or "snapshot" could free up a *lot* of Crucible resources + /// and the user's query shouldn't wait on those DELETE calls. pub async fn volume_delete( self: &Arc, volume_id: Uuid,