From 02c50b52317d1888f18b5e0dd10509a6aa0b809c Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Sun, 9 Oct 2022 19:45:46 -0700 Subject: [PATCH 01/14] VERY wip; basically just refactoring Nexus interfaces for testability --- nexus/src/app/disk.rs | 44 ++++- nexus/src/app/instance.rs | 21 +++ nexus/src/app/mod.rs | 2 +- nexus/src/app/saga.rs | 2 +- nexus/src/app/sagas/disk_create.rs | 227 +++++++++++++++++++++++- nexus/src/app/sagas/disk_delete.rs | 1 + nexus/src/app/sagas/instance_migrate.rs | 32 ++-- nexus/src/app/sagas/snapshot_create.rs | 50 ++---- nexus/src/app/volume.rs | 2 +- nexus/src/saga_interface.rs | 227 +++++++++++++++++++++--- 10 files changed, 526 insertions(+), 82 deletions(-) diff --git a/nexus/src/app/disk.rs b/nexus/src/app/disk.rs index d8c0f60556b..f45ebf2cedd 100644 --- a/nexus/src/app/disk.rs +++ b/nexus/src/app/disk.rs @@ -12,6 +12,7 @@ use crate::db; use crate::db::lookup::LookupPath; use crate::db::model::Name; use crate::external_api::params; +use nexus_types::identity::Resource; use omicron_common::api::external::ByteCount; use omicron_common::api::external::CreateResult; use omicron_common::api::external::DataPageParams; @@ -512,11 +513,50 @@ impl super::Nexus { .await?; // Kick off volume deletion saga(s) - self.volume_delete(db_snapshot.volume_id).await?; + self.clone().volume_delete(db_snapshot.volume_id).await?; if let Some(volume_id) = db_snapshot.destination_volume_id { - self.volume_delete(volume_id).await?; + self.clone().volume_delete(volume_id).await?; } Ok(()) } + + pub(crate) async fn disk_snapshot_sled_agent( + &self, + instance: &db::model::Instance, + disk_id: Uuid, + body: &sled_agent_client::types::InstanceIssueDiskSnapshotRequestBody, + ) -> Result<(), Error> { + let sled_agent_client = self.instance_sled(&instance).await?; + + // Send a snapshot request to propolis through sled agent + sled_agent_client + .instance_issue_disk_snapshot_request( + &instance.id(), + &disk_id, + body, + ) + .await + .map_err(|e| Error::internal_error(&e.to_string()))?; + Ok(()) + } + + pub(crate) async fn disk_snapshot_random_sled_agent( + &self, + disk_id: Uuid, + body: &sled_agent_client::types::DiskSnapshotRequestBody, + ) -> Result<(), Error> { + let sled_id = self.random_sled_id().await?.ok_or_else(|| { + Error::internal_error( + "no sled found when looking for random sled?!", + ) + })?; + + self.sled_client(&sled_id) + .await? + .issue_disk_snapshot_request(&disk_id, &body) + .await + .map_err(|e| Error::internal_error(&e.to_string()))?; + Ok(()) + } } diff --git a/nexus/src/app/instance.rs b/nexus/src/app/instance.rs index ed2417d8a3a..bd9525c7790 100644 --- a/nexus/src/app/instance.rs +++ b/nexus/src/app/instance.rs @@ -31,6 +31,7 @@ use omicron_common::api::external::ListResultVec; use omicron_common::api::external::LookupResult; use omicron_common::api::external::UpdateResult; use omicron_common::api::internal::nexus; +use sled_agent_client::types::InstanceEnsureBody; use sled_agent_client::types::InstanceRuntimeStateMigrateParams; use sled_agent_client::types::InstanceRuntimeStateRequested; use sled_agent_client::types::InstanceStateRequested; @@ -684,6 +685,26 @@ impl super::Nexus { } } + /// Instructs a single sled to update its runtime. + /// + /// Does not modify the database. + pub(crate) async fn instance_sled_agent_set_runtime( + &self, + sled_id: Uuid, + body: &InstanceEnsureBody, + instance_id: Uuid, + ) -> Result { + let dst_sa = self.sled_client(&sled_id).await?; + + let new_runtime_state: nexus::InstanceRuntimeState = dst_sa + .instance_put(&instance_id, &body) + .await + .map_err(omicron_common::api::external::Error::from)? + .into_inner() + .into(); + Ok(new_runtime_state) + } + /// Lists disks attached to the instance. pub async fn instance_list_disks( &self, diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index 172d83b9123..0cbfa00a8c6 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -210,7 +210,7 @@ impl Nexus { opctx, my_sec_id, Arc::new(Arc::new(SagaContext::new( - Arc::clone(&nexus), + nexus.clone(), saga_logger, Arc::clone(&authz), ))), diff --git a/nexus/src/app/saga.rs b/nexus/src/app/saga.rs index 983349d58b1..63e126a833f 100644 --- a/nexus/src/app/saga.rs +++ b/nexus/src/app/saga.rs @@ -86,7 +86,7 @@ impl super::Nexus { "saga_id" => saga_id.to_string() )); let saga_context = Arc::new(Arc::new(SagaContext::new( - Arc::clone(self), + self.clone(), saga_logger, Arc::clone(&self.authz), ))); diff --git a/nexus/src/app/sagas/disk_create.rs b/nexus/src/app/sagas/disk_create.rs index 9bbbb658277..9b6c7ae974f 100644 --- a/nexus/src/app/sagas/disk_create.rs +++ b/nexus/src/app/sagas/disk_create.rs @@ -457,7 +457,7 @@ async fn sdc_create_volume_record_undo( let osagactx = sagactx.user_data(); let volume_id = sagactx.lookup::("volume_id")?; - osagactx.nexus().volume_delete(volume_id).await?; + osagactx.nexus().clone().volume_delete(volume_id).await?; Ok(()) } @@ -557,3 +557,228 @@ fn randomize_volume_construction_request_ids( } } } + +#[cfg(test)] +mod test { + use super::*; + use crate::db::datastore::datastore_test; + use crate::saga_interface::SagaContext; + use nexus_test_utils::db::test_setup_database; + use omicron_common::api::external::ByteCount; + use omicron_common::api::external::DeleteResult; + use omicron_common::api::external::IdentityMetadataCreateParams; + use omicron_common::api::external::UpdateResult; + use omicron_common::api::internal::nexus; + use omicron_test_utils::dev; + use steno::DagBuilder; + use steno::InMemorySecStore; + use steno::SagaDag; + use steno::SagaName; + use steno::SecClient; + + fn new_sec(log: &slog::Logger) -> SecClient { + steno::sec(log.new(slog::o!()), Arc::new(InMemorySecStore::new())) + } + + fn create_saga_dag(params: N::Params) -> SagaDag { + let builder = DagBuilder::new(SagaName::new(N::NAME)); + let dag = N::make_saga_dag(¶ms, builder) + .expect("Failed to build saga DAG"); + let params = serde_json::to_value(¶ms) + .expect("Failed to serialize parameters"); + SagaDag::new(dag, params) + } + + async fn create_org_and_project( + opctx: &OpContext, + datastore: &db::DataStore, + ) -> crate::authz::Project { + let organization = params::OrganizationCreate { + identity: IdentityMetadataCreateParams { + name: "org".parse().unwrap(), + description: "desc".to_string(), + }, + }; + + let organization = datastore + .organization_create(&opctx, &organization) + .await + .expect("Failed to create org"); + + let project = db::model::Project::new( + organization.id(), + params::ProjectCreate { + identity: IdentityMetadataCreateParams { + name: "project" + .parse() + .expect("Failed to parse project name"), + description: "desc".to_string(), + }, + }, + ); + let project_id = project.id(); + let (.., authz_org) = LookupPath::new(&opctx, &datastore) + .organization_id(organization.id()) + .lookup_for(authz::Action::CreateChild) + .await + .expect("Cannot lookup org to create a child project"); + datastore + .project_create(&opctx, &authz_org, project) + .await + .expect("Failed to create project"); + let (.., authz_project, _project) = LookupPath::new(&opctx, &datastore) + .project_id(project_id) + .fetch() + .await + .expect("Cannot lookup project we just created"); + authz_project + } + + // TODO: This - and frankly a lot of this test - could probably be shared + // between sagas. + struct StubNexus { + datastore: Arc, + } + #[async_trait::async_trait] + impl crate::saga_interface::NexusForSagas for StubNexus { + fn datastore(&self) -> &Arc { + &self.datastore + } + + async fn random_sled_id(&self) -> Result, Error> { + todo!(); + } + + async fn volume_delete( + self: Arc, + volume_id: Uuid, + ) -> DeleteResult { + // TODO! + todo!(); + } + + async fn instance_sled_agent_set_runtime( + &self, + sled_id: Uuid, + body: &sled_agent_client::types::InstanceEnsureBody, + instance_id: Uuid, + ) -> Result { + todo!(); + } + + async fn disk_snapshot_sled_agent( + &self, + instance: &db::model::Instance, + disk_id: Uuid, + body: &sled_agent_client::types::InstanceIssueDiskSnapshotRequestBody, + ) -> Result<(), Error> { + todo!(); + } + + async fn disk_snapshot_random_sled_agent( + &self, + disk_id: Uuid, + body: &sled_agent_client::types::DiskSnapshotRequestBody, + ) -> Result<(), Error> { + todo!(); + } + + // TODO: This one could be implemented purely in the DB? + async fn instance_attach_disk( + &self, + opctx: &OpContext, + organization_name: &db::model::Name, + project_name: &db::model::Name, + instance_name: &db::model::Name, + disk_name: &db::model::Name, + ) -> UpdateResult { + todo!(); + } + + // TODO: This one could be implemented purely in the DB? + async fn instance_detach_disk( + &self, + opctx: &OpContext, + organization_name: &db::model::Name, + project_name: &db::model::Name, + instance_name: &db::model::Name, + disk_name: &db::model::Name, + ) -> UpdateResult { + todo!(); + } + + // TODO: This is half in the DB, half to the sled agent. + async fn instance_set_runtime( + &self, + opctx: &OpContext, + authz_instance: &authz::Instance, + db_instance: &db::model::Instance, + requested: sled_agent_client::types::InstanceRuntimeStateRequested, + ) -> Result<(), Error> { + todo!(); + } + + // TODO: This calls instance_set_runtime, so, all the problems + // that one has too + async fn instance_start_migrate( + &self, + opctx: &OpContext, + instance_id: Uuid, + migration_id: Uuid, + dst_propolis_id: Uuid, + ) -> UpdateResult { + todo!(); + } + } + + #[tokio::test] + async fn test_todotodotodo() { + let logctx = dev::test_setup_log("test_TODOTODTODO"); + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = datastore_test(&logctx, &db).await; + + let authz_project = create_org_and_project(&opctx, &datastore).await; + + // Build the saga DAG with the provided test parameters + let params = Params { + serialized_authn: authn::saga::Serialized::for_opctx(&opctx), + project_id: authz_project.id(), + create_params: params::DiskCreate { + identity: IdentityMetadataCreateParams { + name: "my-disk".parse().expect("Invalid disk name"), + description: "My disk".to_string(), + }, + disk_source: params::DiskSource::Blank { + block_size: params::BlockSize(512), + }, + size: ByteCount::from_gibibytes_u32(1), + }, + }; + let dag = create_saga_dag::(params); + + // Create a Saga Executor which can run the saga + let sec = new_sec(&logctx.log); + let saga_id = steno::SagaId(Uuid::new_v4()); + + let nexus = StubNexus { datastore: datastore.clone() }; + let saga_context = Arc::new(Arc::new(SagaContext::new( + Arc::new(nexus), + logctx.log.clone(), + Arc::new(authz::Authz::new(&logctx.log)), + ))); + let fut = sec + .saga_create( + saga_id, + Arc::clone(&saga_context), + Arc::new(dag), + crate::app::sagas::ACTION_REGISTRY.clone(), + ) + .await + .expect("failed to create saga"); + sec.saga_start(saga_id).await.expect("failed to start saga"); + fut.await; + + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } +} diff --git a/nexus/src/app/sagas/disk_delete.rs b/nexus/src/app/sagas/disk_delete.rs index 9cb8ac04b88..9fe8b5aed3d 100644 --- a/nexus/src/app/sagas/disk_delete.rs +++ b/nexus/src/app/sagas/disk_delete.rs @@ -92,6 +92,7 @@ async fn sdd_delete_volume( let volume_id = sagactx.lookup::("volume_id")?; osagactx .nexus() + .clone() .volume_delete(volume_id) .await .map_err(ActionError::action_failed)?; diff --git a/nexus/src/app/sagas/instance_migrate.rs b/nexus/src/app/sagas/instance_migrate.rs index 373c1585f76..5c2ee78202d 100644 --- a/nexus/src/app/sagas/instance_migrate.rs +++ b/nexus/src/app/sagas/instance_migrate.rs @@ -247,28 +247,20 @@ async fn sim_instance_migrate( )) })?; - let dst_sa = osagactx - .sled_client(&dst_sled_id) - .await - .map_err(ActionError::action_failed)?; + let body = InstanceEnsureBody { + initial: instance_hardware, + target, + migrate: Some(InstanceMigrateParams { + src_propolis_addr: src_propolis_addr.to_string(), + src_propolis_id, + }), + }; - let new_runtime_state: InstanceRuntimeState = dst_sa - .instance_put( - &instance_id, - &InstanceEnsureBody { - initial: instance_hardware, - target, - migrate: Some(InstanceMigrateParams { - src_propolis_addr: src_propolis_addr.to_string(), - src_propolis_id, - }), - }, - ) + let new_runtime_state = osagactx + .nexus() + .instance_sled_agent_set_runtime(dst_sled_id, &body, instance_id) .await - .map_err(omicron_common::api::external::Error::from) - .map_err(ActionError::action_failed)? - .into_inner() - .into(); + .map_err(ActionError::action_failed)?; osagactx .datastore() diff --git a/nexus/src/app/sagas/snapshot_create.rs b/nexus/src/app/sagas/snapshot_create.rs index 289aec63a86..e19c6bc0ed7 100644 --- a/nexus/src/app/sagas/snapshot_create.rs +++ b/nexus/src/app/sagas/snapshot_create.rs @@ -87,7 +87,7 @@ use super::{ }; use crate::app::sagas::NexusAction; use crate::context::OpContext; -use crate::db::identity::{Asset, Resource}; +use crate::db::identity::Asset; use crate::db::lookup::LookupPath; use crate::external_api::params; use crate::{authn, authz, db}; @@ -417,7 +417,7 @@ async fn ssc_create_destination_volume_record_undo( let destination_volume_id = sagactx.lookup::("destination_volume_id")?; - osagactx.nexus().volume_delete(destination_volume_id).await?; + osagactx.nexus().clone().volume_delete(destination_volume_id).await?; Ok(()) } @@ -537,24 +537,12 @@ async fn ssc_send_snapshot_request( .await .map_err(ActionError::action_failed)?; - let sled_agent_client = osagactx + let body = InstanceIssueDiskSnapshotRequestBody { snapshot_id }; + osagactx .nexus() - .instance_sled(&instance) + .disk_snapshot_sled_agent(&instance, disk.id(), &body) .await .map_err(ActionError::action_failed)?; - - info!(log, "instance {} sled agent created ok", instance_id); - - // Send a snapshot request to propolis through sled agent - sled_agent_client - .instance_issue_disk_snapshot_request( - &instance.id(), - &disk.id(), - &InstanceIssueDiskSnapshotRequestBody { snapshot_id }, - ) - .await - .map_err(|e| e.to_string()) - .map_err(ActionError::action_failed)?; } None => { @@ -583,26 +571,16 @@ async fn ssc_send_snapshot_request( })?; // Send the snapshot request to a random sled agent - let sled_agent_client = osagactx - .random_sled_client() - .await - .map_err(ActionError::action_failed)? - .ok_or_else(|| { - "no sled found when looking for random sled?!".to_string() - }) - .map_err(ActionError::action_failed)?; + let body = DiskSnapshotRequestBody { + volume_construction_request: disk_volume_construction_request + .clone(), + snapshot_id, + }; - sled_agent_client - .issue_disk_snapshot_request( - &disk.id(), - &DiskSnapshotRequestBody { - volume_construction_request: - disk_volume_construction_request.clone(), - snapshot_id, - }, - ) + osagactx + .nexus() + .disk_snapshot_random_sled_agent(disk.id(), &body) .await - .map_err(|e| e.to_string()) .map_err(ActionError::action_failed)?; } } @@ -786,7 +764,7 @@ async fn ssc_create_volume_record_undo( let volume_id = sagactx.lookup::("volume_id")?; info!(log, "deleting volume {}", volume_id); - osagactx.nexus().volume_delete(volume_id).await?; + osagactx.nexus().clone().volume_delete(volume_id).await?; Ok(()) } diff --git a/nexus/src/app/volume.rs b/nexus/src/app/volume.rs index e8bfe362920..ecb8a16facc 100644 --- a/nexus/src/app/volume.rs +++ b/nexus/src/app/volume.rs @@ -23,7 +23,7 @@ impl super::Nexus { /// of a "disk" or "snapshot" could free up a *lot* of Crucible resources /// and the user's query shouldn't wait on those DELETE calls. pub async fn volume_delete( - self: &Arc, + self: Arc, volume_id: Uuid, ) -> DeleteResult { let saga_params = sagas::volume_delete::Params { volume_id }; diff --git a/nexus/src/saga_interface.rs b/nexus/src/saga_interface.rs index b6a15f9a77e..6324542a2f5 100644 --- a/nexus/src/saga_interface.rs +++ b/nexus/src/saga_interface.rs @@ -4,20 +4,223 @@ //! Interfaces available to saga actions and undo actions +use crate::context::OpContext; use crate::Nexus; use crate::{authz, db}; +use omicron_common::api::external::DeleteResult; use omicron_common::api::external::Error; -use sled_agent_client::Client as SledAgentClient; +use omicron_common::api::external::UpdateResult; +use omicron_common::api::internal::nexus; use slog::Logger; use std::fmt; use std::sync::Arc; use uuid::Uuid; +/// Trait which exposes Nexus interfaces to Sagas. +/// +/// In the production implementation of Nexus, this will almost certainly just +/// be implemented directly by Nexus. However, by defining these methods +/// explicitly, tests can mock out access saga actions that would modify more +/// state than simply the database. +#[async_trait::async_trait] +pub trait NexusForSagas: Send + Sync { + /// Access the raw database under Nexus. + fn datastore(&self) -> &Arc; + + /// Returns a random sled ID. + async fn random_sled_id(&self) -> Result, Error>; + + // TODO: Should this be here? + // + // On the one hand, we do want to let sagas call other sagas. + // However, wouldn't it be nice if they called through this more limited + // nexus interface, rather than the "true" (read: too large) Nexus inteface? + // + // Then we could actually validate the volume-deletion behavior when unit + // testing disks / snapshots. + // + // TODO: I think... this might just involve isolating the "saga engine" + // component of nexus from the rest of it. I'd like to re-use that bit! + async fn volume_delete(self: Arc, volume_id: Uuid) -> DeleteResult; + + /// Makes a request for a specific sled to update its runtime. + async fn instance_sled_agent_set_runtime( + &self, + sled_id: Uuid, + body: &sled_agent_client::types::InstanceEnsureBody, + instance_id: Uuid, + ) -> Result; + + async fn disk_snapshot_sled_agent( + &self, + instance: &db::model::Instance, + disk_id: Uuid, + body: &sled_agent_client::types::InstanceIssueDiskSnapshotRequestBody, + ) -> Result<(), Error>; + + /// Makes a request for a sled to take a snapshot of a disk. + // TODO: Should just be 'snapshot'? already exposing 'random_sled_id' + async fn disk_snapshot_random_sled_agent( + &self, + disk_id: Uuid, + body: &sled_agent_client::types::DiskSnapshotRequestBody, + ) -> Result<(), Error>; + + // TODO: This one could be implemented purely in the DB? + async fn instance_attach_disk( + &self, + opctx: &OpContext, + organization_name: &db::model::Name, + project_name: &db::model::Name, + instance_name: &db::model::Name, + disk_name: &db::model::Name, + ) -> UpdateResult; + + // TODO: This one could be implemented purely in the DB? + async fn instance_detach_disk( + &self, + opctx: &OpContext, + organization_name: &db::model::Name, + project_name: &db::model::Name, + instance_name: &db::model::Name, + disk_name: &db::model::Name, + ) -> UpdateResult; + + // TODO: This is half in the DB, half to the sled agent. + async fn instance_set_runtime( + &self, + opctx: &OpContext, + authz_instance: &authz::Instance, + db_instance: &db::model::Instance, + requested: sled_agent_client::types::InstanceRuntimeStateRequested, + ) -> Result<(), Error>; + + // TODO: This calls instance_set_runtime, so, all the problems + // that one has too + async fn instance_start_migrate( + &self, + opctx: &OpContext, + instance_id: Uuid, + migration_id: Uuid, + dst_propolis_id: Uuid, + ) -> UpdateResult; +} + +// When implementing this interface for Nexus, there's no reason to have +// more complexity than directly calling into one of Nexus' internal methods +// directly. +// +// If you're tempted to change that, consider modifying what interface is +// actually exposed to saga actions instead. +#[async_trait::async_trait] +impl NexusForSagas for Nexus { + fn datastore(&self) -> &Arc { + self.datastore() + } + + async fn random_sled_id(&self) -> Result, Error> { + self.random_sled_id().await + } + + async fn volume_delete(self: Arc, volume_id: Uuid) -> DeleteResult { + self.volume_delete(volume_id).await + } + + async fn instance_sled_agent_set_runtime( + &self, + sled_id: Uuid, + body: &sled_agent_client::types::InstanceEnsureBody, + instance_id: Uuid, + ) -> Result { + self.instance_sled_agent_set_runtime(sled_id, body, instance_id).await + } + + async fn disk_snapshot_sled_agent( + &self, + instance: &db::model::Instance, + disk_id: Uuid, + body: &sled_agent_client::types::InstanceIssueDiskSnapshotRequestBody, + ) -> Result<(), Error> { + self.disk_snapshot_sled_agent(instance, disk_id, body).await + } + + async fn disk_snapshot_random_sled_agent( + &self, + disk_id: Uuid, + body: &sled_agent_client::types::DiskSnapshotRequestBody, + ) -> Result<(), Error> { + self.disk_snapshot_random_sled_agent(disk_id, body).await + } + + async fn instance_attach_disk( + &self, + opctx: &OpContext, + organization_name: &db::model::Name, + project_name: &db::model::Name, + instance_name: &db::model::Name, + disk_name: &db::model::Name, + ) -> UpdateResult { + self.instance_attach_disk( + opctx, + organization_name, + project_name, + instance_name, + disk_name, + ) + .await + } + + async fn instance_detach_disk( + &self, + opctx: &OpContext, + organization_name: &db::model::Name, + project_name: &db::model::Name, + instance_name: &db::model::Name, + disk_name: &db::model::Name, + ) -> UpdateResult { + self.instance_detach_disk( + opctx, + organization_name, + project_name, + instance_name, + disk_name, + ) + .await + } + + async fn instance_set_runtime( + &self, + opctx: &OpContext, + authz_instance: &authz::Instance, + db_instance: &db::model::Instance, + requested: sled_agent_client::types::InstanceRuntimeStateRequested, + ) -> Result<(), Error> { + self.instance_set_runtime(opctx, authz_instance, db_instance, requested) + .await + } + + async fn instance_start_migrate( + &self, + opctx: &OpContext, + instance_id: Uuid, + migration_id: Uuid, + dst_propolis_id: Uuid, + ) -> UpdateResult { + self.instance_start_migrate( + opctx, + instance_id, + migration_id, + dst_propolis_id, + ) + .await + } +} + // TODO-design Should this be the same thing as ServerContext? It's // very analogous, but maybe there's utility in having separate views for the // HTTP server and sagas. pub struct SagaContext { - nexus: Arc, + nexus: Arc, log: Logger, authz: Arc, } @@ -30,7 +233,7 @@ impl fmt::Debug for SagaContext { impl SagaContext { pub fn new( - nexus: Arc, + nexus: Arc, log: Logger, authz: Arc, ) -> SagaContext { @@ -45,27 +248,11 @@ impl SagaContext { &self.authz } - pub fn nexus(&self) -> &Arc { + pub fn nexus(&self) -> &Arc { &self.nexus } pub fn datastore(&self) -> &db::DataStore { self.nexus.datastore() } - - pub async fn sled_client( - &self, - sled_id: &Uuid, - ) -> Result, Error> { - self.nexus.sled_client(sled_id).await - } - - pub async fn random_sled_client( - &self, - ) -> Result>, Error> { - Ok(match self.nexus.random_sled_id().await? { - Some(sled_id) => Some(self.nexus.sled_client(&sled_id).await?), - None => None, - }) - } } From 971f0b892fd3b28aff07adcde987daac5f341fb3 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 10 Oct 2022 10:03:50 -0700 Subject: [PATCH 02/14] hacks --- nexus/src/app/disk.rs | 2 +- nexus/src/app/instance.rs | 1 + nexus/src/app/sagas/disk_create.rs | 2 +- nexus/src/app/sagas/snapshot_create.rs | 2 +- nexus/src/saga_interface.rs | 56 +++++++++++++------------- 5 files changed, 32 insertions(+), 31 deletions(-) diff --git a/nexus/src/app/disk.rs b/nexus/src/app/disk.rs index f45ebf2cedd..0da42be8166 100644 --- a/nexus/src/app/disk.rs +++ b/nexus/src/app/disk.rs @@ -521,7 +521,7 @@ impl super::Nexus { Ok(()) } - pub(crate) async fn disk_snapshot_sled_agent( + pub(crate) async fn disk_snapshot_instance_sled_agent( &self, instance: &db::model::Instance, disk_id: Uuid, diff --git a/nexus/src/app/instance.rs b/nexus/src/app/instance.rs index bd9525c7790..5700c3dc181 100644 --- a/nexus/src/app/instance.rs +++ b/nexus/src/app/instance.rs @@ -726,6 +726,7 @@ impl super::Nexus { } /// Attach a disk to an instance. + // TODO: Delete? pub async fn instance_attach_disk( &self, opctx: &OpContext, diff --git a/nexus/src/app/sagas/disk_create.rs b/nexus/src/app/sagas/disk_create.rs index 9b6c7ae974f..c15308eef95 100644 --- a/nexus/src/app/sagas/disk_create.rs +++ b/nexus/src/app/sagas/disk_create.rs @@ -666,7 +666,7 @@ mod test { todo!(); } - async fn disk_snapshot_sled_agent( + async fn disk_snapshot_instance_sled_agent( &self, instance: &db::model::Instance, disk_id: Uuid, diff --git a/nexus/src/app/sagas/snapshot_create.rs b/nexus/src/app/sagas/snapshot_create.rs index e19c6bc0ed7..2cf94942f49 100644 --- a/nexus/src/app/sagas/snapshot_create.rs +++ b/nexus/src/app/sagas/snapshot_create.rs @@ -540,7 +540,7 @@ async fn ssc_send_snapshot_request( let body = InstanceIssueDiskSnapshotRequestBody { snapshot_id }; osagactx .nexus() - .disk_snapshot_sled_agent(&instance, disk.id(), &body) + .disk_snapshot_instance_sled_agent(&instance, disk.id(), &body) .await .map_err(ActionError::action_failed)?; } diff --git a/nexus/src/saga_interface.rs b/nexus/src/saga_interface.rs index 6324542a2f5..185cef286a3 100644 --- a/nexus/src/saga_interface.rs +++ b/nexus/src/saga_interface.rs @@ -51,7 +51,7 @@ pub trait NexusForSagas: Send + Sync { instance_id: Uuid, ) -> Result; - async fn disk_snapshot_sled_agent( + async fn disk_snapshot_instance_sled_agent( &self, instance: &db::model::Instance, disk_id: Uuid, @@ -67,14 +67,14 @@ pub trait NexusForSagas: Send + Sync { ) -> Result<(), Error>; // TODO: This one could be implemented purely in the DB? - async fn instance_attach_disk( - &self, - opctx: &OpContext, - organization_name: &db::model::Name, - project_name: &db::model::Name, - instance_name: &db::model::Name, - disk_name: &db::model::Name, - ) -> UpdateResult; +// async fn instance_attach_disk( +// &self, +// opctx: &OpContext, +// organization_name: &db::model::Name, +// project_name: &db::model::Name, +// instance_name: &db::model::Name, +// disk_name: &db::model::Name, +// ) -> UpdateResult; // TODO: This one could be implemented purely in the DB? async fn instance_detach_disk( @@ -135,13 +135,13 @@ impl NexusForSagas for Nexus { self.instance_sled_agent_set_runtime(sled_id, body, instance_id).await } - async fn disk_snapshot_sled_agent( + async fn disk_snapshot_instance_sled_agent( &self, instance: &db::model::Instance, disk_id: Uuid, body: &sled_agent_client::types::InstanceIssueDiskSnapshotRequestBody, ) -> Result<(), Error> { - self.disk_snapshot_sled_agent(instance, disk_id, body).await + self.disk_snapshot_instance_sled_agent(instance, disk_id, body).await } async fn disk_snapshot_random_sled_agent( @@ -152,23 +152,23 @@ impl NexusForSagas for Nexus { self.disk_snapshot_random_sled_agent(disk_id, body).await } - async fn instance_attach_disk( - &self, - opctx: &OpContext, - organization_name: &db::model::Name, - project_name: &db::model::Name, - instance_name: &db::model::Name, - disk_name: &db::model::Name, - ) -> UpdateResult { - self.instance_attach_disk( - opctx, - organization_name, - project_name, - instance_name, - disk_name, - ) - .await - } +// async fn instance_attach_disk( +// &self, +// opctx: &OpContext, +// organization_name: &db::model::Name, +// project_name: &db::model::Name, +// instance_name: &db::model::Name, +// disk_name: &db::model::Name, +// ) -> UpdateResult { +// self.instance_attach_disk( +// opctx, +// organization_name, +// project_name, +// instance_name, +// disk_name, +// ) +// .await +// } async fn instance_detach_disk( &self, From 31bbc22a941d30c545600b95d7432d6aa5cf067e Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 19 Oct 2022 15:09:20 -0400 Subject: [PATCH 03/14] Add unwind test, add undo actions --- Cargo.lock | 1 + nexus/Cargo.toml | 1 + nexus/src/app/disk.rs | 4 +- nexus/src/app/instance.rs | 1 - nexus/src/app/mod.rs | 6 +- nexus/src/app/saga.rs | 74 ++++- nexus/src/app/sagas/disk_create.rs | 416 +++++++++++++------------ nexus/src/app/sagas/disk_delete.rs | 1 - nexus/src/app/sagas/snapshot_create.rs | 4 +- nexus/src/app/volume.rs | 2 +- nexus/src/db/datastore/mod.rs | 2 +- nexus/src/saga_interface.rs | 212 +------------ nexus/test-utils/src/lib.rs | 10 + 13 files changed, 292 insertions(+), 442 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2133ce544eb..d8598602c54 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2993,6 +2993,7 @@ dependencies = [ "num-integer", "omicron-common", "omicron-rpaths", + "omicron-sled-agent", "omicron-test-utils", "openapi-lint", "openapiv3", diff --git a/nexus/Cargo.toml b/nexus/Cargo.toml index bb6995d182a..b1eb2ad2aca 100644 --- a/nexus/Cargo.toml +++ b/nexus/Cargo.toml @@ -121,6 +121,7 @@ itertools = "0.10.5" nexus-test-utils-macros = { path = "test-utils-macros" } nexus-test-utils = { path = "test-utils" } omicron-test-utils = { path = "../test-utils" } +omicron-sled-agent = { path = "../sled-agent" } openapiv3 = "1.0" regex = "1.6.0" subprocess = "0.2.9" diff --git a/nexus/src/app/disk.rs b/nexus/src/app/disk.rs index 0da42be8166..747a77fb91c 100644 --- a/nexus/src/app/disk.rs +++ b/nexus/src/app/disk.rs @@ -513,9 +513,9 @@ impl super::Nexus { .await?; // Kick off volume deletion saga(s) - self.clone().volume_delete(db_snapshot.volume_id).await?; + self.volume_delete(db_snapshot.volume_id).await?; if let Some(volume_id) = db_snapshot.destination_volume_id { - self.clone().volume_delete(volume_id).await?; + self.volume_delete(volume_id).await?; } Ok(()) diff --git a/nexus/src/app/instance.rs b/nexus/src/app/instance.rs index 1426986e13d..6719f5338bb 100644 --- a/nexus/src/app/instance.rs +++ b/nexus/src/app/instance.rs @@ -754,7 +754,6 @@ impl super::Nexus { } /// Attach a disk to an instance. - // TODO: Delete? pub async fn instance_attach_disk( &self, opctx: &OpContext, diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index 0cbfa00a8c6..ce54bc347b3 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -33,7 +33,7 @@ mod organization; mod oximeter; mod project; mod rack; -mod saga; +pub mod saga; mod session; mod silo; mod sled; @@ -46,7 +46,7 @@ mod vpc_subnet; // Sagas are not part of the "Nexus" implementation, but they are // application logic. -mod sagas; +pub mod sagas; // TODO: When referring to API types, we should try to include // the prefix unless it is unambiguous. @@ -210,7 +210,7 @@ impl Nexus { opctx, my_sec_id, Arc::new(Arc::new(SagaContext::new( - nexus.clone(), + Arc::clone(&nexus), saga_logger, Arc::clone(&authz), ))), diff --git a/nexus/src/app/saga.rs b/nexus/src/app/saga.rs index 63e126a833f..7b2bba7c981 100644 --- a/nexus/src/app/saga.rs +++ b/nexus/src/app/saga.rs @@ -11,6 +11,7 @@ use crate::authz; use crate::context::OpContext; use crate::saga_interface::SagaContext; use anyhow::Context; +use futures::future::BoxFuture; use futures::StreamExt; use omicron_common::api::external; use omicron_common::api::external::DataPageParams; @@ -24,9 +25,32 @@ use steno::DagBuilder; use steno::SagaDag; use steno::SagaId; use steno::SagaName; +use steno::SagaResult; use steno::SagaResultOk; use uuid::Uuid; +pub struct RunnableSaga { + id: SagaId, + fut: BoxFuture<'static, SagaResult>, +} + +impl RunnableSaga { + pub fn id(&self) -> SagaId { + self.id + } +} + +pub fn create_saga_dag( + params: N::Params, +) -> Result { + let builder = DagBuilder::new(SagaName::new(N::NAME)); + let dag = N::make_saga_dag(¶ms, builder)?; + let params = serde_json::to_value(¶ms).map_err(|e| { + SagaInitError::SerializeError(String::from("saga params"), e) + })?; + Ok(SagaDag::new(dag, params)) +} + impl super::Nexus { pub async fn sagas_list( &self, @@ -66,23 +90,14 @@ impl super::Nexus { })? } - /// Given a saga type and parameters, create a new saga and execute it. - pub(crate) async fn execute_saga( + pub async fn create_runnable_saga( self: &Arc, - params: N::Params, - ) -> Result { - let saga = { - let builder = DagBuilder::new(SagaName::new(N::NAME)); - let dag = N::make_saga_dag(¶ms, builder)?; - let params = serde_json::to_value(¶ms).map_err(|e| { - SagaInitError::SerializeError(String::from("saga params"), e) - })?; - SagaDag::new(dag, params) - }; - + dag: SagaDag, + ) -> Result { + // Construct the context necessary to execute this saga. let saga_id = SagaId(Uuid::new_v4()); let saga_logger = self.log.new(o!( - "saga_name" => saga.saga_name().to_string(), + "saga_name" => dag.saga_name().to_string(), "saga_id" => saga_id.to_string() )); let saga_context = Arc::new(Arc::new(SagaContext::new( @@ -95,7 +110,7 @@ impl super::Nexus { .saga_create( saga_id, saga_context, - Arc::new(saga), + Arc::new(dag), ACTION_REGISTRY.clone(), ) .await @@ -106,14 +121,20 @@ impl super::Nexus { // Steno. Error::internal_error(&format!("{:#}", error)) })?; + Ok(RunnableSaga { id: saga_id, fut: future }) + } + pub async fn run_saga( + &self, + runnable_saga: RunnableSaga, + ) -> Result { self.sec_client - .saga_start(saga_id) + .saga_start(runnable_saga.id) .await .context("starting saga") .map_err(|error| Error::internal_error(&format!("{:#}", error)))?; - let result = future.await; + let result = runnable_saga.fut.await; result.kind.map_err(|saga_error| { saga_error .error_source @@ -125,4 +146,23 @@ impl super::Nexus { )) }) } + + pub fn sec(&self) -> &steno::SecClient { + &self.sec_client + } + + /// Given a saga type and parameters, create a new saga and execute it. + pub(crate) async fn execute_saga( + self: &Arc, + params: N::Params, + ) -> Result { + // Construct the DAG specific to this saga. + let dag = create_saga_dag::(params)?; + + // Register the saga with the saga executor. + let runnable_saga = self.create_runnable_saga(dag).await?; + + // Actually run the saga to completion. + self.run_saga(runnable_saga).await + } } diff --git a/nexus/src/app/sagas/disk_create.rs b/nexus/src/app/sagas/disk_create.rs index c15308eef95..1ba978816cf 100644 --- a/nexus/src/app/sagas/disk_create.rs +++ b/nexus/src/app/sagas/disk_create.rs @@ -3,8 +3,11 @@ // file, You can obtain one at https://mozilla.org/MPL/2.0/. use super::{ - common_storage::ensure_all_datasets_and_regions, ActionRegistry, - NexusActionContext, NexusSaga, SagaInitError, ACTION_GENERATE_ID, + common_storage::{ + delete_crucible_regions, ensure_all_datasets_and_regions, + }, + ActionRegistry, NexusActionContext, NexusSaga, SagaInitError, + ACTION_GENERATE_ID, }; use crate::app::sagas::NexusAction; use crate::context::OpContext; @@ -42,10 +45,16 @@ lazy_static! { sdc_create_disk_record, sdc_create_disk_record_undo ); - static ref REGIONS_ALLOC: NexusAction = - new_action_noop_undo("disk-create.regions-alloc", sdc_alloc_regions,); - static ref REGIONS_ENSURE: NexusAction = - new_action_noop_undo("disk-create.regions-ensure", sdc_regions_ensure,); + static ref REGIONS_ALLOC: NexusAction = ActionFunc::new_action( + "disk-create.regions-alloc", + sdc_alloc_regions, + sdc_alloc_regions_undo, + ); + static ref REGIONS_ENSURE: NexusAction = ActionFunc::new_action( + "disk-create.regions-ensure", + sdc_regions_ensure, + sdc_regions_ensure_undo, + ); static ref CREATE_VOLUME_RECORD: NexusAction = ActionFunc::new_action( "disk-create.create-volume-record", sdc_create_volume_record, @@ -242,6 +251,23 @@ async fn sdc_alloc_regions( Ok(datasets_and_regions) } +async fn sdc_alloc_regions_undo( + sagactx: NexusActionContext, +) -> Result<(), anyhow::Error> { + let osagactx = sagactx.user_data(); + + let region_ids = sagactx + .lookup::>( + "datasets_and_regions", + )? + .into_iter() + .map(|(_, region)| region.id()) + .collect::>(); + + osagactx.datastore().regions_hard_delete(region_ids).await?; + Ok(()) +} + /// Call out to Crucible agent and perform region creation. async fn sdc_regions_ensure( sagactx: NexusActionContext, @@ -262,7 +288,6 @@ async fn sdc_regions_ensure( // If a disk source was requested, set the read-only parent of this disk. let osagactx = sagactx.user_data(); let params = sagactx.saga_params::()?; - let log = osagactx.log(); let opctx = OpContext::for_saga_action(&sagactx, ¶ms.serialized_authn); let mut read_only_parent: Option> = @@ -432,6 +457,21 @@ async fn sdc_regions_ensure( Ok(volume_data) } +async fn sdc_regions_ensure_undo( + sagactx: NexusActionContext, +) -> Result<(), anyhow::Error> { + let log = sagactx.user_data().log(); + warn!(log, "regions_ensure_undo: Deleting crucible regions"); + delete_crucible_regions( + sagactx.lookup::>( + "datasets_and_regions", + )?, + ) + .await?; + info!(log, "regions_ensure_undo: Deleted crucible regions"); + Ok(()) +} + async fn sdc_create_volume_record( sagactx: NexusActionContext, ) -> Result { @@ -457,7 +497,7 @@ async fn sdc_create_volume_record_undo( let osagactx = sagactx.user_data(); let volume_id = sagactx.lookup::("volume_id")?; - osagactx.nexus().clone().volume_delete(volume_id).await?; + osagactx.nexus().volume_delete(volume_id).await?; Ok(()) } @@ -560,225 +600,191 @@ fn randomize_volume_construction_request_ids( #[cfg(test)] mod test { - use super::*; - use crate::db::datastore::datastore_test; - use crate::saga_interface::SagaContext; - use nexus_test_utils::db::test_setup_database; + use async_bb8_diesel::{AsyncRunQueryDsl, OptionalExtension}; + use diesel::{ExpressionMethods, QueryDsl, SelectableHelper}; + use dropshot::test_util::ClientTestContext; + use nexus_test_utils::nexus::{ + app::saga::create_saga_dag, app::sagas::disk_create::Params, + app::sagas::disk_create::SagaDiskCreate, authn::saga::Serialized, + context::OpContext, db::datastore::DataStore, external_api::params, + }; + use nexus_test_utils::resource_helpers::create_ip_pool; + use nexus_test_utils::resource_helpers::create_organization; + use nexus_test_utils::resource_helpers::create_project; + use nexus_test_utils::resource_helpers::DiskTest; + use nexus_test_utils::ControlPlaneTestContext; + use nexus_test_utils_macros::nexus_test; use omicron_common::api::external::ByteCount; - use omicron_common::api::external::DeleteResult; use omicron_common::api::external::IdentityMetadataCreateParams; - use omicron_common::api::external::UpdateResult; - use omicron_common::api::internal::nexus; - use omicron_test_utils::dev; - use steno::DagBuilder; - use steno::InMemorySecStore; - use steno::SagaDag; - use steno::SagaName; - use steno::SecClient; - - fn new_sec(log: &slog::Logger) -> SecClient { - steno::sec(log.new(slog::o!()), Arc::new(InMemorySecStore::new())) - } - - fn create_saga_dag(params: N::Params) -> SagaDag { - let builder = DagBuilder::new(SagaName::new(N::NAME)); - let dag = N::make_saga_dag(¶ms, builder) - .expect("Failed to build saga DAG"); - let params = serde_json::to_value(¶ms) - .expect("Failed to serialize parameters"); - SagaDag::new(dag, params) - } + use omicron_sled_agent::sim::SledAgent; + use uuid::Uuid; - async fn create_org_and_project( - opctx: &OpContext, - datastore: &db::DataStore, - ) -> crate::authz::Project { - let organization = params::OrganizationCreate { - identity: IdentityMetadataCreateParams { - name: "org".parse().unwrap(), - description: "desc".to_string(), - }, - }; + const ORG_NAME: &str = "test-org"; + const PROJECT_NAME: &str = "springfield-squidport-disks"; - let organization = datastore - .organization_create(&opctx, &organization) - .await - .expect("Failed to create org"); + async fn create_org_and_project(client: &ClientTestContext) -> Uuid { + create_ip_pool(&client, "p0", None, None).await; + create_organization(&client, ORG_NAME).await; + let project = create_project(client, ORG_NAME, PROJECT_NAME).await; + project.identity.id + } - let project = db::model::Project::new( - organization.id(), - params::ProjectCreate { + // Helper for creating disk create parameters + fn new_test_params(opctx: &OpContext, project_id: Uuid) -> Params { + Params { + serialized_authn: Serialized::for_opctx(opctx), + project_id, + create_params: params::DiskCreate { identity: IdentityMetadataCreateParams { - name: "project" - .parse() - .expect("Failed to parse project name"), - description: "desc".to_string(), + name: "my-disk".parse().expect("Invalid disk name"), + description: "My disk".to_string(), }, + disk_source: params::DiskSource::Blank { + block_size: params::BlockSize(512), + }, + size: ByteCount::from_gibibytes_u32(1), }, - ); - let project_id = project.id(); - let (.., authz_org) = LookupPath::new(&opctx, &datastore) - .organization_id(organization.id()) - .lookup_for(authz::Action::CreateChild) - .await - .expect("Cannot lookup org to create a child project"); - datastore - .project_create(&opctx, &authz_org, project) - .await - .expect("Failed to create project"); - let (.., authz_project, _project) = LookupPath::new(&opctx, &datastore) - .project_id(project_id) - .fetch() - .await - .expect("Cannot lookup project we just created"); - authz_project - } - - // TODO: This - and frankly a lot of this test - could probably be shared - // between sagas. - struct StubNexus { - datastore: Arc, - } - #[async_trait::async_trait] - impl crate::saga_interface::NexusForSagas for StubNexus { - fn datastore(&self) -> &Arc { - &self.datastore - } - - async fn random_sled_id(&self) -> Result, Error> { - todo!(); - } - - async fn volume_delete( - self: Arc, - volume_id: Uuid, - ) -> DeleteResult { - // TODO! - todo!(); - } - - async fn instance_sled_agent_set_runtime( - &self, - sled_id: Uuid, - body: &sled_agent_client::types::InstanceEnsureBody, - instance_id: Uuid, - ) -> Result { - todo!(); } + } - async fn disk_snapshot_instance_sled_agent( - &self, - instance: &db::model::Instance, - disk_id: Uuid, - body: &sled_agent_client::types::InstanceIssueDiskSnapshotRequestBody, - ) -> Result<(), Error> { - todo!(); - } + #[nexus_test] + async fn test_saga_basic_usage_succeeds( + cptestctx: &ControlPlaneTestContext, + ) { + DiskTest::new(cptestctx).await; - async fn disk_snapshot_random_sled_agent( - &self, - disk_id: Uuid, - body: &sled_agent_client::types::DiskSnapshotRequestBody, - ) -> Result<(), Error> { - todo!(); - } + let client = &cptestctx.external_client; + let nexus = &cptestctx.server.apictx.nexus; + let project_id = create_org_and_project(&client).await; - // TODO: This one could be implemented purely in the DB? - async fn instance_attach_disk( - &self, - opctx: &OpContext, - organization_name: &db::model::Name, - project_name: &db::model::Name, - instance_name: &db::model::Name, - disk_name: &db::model::Name, - ) -> UpdateResult { - todo!(); - } + // Build the saga DAG with the provided test parameters + let opctx = cptestctx.test_opctx(); + let params = new_test_params(&opctx, project_id); + let dag = create_saga_dag::(params).unwrap(); + let runnable_saga = nexus.create_runnable_saga(dag).await.unwrap(); + + // Actually run the saga + let output = nexus.run_saga(runnable_saga).await.unwrap(); + + let disk = output + .lookup_node_output::("created_disk") + .unwrap(); + assert_eq!(disk.project_id, project_id); + } - // TODO: This one could be implemented purely in the DB? - async fn instance_detach_disk( - &self, - opctx: &OpContext, - organization_name: &db::model::Name, - project_name: &db::model::Name, - instance_name: &db::model::Name, - disk_name: &db::model::Name, - ) -> UpdateResult { - todo!(); - } + async fn no_disk_records_exist( + datastore: &DataStore, + opctx: &OpContext, + ) -> bool { + use crate::db::model::Disk; + use crate::db::schema::disk::dsl; + + dsl::disk + .filter(dsl::time_deleted.is_null()) + .select(Disk::as_select()) + .first_async::( + datastore.pool_authorized(opctx).await.unwrap(), + ) + .await + .optional() + .unwrap() + .is_none() + } - // TODO: This is half in the DB, half to the sled agent. - async fn instance_set_runtime( - &self, - opctx: &OpContext, - authz_instance: &authz::Instance, - db_instance: &db::model::Instance, - requested: sled_agent_client::types::InstanceRuntimeStateRequested, - ) -> Result<(), Error> { - todo!(); + async fn no_region_allocations_exist( + datastore: &DataStore, + test: &DiskTest, + ) -> bool { + for zpool in &test.zpools { + for dataset in &zpool.datasets { + if datastore + .regions_total_occupied_size(dataset.id) + .await + .unwrap() + != 0 + { + return false; + } + } } + true + } - // TODO: This calls instance_set_runtime, so, all the problems - // that one has too - async fn instance_start_migrate( - &self, - opctx: &OpContext, - instance_id: Uuid, - migration_id: Uuid, - dst_propolis_id: Uuid, - ) -> UpdateResult { - todo!(); + async fn no_regions_ensured( + sled_agent: &SledAgent, + test: &DiskTest, + ) -> bool { + for zpool in &test.zpools { + for dataset in &zpool.datasets { + let crucible_dataset = + sled_agent.get_crucible_dataset(zpool.id, dataset.id).await; + if !crucible_dataset.is_empty().await { + return false; + } + } } + true } - #[tokio::test] - async fn test_todotodotodo() { - let logctx = dev::test_setup_log("test_TODOTODTODO"); - let mut db = test_setup_database(&logctx.log).await; - let (opctx, datastore) = datastore_test(&logctx, &db).await; + #[nexus_test] + async fn test_action_failure_can_unwind( + cptestctx: &ControlPlaneTestContext, + ) { + let test = DiskTest::new(cptestctx).await; + let log = &cptestctx.logctx.log; - let authz_project = create_org_and_project(&opctx, &datastore).await; + let client = &cptestctx.external_client; + let nexus = &cptestctx.server.apictx.nexus; + let project_id = create_org_and_project(&client).await; // Build the saga DAG with the provided test parameters - let params = Params { - serialized_authn: authn::saga::Serialized::for_opctx(&opctx), - project_id: authz_project.id(), - create_params: params::DiskCreate { - identity: IdentityMetadataCreateParams { - name: "my-disk".parse().expect("Invalid disk name"), - description: "My disk".to_string(), - }, - disk_source: params::DiskSource::Blank { - block_size: params::BlockSize(512), - }, - size: ByteCount::from_gibibytes_u32(1), - }, - }; - let dag = create_saga_dag::(params); - - // Create a Saga Executor which can run the saga - let sec = new_sec(&logctx.log); - let saga_id = steno::SagaId(Uuid::new_v4()); - - let nexus = StubNexus { datastore: datastore.clone() }; - let saga_context = Arc::new(Arc::new(SagaContext::new( - Arc::new(nexus), - logctx.log.clone(), - Arc::new(authz::Authz::new(&logctx.log)), - ))); - let fut = sec - .saga_create( - saga_id, - Arc::clone(&saga_context), - Arc::new(dag), - crate::app::sagas::ACTION_REGISTRY.clone(), - ) - .await - .expect("failed to create saga"); - sec.saga_start(saga_id).await.expect("failed to start saga"); - fut.await; + let opctx = cptestctx.test_opctx(); - db.cleanup().await.unwrap(); - logctx.cleanup_successful(); + let nodes_to_fail = [ + "disk_id", + "volume_id", + "created_disk", + "datasets_and_regions", + "regions_ensure", + "created_volume", + "disk_runtime", + ]; + + for failing_node in &nodes_to_fail { + // Create a new saga for this node. + info!(log, "Creating new saga which will fail at {failing_node}"); + let params = new_test_params(&opctx, project_id); + let dag = create_saga_dag::(params).unwrap(); + let node_id = dag.get_index(failing_node).unwrap(); + let runnable_saga = nexus.create_runnable_saga(dag).await.unwrap(); + + // Inject an error instead of running the node. + // + // This should cause the saga to unwind. + nexus + .sec() + .saga_inject_error(runnable_saga.id(), node_id) + .await + .unwrap(); + nexus + .run_saga(runnable_saga) + .await + .expect_err("Saga should have failed"); + + let datastore = nexus.datastore(); + + // Check that no partial artifacts of disk creation exist: + assert!(no_disk_records_exist(datastore, &opctx).await); + assert!(no_region_allocations_exist(datastore, &test).await); + assert!( + no_regions_ensured(&cptestctx.sled_agent.sled_agent, &test) + .await + ); + } } + + // TODO: We still need to test: + // - Can we repeat each action safely, without failing / leaving detritus? + // - Can we repeat each undo action safely? + // - Is each node atomic? (This seems harder to test) } diff --git a/nexus/src/app/sagas/disk_delete.rs b/nexus/src/app/sagas/disk_delete.rs index 9fe8b5aed3d..9cb8ac04b88 100644 --- a/nexus/src/app/sagas/disk_delete.rs +++ b/nexus/src/app/sagas/disk_delete.rs @@ -92,7 +92,6 @@ async fn sdd_delete_volume( let volume_id = sagactx.lookup::("volume_id")?; osagactx .nexus() - .clone() .volume_delete(volume_id) .await .map_err(ActionError::action_failed)?; diff --git a/nexus/src/app/sagas/snapshot_create.rs b/nexus/src/app/sagas/snapshot_create.rs index 2cf94942f49..9b3c4979eb5 100644 --- a/nexus/src/app/sagas/snapshot_create.rs +++ b/nexus/src/app/sagas/snapshot_create.rs @@ -417,7 +417,7 @@ async fn ssc_create_destination_volume_record_undo( let destination_volume_id = sagactx.lookup::("destination_volume_id")?; - osagactx.nexus().clone().volume_delete(destination_volume_id).await?; + osagactx.nexus().volume_delete(destination_volume_id).await?; Ok(()) } @@ -764,7 +764,7 @@ async fn ssc_create_volume_record_undo( let volume_id = sagactx.lookup::("volume_id")?; info!(log, "deleting volume {}", volume_id); - osagactx.nexus().clone().volume_delete(volume_id).await?; + osagactx.nexus().volume_delete(volume_id).await?; Ok(()) } diff --git a/nexus/src/app/volume.rs b/nexus/src/app/volume.rs index ecb8a16facc..e8bfe362920 100644 --- a/nexus/src/app/volume.rs +++ b/nexus/src/app/volume.rs @@ -23,7 +23,7 @@ impl super::Nexus { /// of a "disk" or "snapshot" could free up a *lot* of Crucible resources /// and the user's query shouldn't wait on those DELETE calls. pub async fn volume_delete( - self: Arc, + self: &Arc, volume_id: Uuid, ) -> DeleteResult { let saga_params = sagas::volume_delete::Params { volume_id }; diff --git a/nexus/src/db/datastore/mod.rs b/nexus/src/db/datastore/mod.rs index b096e9fd658..c4155d391c9 100644 --- a/nexus/src/db/datastore/mod.rs +++ b/nexus/src/db/datastore/mod.rs @@ -122,7 +122,7 @@ impl DataStore { self.pool.pool() } - pub(super) async fn pool_authorized( + pub async fn pool_authorized( &self, opctx: &OpContext, ) -> Result<&bb8::Pool>, Error> { diff --git a/nexus/src/saga_interface.rs b/nexus/src/saga_interface.rs index 185cef286a3..f1be94cf8dd 100644 --- a/nexus/src/saga_interface.rs +++ b/nexus/src/saga_interface.rs @@ -4,223 +4,17 @@ //! Interfaces available to saga actions and undo actions -use crate::context::OpContext; use crate::Nexus; use crate::{authz, db}; -use omicron_common::api::external::DeleteResult; -use omicron_common::api::external::Error; -use omicron_common::api::external::UpdateResult; -use omicron_common::api::internal::nexus; use slog::Logger; use std::fmt; use std::sync::Arc; -use uuid::Uuid; - -/// Trait which exposes Nexus interfaces to Sagas. -/// -/// In the production implementation of Nexus, this will almost certainly just -/// be implemented directly by Nexus. However, by defining these methods -/// explicitly, tests can mock out access saga actions that would modify more -/// state than simply the database. -#[async_trait::async_trait] -pub trait NexusForSagas: Send + Sync { - /// Access the raw database under Nexus. - fn datastore(&self) -> &Arc; - - /// Returns a random sled ID. - async fn random_sled_id(&self) -> Result, Error>; - - // TODO: Should this be here? - // - // On the one hand, we do want to let sagas call other sagas. - // However, wouldn't it be nice if they called through this more limited - // nexus interface, rather than the "true" (read: too large) Nexus inteface? - // - // Then we could actually validate the volume-deletion behavior when unit - // testing disks / snapshots. - // - // TODO: I think... this might just involve isolating the "saga engine" - // component of nexus from the rest of it. I'd like to re-use that bit! - async fn volume_delete(self: Arc, volume_id: Uuid) -> DeleteResult; - - /// Makes a request for a specific sled to update its runtime. - async fn instance_sled_agent_set_runtime( - &self, - sled_id: Uuid, - body: &sled_agent_client::types::InstanceEnsureBody, - instance_id: Uuid, - ) -> Result; - - async fn disk_snapshot_instance_sled_agent( - &self, - instance: &db::model::Instance, - disk_id: Uuid, - body: &sled_agent_client::types::InstanceIssueDiskSnapshotRequestBody, - ) -> Result<(), Error>; - - /// Makes a request for a sled to take a snapshot of a disk. - // TODO: Should just be 'snapshot'? already exposing 'random_sled_id' - async fn disk_snapshot_random_sled_agent( - &self, - disk_id: Uuid, - body: &sled_agent_client::types::DiskSnapshotRequestBody, - ) -> Result<(), Error>; - - // TODO: This one could be implemented purely in the DB? -// async fn instance_attach_disk( -// &self, -// opctx: &OpContext, -// organization_name: &db::model::Name, -// project_name: &db::model::Name, -// instance_name: &db::model::Name, -// disk_name: &db::model::Name, -// ) -> UpdateResult; - - // TODO: This one could be implemented purely in the DB? - async fn instance_detach_disk( - &self, - opctx: &OpContext, - organization_name: &db::model::Name, - project_name: &db::model::Name, - instance_name: &db::model::Name, - disk_name: &db::model::Name, - ) -> UpdateResult; - - // TODO: This is half in the DB, half to the sled agent. - async fn instance_set_runtime( - &self, - opctx: &OpContext, - authz_instance: &authz::Instance, - db_instance: &db::model::Instance, - requested: sled_agent_client::types::InstanceRuntimeStateRequested, - ) -> Result<(), Error>; - - // TODO: This calls instance_set_runtime, so, all the problems - // that one has too - async fn instance_start_migrate( - &self, - opctx: &OpContext, - instance_id: Uuid, - migration_id: Uuid, - dst_propolis_id: Uuid, - ) -> UpdateResult; -} - -// When implementing this interface for Nexus, there's no reason to have -// more complexity than directly calling into one of Nexus' internal methods -// directly. -// -// If you're tempted to change that, consider modifying what interface is -// actually exposed to saga actions instead. -#[async_trait::async_trait] -impl NexusForSagas for Nexus { - fn datastore(&self) -> &Arc { - self.datastore() - } - - async fn random_sled_id(&self) -> Result, Error> { - self.random_sled_id().await - } - - async fn volume_delete(self: Arc, volume_id: Uuid) -> DeleteResult { - self.volume_delete(volume_id).await - } - - async fn instance_sled_agent_set_runtime( - &self, - sled_id: Uuid, - body: &sled_agent_client::types::InstanceEnsureBody, - instance_id: Uuid, - ) -> Result { - self.instance_sled_agent_set_runtime(sled_id, body, instance_id).await - } - - async fn disk_snapshot_instance_sled_agent( - &self, - instance: &db::model::Instance, - disk_id: Uuid, - body: &sled_agent_client::types::InstanceIssueDiskSnapshotRequestBody, - ) -> Result<(), Error> { - self.disk_snapshot_instance_sled_agent(instance, disk_id, body).await - } - - async fn disk_snapshot_random_sled_agent( - &self, - disk_id: Uuid, - body: &sled_agent_client::types::DiskSnapshotRequestBody, - ) -> Result<(), Error> { - self.disk_snapshot_random_sled_agent(disk_id, body).await - } - -// async fn instance_attach_disk( -// &self, -// opctx: &OpContext, -// organization_name: &db::model::Name, -// project_name: &db::model::Name, -// instance_name: &db::model::Name, -// disk_name: &db::model::Name, -// ) -> UpdateResult { -// self.instance_attach_disk( -// opctx, -// organization_name, -// project_name, -// instance_name, -// disk_name, -// ) -// .await -// } - - async fn instance_detach_disk( - &self, - opctx: &OpContext, - organization_name: &db::model::Name, - project_name: &db::model::Name, - instance_name: &db::model::Name, - disk_name: &db::model::Name, - ) -> UpdateResult { - self.instance_detach_disk( - opctx, - organization_name, - project_name, - instance_name, - disk_name, - ) - .await - } - - async fn instance_set_runtime( - &self, - opctx: &OpContext, - authz_instance: &authz::Instance, - db_instance: &db::model::Instance, - requested: sled_agent_client::types::InstanceRuntimeStateRequested, - ) -> Result<(), Error> { - self.instance_set_runtime(opctx, authz_instance, db_instance, requested) - .await - } - - async fn instance_start_migrate( - &self, - opctx: &OpContext, - instance_id: Uuid, - migration_id: Uuid, - dst_propolis_id: Uuid, - ) -> UpdateResult { - self.instance_start_migrate( - opctx, - instance_id, - migration_id, - dst_propolis_id, - ) - .await - } -} // TODO-design Should this be the same thing as ServerContext? It's // very analogous, but maybe there's utility in having separate views for the // HTTP server and sagas. pub struct SagaContext { - nexus: Arc, + nexus: Arc, log: Logger, authz: Arc, } @@ -233,7 +27,7 @@ impl fmt::Debug for SagaContext { impl SagaContext { pub fn new( - nexus: Arc, + nexus: Arc, log: Logger, authz: Arc, ) -> SagaContext { @@ -248,7 +42,7 @@ impl SagaContext { &self.authz } - pub fn nexus(&self) -> &Arc { + pub fn nexus(&self) -> &Arc { &self.nexus } diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index 82b01c3a0ff..607c8fca92f 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -23,6 +23,9 @@ use std::path::Path; use std::time::Duration; use uuid::Uuid; +// Expose the version of Nexus used by the test utils. +pub use omicron_nexus as nexus; + pub mod db; pub mod http_testing; pub mod resource_helpers; @@ -45,6 +48,13 @@ pub struct ControlPlaneTestContext { } impl ControlPlaneTestContext { + pub fn test_opctx(&self) -> omicron_nexus::context::OpContext { + omicron_nexus::context::OpContext::for_tests( + self.logctx.log.new(o!()), + self.server.apictx.nexus.datastore().clone(), + ) + } + pub async fn teardown(mut self) { for server in self.server.http_servers_external { server.close().await.unwrap(); From a8f9e5defb2a3c7bf8e99e067623bb313ba4f964 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 19 Oct 2022 15:15:31 -0400 Subject: [PATCH 04/14] Undo the saga interface changes --- nexus/src/app/disk.rs | 40 --------------------- nexus/src/app/instance.rs | 21 ----------- nexus/src/app/sagas/instance_migrate.rs | 32 ++++++++++------- nexus/src/app/sagas/snapshot_create.rs | 46 ++++++++++++++++++------- nexus/src/saga_interface.rs | 19 ++++++++++ 5 files changed, 73 insertions(+), 85 deletions(-) diff --git a/nexus/src/app/disk.rs b/nexus/src/app/disk.rs index 747a77fb91c..d8c0f60556b 100644 --- a/nexus/src/app/disk.rs +++ b/nexus/src/app/disk.rs @@ -12,7 +12,6 @@ use crate::db; use crate::db::lookup::LookupPath; use crate::db::model::Name; use crate::external_api::params; -use nexus_types::identity::Resource; use omicron_common::api::external::ByteCount; use omicron_common::api::external::CreateResult; use omicron_common::api::external::DataPageParams; @@ -520,43 +519,4 @@ impl super::Nexus { Ok(()) } - - pub(crate) async fn disk_snapshot_instance_sled_agent( - &self, - instance: &db::model::Instance, - disk_id: Uuid, - body: &sled_agent_client::types::InstanceIssueDiskSnapshotRequestBody, - ) -> Result<(), Error> { - let sled_agent_client = self.instance_sled(&instance).await?; - - // Send a snapshot request to propolis through sled agent - sled_agent_client - .instance_issue_disk_snapshot_request( - &instance.id(), - &disk_id, - body, - ) - .await - .map_err(|e| Error::internal_error(&e.to_string()))?; - Ok(()) - } - - pub(crate) async fn disk_snapshot_random_sled_agent( - &self, - disk_id: Uuid, - body: &sled_agent_client::types::DiskSnapshotRequestBody, - ) -> Result<(), Error> { - let sled_id = self.random_sled_id().await?.ok_or_else(|| { - Error::internal_error( - "no sled found when looking for random sled?!", - ) - })?; - - self.sled_client(&sled_id) - .await? - .issue_disk_snapshot_request(&disk_id, &body) - .await - .map_err(|e| Error::internal_error(&e.to_string()))?; - Ok(()) - } } diff --git a/nexus/src/app/instance.rs b/nexus/src/app/instance.rs index 6719f5338bb..518e8d4bb99 100644 --- a/nexus/src/app/instance.rs +++ b/nexus/src/app/instance.rs @@ -32,7 +32,6 @@ use omicron_common::api::external::LookupResult; use omicron_common::api::external::UpdateResult; use omicron_common::api::external::Vni; use omicron_common::api::internal::nexus; -use sled_agent_client::types::InstanceEnsureBody; use sled_agent_client::types::InstanceRuntimeStateMigrateParams; use sled_agent_client::types::InstanceRuntimeStateRequested; use sled_agent_client::types::InstanceStateRequested; @@ -713,26 +712,6 @@ impl super::Nexus { } } - /// Instructs a single sled to update its runtime. - /// - /// Does not modify the database. - pub(crate) async fn instance_sled_agent_set_runtime( - &self, - sled_id: Uuid, - body: &InstanceEnsureBody, - instance_id: Uuid, - ) -> Result { - let dst_sa = self.sled_client(&sled_id).await?; - - let new_runtime_state: nexus::InstanceRuntimeState = dst_sa - .instance_put(&instance_id, &body) - .await - .map_err(omicron_common::api::external::Error::from)? - .into_inner() - .into(); - Ok(new_runtime_state) - } - /// Lists disks attached to the instance. pub async fn instance_list_disks( &self, diff --git a/nexus/src/app/sagas/instance_migrate.rs b/nexus/src/app/sagas/instance_migrate.rs index ad91c855ff3..eac1c8e6e2d 100644 --- a/nexus/src/app/sagas/instance_migrate.rs +++ b/nexus/src/app/sagas/instance_migrate.rs @@ -251,21 +251,29 @@ async fn sim_instance_migrate( )) })?; - let body = InstanceEnsureBody { - initial: instance_hardware, - target, - migrate: Some(InstanceMigrateParams { - src_propolis_addr: src_propolis_addr.to_string(), - src_propolis_id, - }), - }; - - let new_runtime_state = osagactx - .nexus() - .instance_sled_agent_set_runtime(dst_sled_id, &body, instance_id) + let dst_sa = osagactx + .sled_client(&dst_sled_id) .await .map_err(ActionError::action_failed)?; + let new_runtime_state: InstanceRuntimeState = dst_sa + .instance_put( + &instance_id, + &InstanceEnsureBody { + initial: instance_hardware, + target, + migrate: Some(InstanceMigrateParams { + src_propolis_addr: src_propolis_addr.to_string(), + src_propolis_id, + }), + }, + ) + .await + .map_err(omicron_common::api::external::Error::from) + .map_err(ActionError::action_failed)? + .into_inner() + .into(); + osagactx .datastore() .instance_update_runtime(&instance_id, &new_runtime_state.into()) diff --git a/nexus/src/app/sagas/snapshot_create.rs b/nexus/src/app/sagas/snapshot_create.rs index 9b3c4979eb5..289aec63a86 100644 --- a/nexus/src/app/sagas/snapshot_create.rs +++ b/nexus/src/app/sagas/snapshot_create.rs @@ -87,7 +87,7 @@ use super::{ }; use crate::app::sagas::NexusAction; use crate::context::OpContext; -use crate::db::identity::Asset; +use crate::db::identity::{Asset, Resource}; use crate::db::lookup::LookupPath; use crate::external_api::params; use crate::{authn, authz, db}; @@ -537,12 +537,24 @@ async fn ssc_send_snapshot_request( .await .map_err(ActionError::action_failed)?; - let body = InstanceIssueDiskSnapshotRequestBody { snapshot_id }; - osagactx + let sled_agent_client = osagactx .nexus() - .disk_snapshot_instance_sled_agent(&instance, disk.id(), &body) + .instance_sled(&instance) .await .map_err(ActionError::action_failed)?; + + info!(log, "instance {} sled agent created ok", instance_id); + + // Send a snapshot request to propolis through sled agent + sled_agent_client + .instance_issue_disk_snapshot_request( + &instance.id(), + &disk.id(), + &InstanceIssueDiskSnapshotRequestBody { snapshot_id }, + ) + .await + .map_err(|e| e.to_string()) + .map_err(ActionError::action_failed)?; } None => { @@ -571,16 +583,26 @@ async fn ssc_send_snapshot_request( })?; // Send the snapshot request to a random sled agent - let body = DiskSnapshotRequestBody { - volume_construction_request: disk_volume_construction_request - .clone(), - snapshot_id, - }; + let sled_agent_client = osagactx + .random_sled_client() + .await + .map_err(ActionError::action_failed)? + .ok_or_else(|| { + "no sled found when looking for random sled?!".to_string() + }) + .map_err(ActionError::action_failed)?; - osagactx - .nexus() - .disk_snapshot_random_sled_agent(disk.id(), &body) + sled_agent_client + .issue_disk_snapshot_request( + &disk.id(), + &DiskSnapshotRequestBody { + volume_construction_request: + disk_volume_construction_request.clone(), + snapshot_id, + }, + ) .await + .map_err(|e| e.to_string()) .map_err(ActionError::action_failed)?; } } diff --git a/nexus/src/saga_interface.rs b/nexus/src/saga_interface.rs index f1be94cf8dd..b6a15f9a77e 100644 --- a/nexus/src/saga_interface.rs +++ b/nexus/src/saga_interface.rs @@ -6,9 +6,12 @@ use crate::Nexus; use crate::{authz, db}; +use omicron_common::api::external::Error; +use sled_agent_client::Client as SledAgentClient; use slog::Logger; use std::fmt; use std::sync::Arc; +use uuid::Uuid; // TODO-design Should this be the same thing as ServerContext? It's // very analogous, but maybe there's utility in having separate views for the @@ -49,4 +52,20 @@ impl SagaContext { pub fn datastore(&self) -> &db::DataStore { self.nexus.datastore() } + + pub async fn sled_client( + &self, + sled_id: &Uuid, + ) -> Result, Error> { + self.nexus.sled_client(sled_id).await + } + + pub async fn random_sled_client( + &self, + ) -> Result>, Error> { + Ok(match self.nexus.random_sled_id().await? { + Some(sled_id) => Some(self.nexus.sled_client(&sled_id).await?), + None => None, + }) + } } From 85862c555fcedeb3cb9c4c21f3a43effb0cea7b8 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Thu, 20 Oct 2022 17:48:25 -0400 Subject: [PATCH 05/14] [nexus] Add tests for instance_create idempotency --- Cargo.lock | 6 +- common/Cargo.toml | 2 +- nexus/Cargo.toml | 3 +- nexus/db-model/Cargo.toml | 2 +- nexus/src/app/sagas/disk_create.rs | 2 +- nexus/src/app/sagas/instance_create.rs | 231 +++++++++++++++++++++++++ sled-agent/src/sim/collection.rs | 4 + sled-agent/src/sim/sled_agent.rs | 8 + 8 files changed, 251 insertions(+), 7 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d8598602c54..9c8bc4cf5e6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3007,6 +3007,7 @@ dependencies = [ "oximeter-instruments", "oximeter-producer", "parse-display", + "petgraph", "pq-sys", "rand 0.8.5", "ref-cast", @@ -5384,9 +5385,8 @@ checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" [[package]] name = "steno" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f695d04f2d9e08f3ed0c72acfa21cddc20eb76698a2ff0961a6758d3566454c2" +version = "0.2.1-dev" +source = "git+https://github.com/oxidecomputer/steno?branch=node-access#19cdbd6c26b3f0ebcb1948cd95558d5857a66097" dependencies = [ "anyhow", "async-trait", diff --git a/common/Cargo.toml b/common/Cargo.toml index f09fe17940f..72bc455b0ba 100644 --- a/common/Cargo.toml +++ b/common/Cargo.toml @@ -25,7 +25,7 @@ serde_json = "1.0" serde_with = "2.0.1" slog = { version = "2.5", features = [ "max_level_trace", "release_max_level_debug" ] } smf = "0.2" -steno = "0.2" +steno = { git = "https://github.com/oxidecomputer/steno", branch = "node-access" } thiserror = "1.0" tokio = { version = "1.21", features = [ "full" ] } tokio-postgres = { version = "0.7", features = [ "with-chrono-0_4", "with-uuid-1" ] } diff --git a/nexus/Cargo.toml b/nexus/Cargo.toml index b1eb2ad2aca..73347a50975 100644 --- a/nexus/Cargo.toml +++ b/nexus/Cargo.toml @@ -52,7 +52,7 @@ serde_urlencoded = "0.7.1" serde_with = "2.0.1" sled-agent-client = { path = "../sled-agent-client" } slog-dtrace = "0.2" -steno = "0.2" +steno = { git = "https://github.com/oxidecomputer/steno", branch = "node-access" } tempfile = "3.3" thiserror = "1.0" toml = "0.5.9" @@ -123,6 +123,7 @@ nexus-test-utils = { path = "test-utils" } omicron-test-utils = { path = "../test-utils" } omicron-sled-agent = { path = "../sled-agent" } openapiv3 = "1.0" +petgraph = "0.6.2" regex = "1.6.0" subprocess = "0.2.9" term = "0.7" diff --git a/nexus/db-model/Cargo.toml b/nexus/db-model/Cargo.toml index 3ff0b4e02a4..894d99ae636 100644 --- a/nexus/db-model/Cargo.toml +++ b/nexus/db-model/Cargo.toml @@ -25,7 +25,7 @@ serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" uuid = { version = "1.2.1", features = ["serde", "v4"] } -steno = "0.2" +steno = { git = "https://github.com/oxidecomputer/steno", branch = "node-access" } db-macros = { path = "../db-macros" } omicron-common = { path = "../../common" } diff --git a/nexus/src/app/sagas/disk_create.rs b/nexus/src/app/sagas/disk_create.rs index 1ba978816cf..669983fb44a 100644 --- a/nexus/src/app/sagas/disk_create.rs +++ b/nexus/src/app/sagas/disk_create.rs @@ -620,7 +620,7 @@ mod test { use uuid::Uuid; const ORG_NAME: &str = "test-org"; - const PROJECT_NAME: &str = "springfield-squidport-disks"; + const PROJECT_NAME: &str = "springfield-squidport"; async fn create_org_and_project(client: &ClientTestContext) -> Uuid { create_ip_pool(&client, "p0", None, None).await; diff --git a/nexus/src/app/sagas/instance_create.rs b/nexus/src/app/sagas/instance_create.rs index 7f87c5e1e2c..4c7b9d75cc1 100644 --- a/nexus/src/app/sagas/instance_create.rs +++ b/nexus/src/app/sagas/instance_create.rs @@ -1014,3 +1014,234 @@ async fn sic_instance_ensure( Ok(()) } + +#[cfg(test)] +mod test { + use async_bb8_diesel::{AsyncRunQueryDsl, OptionalExtension}; + use diesel::{ExpressionMethods, QueryDsl, SelectableHelper}; + use dropshot::test_util::ClientTestContext; + use nexus_test_utils::nexus::{ + app::saga::create_saga_dag, app::sagas::instance_create::Params, + app::sagas::instance_create::SagaInstanceCreate, + authn::saga::Serialized, context::OpContext, db::datastore::DataStore, + external_api::params, + }; + use nexus_test_utils::resource_helpers::create_disk; + use nexus_test_utils::resource_helpers::create_ip_pool; + use nexus_test_utils::resource_helpers::create_organization; + use nexus_test_utils::resource_helpers::create_project; + use nexus_test_utils::resource_helpers::DiskTest; + use nexus_test_utils::ControlPlaneTestContext; + use nexus_test_utils_macros::nexus_test; + use omicron_common::api::external::{ + ByteCount, IdentityMetadataCreateParams, InstanceCpuCount, + }; + use omicron_sled_agent::sim::SledAgent; + use uuid::Uuid; + + const ORG_NAME: &str = "test-org"; + const PROJECT_NAME: &str = "springfield-squidport"; + const DISK_NAME: &str = "my-disk"; + + async fn create_org_project_and_disk(client: &ClientTestContext) -> Uuid { + create_ip_pool(&client, "p0", None, None).await; + create_organization(&client, ORG_NAME).await; + let project = create_project(client, ORG_NAME, PROJECT_NAME).await; + create_disk(&client, ORG_NAME, PROJECT_NAME, DISK_NAME).await; + project.identity.id + } + + // Helper for creating instance create parameters + fn new_test_params(opctx: &OpContext, project_id: Uuid) -> Params { + Params { + serialized_authn: Serialized::for_opctx(opctx), + organization_name: ORG_NAME.parse().unwrap(), + project_name: PROJECT_NAME.parse().unwrap(), + project_id, + create_params: params::InstanceCreate { + identity: IdentityMetadataCreateParams { + name: "my-instance".parse().unwrap(), + description: "My instance".to_string(), + }, + ncpus: InstanceCpuCount::try_from(2).unwrap(), + memory: ByteCount::from_gibibytes_u32(4), + hostname: String::from("inst"), + user_data: vec![], + network_interfaces: + params::InstanceNetworkInterfaceAttachment::Default, + external_ips: vec![params::ExternalIpCreate::Ephemeral { + pool_name: None, + }], + disks: vec![params::InstanceDiskAttachment::Attach( + params::InstanceDiskAttach { + name: DISK_NAME.parse().unwrap(), + }, + )], + start: true, + }, + } + } + + #[nexus_test] + async fn test_saga_basic_usage_succeeds( + cptestctx: &ControlPlaneTestContext, + ) { + DiskTest::new(cptestctx).await; + let client = &cptestctx.external_client; + let nexus = &cptestctx.server.apictx.nexus; + let project_id = create_org_project_and_disk(&client).await; + + // Build the saga DAG with the provided test parameters + let opctx = cptestctx.test_opctx(); + let params = new_test_params(&opctx, project_id); + let dag = create_saga_dag::(params).unwrap(); + let runnable_saga = nexus.create_runnable_saga(dag).await.unwrap(); + + // Actually run the saga + nexus.run_saga(runnable_saga).await.unwrap(); + } + + async fn no_instance_records_exist( + datastore: &DataStore, + opctx: &OpContext, + ) -> bool { + use crate::db::model::Instance; + use crate::db::schema::instance::dsl; + + dsl::instance + .filter(dsl::time_deleted.is_null()) + .select(Instance::as_select()) + .first_async::( + datastore.pool_authorized(opctx).await.unwrap(), + ) + .await + .optional() + .unwrap() + .is_none() + } + + async fn no_network_interface_records_exist( + datastore: &DataStore, + opctx: &OpContext, + ) -> bool { + use crate::db::model::NetworkInterface; + use crate::db::schema::network_interface::dsl; + + dsl::network_interface + .filter(dsl::time_deleted.is_null()) + .select(NetworkInterface::as_select()) + .first_async::( + datastore.pool_authorized(opctx).await.unwrap(), + ) + .await + .optional() + .unwrap() + .is_none() + } + + async fn no_external_ip_records_exist( + datastore: &DataStore, + opctx: &OpContext, + ) -> bool { + use crate::db::model::ExternalIp; + use crate::db::schema::external_ip::dsl; + + dsl::external_ip + .filter(dsl::time_deleted.is_null()) + .select(ExternalIp::as_select()) + .first_async::( + datastore.pool_authorized(opctx).await.unwrap(), + ) + .await + .optional() + .unwrap() + .is_none() + } + + async fn disk_is_detached( + datastore: &DataStore, + opctx: &OpContext, + ) -> bool { + use crate::db::model::Disk; + use crate::db::schema::disk::dsl; + + dsl::disk + .filter(dsl::time_deleted.is_null()) + .filter(dsl::name.eq(DISK_NAME)) + .select(Disk::as_select()) + .first_async::( + datastore.pool_authorized(opctx).await.unwrap(), + ) + .await + .unwrap() + .runtime_state + .disk_state + == "detached" + } + + async fn no_instances_or_disks_on_sled(sled_agent: &SledAgent) -> bool { + sled_agent.instance_count().await == 0 + && sled_agent.disk_count().await == 0 + } + + #[nexus_test] + async fn test_action_failure_can_unwind( + cptestctx: &ControlPlaneTestContext, + ) { + DiskTest::new(cptestctx).await; + let log = &cptestctx.logctx.log; + + let client = &cptestctx.external_client; + let nexus = &cptestctx.server.apictx.nexus; + let project_id = create_org_project_and_disk(&client).await; + + // Build the saga DAG with the provided test parameters + let opctx = cptestctx.test_opctx(); + + let params = new_test_params(&opctx, project_id); + let dag = create_saga_dag::(params).unwrap(); + + for i in 0..dag.get_node_count() { + // Create a new saga for this node. + info!( + log, + "Creating new saga which will fail at index {i}"; + "node_name" => dag.get_node_name(i).unwrap(), + "label" => dag.get_node_label(i).unwrap(), + ); + + let runnable_saga = + nexus.create_runnable_saga(dag.clone()).await.unwrap(); + + // Inject an error instead of running the node. + // + // This should cause the saga to unwind. + nexus + .sec() + .saga_inject_error( + runnable_saga.id(), + petgraph::graph::NodeIndex::new(i), + ) + .await + .unwrap(); + nexus + .run_saga(runnable_saga) + .await + .expect_err("Saga should have failed"); + + let datastore = nexus.datastore(); + + // Check that no partial artifacts of instance creation exist + assert!(no_instance_records_exist(datastore, &opctx).await); + assert!( + no_network_interface_records_exist(datastore, &opctx).await + ); + assert!(no_external_ip_records_exist(datastore, &opctx).await); + assert!(disk_is_detached(datastore, &opctx).await); + assert!( + no_instances_or_disks_on_sled(&cptestctx.sled_agent.sled_agent) + .await + ); + } + } +} diff --git a/sled-agent/src/sim/collection.rs b/sled-agent/src/sim/collection.rs index 9f8fba3ad6d..6a4ebe87a00 100644 --- a/sled-agent/src/sim/collection.rs +++ b/sled-agent/src/sim/collection.rs @@ -191,6 +191,10 @@ impl SimCollection { } } + pub async fn size(&self) -> usize { + self.objects.lock().await.len() + } + /// Body of the background task (one per `SimObject`) that simulates /// asynchronous transitions. Each time we read a message from the object's /// channel, we sleep for a bit and then invoke `poke()` to complete whatever diff --git a/sled-agent/src/sim/sled_agent.rs b/sled-agent/src/sim/sled_agent.rs index b05b2c39eb5..4d14cad7975 100644 --- a/sled-agent/src/sim/sled_agent.rs +++ b/sled-agent/src/sim/sled_agent.rs @@ -250,6 +250,14 @@ impl SledAgent { self.disks.sim_ensure(&disk_id, initial_state, target).await } + pub async fn instance_count(&self) -> usize { + self.instances.size().await + } + + pub async fn disk_count(&self) -> usize { + self.disks.size().await + } + pub async fn instance_poke(&self, id: Uuid) { self.instances.sim_poke(id).await; } From e44438d85bd30d82fff99bd9b30b156289afef64 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Tue, 25 Oct 2022 21:20:48 -0400 Subject: [PATCH 06/14] Make nexus-test-utils no longer depend on Nexus, for the most part. Not actually. There is definitely still a dependency, but the "test setup" part can be re-used with Nexus tests without causing any real circular dependency issues. See https://github.com/oxidecomputer/omicron/pull/1835#discussion_r1001936727 for an example of one such issue. Frankly the rest of nexus-test-utils probably *should* continue to be refactored such that there is no real dependency on Nexus; doing so will reduce the risk of "mis-matched versions of Nexus" classes of errors in the future. This required the following changes: - Move `nexus`'s config information into `common/src/nexus_config.rs`, so that it can be used by `nexus-test-utils`. - Make `nexus-test-utils` operate on a generic version of `NexusServer`, exposed by a new crate named `nexus-test-interface`. This crate can contain any information used to operate Nexus during shared test setup, without actually requiring a full dependency on `nexus`. - Note, adding this generic required making `ControlPlaneTestContext` generic, which ended up bubbling up to a lot of test setup functions, including the one used in the `#[nexus_test]` macro... - ... So, make it possible to customize which version of `NexusServer` is used by `#[nexus_test]`. This means callers can supply their *own* version of `NexusServer`, either from their own crate (see: Nexus unit tests) or the version from the `omicron_nexus` crate (see: Nexus integration tests). --- Cargo.lock | 15 +- common/Cargo.toml | 1 + common/src/nexus_config.rs | 580 +++++++++++++++++ nexus/Cargo.toml | 4 +- nexus/benches/setup_benchmark.rs | 4 +- nexus/db-model/src/ipv4net.rs | 4 +- nexus/db-model/src/ipv6net.rs | 4 +- nexus/defaults/src/lib.rs | 8 - nexus/src/app/saga.rs | 5 + nexus/src/app/sagas/disk_create.rs | 40 +- nexus/src/app/vpc_subnet.rs | 6 +- nexus/src/config.rs | 587 +----------------- nexus/src/db/datastore/mod.rs | 9 +- nexus/src/db/queries/network_interface.rs | 2 +- nexus/src/lib.rs | 30 +- nexus/test-interface/Cargo.toml | 11 + nexus/test-interface/src/lib.rs | 53 ++ nexus/test-utils-macros/Cargo.toml | 1 + nexus/test-utils-macros/src/lib.rs | 58 +- nexus/test-utils/Cargo.toml | 1 + nexus/test-utils/src/lib.rs | 68 +- nexus/test-utils/src/resource_helpers.rs | 2 +- nexus/tests/integration_tests/authz.rs | 4 +- nexus/tests/integration_tests/basic.rs | 4 +- nexus/tests/integration_tests/console_api.rs | 14 +- nexus/tests/integration_tests/datasets.rs | 5 +- nexus/tests/integration_tests/device_auth.rs | 4 +- nexus/tests/integration_tests/disks.rs | 4 +- nexus/tests/integration_tests/images.rs | 4 +- nexus/tests/integration_tests/instances.rs | 4 +- nexus/tests/integration_tests/ip_pools.rs | 4 +- .../tests/integration_tests/organizations.rs | 4 +- nexus/tests/integration_tests/oximeter.rs | 10 +- nexus/tests/integration_tests/projects.rs | 4 +- nexus/tests/integration_tests/rack.rs | 4 +- .../integration_tests/role_assignments.rs | 4 +- .../tests/integration_tests/roles_builtin.rs | 4 +- .../tests/integration_tests/router_routes.rs | 4 +- nexus/tests/integration_tests/saml.rs | 4 +- nexus/tests/integration_tests/silos.rs | 4 +- nexus/tests/integration_tests/snapshots.rs | 4 +- nexus/tests/integration_tests/ssh_keys.rs | 4 +- .../integration_tests/subnet_allocation.rs | 6 +- nexus/tests/integration_tests/timeseries.rs | 4 +- nexus/tests/integration_tests/unauthorized.rs | 4 +- nexus/tests/integration_tests/updates.rs | 11 +- .../tests/integration_tests/users_builtin.rs | 4 +- .../integration_tests/volume_management.rs | 4 +- nexus/tests/integration_tests/vpc_firewall.rs | 4 +- nexus/tests/integration_tests/vpc_routers.rs | 4 +- nexus/tests/integration_tests/vpc_subnets.rs | 4 +- nexus/tests/integration_tests/vpcs.rs | 4 +- nexus/tests/integration_tests/zpools.rs | 5 +- 53 files changed, 937 insertions(+), 707 deletions(-) create mode 100644 nexus/test-interface/Cargo.toml create mode 100644 nexus/test-interface/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index d8598602c54..dca7802bc90 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2696,6 +2696,16 @@ dependencies = [ "serde_json", ] +[[package]] +name = "nexus-test-interface" +version = "0.1.0" +dependencies = [ + "async-trait", + "dropshot", + "omicron-common", + "slog", +] + [[package]] name = "nexus-test-utils" version = "0.1.0" @@ -2707,6 +2717,7 @@ dependencies = [ "headers", "http", "hyper", + "nexus-test-interface", "omicron-common", "omicron-nexus", "omicron-sled-agent", @@ -2728,6 +2739,7 @@ dependencies = [ name = "nexus-test-utils-macros" version = "0.1.0" dependencies = [ + "proc-macro2", "quote", "syn", ] @@ -2880,6 +2892,7 @@ dependencies = [ "http", "hyper", "ipnetwork", + "libc", "macaddr", "parse-display", "progenitor", @@ -2981,12 +2994,12 @@ dependencies = [ "ipnetwork", "itertools", "lazy_static", - "libc", "macaddr", "mime_guess", "newtype_derive", "nexus-db-model", "nexus-defaults", + "nexus-test-interface", "nexus-test-utils", "nexus-test-utils-macros", "nexus-types", diff --git a/common/Cargo.toml b/common/Cargo.toml index f09fe17940f..2282212dfd7 100644 --- a/common/Cargo.toml +++ b/common/Cargo.toml @@ -36,5 +36,6 @@ progenitor = { git = "https://github.com/oxidecomputer/progenitor" } [dev-dependencies] expectorate = "1.0.5" +libc = "0.2.135" serde_urlencoded = "0.7.1" tokio = { version = "1.21", features = [ "test-util" ] } diff --git a/common/src/nexus_config.rs b/common/src/nexus_config.rs index 13a6a7b057b..90ed8e0d0fb 100644 --- a/common/src/nexus_config.rs +++ b/common/src/nexus_config.rs @@ -7,11 +7,16 @@ use super::address::{Ipv6Subnet, RACK_PREFIX}; use super::postgres_config::PostgresConfigWithUrl; +use anyhow::anyhow; use dropshot::ConfigDropshot; +use dropshot::ConfigLogging; use serde::{Deserialize, Serialize}; use serde_with::serde_as; +use serde_with::DeserializeFromStr; use serde_with::DisplayFromStr; +use serde_with::SerializeDisplay; use std::fmt; +use std::net::SocketAddr; use std::path::{Path, PathBuf}; use uuid::Uuid; @@ -131,3 +136,578 @@ impl DeploymentConfig { Ok(config_parsed) } } + +// By design, we require that all config properties be specified (i.e., we don't +// use `serde(default)`). + +#[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] +pub struct AuthnConfig { + /// allowed authentication schemes for external HTTP server + pub schemes_external: Vec, +} + +#[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] +pub struct ConsoleConfig { + pub static_dir: PathBuf, + /// how long the browser can cache static assets + pub cache_control_max_age_minutes: u32, + /// how long a session can be idle before expiring + pub session_idle_timeout_minutes: u32, + /// how long a session can exist before expiring + pub session_absolute_timeout_minutes: u32, +} + +#[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] +pub struct UpdatesConfig { + /// Trusted root.json role for the TUF updates repository. + pub trusted_root: PathBuf, + /// Default base URL for the TUF repository. + pub default_base_url: String, +} + +/// Optional configuration for the timeseries database. +#[derive(Clone, Debug, Default, Deserialize, PartialEq, Serialize)] +pub struct TimeseriesDbConfig { + #[serde(default, skip_serializing_if = "Option::is_none")] + pub address: Option, +} + +// A deserializable type that does no validation on the tunable parameters. +#[derive(Clone, Debug, Deserialize, PartialEq)] +struct UnvalidatedTunables { + max_vpc_ipv4_subnet_prefix: u8, +} + +/// Tunable configuration parameters, intended for use in test environments or +/// other situations in which experimentation / tuning is valuable. +#[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] +#[serde(try_from = "UnvalidatedTunables")] +pub struct Tunables { + /// The maximum prefix size supported for VPC Subnet IPv4 subnetworks. + /// + /// Note that this is the maximum _prefix_ size, which sets the minimum size + /// of the subnet. + pub max_vpc_ipv4_subnet_prefix: u8, +} + +// Convert from the unvalidated tunables, verifying each parameter as needed. +impl TryFrom for Tunables { + type Error = InvalidTunable; + + fn try_from(unvalidated: UnvalidatedTunables) -> Result { + Tunables::validate_ipv4_prefix(unvalidated.max_vpc_ipv4_subnet_prefix)?; + Ok(Tunables { + max_vpc_ipv4_subnet_prefix: unvalidated.max_vpc_ipv4_subnet_prefix, + }) + } +} + +/// Minimum prefix size supported in IPv4 VPC Subnets. +/// +/// NOTE: This is the minimum _prefix_, which sets the maximum subnet size. +pub const MIN_VPC_IPV4_SUBNET_PREFIX: u8 = 8; + +/// The number of reserved addresses at the beginning of a subnet range. +pub const NUM_INITIAL_RESERVED_IP_ADDRESSES: usize = 5; + +impl Tunables { + fn validate_ipv4_prefix(prefix: u8) -> Result<(), InvalidTunable> { + let absolute_max: u8 = 32_u8 + .checked_sub( + // Always need space for the reserved Oxide addresses, including the + // broadcast address at the end of the subnet. + ((NUM_INITIAL_RESERVED_IP_ADDRESSES + 1) as f32) + .log2() // Subnet size to bit prefix. + .ceil() // Round up to a whole number of bits. + as u8, + ) + .expect("Invalid absolute maximum IPv4 subnet prefix"); + if prefix >= MIN_VPC_IPV4_SUBNET_PREFIX && prefix <= absolute_max { + Ok(()) + } else { + Err(InvalidTunable { + tunable: String::from("max_vpc_ipv4_subnet_prefix"), + message: format!( + "IPv4 subnet prefix must be in the range [0, {}], found: {}", + absolute_max, + prefix, + ), + }) + } + } +} + +/// The maximum prefix size by default. +/// +/// There are 6 Oxide reserved IP addresses, 5 at the beginning for DNS and the +/// like, and the broadcast address at the end of the subnet. This size provides +/// room for 2 ** 6 - 6 = 58 IP addresses, which seems like a reasonable size +/// for the smallest subnet that's still useful in many contexts. +pub const MAX_VPC_IPV4_SUBNET_PREFIX: u8 = 26; + +impl Default for Tunables { + fn default() -> Self { + Tunables { max_vpc_ipv4_subnet_prefix: MAX_VPC_IPV4_SUBNET_PREFIX } + } +} + +/// Configuration for a nexus server +#[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] +pub struct PackageConfig { + /// Console-related tunables + pub console: ConsoleConfig, + /// Server-wide logging configuration. + pub log: ConfigLogging, + /// Authentication-related configuration + pub authn: AuthnConfig, + /// Timeseries database configuration. + #[serde(default)] + pub timeseries_db: TimeseriesDbConfig, + /// Updates-related configuration. Updates APIs return 400 Bad Request when this is + /// unconfigured. + #[serde(default)] + pub updates: Option, + /// Tunable configuration for testing and experimentation + #[serde(default)] + pub tunables: Tunables, +} + +#[derive(Clone, Debug, PartialEq, Deserialize, Serialize)] +pub struct Config { + /// Configuration parameters known at compile-time. + #[serde(flatten)] + pub pkg: PackageConfig, + + /// A variety of configuration parameters only known at deployment time. + pub deployment: DeploymentConfig, +} + +impl Config { + /// Load a `Config` from the given TOML file + /// + /// This config object can then be used to create a new `Nexus`. + /// The format is described in the README. + pub fn from_file>(path: P) -> Result { + let path = path.as_ref(); + let file_contents = std::fs::read_to_string(path) + .map_err(|e| (path.to_path_buf(), e))?; + let config_parsed: Self = toml::from_str(&file_contents) + .map_err(|e| (path.to_path_buf(), e))?; + Ok(config_parsed) + } +} + +/// List of supported external authn schemes +/// +/// Note that the authn subsystem doesn't know about this type. It allows +/// schemes to be called whatever they want. This is just to provide a set of +/// allowed values for configuration. +#[derive( + Clone, Copy, Debug, DeserializeFromStr, Eq, PartialEq, SerializeDisplay, +)] +pub enum SchemeName { + Spoof, + SessionCookie, + AccessToken, +} + +impl std::str::FromStr for SchemeName { + type Err = anyhow::Error; + + fn from_str(s: &str) -> Result { + match s { + "spoof" => Ok(SchemeName::Spoof), + "session_cookie" => Ok(SchemeName::SessionCookie), + "access_token" => Ok(SchemeName::AccessToken), + _ => Err(anyhow!("unsupported authn scheme: {:?}", s)), + } + } +} + +impl std::fmt::Display for SchemeName { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str(match self { + SchemeName::Spoof => "spoof", + SchemeName::SessionCookie => "session_cookie", + SchemeName::AccessToken => "access_token", + }) + } +} + +#[cfg(test)] +mod test { + use super::Tunables; + use super::{ + AuthnConfig, Config, ConsoleConfig, LoadError, PackageConfig, + SchemeName, TimeseriesDbConfig, UpdatesConfig, + }; + use crate::address::{Ipv6Subnet, RACK_PREFIX}; + use crate::nexus_config::{Database, DeploymentConfig, LoadErrorKind}; + use dropshot::ConfigDropshot; + use dropshot::ConfigLogging; + use dropshot::ConfigLoggingIfExists; + use dropshot::ConfigLoggingLevel; + use libc; + use std::fs; + use std::net::{Ipv6Addr, SocketAddr}; + use std::path::Path; + use std::path::PathBuf; + + /// Generates a temporary filesystem path unique for the given label. + fn temp_path(label: &str) -> PathBuf { + let arg0str = std::env::args().next().expect("expected process arg0"); + let arg0 = Path::new(&arg0str) + .file_name() + .expect("expected arg0 filename") + .to_str() + .expect("expected arg0 filename to be valid Unicode"); + let pid = std::process::id(); + let mut pathbuf = std::env::temp_dir(); + pathbuf.push(format!("{}.{}.{}", arg0, pid, label)); + pathbuf + } + + /// Load a Config with the given string `contents`. To exercise + /// the full path, this function writes the contents to a file first, then + /// loads the config from that file, then removes the file. `label` is used + /// as a unique string for the filename and error messages. It should be + /// unique for each test. + fn read_config(label: &str, contents: &str) -> Result { + let pathbuf = temp_path(label); + let path = pathbuf.as_path(); + eprintln!("writing test config {}", path.display()); + fs::write(path, contents).expect("write to tempfile failed"); + + let result = Config::from_file(path); + fs::remove_file(path).expect("failed to remove temporary file"); + eprintln!("{:?}", result); + result + } + + // Totally bogus config files (nonexistent, bad TOML syntax) + + #[test] + fn test_config_nonexistent() { + let error = Config::from_file(Path::new("/nonexistent")) + .expect_err("expected config to fail from /nonexistent"); + let expected = std::io::Error::from_raw_os_error(libc::ENOENT); + assert_eq!(error, expected); + } + + #[test] + fn test_config_bad_toml() { + let error = + read_config("bad_toml", "foo =").expect_err("expected failure"); + if let LoadErrorKind::Parse(error) = &error.kind { + assert_eq!(error.line_col(), Some((0, 5))); + assert_eq!( + error.to_string(), + "unexpected eof encountered at line 1 column 6" + ); + } else { + panic!( + "Got an unexpected error, expected Parse but got {:?}", + error + ); + } + } + + // Empty config (special case of a missing required field, but worth calling + // out explicitly) + + #[test] + fn test_config_empty() { + let error = read_config("empty", "").expect_err("expected failure"); + if let LoadErrorKind::Parse(error) = &error.kind { + assert_eq!(error.line_col(), None); + assert_eq!(error.to_string(), "missing field `deployment`"); + } else { + panic!( + "Got an unexpected error, expected Parse but got {:?}", + error + ); + } + } + + // Success case. We don't need to retest semantics for either ConfigLogging + // or ConfigDropshot because those are both tested within Dropshot. If we + // add new configuration sections of our own, we will want to test those + // here (both syntax and semantics). + #[test] + fn test_valid() { + let config = read_config( + "valid", + r##" + [console] + static_dir = "tests/static" + cache_control_max_age_minutes = 10 + session_idle_timeout_minutes = 60 + session_absolute_timeout_minutes = 480 + [authn] + schemes_external = [] + [log] + mode = "file" + level = "debug" + path = "/nonexistent/path" + if_exists = "fail" + [timeseries_db] + address = "[::1]:8123" + [updates] + trusted_root = "/path/to/root.json" + default_base_url = "http://example.invalid/" + [tunables] + max_vpc_ipv4_subnet_prefix = 27 + [deployment] + id = "28b90dc4-c22a-65ba-f49a-f051fe01208f" + rack_id = "38b90dc4-c22a-65ba-f49a-f051fe01208f" + [[deployment.dropshot_external]] + bind_address = "10.1.2.3:4567" + request_body_max_bytes = 1024 + [deployment.dropshot_internal] + bind_address = "10.1.2.3:4568" + request_body_max_bytes = 1024 + [deployment.subnet] + net = "::/56" + [deployment.database] + type = "from_dns" + "##, + ) + .unwrap(); + + assert_eq!( + config, + Config { + deployment: DeploymentConfig { + id: "28b90dc4-c22a-65ba-f49a-f051fe01208f".parse().unwrap(), + rack_id: "38b90dc4-c22a-65ba-f49a-f051fe01208f" + .parse() + .unwrap(), + dropshot_external: vec![ConfigDropshot { + bind_address: "10.1.2.3:4567" + .parse::() + .unwrap(), + ..Default::default() + },], + dropshot_internal: ConfigDropshot { + bind_address: "10.1.2.3:4568" + .parse::() + .unwrap(), + ..Default::default() + }, + subnet: Ipv6Subnet::::new(Ipv6Addr::LOCALHOST), + database: Database::FromDns, + }, + pkg: PackageConfig { + console: ConsoleConfig { + static_dir: "tests/static".parse().unwrap(), + cache_control_max_age_minutes: 10, + session_idle_timeout_minutes: 60, + session_absolute_timeout_minutes: 480 + }, + authn: AuthnConfig { schemes_external: Vec::new() }, + log: ConfigLogging::File { + level: ConfigLoggingLevel::Debug, + if_exists: ConfigLoggingIfExists::Fail, + path: "/nonexistent/path".to_string() + }, + timeseries_db: TimeseriesDbConfig { + address: Some("[::1]:8123".parse().unwrap()) + }, + updates: Some(UpdatesConfig { + trusted_root: PathBuf::from("/path/to/root.json"), + default_base_url: "http://example.invalid/".into(), + }), + tunables: Tunables { max_vpc_ipv4_subnet_prefix: 27 }, + }, + } + ); + + let config = read_config( + "valid", + r##" + [console] + static_dir = "tests/static" + cache_control_max_age_minutes = 10 + session_idle_timeout_minutes = 60 + session_absolute_timeout_minutes = 480 + [authn] + schemes_external = [ "spoof", "session_cookie" ] + [log] + mode = "file" + level = "debug" + path = "/nonexistent/path" + if_exists = "fail" + [timeseries_db] + address = "[::1]:8123" + [deployment] + id = "28b90dc4-c22a-65ba-f49a-f051fe01208f" + rack_id = "38b90dc4-c22a-65ba-f49a-f051fe01208f" + [[deployment.dropshot_external]] + bind_address = "10.1.2.3:4567" + request_body_max_bytes = 1024 + [deployment.dropshot_internal] + bind_address = "10.1.2.3:4568" + request_body_max_bytes = 1024 + [deployment.subnet] + net = "::/56" + [deployment.database] + type = "from_dns" + "##, + ) + .unwrap(); + + assert_eq!( + config.pkg.authn.schemes_external, + vec![SchemeName::Spoof, SchemeName::SessionCookie], + ); + } + + #[test] + fn test_bad_authn_schemes() { + let error = read_config( + "bad authn.schemes_external", + r##" + [console] + static_dir = "tests/static" + cache_control_max_age_minutes = 10 + session_idle_timeout_minutes = 60 + session_absolute_timeout_minutes = 480 + [authn] + schemes_external = ["trust-me"] + [log] + mode = "file" + level = "debug" + path = "/nonexistent/path" + if_exists = "fail" + [timeseries_db] + address = "[::1]:8123" + [deployment] + id = "28b90dc4-c22a-65ba-f49a-f051fe01208f" + rack_id = "38b90dc4-c22a-65ba-f49a-f051fe01208f" + [[deployment.dropshot_external]] + bind_address = "10.1.2.3:4567" + request_body_max_bytes = 1024 + [deployment.dropshot_internal] + bind_address = "10.1.2.3:4568" + request_body_max_bytes = 1024 + [deployment.subnet] + net = "::/56" + [deployment.database] + type = "from_dns" + "##, + ) + .expect_err("expected failure"); + if let LoadErrorKind::Parse(error) = &error.kind { + assert!( + error + .to_string() + .starts_with("unsupported authn scheme: \"trust-me\""), + "error = {}", + error.to_string() + ); + } else { + panic!( + "Got an unexpected error, expected Parse but got {:?}", + error + ); + } + } + + #[test] + fn test_invalid_ipv4_prefix_tunable() { + let error = read_config( + "invalid_ipv4_prefix_tunable", + r##" + [console] + static_dir = "tests/static" + cache_control_max_age_minutes = 10 + session_idle_timeout_minutes = 60 + session_absolute_timeout_minutes = 480 + [authn] + schemes_external = [] + [log] + mode = "file" + level = "debug" + path = "/nonexistent/path" + if_exists = "fail" + [timeseries_db] + address = "[::1]:8123" + [updates] + trusted_root = "/path/to/root.json" + default_base_url = "http://example.invalid/" + [tunables] + max_vpc_ipv4_subnet_prefix = 100 + [deployment] + id = "28b90dc4-c22a-65ba-f49a-f051fe01208f" + rack_id = "38b90dc4-c22a-65ba-f49a-f051fe01208f" + [[deployment.dropshot_external]] + bind_address = "10.1.2.3:4567" + request_body_max_bytes = 1024 + [deployment.dropshot_internal] + bind_address = "10.1.2.3:4568" + request_body_max_bytes = 1024 + [deployment.subnet] + net = "::/56" + [deployment.database] + type = "from_dns" + "##, + ) + .expect_err("Expected failure"); + if let LoadErrorKind::Parse(error) = &error.kind { + assert!(error.to_string().starts_with( + r#"invalid "max_vpc_ipv4_subnet_prefix": "IPv4 subnet prefix must"#, + )); + } else { + panic!( + "Got an unexpected error, expected Parse but got {:?}", + error + ); + } + } + + #[test] + fn test_repo_configs_are_valid() { + // The example config file should be valid. + let config_path = "../nexus/examples/config.toml"; + println!("checking {:?}", config_path); + let example_config = Config::from_file(config_path) + .expect("example config file is not valid"); + + // The config file used for the tests should also be valid. The tests + // won't clear the runway anyway if this file isn't valid. But it's + // helpful to verify this here explicitly as well. + let config_path = "../nexus/examples/config.toml"; + println!("checking {:?}", config_path); + let _ = Config::from_file(config_path) + .expect("test config file is not valid"); + + // The partial config file that's used to deploy Nexus must also be + // valid. However, it's missing the "deployment" section because that's + // generated at deployment time. We'll serialize this section from the + // example config file (loaded above), append it to the contents of this + // file, and verify the whole thing. + #[derive(serde::Serialize)] + struct DummyConfig { + deployment: DeploymentConfig, + } + let config_path = "../smf/nexus/config-partial.toml"; + println!( + "checking {:?} with example deployment section added", + config_path + ); + let mut contents = std::fs::read_to_string(config_path) + .expect("failed to read Nexus SMF config file"); + contents.push_str( + "\n\n\n \ + # !! content below added by test_repo_configs_are_valid()\n\ + \n\n\n", + ); + let example_deployment = toml::to_string_pretty(&DummyConfig { + deployment: example_config.deployment, + }) + .unwrap(); + contents.push_str(&example_deployment); + let _: Config = toml::from_str(&contents) + .expect("Nexus SMF config file is not valid"); + } +} diff --git a/nexus/Cargo.toml b/nexus/Cargo.toml index b1eb2ad2aca..2de7e7ba4e8 100644 --- a/nexus/Cargo.toml +++ b/nexus/Cargo.toml @@ -27,10 +27,12 @@ hyper = "0.14" internal-dns-client = { path = "../internal-dns-client" } ipnetwork = "0.20" lazy_static = "1.4.0" -libc = "0.2.135" macaddr = { version = "1.0.1", features = [ "serde_std" ]} mime_guess = "2.0.4" newtype_derive = "0.1.6" +# Not under "dev-dependencies"; these also need to be implemented for +# integration tests. +nexus-test-interface = { path = "test-interface" } num-integer = "0.1.45" # must match samael's crate! openssl = "0.10" diff --git a/nexus/benches/setup_benchmark.rs b/nexus/benches/setup_benchmark.rs index 24584670ce5..d9e9577a1f0 100644 --- a/nexus/benches/setup_benchmark.rs +++ b/nexus/benches/setup_benchmark.rs @@ -12,7 +12,9 @@ use omicron_test_utils::dev; // This is the default wrapper around most Nexus integration tests. // Benchmark how long an "empty test" would take. async fn do_full_setup() { - let ctx = nexus_test_utils::test_setup("full_setup").await; + let ctx = + nexus_test_utils::test_setup::("full_setup") + .await; ctx.teardown().await; } diff --git a/nexus/db-model/src/ipv4net.rs b/nexus/db-model/src/ipv4net.rs index 664dda5c80c..c5b45e4064f 100644 --- a/nexus/db-model/src/ipv4net.rs +++ b/nexus/db-model/src/ipv4net.rs @@ -8,8 +8,8 @@ use diesel::pg::Pg; use diesel::serialize::{self, ToSql}; use diesel::sql_types; use ipnetwork::IpNetwork; -use nexus_defaults as defaults; use omicron_common::api::external; +use omicron_common::nexus_config::NUM_INITIAL_RESERVED_IP_ADDRESSES; use std::net::Ipv4Addr; #[derive(Clone, Copy, Debug, PartialEq, AsExpression, FromSqlRow)] @@ -26,7 +26,7 @@ impl Ipv4Net { && ( // First N addresses are reserved self.iter() - .take(defaults::NUM_INITIAL_RESERVED_IP_ADDRESSES) + .take(NUM_INITIAL_RESERVED_IP_ADDRESSES) .all(|this| this != addr) ) && ( diff --git a/nexus/db-model/src/ipv6net.rs b/nexus/db-model/src/ipv6net.rs index a91c3efcd4d..2bbcb08a4bb 100644 --- a/nexus/db-model/src/ipv6net.rs +++ b/nexus/db-model/src/ipv6net.rs @@ -8,8 +8,8 @@ use diesel::pg::Pg; use diesel::serialize::{self, ToSql}; use diesel::sql_types; use ipnetwork::IpNetwork; -use nexus_defaults as defaults; use omicron_common::api::external; +use omicron_common::nexus_config::NUM_INITIAL_RESERVED_IP_ADDRESSES; use rand::{rngs::StdRng, SeedableRng}; use std::net::Ipv6Addr; @@ -77,7 +77,7 @@ impl Ipv6Net { self.contains(addr) && self .iter() - .take(defaults::NUM_INITIAL_RESERVED_IP_ADDRESSES) + .take(NUM_INITIAL_RESERVED_IP_ADDRESSES) .all(|this| this != addr) } } diff --git a/nexus/defaults/src/lib.rs b/nexus/defaults/src/lib.rs index c7af06cca01..3474804cfa0 100644 --- a/nexus/defaults/src/lib.rs +++ b/nexus/defaults/src/lib.rs @@ -13,14 +13,6 @@ use omicron_common::api::external::Ipv6Net; use std::net::Ipv4Addr; use std::net::Ipv6Addr; -/// Minimum prefix size supported in IPv4 VPC Subnets. -/// -/// NOTE: This is the minimum _prefix_, which sets the maximum subnet size. -pub const MIN_VPC_IPV4_SUBNET_PREFIX: u8 = 8; - -/// The number of reserved addresses at the beginning of a subnet range. -pub const NUM_INITIAL_RESERVED_IP_ADDRESSES: usize = 5; - /// The name provided for a default primary network interface for a guest /// instance. pub const DEFAULT_PRIMARY_NIC_NAME: &str = "net0"; diff --git a/nexus/src/app/saga.rs b/nexus/src/app/saga.rs index 7b2bba7c981..256c3a73290 100644 --- a/nexus/src/app/saga.rs +++ b/nexus/src/app/saga.rs @@ -29,6 +29,11 @@ use steno::SagaResult; use steno::SagaResultOk; use uuid::Uuid; +/// Encapsulates a saga to be run before we actually start running it +/// +/// At this point, we've built the DAG, loaded it into the SEC, etc. but haven't +/// started it running. This is a useful point to inject errors, inspect the +/// DAG, etc. pub struct RunnableSaga { id: SagaId, fut: BoxFuture<'static, SagaResult>, diff --git a/nexus/src/app/sagas/disk_create.rs b/nexus/src/app/sagas/disk_create.rs index 1ba978816cf..ded22f407ac 100644 --- a/nexus/src/app/sagas/disk_create.rs +++ b/nexus/src/app/sagas/disk_create.rs @@ -600,27 +600,29 @@ fn randomize_volume_construction_request_ids( #[cfg(test)] mod test { - use async_bb8_diesel::{AsyncRunQueryDsl, OptionalExtension}; - use diesel::{ExpressionMethods, QueryDsl, SelectableHelper}; - use dropshot::test_util::ClientTestContext; - use nexus_test_utils::nexus::{ + use crate::{ app::saga::create_saga_dag, app::sagas::disk_create::Params, app::sagas::disk_create::SagaDiskCreate, authn::saga::Serialized, context::OpContext, db::datastore::DataStore, external_api::params, }; + use async_bb8_diesel::{AsyncRunQueryDsl, OptionalExtension}; + use diesel::{ExpressionMethods, QueryDsl, SelectableHelper}; + use dropshot::test_util::ClientTestContext; use nexus_test_utils::resource_helpers::create_ip_pool; use nexus_test_utils::resource_helpers::create_organization; use nexus_test_utils::resource_helpers::create_project; use nexus_test_utils::resource_helpers::DiskTest; - use nexus_test_utils::ControlPlaneTestContext; use nexus_test_utils_macros::nexus_test; use omicron_common::api::external::ByteCount; use omicron_common::api::external::IdentityMetadataCreateParams; use omicron_sled_agent::sim::SledAgent; use uuid::Uuid; + type ControlPlaneTestContext = + nexus_test_utils::ControlPlaneTestContext; + const ORG_NAME: &str = "test-org"; - const PROJECT_NAME: &str = "springfield-squidport-disks"; + const PROJECT_NAME: &str = "springfield-squidport"; async fn create_org_and_project(client: &ClientTestContext) -> Uuid { create_ip_pool(&client, "p0", None, None).await; @@ -647,7 +649,14 @@ mod test { } } - #[nexus_test] + pub fn test_opctx(cptestctx: &ControlPlaneTestContext) -> OpContext { + OpContext::for_tests( + cptestctx.logctx.log.new(o!()), + cptestctx.server.apictx.nexus.datastore().clone(), + ) + } + + #[nexus_test(server = crate::Server)] async fn test_saga_basic_usage_succeeds( cptestctx: &ControlPlaneTestContext, ) { @@ -658,7 +667,7 @@ mod test { let project_id = create_org_and_project(&client).await; // Build the saga DAG with the provided test parameters - let opctx = cptestctx.test_opctx(); + let opctx = test_opctx(cptestctx); let params = new_test_params(&opctx, project_id); let dag = create_saga_dag::(params).unwrap(); let runnable_saga = nexus.create_runnable_saga(dag).await.unwrap(); @@ -672,19 +681,14 @@ mod test { assert_eq!(disk.project_id, project_id); } - async fn no_disk_records_exist( - datastore: &DataStore, - opctx: &OpContext, - ) -> bool { + async fn no_disk_records_exist(datastore: &DataStore) -> bool { use crate::db::model::Disk; use crate::db::schema::disk::dsl; dsl::disk .filter(dsl::time_deleted.is_null()) .select(Disk::as_select()) - .first_async::( - datastore.pool_authorized(opctx).await.unwrap(), - ) + .first_async::(datastore.pool_for_tests().await.unwrap()) .await .optional() .unwrap() @@ -726,7 +730,7 @@ mod test { true } - #[nexus_test] + #[nexus_test(server = crate::Server)] async fn test_action_failure_can_unwind( cptestctx: &ControlPlaneTestContext, ) { @@ -738,7 +742,7 @@ mod test { let project_id = create_org_and_project(&client).await; // Build the saga DAG with the provided test parameters - let opctx = cptestctx.test_opctx(); + let opctx = test_opctx(cptestctx); let nodes_to_fail = [ "disk_id", @@ -774,7 +778,7 @@ mod test { let datastore = nexus.datastore(); // Check that no partial artifacts of disk creation exist: - assert!(no_disk_records_exist(datastore, &opctx).await); + assert!(no_disk_records_exist(datastore).await); assert!(no_region_allocations_exist(datastore, &test).await); assert!( no_regions_ensured(&cptestctx.sled_agent.sled_agent, &test) diff --git a/nexus/src/app/vpc_subnet.rs b/nexus/src/app/vpc_subnet.rs index a162377b260..144e8abf793 100644 --- a/nexus/src/app/vpc_subnet.rs +++ b/nexus/src/app/vpc_subnet.rs @@ -13,7 +13,6 @@ use crate::db::model::Name; use crate::db::model::VpcSubnet; use crate::db::queries::vpc_subnet::SubnetError; use crate::external_api::params; -use nexus_defaults as defaults; use omicron_common::api::external; use omicron_common::api::external::CreateResult; use omicron_common::api::external::DataPageParams; @@ -21,6 +20,7 @@ use omicron_common::api::external::DeleteResult; use omicron_common::api::external::ListResultVec; use omicron_common::api::external::LookupResult; use omicron_common::api::external::UpdateResult; +use omicron_common::nexus_config::MIN_VPC_IPV4_SUBNET_PREFIX; use uuid::Uuid; impl super::Nexus { @@ -48,7 +48,7 @@ impl super::Nexus { "VPC Subnet IPv4 address ranges must be from a private range", )); } - if params.ipv4_block.prefix() < defaults::MIN_VPC_IPV4_SUBNET_PREFIX + if params.ipv4_block.prefix() < MIN_VPC_IPV4_SUBNET_PREFIX || params.ipv4_block.prefix() > self.tunables.max_vpc_ipv4_subnet_prefix { @@ -57,7 +57,7 @@ impl super::Nexus { "VPC Subnet IPv4 address ranges must have prefix ", "length between {} and {}, inclusive" ), - defaults::MIN_VPC_IPV4_SUBNET_PREFIX, + MIN_VPC_IPV4_SUBNET_PREFIX, self.tunables.max_vpc_ipv4_subnet_prefix, ))); } diff --git a/nexus/src/config.rs b/nexus/src/config.rs index d622368ef1b..dc265d819c3 100644 --- a/nexus/src/config.rs +++ b/nexus/src/config.rs @@ -5,585 +5,10 @@ //! Interfaces for parsing configuration files and working with a nexus server //! configuration -use anyhow::anyhow; -use dropshot::ConfigLogging; -use omicron_common::nexus_config::{ - DeploymentConfig, InvalidTunable, LoadError, -}; -use serde::Deserialize; -use serde::Serialize; -use serde_with::DeserializeFromStr; -use serde_with::SerializeDisplay; -use std::net::SocketAddr; -use std::path::{Path, PathBuf}; +// TODO: Use them directly? No need for this file -// By design, we require that all config properties be specified (i.e., we don't -// use `serde(default)`). - -#[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] -pub struct AuthnConfig { - /// allowed authentication schemes for external HTTP server - pub schemes_external: Vec, -} - -#[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] -pub struct ConsoleConfig { - pub static_dir: PathBuf, - /// how long the browser can cache static assets - pub cache_control_max_age_minutes: u32, - /// how long a session can be idle before expiring - pub session_idle_timeout_minutes: u32, - /// how long a session can exist before expiring - pub session_absolute_timeout_minutes: u32, -} - -#[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] -pub struct UpdatesConfig { - /// Trusted root.json role for the TUF updates repository. - pub trusted_root: PathBuf, - /// Default base URL for the TUF repository. - pub default_base_url: String, -} - -/// Optional configuration for the timeseries database. -#[derive(Clone, Debug, Default, Deserialize, PartialEq, Serialize)] -pub struct TimeseriesDbConfig { - #[serde(default, skip_serializing_if = "Option::is_none")] - pub address: Option, -} - -// A deserializable type that does no validation on the tunable parameters. -#[derive(Clone, Debug, Deserialize, PartialEq)] -struct UnvalidatedTunables { - max_vpc_ipv4_subnet_prefix: u8, -} - -/// Tunable configuration parameters, intended for use in test environments or -/// other situations in which experimentation / tuning is valuable. -#[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] -#[serde(try_from = "UnvalidatedTunables")] -pub struct Tunables { - /// The maximum prefix size supported for VPC Subnet IPv4 subnetworks. - /// - /// Note that this is the maximum _prefix_ size, which sets the minimum size - /// of the subnet. - pub max_vpc_ipv4_subnet_prefix: u8, -} - -// Convert from the unvalidated tunables, verifying each parameter as needed. -impl TryFrom for Tunables { - type Error = InvalidTunable; - - fn try_from(unvalidated: UnvalidatedTunables) -> Result { - Tunables::validate_ipv4_prefix(unvalidated.max_vpc_ipv4_subnet_prefix)?; - Ok(Tunables { - max_vpc_ipv4_subnet_prefix: unvalidated.max_vpc_ipv4_subnet_prefix, - }) - } -} - -impl Tunables { - fn validate_ipv4_prefix(prefix: u8) -> Result<(), InvalidTunable> { - let absolute_max: u8 = 32_u8 - .checked_sub( - // Always need space for the reserved Oxide addresses, including the - // broadcast address at the end of the subnet. - ((nexus_defaults::NUM_INITIAL_RESERVED_IP_ADDRESSES + 1) as f32) - .log2() // Subnet size to bit prefix. - .ceil() // Round up to a whole number of bits. - as u8, - ) - .expect("Invalid absolute maximum IPv4 subnet prefix"); - if prefix >= nexus_defaults::MIN_VPC_IPV4_SUBNET_PREFIX - && prefix <= absolute_max - { - Ok(()) - } else { - Err(InvalidTunable { - tunable: String::from("max_vpc_ipv4_subnet_prefix"), - message: format!( - "IPv4 subnet prefix must be in the range [0, {}], found: {}", - absolute_max, - prefix, - ), - }) - } - } -} - -/// The maximum prefix size by default. -/// -/// There are 6 Oxide reserved IP addresses, 5 at the beginning for DNS and the -/// like, and the broadcast address at the end of the subnet. This size provides -/// room for 2 ** 6 - 6 = 58 IP addresses, which seems like a reasonable size -/// for the smallest subnet that's still useful in many contexts. -pub const MAX_VPC_IPV4_SUBNET_PREFIX: u8 = 26; - -impl Default for Tunables { - fn default() -> Self { - Tunables { max_vpc_ipv4_subnet_prefix: MAX_VPC_IPV4_SUBNET_PREFIX } - } -} - -/// Configuration for a nexus server -#[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] -pub struct PackageConfig { - /// Console-related tunables - pub console: ConsoleConfig, - /// Server-wide logging configuration. - pub log: ConfigLogging, - /// Authentication-related configuration - pub authn: AuthnConfig, - /// Timeseries database configuration. - #[serde(default)] - pub timeseries_db: TimeseriesDbConfig, - /// Updates-related configuration. Updates APIs return 400 Bad Request when this is - /// unconfigured. - #[serde(default)] - pub updates: Option, - /// Tunable configuration for testing and experimentation - #[serde(default)] - pub tunables: Tunables, -} - -#[derive(Clone, Debug, PartialEq, Deserialize, Serialize)] -pub struct Config { - /// Configuration parameters known at compile-time. - #[serde(flatten)] - pub pkg: PackageConfig, - - /// A variety of configuration parameters only known at deployment time. - pub deployment: DeploymentConfig, -} - -impl Config { - /// Load a `Config` from the given TOML file - /// - /// This config object can then be used to create a new `Nexus`. - /// The format is described in the README. - pub fn from_file>(path: P) -> Result { - let path = path.as_ref(); - let file_contents = std::fs::read_to_string(path) - .map_err(|e| (path.to_path_buf(), e))?; - let config_parsed: Self = toml::from_str(&file_contents) - .map_err(|e| (path.to_path_buf(), e))?; - Ok(config_parsed) - } -} - -/// List of supported external authn schemes -/// -/// Note that the authn subsystem doesn't know about this type. It allows -/// schemes to be called whatever they want. This is just to provide a set of -/// allowed values for configuration. -#[derive( - Clone, Copy, Debug, DeserializeFromStr, Eq, PartialEq, SerializeDisplay, -)] -pub enum SchemeName { - Spoof, - SessionCookie, - AccessToken, -} - -impl std::str::FromStr for SchemeName { - type Err = anyhow::Error; - - fn from_str(s: &str) -> Result { - match s { - "spoof" => Ok(SchemeName::Spoof), - "session_cookie" => Ok(SchemeName::SessionCookie), - "access_token" => Ok(SchemeName::AccessToken), - _ => Err(anyhow!("unsupported authn scheme: {:?}", s)), - } - } -} - -impl std::fmt::Display for SchemeName { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.write_str(match self { - SchemeName::Spoof => "spoof", - SchemeName::SessionCookie => "session_cookie", - SchemeName::AccessToken => "access_token", - }) - } -} - -#[cfg(test)] -mod test { - use super::Tunables; - use super::{ - AuthnConfig, Config, ConsoleConfig, LoadError, PackageConfig, - SchemeName, TimeseriesDbConfig, UpdatesConfig, - }; - use dropshot::ConfigDropshot; - use dropshot::ConfigLogging; - use dropshot::ConfigLoggingIfExists; - use dropshot::ConfigLoggingLevel; - use libc; - use omicron_common::address::{Ipv6Subnet, RACK_PREFIX}; - use omicron_common::nexus_config::{ - Database, DeploymentConfig, LoadErrorKind, - }; - use std::fs; - use std::net::{Ipv6Addr, SocketAddr}; - use std::path::Path; - use std::path::PathBuf; - - /// Generates a temporary filesystem path unique for the given label. - fn temp_path(label: &str) -> PathBuf { - let arg0str = std::env::args().next().expect("expected process arg0"); - let arg0 = Path::new(&arg0str) - .file_name() - .expect("expected arg0 filename") - .to_str() - .expect("expected arg0 filename to be valid Unicode"); - let pid = std::process::id(); - let mut pathbuf = std::env::temp_dir(); - pathbuf.push(format!("{}.{}.{}", arg0, pid, label)); - pathbuf - } - - /// Load a Config with the given string `contents`. To exercise - /// the full path, this function writes the contents to a file first, then - /// loads the config from that file, then removes the file. `label` is used - /// as a unique string for the filename and error messages. It should be - /// unique for each test. - fn read_config(label: &str, contents: &str) -> Result { - let pathbuf = temp_path(label); - let path = pathbuf.as_path(); - eprintln!("writing test config {}", path.display()); - fs::write(path, contents).expect("write to tempfile failed"); - - let result = Config::from_file(path); - fs::remove_file(path).expect("failed to remove temporary file"); - eprintln!("{:?}", result); - result - } - - // Totally bogus config files (nonexistent, bad TOML syntax) - - #[test] - fn test_config_nonexistent() { - let error = Config::from_file(Path::new("/nonexistent")) - .expect_err("expected config to fail from /nonexistent"); - let expected = std::io::Error::from_raw_os_error(libc::ENOENT); - assert_eq!(error, expected); - } - - #[test] - fn test_config_bad_toml() { - let error = - read_config("bad_toml", "foo =").expect_err("expected failure"); - if let LoadErrorKind::Parse(error) = &error.kind { - assert_eq!(error.line_col(), Some((0, 5))); - assert_eq!( - error.to_string(), - "unexpected eof encountered at line 1 column 6" - ); - } else { - panic!( - "Got an unexpected error, expected Parse but got {:?}", - error - ); - } - } - - // Empty config (special case of a missing required field, but worth calling - // out explicitly) - - #[test] - fn test_config_empty() { - let error = read_config("empty", "").expect_err("expected failure"); - if let LoadErrorKind::Parse(error) = &error.kind { - assert_eq!(error.line_col(), None); - assert_eq!(error.to_string(), "missing field `deployment`"); - } else { - panic!( - "Got an unexpected error, expected Parse but got {:?}", - error - ); - } - } - - // Success case. We don't need to retest semantics for either ConfigLogging - // or ConfigDropshot because those are both tested within Dropshot. If we - // add new configuration sections of our own, we will want to test those - // here (both syntax and semantics). - #[test] - fn test_valid() { - let config = read_config( - "valid", - r##" - [console] - static_dir = "tests/static" - cache_control_max_age_minutes = 10 - session_idle_timeout_minutes = 60 - session_absolute_timeout_minutes = 480 - [authn] - schemes_external = [] - [log] - mode = "file" - level = "debug" - path = "/nonexistent/path" - if_exists = "fail" - [timeseries_db] - address = "[::1]:8123" - [updates] - trusted_root = "/path/to/root.json" - default_base_url = "http://example.invalid/" - [tunables] - max_vpc_ipv4_subnet_prefix = 27 - [deployment] - id = "28b90dc4-c22a-65ba-f49a-f051fe01208f" - rack_id = "38b90dc4-c22a-65ba-f49a-f051fe01208f" - [[deployment.dropshot_external]] - bind_address = "10.1.2.3:4567" - request_body_max_bytes = 1024 - [deployment.dropshot_internal] - bind_address = "10.1.2.3:4568" - request_body_max_bytes = 1024 - [deployment.subnet] - net = "::/56" - [deployment.database] - type = "from_dns" - "##, - ) - .unwrap(); - - assert_eq!( - config, - Config { - deployment: DeploymentConfig { - id: "28b90dc4-c22a-65ba-f49a-f051fe01208f".parse().unwrap(), - rack_id: "38b90dc4-c22a-65ba-f49a-f051fe01208f" - .parse() - .unwrap(), - dropshot_external: vec![ConfigDropshot { - bind_address: "10.1.2.3:4567" - .parse::() - .unwrap(), - ..Default::default() - },], - dropshot_internal: ConfigDropshot { - bind_address: "10.1.2.3:4568" - .parse::() - .unwrap(), - ..Default::default() - }, - subnet: Ipv6Subnet::::new(Ipv6Addr::LOCALHOST), - database: Database::FromDns, - }, - pkg: PackageConfig { - console: ConsoleConfig { - static_dir: "tests/static".parse().unwrap(), - cache_control_max_age_minutes: 10, - session_idle_timeout_minutes: 60, - session_absolute_timeout_minutes: 480 - }, - authn: AuthnConfig { schemes_external: Vec::new() }, - log: ConfigLogging::File { - level: ConfigLoggingLevel::Debug, - if_exists: ConfigLoggingIfExists::Fail, - path: "/nonexistent/path".to_string() - }, - timeseries_db: TimeseriesDbConfig { - address: Some("[::1]:8123".parse().unwrap()) - }, - updates: Some(UpdatesConfig { - trusted_root: PathBuf::from("/path/to/root.json"), - default_base_url: "http://example.invalid/".into(), - }), - tunables: Tunables { max_vpc_ipv4_subnet_prefix: 27 }, - }, - } - ); - - let config = read_config( - "valid", - r##" - [console] - static_dir = "tests/static" - cache_control_max_age_minutes = 10 - session_idle_timeout_minutes = 60 - session_absolute_timeout_minutes = 480 - [authn] - schemes_external = [ "spoof", "session_cookie" ] - [log] - mode = "file" - level = "debug" - path = "/nonexistent/path" - if_exists = "fail" - [timeseries_db] - address = "[::1]:8123" - [deployment] - id = "28b90dc4-c22a-65ba-f49a-f051fe01208f" - rack_id = "38b90dc4-c22a-65ba-f49a-f051fe01208f" - [[deployment.dropshot_external]] - bind_address = "10.1.2.3:4567" - request_body_max_bytes = 1024 - [deployment.dropshot_internal] - bind_address = "10.1.2.3:4568" - request_body_max_bytes = 1024 - [deployment.subnet] - net = "::/56" - [deployment.database] - type = "from_dns" - "##, - ) - .unwrap(); - - assert_eq!( - config.pkg.authn.schemes_external, - vec![SchemeName::Spoof, SchemeName::SessionCookie], - ); - } - - #[test] - fn test_bad_authn_schemes() { - let error = read_config( - "bad authn.schemes_external", - r##" - [console] - static_dir = "tests/static" - cache_control_max_age_minutes = 10 - session_idle_timeout_minutes = 60 - session_absolute_timeout_minutes = 480 - [authn] - schemes_external = ["trust-me"] - [log] - mode = "file" - level = "debug" - path = "/nonexistent/path" - if_exists = "fail" - [timeseries_db] - address = "[::1]:8123" - [deployment] - id = "28b90dc4-c22a-65ba-f49a-f051fe01208f" - rack_id = "38b90dc4-c22a-65ba-f49a-f051fe01208f" - [[deployment.dropshot_external]] - bind_address = "10.1.2.3:4567" - request_body_max_bytes = 1024 - [deployment.dropshot_internal] - bind_address = "10.1.2.3:4568" - request_body_max_bytes = 1024 - [deployment.subnet] - net = "::/56" - [deployment.database] - type = "from_dns" - "##, - ) - .expect_err("expected failure"); - if let LoadErrorKind::Parse(error) = &error.kind { - assert!( - error - .to_string() - .starts_with("unsupported authn scheme: \"trust-me\""), - "error = {}", - error.to_string() - ); - } else { - panic!( - "Got an unexpected error, expected Parse but got {:?}", - error - ); - } - } - - #[test] - fn test_invalid_ipv4_prefix_tunable() { - let error = read_config( - "invalid_ipv4_prefix_tunable", - r##" - [console] - static_dir = "tests/static" - cache_control_max_age_minutes = 10 - session_idle_timeout_minutes = 60 - session_absolute_timeout_minutes = 480 - [authn] - schemes_external = [] - [log] - mode = "file" - level = "debug" - path = "/nonexistent/path" - if_exists = "fail" - [timeseries_db] - address = "[::1]:8123" - [updates] - trusted_root = "/path/to/root.json" - default_base_url = "http://example.invalid/" - [tunables] - max_vpc_ipv4_subnet_prefix = 100 - [deployment] - id = "28b90dc4-c22a-65ba-f49a-f051fe01208f" - rack_id = "38b90dc4-c22a-65ba-f49a-f051fe01208f" - [[deployment.dropshot_external]] - bind_address = "10.1.2.3:4567" - request_body_max_bytes = 1024 - [deployment.dropshot_internal] - bind_address = "10.1.2.3:4568" - request_body_max_bytes = 1024 - [deployment.subnet] - net = "::/56" - [deployment.database] - type = "from_dns" - "##, - ) - .expect_err("Expected failure"); - if let LoadErrorKind::Parse(error) = &error.kind { - assert!(error.to_string().starts_with( - r#"invalid "max_vpc_ipv4_subnet_prefix": "IPv4 subnet prefix must"#, - )); - } else { - panic!( - "Got an unexpected error, expected Parse but got {:?}", - error - ); - } - } - - #[test] - fn test_repo_configs_are_valid() { - // The example config file should be valid. - let config_path = "examples/config.toml"; - println!("checking {:?}", config_path); - let example_config = Config::from_file(config_path) - .expect("example config file is not valid"); - - // The config file used for the tests should also be valid. The tests - // won't clear the runway anyway if this file isn't valid. But it's - // helpful to verify this here explicitly as well. - let config_path = "examples/config.toml"; - println!("checking {:?}", config_path); - let _ = Config::from_file(config_path) - .expect("test config file is not valid"); - - // The partial config file that's used to deploy Nexus must also be - // valid. However, it's missing the "deployment" section because that's - // generated at deployment time. We'll serialize this section from the - // example config file (loaded above), append it to the contents of this - // file, and verify the whole thing. - #[derive(serde::Serialize)] - struct DummyConfig { - deployment: DeploymentConfig, - } - let config_path = "../smf/nexus/config-partial.toml"; - println!( - "checking {:?} with example deployment section added", - config_path - ); - let mut contents = std::fs::read_to_string(config_path) - .expect("failed to read Nexus SMF config file"); - contents.push_str( - "\n\n\n \ - # !! content below added by test_repo_configs_are_valid()\n\ - \n\n\n", - ); - let example_deployment = toml::to_string_pretty(&DummyConfig { - deployment: example_config.deployment, - }) - .unwrap(); - contents.push_str(&example_deployment); - let _: Config = toml::from_str(&contents) - .expect("Nexus SMF config file is not valid"); - } -} +pub use omicron_common::nexus_config::Config; +pub use omicron_common::nexus_config::PackageConfig; +pub use omicron_common::nexus_config::SchemeName; +pub use omicron_common::nexus_config::Tunables; +pub use omicron_common::nexus_config::UpdatesConfig; diff --git a/nexus/src/db/datastore/mod.rs b/nexus/src/db/datastore/mod.rs index c4155d391c9..f636e510ddb 100644 --- a/nexus/src/db/datastore/mod.rs +++ b/nexus/src/db/datastore/mod.rs @@ -122,7 +122,7 @@ impl DataStore { self.pool.pool() } - pub async fn pool_authorized( + pub(super) async fn pool_authorized( &self, opctx: &OpContext, ) -> Result<&bb8::Pool>, Error> { @@ -130,6 +130,13 @@ impl DataStore { Ok(self.pool.pool()) } + #[cfg(test)] + pub async fn pool_for_tests( + &self, + ) -> Result<&bb8::Pool>, Error> { + Ok(self.pool.pool()) + } + /// Return the next available IPv6 address for an Oxide service running on /// the provided sled. pub async fn next_ipv6_address( diff --git a/nexus/src/db/queries/network_interface.rs b/nexus/src/db/queries/network_interface.rs index fdae2f5e84f..825d3068494 100644 --- a/nexus/src/db/queries/network_interface.rs +++ b/nexus/src/db/queries/network_interface.rs @@ -25,8 +25,8 @@ use diesel::QueryResult; use diesel::RunQueryDsl; use ipnetwork::IpNetwork; use ipnetwork::Ipv4Network; -use nexus_defaults::NUM_INITIAL_RESERVED_IP_ADDRESSES; use omicron_common::api::external; +use omicron_common::nexus_config::NUM_INITIAL_RESERVED_IP_ADDRESSES; use std::net::IpAddr; use uuid::Uuid; diff --git a/nexus/src/lib.rs b/nexus/src/lib.rs index a29c02cfb14..ef29550f7da 100644 --- a/nexus/src/lib.rs +++ b/nexus/src/lib.rs @@ -27,12 +27,13 @@ pub mod updates; // public for testing pub use app::test_interfaces::TestInterfaces; pub use app::Nexus; -pub use config::{Config, PackageConfig}; +pub use config::Config; pub use context::ServerContext; pub use crucible_agent_client; use external_api::http_entrypoints::external_api; use internal_api::http_entrypoints::internal_api; use slog::Logger; +use std::net::SocketAddr; use std::sync::Arc; #[macro_use] @@ -161,6 +162,33 @@ impl Server { } } +#[async_trait::async_trait] +impl nexus_test_interface::NexusServer for Server { + async fn start_and_populate(config: &Config, log: &Logger) -> Self { + let server = Server::start(config, log).await.unwrap(); + server.apictx.nexus.wait_for_populate().await.unwrap(); + server + } + + fn get_http_servers_external(&self) -> Vec { + self.http_servers_external + .iter() + .map(|server| server.local_addr()) + .collect() + } + + fn get_http_server_internal(&self) -> SocketAddr { + self.http_server_internal.local_addr() + } + + async fn close(mut self) { + for server in self.http_servers_external { + server.close().await.unwrap(); + } + self.http_server_internal.close().await.unwrap(); + } +} + /// Run an instance of the [Server]. pub async fn run_server(config: &Config) -> Result<(), String> { use slog::Drain; diff --git a/nexus/test-interface/Cargo.toml b/nexus/test-interface/Cargo.toml new file mode 100644 index 00000000000..1df5091c5ec --- /dev/null +++ b/nexus/test-interface/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "nexus-test-interface" +version = "0.1.0" +edition = "2021" +license = "MPL-2.0" + +[dependencies] +async-trait = "0.1.56" +dropshot = { git = "https://github.com/oxidecomputer/dropshot", branch = "main" , features = [ "usdt-probes" ] } +omicron-common = { path = "../../common" } +slog = { version = "2.7" } diff --git a/nexus/test-interface/src/lib.rs b/nexus/test-interface/src/lib.rs new file mode 100644 index 00000000000..40227313f90 --- /dev/null +++ b/nexus/test-interface/src/lib.rs @@ -0,0 +1,53 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Interfaces for Nexus under test. +//! +//! By splitting these interfaces into a new crate, we can avoid a circular +//! dependency on Nexus during testing. +//! +//! Both Nexus unit tests and Integration tests want to able to share +//! utilities for launching Nexus, a multi-service setup process that exists in +//! `nexus-test-utils`. +//! +//! Without a separate test interface crate, this dependency looks like the +//! following (note: "─►" means "depends on") +//! +//! ┌────────────┐ +//! │ │ +//! ┌──▼──┐ ┌───────┴────────┐ ┌─────────────────┐ +//! │nexus├─►nexus-test-utils◄──┤integration tests│ +//! └──▲──┘ └────────────────┘ └─────────┬───────┘ +//! │ │ +//! └──────────────────────────────────┘ +//! +//! As we can see, this introduces a circular dependency between +//! `nexus-test-utils` and `nexus`. +//! +//! However, by separating out the portion of `nexus` used by `nexus-test-utils` +//! into a separate trait, we can break the circular dependency: +//! +//! ┌────────────────────────────────────┐ +//! │ │ +//! ┌──▼──┐ ┌────────────────┐ ┌─────────┴───────┐ +//! │nexus├───►nexus-test-utils◄──┤integration tests│ +//! └──┬──┘ └──────┬─────────┘ └─────────────────┘ +//! │ │ +//! │ ┌────────▼───────────┐ +//! └────►nexus-test-interface│ +//! └────────────────────┘ + +use async_trait::async_trait; +use omicron_common::nexus_config::Config; +use slog::Logger; +use std::net::SocketAddr; + +#[async_trait] +pub trait NexusServer { + async fn start_and_populate(config: &Config, log: &Logger) -> Self; + + fn get_http_servers_external(&self) -> Vec; + fn get_http_server_internal(&self) -> SocketAddr; + async fn close(self); +} diff --git a/nexus/test-utils-macros/Cargo.toml b/nexus/test-utils-macros/Cargo.toml index 3fceb1b8339..08609f68db2 100644 --- a/nexus/test-utils-macros/Cargo.toml +++ b/nexus/test-utils-macros/Cargo.toml @@ -7,5 +7,6 @@ edition = "2021" proc-macro = true [dependencies] +proc-macro2 = { version = "1.0" } quote = "1.0" syn = { version="1.0", features=["full", "fold", "parsing"] } diff --git a/nexus/test-utils-macros/src/lib.rs b/nexus/test-utils-macros/src/lib.rs index 451e65867a9..ac217686414 100644 --- a/nexus/test-utils-macros/src/lib.rs +++ b/nexus/test-utils-macros/src/lib.rs @@ -1,6 +1,37 @@ use proc_macro::TokenStream; use quote::quote; -use syn::{parse_macro_input, ItemFn}; +use std::collections::HashSet as Set; +use syn::parse::{Parse, ParseStream, Result}; +use syn::punctuated::Punctuated; +use syn::{parse_macro_input, ItemFn, Token}; + +#[derive(Debug, PartialEq, Eq, Hash)] +pub(crate) struct NameValue { + name: syn::Path, + _eq_token: syn::token::Eq, + value: syn::Path, +} + +impl syn::parse::Parse for NameValue { + fn parse(input: syn::parse::ParseStream) -> syn::Result { + Ok(Self { + name: input.parse()?, + _eq_token: input.parse()?, + value: input.parse()?, + }) + } +} + +struct Args { + vars: Set, +} + +impl Parse for Args { + fn parse(input: ParseStream) -> Result { + let vars = Punctuated::::parse_terminated(input)?; + Ok(Args { vars: vars.into_iter().collect() }) + } +} /// Attribute for wrapping a test function to handle automatically /// creating and destroying a ControlPlaneTestContext. If the wrapped test @@ -10,6 +41,8 @@ use syn::{parse_macro_input, ItemFn}; /// Example usage: /// /// ```ignore +/// use ControlPlaneTestContext = +/// nexus_test_utils::ControlPlaneTestContext; /// #[nexus_test] /// async fn test_my_test_case(cptestctx: &ControlPlaneTestContext) { /// assert!(true); @@ -20,7 +53,7 @@ use syn::{parse_macro_input, ItemFn}; /// we want the teardown to only happen when the test doesn't fail (which causes /// a panic and unwind). #[proc_macro_attribute] -pub fn nexus_test(_metadata: TokenStream, input: TokenStream) -> TokenStream { +pub fn nexus_test(attrs: TokenStream, input: TokenStream) -> TokenStream { let input_func = parse_macro_input!(input as ItemFn); let mut correct_signature = true; @@ -31,6 +64,25 @@ pub fn nexus_test(_metadata: TokenStream, input: TokenStream) -> TokenStream { correct_signature = false; } + // By default, import "omicron_nexus::Server" as the server under test. + // + // However, a caller can supply their own implementation of the server + // using: + // + // #[nexus_test(server = )] + // + // This mechanism allows Nexus unit test to be tested using the `nexus_test` + // macro without a circular dependency on nexus-test-utils. + let attrs = parse_macro_input!(attrs as Args); + let which_nexus = attrs + .vars + .iter() + .find(|nv| nv.name.is_ident("server")) + .map(|nv| nv.value.clone()) + .unwrap_or_else(|| { + syn::parse_str::("::omicron_nexus::Server").unwrap() + }); + // Verify we're returning an empty tuple correct_signature &= match input_func.sig.output { syn::ReturnType::Default => true, @@ -52,7 +104,7 @@ pub fn nexus_test(_metadata: TokenStream, input: TokenStream) -> TokenStream { { #input_func - let ctx = ::nexus_test_utils::test_setup(#func_ident_string).await; + let ctx = ::nexus_test_utils::test_setup::<#which_nexus>(#func_ident_string).await; #func_ident(&ctx).await; ctx.teardown().await; } diff --git a/nexus/test-utils/Cargo.toml b/nexus/test-utils/Cargo.toml index 584cc4d7f43..e9ccaca801c 100644 --- a/nexus/test-utils/Cargo.toml +++ b/nexus/test-utils/Cargo.toml @@ -12,6 +12,7 @@ dropshot = { git = "https://github.com/oxidecomputer/dropshot", branch = "main", headers = "0.3.8" http = "0.2.7" hyper = "0.14" +nexus-test-interface = { path = "../test-interface" } omicron-common = { path = "../../common" } omicron-nexus = { path = ".." } omicron-sled-agent = { path = "../../sled-agent" } diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index 607c8fca92f..92227506bd8 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -9,6 +9,7 @@ use dropshot::test_util::LogContext; use dropshot::ConfigDropshot; use dropshot::ConfigLogging; use dropshot::ConfigLoggingLevel; +use nexus_test_interface::NexusServer; use omicron_common::api::external::IdentityMetadata; use omicron_common::api::internal::nexus::ProducerEndpoint; use omicron_common::nexus_config; @@ -23,9 +24,6 @@ use std::path::Path; use std::time::Duration; use uuid::Uuid; -// Expose the version of Nexus used by the test utils. -pub use omicron_nexus as nexus; - pub mod db; pub mod http_testing; pub mod resource_helpers; @@ -35,10 +33,10 @@ pub const RACK_UUID: &str = "c19a698f-c6f9-4a17-ae30-20d711b8f7dc"; pub const OXIMETER_UUID: &str = "39e6175b-4df2-4730-b11d-cbc1e60a2e78"; pub const PRODUCER_UUID: &str = "a6458b7d-87c3-4483-be96-854d814c20de"; -pub struct ControlPlaneTestContext { +pub struct ControlPlaneTestContext { pub external_client: ClientTestContext, pub internal_client: ClientTestContext, - pub server: omicron_nexus::Server, + pub server: N, pub database: dev::db::CockroachInstance, pub clickhouse: dev::clickhouse::ClickHouseInstance, pub logctx: LogContext, @@ -47,19 +45,9 @@ pub struct ControlPlaneTestContext { pub producer: ProducerServer, } -impl ControlPlaneTestContext { - pub fn test_opctx(&self) -> omicron_nexus::context::OpContext { - omicron_nexus::context::OpContext::for_tests( - self.logctx.log.new(o!()), - self.server.apictx.nexus.datastore().clone(), - ) - } - +impl ControlPlaneTestContext { pub async fn teardown(mut self) { - for server in self.server.http_servers_external { - server.close().await.unwrap(); - } - self.server.http_server_internal.close().await.unwrap(); + self.server.close().await; self.database.cleanup().await.unwrap(); self.clickhouse.cleanup().await.unwrap(); self.sled_agent.http_server.close().await.unwrap(); @@ -69,7 +57,7 @@ impl ControlPlaneTestContext { } } -pub fn load_test_config() -> omicron_nexus::Config { +pub fn load_test_config() -> omicron_common::nexus_config::Config { // We load as much configuration as we can from the test suite configuration // file. In practice, TestContext requires that: // @@ -86,21 +74,24 @@ pub fn load_test_config() -> omicron_nexus::Config { // configuration options, we expect many of those can be usefully configured // (and reconfigured) for the test suite. let config_file_path = Path::new("tests/config.test.toml"); - let mut config = omicron_nexus::Config::from_file(config_file_path) - .expect("failed to load config.test.toml"); + let mut config = + omicron_common::nexus_config::Config::from_file(config_file_path) + .expect("failed to load config.test.toml"); config.deployment.id = Uuid::new_v4(); config } -pub async fn test_setup(test_name: &str) -> ControlPlaneTestContext { +pub async fn test_setup( + test_name: &str, +) -> ControlPlaneTestContext { let mut config = load_test_config(); - test_setup_with_config(test_name, &mut config).await + test_setup_with_config::(test_name, &mut config).await } -pub async fn test_setup_with_config( +pub async fn test_setup_with_config( test_name: &str, - config: &mut omicron_nexus::Config, -) -> ControlPlaneTestContext { + config: &mut omicron_common::nexus_config::Config, +) -> ControlPlaneTestContext { let logctx = LogContext::new(test_name, &config.pkg.log); let log = &logctx.log; @@ -121,21 +112,14 @@ pub async fn test_setup_with_config( .expect("Tests expect to set a port of Clickhouse") .set_port(clickhouse.port()); - let server = - omicron_nexus::Server::start(&config, &logctx.log).await.unwrap(); - server - .apictx - .nexus - .wait_for_populate() - .await - .expect("Nexus never loaded users"); + let server = N::start_and_populate(&config, &logctx.log).await; let testctx_external = ClientTestContext::new( - server.http_servers_external[0].local_addr(), + server.get_http_servers_external()[0], logctx.log.new(o!("component" => "external client test context")), ); let testctx_internal = ClientTestContext::new( - server.http_server_internal.local_addr(), + server.get_http_server_internal(), logctx.log.new(o!("component" => "internal client test context")), ); @@ -146,7 +130,7 @@ pub async fn test_setup_with_config( "component" => "omicron_sled_agent::sim::Server", "sled_id" => sa_id.to_string(), )), - server.http_server_internal.local_addr(), + server.get_http_server_internal(), sa_id, ) .await @@ -156,7 +140,7 @@ pub async fn test_setup_with_config( let collector_id = Uuid::parse_str(OXIMETER_UUID).unwrap(); let oximeter = start_oximeter( log.new(o!("component" => "oximeter")), - server.http_server_internal.local_addr(), + server.get_http_server_internal(), clickhouse.port(), collector_id, ) @@ -165,12 +149,10 @@ pub async fn test_setup_with_config( // Set up a test metric producer server let producer_id = Uuid::parse_str(PRODUCER_UUID).unwrap(); - let producer = start_producer_server( - server.http_server_internal.local_addr(), - producer_id, - ) - .await - .unwrap(); + let producer = + start_producer_server(server.get_http_server_internal(), producer_id) + .await + .unwrap(); register_test_producer(&producer).unwrap(); ControlPlaneTestContext { diff --git a/nexus/test-utils/src/resource_helpers.rs b/nexus/test-utils/src/resource_helpers.rs index 3ead59bff43..247ea316368 100644 --- a/nexus/test-utils/src/resource_helpers.rs +++ b/nexus/test-utils/src/resource_helpers.rs @@ -414,7 +414,7 @@ impl DiskTest { pub const DEFAULT_ZPOOL_SIZE_GIB: u32 = 10; // Creates fake physical storage, an organization, and a project. - pub async fn new(cptestctx: &ControlPlaneTestContext) -> Self { + pub async fn new(cptestctx: &ControlPlaneTestContext) -> Self { let sled_agent = cptestctx.sled_agent.sled_agent.clone(); let mut disk_test = Self { sled_agent, zpools: vec![] }; diff --git a/nexus/tests/integration_tests/authz.rs b/nexus/tests/integration_tests/authz.rs index 8654a6bae91..933a778669a 100644 --- a/nexus/tests/integration_tests/authz.rs +++ b/nexus/tests/integration_tests/authz.rs @@ -5,7 +5,6 @@ //! Tests for authz policy not covered in the set of unauthorized tests use nexus_test_utils::http_testing::{AuthnMode, NexusRequest, RequestBuilder}; -use nexus_test_utils::ControlPlaneTestContext; use nexus_test_utils_macros::nexus_test; use omicron_common::api::external::IdentityMetadataCreateParams; @@ -21,6 +20,9 @@ use httptest::{matchers::*, responders::*, Expectation, ServerBuilder}; use uuid::Uuid; +type ControlPlaneTestContext = + nexus_test_utils::ControlPlaneTestContext; + // Test that a user cannot read other user's SSH keys #[nexus_test] async fn test_cannot_read_others_ssh_keys(cptestctx: &ControlPlaneTestContext) { diff --git a/nexus/tests/integration_tests/basic.rs b/nexus/tests/integration_tests/basic.rs index 0f136dc42e4..5a3ca2c0024 100644 --- a/nexus/tests/integration_tests/basic.rs +++ b/nexus/tests/integration_tests/basic.rs @@ -29,9 +29,11 @@ use nexus_test_utils::http_testing::RequestBuilder; use nexus_test_utils::resource_helpers::create_organization; use nexus_test_utils::resource_helpers::create_project; use nexus_test_utils::start_sled_agent; -use nexus_test_utils::ControlPlaneTestContext; use nexus_test_utils_macros::nexus_test; +type ControlPlaneTestContext = + nexus_test_utils::ControlPlaneTestContext; + #[nexus_test] async fn test_basic_failures(cptestctx: &ControlPlaneTestContext) { let client = &cptestctx.external_client; diff --git a/nexus/tests/integration_tests/console_api.rs b/nexus/tests/integration_tests/console_api.rs index ad4908dcca0..30cea8525f7 100644 --- a/nexus/tests/integration_tests/console_api.rs +++ b/nexus/tests/integration_tests/console_api.rs @@ -11,9 +11,7 @@ use nexus_test_utils::http_testing::{ AuthnMode, NexusRequest, RequestBuilder, TestResponse, }; use nexus_test_utils::resource_helpers::grant_iam; -use nexus_test_utils::{ - load_test_config, test_setup_with_config, ControlPlaneTestContext, -}; +use nexus_test_utils::{load_test_config, test_setup_with_config}; use nexus_test_utils_macros::nexus_test; use omicron_common::api::external::IdentityMetadataCreateParams; use omicron_nexus::authn::{USER_TEST_PRIVILEGED, USER_TEST_UNPRIVILEGED}; @@ -24,6 +22,9 @@ use omicron_nexus::external_api::console_api::SpoofLoginBody; use omicron_nexus::external_api::params::OrganizationCreate; use omicron_nexus::external_api::{shared, views}; +type ControlPlaneTestContext = + nexus_test_utils::ControlPlaneTestContext; + #[nexus_test] async fn test_sessions(cptestctx: &ControlPlaneTestContext) { let testctx = &cptestctx.external_client; @@ -303,8 +304,11 @@ async fn test_assets(cptestctx: &ControlPlaneTestContext) { async fn test_absolute_static_dir() { let mut config = load_test_config(); config.pkg.console.static_dir = current_dir().unwrap().join("tests/static"); - let cptestctx = - test_setup_with_config("test_absolute_static_dir", &mut config).await; + let cptestctx = test_setup_with_config::( + "test_absolute_static_dir", + &mut config, + ) + .await; let testctx = &cptestctx.external_client; // existing file is returned diff --git a/nexus/tests/integration_tests/datasets.rs b/nexus/tests/integration_tests/datasets.rs index 42b0d48a847..a5dcc13e06c 100644 --- a/nexus/tests/integration_tests/datasets.rs +++ b/nexus/tests/integration_tests/datasets.rs @@ -11,9 +11,12 @@ use omicron_nexus::internal_api::params::{ use std::net::{IpAddr, Ipv4Addr, SocketAddr}; use uuid::Uuid; -use nexus_test_utils::{ControlPlaneTestContext, SLED_AGENT_UUID}; +use nexus_test_utils::SLED_AGENT_UUID; use nexus_test_utils_macros::nexus_test; +type ControlPlaneTestContext = + nexus_test_utils::ControlPlaneTestContext; + // Tests the "normal" case of dataset_put: inserting a dataset within a known // zpool. // diff --git a/nexus/tests/integration_tests/device_auth.rs b/nexus/tests/integration_tests/device_auth.rs index ca0e9da9ef2..a0de75f2e9d 100644 --- a/nexus/tests/integration_tests/device_auth.rs +++ b/nexus/tests/integration_tests/device_auth.rs @@ -3,7 +3,6 @@ // file, You can obtain one at https://mozilla.org/MPL/2.0/. use nexus_test_utils::http_testing::{AuthnMode, NexusRequest, RequestBuilder}; -use nexus_test_utils::ControlPlaneTestContext; use nexus_test_utils_macros::nexus_test; use omicron_nexus::external_api::device_auth::{ DeviceAccessTokenRequest, DeviceAuthRequest, DeviceAuthVerify, @@ -16,6 +15,9 @@ use http::{header, method::Method, StatusCode}; use serde::Deserialize; use uuid::Uuid; +type ControlPlaneTestContext = + nexus_test_utils::ControlPlaneTestContext; + #[derive(Deserialize)] struct OAuthError { error: String, diff --git a/nexus/tests/integration_tests/disks.rs b/nexus/tests/integration_tests/disks.rs index 954cd028a3c..22a96cde435 100644 --- a/nexus/tests/integration_tests/disks.rs +++ b/nexus/tests/integration_tests/disks.rs @@ -24,7 +24,6 @@ use nexus_test_utils::resource_helpers::create_organization; use nexus_test_utils::resource_helpers::create_project; use nexus_test_utils::resource_helpers::objects_list_page_authz; use nexus_test_utils::resource_helpers::DiskTest; -use nexus_test_utils::ControlPlaneTestContext; use nexus_test_utils_macros::nexus_test; use omicron_common::api::external::ByteCount; use omicron_common::api::external::Disk; @@ -41,6 +40,9 @@ use sled_agent_client::TestInterfaces as _; use std::sync::Arc; use uuid::Uuid; +type ControlPlaneTestContext = + nexus_test_utils::ControlPlaneTestContext; + const ORG_NAME: &str = "test-org"; const PROJECT_NAME: &str = "springfield-squidport-disks"; const DISK_NAME: &str = "just-rainsticks"; diff --git a/nexus/tests/integration_tests/images.rs b/nexus/tests/integration_tests/images.rs index 22b2fd9ad44..9925731a435 100644 --- a/nexus/tests/integration_tests/images.rs +++ b/nexus/tests/integration_tests/images.rs @@ -12,7 +12,6 @@ use nexus_test_utils::http_testing::RequestBuilder; use nexus_test_utils::resource_helpers::create_organization; use nexus_test_utils::resource_helpers::create_project; use nexus_test_utils::resource_helpers::DiskTest; -use nexus_test_utils::ControlPlaneTestContext; use nexus_test_utils_macros::nexus_test; use omicron_common::api::external::{ByteCount, IdentityMetadataCreateParams}; @@ -21,6 +20,9 @@ use omicron_nexus::external_api::views::GlobalImage; use httptest::{matchers::*, responders::*, Expectation, ServerBuilder}; +type ControlPlaneTestContext = + nexus_test_utils::ControlPlaneTestContext; + #[nexus_test] async fn test_global_image_create(cptestctx: &ControlPlaneTestContext) { let client = &cptestctx.external_client; diff --git a/nexus/tests/integration_tests/instances.rs b/nexus/tests/integration_tests/instances.rs index 170e03cfe15..da5bf5d14bb 100644 --- a/nexus/tests/integration_tests/instances.rs +++ b/nexus/tests/integration_tests/instances.rs @@ -43,9 +43,11 @@ use nexus_test_utils::identity_eq; use nexus_test_utils::resource_helpers::{ create_instance, create_organization, create_project, }; -use nexus_test_utils::ControlPlaneTestContext; use nexus_test_utils_macros::nexus_test; +type ControlPlaneTestContext = + nexus_test_utils::ControlPlaneTestContext; + static POOL_NAME: &str = "p0"; static ORGANIZATION_NAME: &str = "test-org"; static PROJECT_NAME: &str = "springfield-squidport"; diff --git a/nexus/tests/integration_tests/ip_pools.rs b/nexus/tests/integration_tests/ip_pools.rs index 0dfe0d4a65b..9bcd288d079 100644 --- a/nexus/tests/integration_tests/ip_pools.rs +++ b/nexus/tests/integration_tests/ip_pools.rs @@ -15,7 +15,6 @@ use nexus_test_utils::resource_helpers::create_instance; use nexus_test_utils::resource_helpers::create_organization; use nexus_test_utils::resource_helpers::create_project; use nexus_test_utils::resource_helpers::objects_list_page_authz; -use nexus_test_utils::ControlPlaneTestContext; use nexus_test_utils_macros::nexus_test; use omicron_common::api::external::IdentityMetadataCreateParams; use omicron_common::api::external::IdentityMetadataUpdateParams; @@ -29,6 +28,9 @@ use omicron_nexus::external_api::views::IpPoolRange; use omicron_nexus::TestInterfaces; use sled_agent_client::TestInterfaces as SledTestInterfaces; +type ControlPlaneTestContext = + nexus_test_utils::ControlPlaneTestContext; + // Basic test verifying CRUD behavior on the IP Pool itself. #[nexus_test] async fn test_ip_pool_basic_crud(cptestctx: &ControlPlaneTestContext) { diff --git a/nexus/tests/integration_tests/organizations.rs b/nexus/tests/integration_tests/organizations.rs index bd5624736e8..8cc40e3f532 100644 --- a/nexus/tests/integration_tests/organizations.rs +++ b/nexus/tests/integration_tests/organizations.rs @@ -10,9 +10,11 @@ use http::StatusCode; use nexus_test_utils::resource_helpers::{ create_organization, create_project, objects_list_page_authz, }; -use nexus_test_utils::ControlPlaneTestContext; use nexus_test_utils_macros::nexus_test; +type ControlPlaneTestContext = + nexus_test_utils::ControlPlaneTestContext; + #[nexus_test] async fn test_organizations(cptestctx: &ControlPlaneTestContext) { let client = &cptestctx.external_client; diff --git a/nexus/tests/integration_tests/oximeter.rs b/nexus/tests/integration_tests/oximeter.rs index 8455277b834..ad1844f59bb 100644 --- a/nexus/tests/integration_tests/oximeter.rs +++ b/nexus/tests/integration_tests/oximeter.rs @@ -4,7 +4,6 @@ //! Integration tests for oximeter collectors and producers. -use nexus_test_utils::ControlPlaneTestContext; use nexus_test_utils_macros::nexus_test; use omicron_test_utils::dev::poll::{wait_for_condition, CondCheckError}; use oximeter_db::DbWrite; @@ -12,6 +11,9 @@ use std::net; use std::time::Duration; use uuid::Uuid; +type ControlPlaneTestContext = + nexus_test_utils::ControlPlaneTestContext; + #[nexus_test] async fn test_oximeter_database_records(context: &ControlPlaneTestContext) { let db = &context.database; @@ -62,8 +64,10 @@ async fn test_oximeter_database_records(context: &ControlPlaneTestContext) { #[tokio::test] async fn test_oximeter_reregistration() { - let mut context = - nexus_test_utils::test_setup("test_oximeter_reregistration").await; + let mut context = nexus_test_utils::test_setup::( + "test_oximeter_reregistration", + ) + .await; let db = &context.database; let producer_id = nexus_test_utils::PRODUCER_UUID.parse().unwrap(); let oximeter_id = nexus_test_utils::OXIMETER_UUID.parse().unwrap(); diff --git a/nexus/tests/integration_tests/projects.rs b/nexus/tests/integration_tests/projects.rs index 7b230b1f73b..400ddf6751a 100644 --- a/nexus/tests/integration_tests/projects.rs +++ b/nexus/tests/integration_tests/projects.rs @@ -7,9 +7,11 @@ use nexus_test_utils::resource_helpers::project_get; use omicron_nexus::external_api::views::Project; use nexus_test_utils::resource_helpers::{create_organization, create_project}; -use nexus_test_utils::ControlPlaneTestContext; use nexus_test_utils_macros::nexus_test; +type ControlPlaneTestContext = + nexus_test_utils::ControlPlaneTestContext; + #[nexus_test] async fn test_projects(cptestctx: &ControlPlaneTestContext) { let client = &cptestctx.external_client; diff --git a/nexus/tests/integration_tests/rack.rs b/nexus/tests/integration_tests/rack.rs index 10961edc221..438ea2baa88 100644 --- a/nexus/tests/integration_tests/rack.rs +++ b/nexus/tests/integration_tests/rack.rs @@ -4,11 +4,13 @@ use nexus_test_utils::http_testing::AuthnMode; use nexus_test_utils::http_testing::NexusRequest; -use nexus_test_utils::ControlPlaneTestContext; use nexus_test_utils_macros::nexus_test; use omicron_nexus::external_api::views::Rack; use omicron_nexus::TestInterfaces; +type ControlPlaneTestContext = + nexus_test_utils::ControlPlaneTestContext; + #[nexus_test] async fn test_list_own_rack(cptestctx: &ControlPlaneTestContext) { let client = &cptestctx.external_client; diff --git a/nexus/tests/integration_tests/role_assignments.rs b/nexus/tests/integration_tests/role_assignments.rs index 44959f225fa..c739326726e 100644 --- a/nexus/tests/integration_tests/role_assignments.rs +++ b/nexus/tests/integration_tests/role_assignments.rs @@ -14,7 +14,6 @@ use nexus_test_utils::http_testing::AuthnMode; use nexus_test_utils::http_testing::NexusRequest; use nexus_test_utils::resource_helpers::create_organization; use nexus_test_utils::resource_helpers::create_project; -use nexus_test_utils::ControlPlaneTestContext; use nexus_test_utils_macros::nexus_test; use omicron_common::api::external::ObjectIdentity; use omicron_nexus::authn::USER_TEST_UNPRIVILEGED; @@ -26,6 +25,9 @@ use omicron_nexus::db::model::DatabaseString; use omicron_nexus::external_api::shared; use omicron_nexus::external_api::views; +type ControlPlaneTestContext = + nexus_test_utils::ControlPlaneTestContext; + /// Describes the role assignment test for a particular kind of resource /// /// This trait essentially describes a test case that will be fed into diff --git a/nexus/tests/integration_tests/roles_builtin.rs b/nexus/tests/integration_tests/roles_builtin.rs index 2f6ce82827c..b8070f8ac41 100644 --- a/nexus/tests/integration_tests/roles_builtin.rs +++ b/nexus/tests/integration_tests/roles_builtin.rs @@ -9,10 +9,12 @@ use http::StatusCode; use nexus_test_utils::http_testing::AuthnMode; use nexus_test_utils::http_testing::NexusRequest; use nexus_test_utils::http_testing::RequestBuilder; -use nexus_test_utils::ControlPlaneTestContext; use nexus_test_utils_macros::nexus_test; use omicron_nexus::external_api::views::Role; +type ControlPlaneTestContext = + nexus_test_utils::ControlPlaneTestContext; + #[nexus_test] async fn test_roles_builtin(cptestctx: &ControlPlaneTestContext) { let testctx = &cptestctx.external_client; diff --git a/nexus/tests/integration_tests/router_routes.rs b/nexus/tests/integration_tests/router_routes.rs index 8ae05ff9cdd..5ef1a5af71a 100644 --- a/nexus/tests/integration_tests/router_routes.rs +++ b/nexus/tests/integration_tests/router_routes.rs @@ -8,7 +8,6 @@ use nexus_test_utils::http_testing::AuthnMode; use nexus_test_utils::http_testing::NexusRequest; use nexus_test_utils::identity_eq; use nexus_test_utils::resource_helpers::objects_list_page_authz; -use nexus_test_utils::ControlPlaneTestContext; use nexus_test_utils_macros::nexus_test; use omicron_common::api::external::{ IdentityMetadataCreateParams, IdentityMetadataUpdateParams, @@ -22,6 +21,9 @@ use nexus_test_utils::resource_helpers::{ create_organization, create_project, create_router, create_vpc, }; +type ControlPlaneTestContext = + nexus_test_utils::ControlPlaneTestContext; + #[nexus_test] async fn test_router_routes(cptestctx: &ControlPlaneTestContext) { let client = &cptestctx.external_client; diff --git a/nexus/tests/integration_tests/saml.rs b/nexus/tests/integration_tests/saml.rs index 55746338120..ca19dbf952b 100644 --- a/nexus/tests/integration_tests/saml.rs +++ b/nexus/tests/integration_tests/saml.rs @@ -16,11 +16,13 @@ use http::method::Method; use http::StatusCode; use nexus_test_utils::resource_helpers::{create_silo, object_create}; -use nexus_test_utils::ControlPlaneTestContext; use nexus_test_utils_macros::nexus_test; use httptest::{matchers::*, responders::*, Expectation, Server}; +type ControlPlaneTestContext = + nexus_test_utils::ControlPlaneTestContext; + // Valid SAML IdP entity descriptor from https://en.wikipedia.org/wiki/SAML_metadata#Identity_provider_metadata // note: no signing keys pub const SAML_IDP_DESCRIPTOR: &str = diff --git a/nexus/tests/integration_tests/silos.rs b/nexus/tests/integration_tests/silos.rs index 131fe59f025..6794637c665 100644 --- a/nexus/tests/integration_tests/silos.rs +++ b/nexus/tests/integration_tests/silos.rs @@ -24,7 +24,6 @@ use nexus_test_utils::resource_helpers::{ }; use crate::integration_tests::saml::SAML_IDP_DESCRIPTOR; -use nexus_test_utils::ControlPlaneTestContext; use nexus_test_utils_macros::nexus_test; use omicron_nexus::authz::{self, SiloRole}; use uuid::Uuid; @@ -35,6 +34,9 @@ use omicron_nexus::authn::{USER_TEST_PRIVILEGED, USER_TEST_UNPRIVILEGED}; use omicron_nexus::db::fixed_data::silo::SILO_ID; use omicron_nexus::db::identity::Asset; +type ControlPlaneTestContext = + nexus_test_utils::ControlPlaneTestContext; + #[nexus_test] async fn test_silos(cptestctx: &ControlPlaneTestContext) { let client = &cptestctx.external_client; diff --git a/nexus/tests/integration_tests/snapshots.rs b/nexus/tests/integration_tests/snapshots.rs index 7231d1a1c92..e8c2da55907 100644 --- a/nexus/tests/integration_tests/snapshots.rs +++ b/nexus/tests/integration_tests/snapshots.rs @@ -16,7 +16,6 @@ use nexus_test_utils::resource_helpers::create_organization; use nexus_test_utils::resource_helpers::create_project; use nexus_test_utils::resource_helpers::object_create; use nexus_test_utils::resource_helpers::DiskTest; -use nexus_test_utils::ControlPlaneTestContext; use nexus_test_utils_macros::nexus_test; use omicron_common::api::external; use omicron_common::api::external::ByteCount; @@ -36,6 +35,9 @@ use uuid::Uuid; use httptest::{matchers::*, responders::*, Expectation, ServerBuilder}; +type ControlPlaneTestContext = + nexus_test_utils::ControlPlaneTestContext; + const ORG_NAME: &str = "test-org"; const PROJECT_NAME: &str = "springfield-squidport-disks"; diff --git a/nexus/tests/integration_tests/ssh_keys.rs b/nexus/tests/integration_tests/ssh_keys.rs index 5e03637a4d0..0f23fda4ec2 100644 --- a/nexus/tests/integration_tests/ssh_keys.rs +++ b/nexus/tests/integration_tests/ssh_keys.rs @@ -4,13 +4,15 @@ use http::{method::Method, StatusCode}; use nexus_test_utils::http_testing::{AuthnMode, NexusRequest}; use nexus_test_utils::resource_helpers::objects_list_page_authz; -use nexus_test_utils::ControlPlaneTestContext; use nexus_test_utils_macros::nexus_test; use omicron_common::api::external::IdentityMetadataCreateParams; use omicron_nexus::external_api::params::SshKeyCreate; use omicron_nexus::external_api::views::SshKey; +type ControlPlaneTestContext = + nexus_test_utils::ControlPlaneTestContext; + // Note: we use UnprivilegedUser in this test because unlike most tests, all the // endpoints here _can_ be accessed by that user and we want to explicitly // verify that behavior. diff --git a/nexus/tests/integration_tests/subnet_allocation.rs b/nexus/tests/integration_tests/subnet_allocation.rs index 9d8643d7c67..636157262ec 100644 --- a/nexus/tests/integration_tests/subnet_allocation.rs +++ b/nexus/tests/integration_tests/subnet_allocation.rs @@ -10,7 +10,6 @@ use dropshot::HttpErrorResponseBody; use http::method::Method; use http::StatusCode; use ipnetwork::Ipv4Network; -use nexus_defaults::NUM_INITIAL_RESERVED_IP_ADDRESSES; use nexus_test_utils::http_testing::AuthnMode; use nexus_test_utils::http_testing::NexusRequest; use nexus_test_utils::http_testing::RequestBuilder; @@ -18,15 +17,18 @@ use nexus_test_utils::resource_helpers::create_instance_with; use nexus_test_utils::resource_helpers::create_ip_pool; use nexus_test_utils::resource_helpers::objects_list_page_authz; use nexus_test_utils::resource_helpers::{create_organization, create_project}; -use nexus_test_utils::ControlPlaneTestContext; use nexus_test_utils_macros::nexus_test; use omicron_common::api::external::{ ByteCount, IdentityMetadataCreateParams, InstanceCpuCount, Ipv4Net, NetworkInterface, }; +use omicron_common::nexus_config::NUM_INITIAL_RESERVED_IP_ADDRESSES; use omicron_nexus::external_api::params; use std::net::Ipv4Addr; +type ControlPlaneTestContext = + nexus_test_utils::ControlPlaneTestContext; + async fn create_instance_expect_failure( client: &ClientTestContext, url_instances: &String, diff --git a/nexus/tests/integration_tests/timeseries.rs b/nexus/tests/integration_tests/timeseries.rs index 6e02d5f7a69..0f2bd84b28b 100644 --- a/nexus/tests/integration_tests/timeseries.rs +++ b/nexus/tests/integration_tests/timeseries.rs @@ -3,13 +3,15 @@ // file, You can obtain one at https://mozilla.org/MPL/2.0/. use nexus_test_utils::resource_helpers::objects_list_page_authz; -use nexus_test_utils::ControlPlaneTestContext; use nexus_test_utils_macros::nexus_test; use omicron_test_utils::dev::poll::{wait_for_condition, CondCheckError}; use oximeter_db::TimeseriesSchema; use std::convert::Infallible; use std::time::Duration; +type ControlPlaneTestContext = + nexus_test_utils::ControlPlaneTestContext; + #[nexus_test] async fn test_timeseries_schema(context: &ControlPlaneTestContext) { let client = &context.external_client; diff --git a/nexus/tests/integration_tests/unauthorized.rs b/nexus/tests/integration_tests/unauthorized.rs index 1ff3105a5c0..44ae4383227 100644 --- a/nexus/tests/integration_tests/unauthorized.rs +++ b/nexus/tests/integration_tests/unauthorized.rs @@ -19,10 +19,12 @@ use nexus_test_utils::http_testing::NexusRequest; use nexus_test_utils::http_testing::RequestBuilder; use nexus_test_utils::http_testing::TestResponse; use nexus_test_utils::resource_helpers::DiskTest; -use nexus_test_utils::ControlPlaneTestContext; use nexus_test_utils_macros::nexus_test; use omicron_nexus::authn::external::spoof; +type ControlPlaneTestContext = + nexus_test_utils::ControlPlaneTestContext; + // This test hits a list Nexus API endpoints using both unauthenticated and // unauthorized requests to make sure we get the expected behavior (generally: // 401, 403, or 404). This is trickier than it sounds because the appropriate diff --git a/nexus/tests/integration_tests/updates.rs b/nexus/tests/integration_tests/updates.rs index c249f7adbfe..5206831d8db 100644 --- a/nexus/tests/integration_tests/updates.rs +++ b/nexus/tests/integration_tests/updates.rs @@ -74,8 +74,11 @@ async fn test_update_end_to_end() { trusted_root: tuf_repo.path().join("metadata").join("1.root.json"), default_base_url: format!("http://{}/", local_addr), }); - let cptestctx = - test_setup_with_config("test_update_end_to_end", &mut config).await; + let cptestctx = test_setup_with_config::( + "test_update_end_to_end", + &mut config, + ) + .await; let client = &cptestctx.external_client; // call /system/updates/refresh on nexus @@ -283,7 +286,9 @@ impl KeySource for KeyKeySource { // Tests that ".." paths are disallowed by dropshot. #[tokio::test] async fn test_download_with_dots_fails() { - let cptestctx = test_setup("test_download_with_dots_fails").await; + let cptestctx = + test_setup::("test_download_with_dots_fails") + .await; let client = &cptestctx.internal_client; let filename = "hey/can/you/look/../../../../up/the/directory/tree"; diff --git a/nexus/tests/integration_tests/users_builtin.rs b/nexus/tests/integration_tests/users_builtin.rs index baa1f6fcf67..654d14c0c56 100644 --- a/nexus/tests/integration_tests/users_builtin.rs +++ b/nexus/tests/integration_tests/users_builtin.rs @@ -3,12 +3,14 @@ use dropshot::ResultsPage; use nexus_test_utils::http_testing::AuthnMode; use nexus_test_utils::http_testing::NexusRequest; -use nexus_test_utils::ControlPlaneTestContext; use nexus_test_utils_macros::nexus_test; use omicron_nexus::authn; use omicron_nexus::external_api::views::UserBuiltin; use std::collections::BTreeMap; +type ControlPlaneTestContext = + nexus_test_utils::ControlPlaneTestContext; + #[nexus_test] async fn test_users_builtin(cptestctx: &ControlPlaneTestContext) { let testctx = &cptestctx.external_client; diff --git a/nexus/tests/integration_tests/volume_management.rs b/nexus/tests/integration_tests/volume_management.rs index 1943ec3e6c8..3ba04df0953 100644 --- a/nexus/tests/integration_tests/volume_management.rs +++ b/nexus/tests/integration_tests/volume_management.rs @@ -16,7 +16,6 @@ use nexus_test_utils::resource_helpers::create_organization; use nexus_test_utils::resource_helpers::create_project; use nexus_test_utils::resource_helpers::object_create; use nexus_test_utils::resource_helpers::DiskTest; -use nexus_test_utils::ControlPlaneTestContext; use nexus_test_utils_macros::nexus_test; use omicron_common::api::external::ByteCount; use omicron_common::api::external::Disk; @@ -31,6 +30,9 @@ use uuid::Uuid; use httptest::{matchers::*, responders::*, Expectation, ServerBuilder}; +type ControlPlaneTestContext = + nexus_test_utils::ControlPlaneTestContext; + const ORG_NAME: &str = "test-org"; const PROJECT_NAME: &str = "springfield-squidport-disks"; diff --git a/nexus/tests/integration_tests/vpc_firewall.rs b/nexus/tests/integration_tests/vpc_firewall.rs index d8eade9c5c9..ebd2a0b98ce 100644 --- a/nexus/tests/integration_tests/vpc_firewall.rs +++ b/nexus/tests/integration_tests/vpc_firewall.rs @@ -8,7 +8,6 @@ use nexus_test_utils::http_testing::{AuthnMode, NexusRequest}; use nexus_test_utils::resource_helpers::{ create_organization, create_project, create_vpc, }; -use nexus_test_utils::ControlPlaneTestContext; use nexus_test_utils_macros::nexus_test; use omicron_common::api::external::{ IdentityMetadata, L4Port, L4PortRange, VpcFirewallRule, @@ -21,6 +20,9 @@ use omicron_nexus::external_api::views::Vpc; use std::convert::TryFrom; use uuid::Uuid; +type ControlPlaneTestContext = + nexus_test_utils::ControlPlaneTestContext; + #[nexus_test] async fn test_vpc_firewall(cptestctx: &ControlPlaneTestContext) { let client = &cptestctx.external_client; diff --git a/nexus/tests/integration_tests/vpc_routers.rs b/nexus/tests/integration_tests/vpc_routers.rs index 547aa2d8c5e..7e2014a0ef0 100644 --- a/nexus/tests/integration_tests/vpc_routers.rs +++ b/nexus/tests/integration_tests/vpc_routers.rs @@ -13,7 +13,6 @@ use nexus_test_utils::resource_helpers::objects_list_page_authz; use nexus_test_utils::resource_helpers::{ create_organization, create_project, create_vpc, }; -use nexus_test_utils::ControlPlaneTestContext; use nexus_test_utils_macros::nexus_test; use omicron_common::api::external::IdentityMetadataCreateParams; use omicron_common::api::external::IdentityMetadataUpdateParams; @@ -21,6 +20,9 @@ use omicron_nexus::external_api::params; use omicron_nexus::external_api::views::VpcRouter; use omicron_nexus::external_api::views::VpcRouterKind; +type ControlPlaneTestContext = + nexus_test_utils::ControlPlaneTestContext; + #[nexus_test] async fn test_vpc_routers(cptestctx: &ControlPlaneTestContext) { let client = &cptestctx.external_client; diff --git a/nexus/tests/integration_tests/vpc_subnets.rs b/nexus/tests/integration_tests/vpc_subnets.rs index 35f427a4b76..0bf8eab37e4 100644 --- a/nexus/tests/integration_tests/vpc_subnets.rs +++ b/nexus/tests/integration_tests/vpc_subnets.rs @@ -17,7 +17,6 @@ use nexus_test_utils::resource_helpers::{ create_instance, create_ip_pool, create_organization, create_project, create_vpc, }; -use nexus_test_utils::ControlPlaneTestContext; use nexus_test_utils_macros::nexus_test; use omicron_common::api::external::IdentityMetadataCreateParams; use omicron_common::api::external::IdentityMetadataUpdateParams; @@ -25,6 +24,9 @@ use omicron_common::api::external::Ipv4Net; use omicron_common::api::external::Ipv6Net; use omicron_nexus::external_api::{params, views::VpcSubnet}; +type ControlPlaneTestContext = + nexus_test_utils::ControlPlaneTestContext; + #[nexus_test] async fn test_delete_vpc_subnet_with_interfaces_fails( cptestctx: &ControlPlaneTestContext, diff --git a/nexus/tests/integration_tests/vpcs.rs b/nexus/tests/integration_tests/vpcs.rs index de75a7cb0e1..5e6ffaca928 100644 --- a/nexus/tests/integration_tests/vpcs.rs +++ b/nexus/tests/integration_tests/vpcs.rs @@ -14,13 +14,15 @@ use nexus_test_utils::resource_helpers::objects_list_page_authz; use nexus_test_utils::resource_helpers::{ create_organization, create_project, create_vpc, create_vpc_with_error, }; -use nexus_test_utils::ControlPlaneTestContext; use nexus_test_utils_macros::nexus_test; use omicron_common::api::external::IdentityMetadataCreateParams; use omicron_common::api::external::IdentityMetadataUpdateParams; use omicron_common::api::external::Ipv6Net; use omicron_nexus::external_api::{params, views::Vpc}; +type ControlPlaneTestContext = + nexus_test_utils::ControlPlaneTestContext; + #[nexus_test] async fn test_vpcs(cptestctx: &ControlPlaneTestContext) { let client = &cptestctx.external_client; diff --git a/nexus/tests/integration_tests/zpools.rs b/nexus/tests/integration_tests/zpools.rs index 6dc760a3df0..89c8ffada85 100644 --- a/nexus/tests/integration_tests/zpools.rs +++ b/nexus/tests/integration_tests/zpools.rs @@ -8,9 +8,12 @@ use omicron_common::api::external::ByteCount; use omicron_nexus::internal_api::params::ZpoolPutRequest; use uuid::Uuid; -use nexus_test_utils::{ControlPlaneTestContext, SLED_AGENT_UUID}; +use nexus_test_utils::SLED_AGENT_UUID; use nexus_test_utils_macros::nexus_test; +type ControlPlaneTestContext = + nexus_test_utils::ControlPlaneTestContext; + // Tests the "normal" case of zpool_put: inserting a known Zpool. // // This will typically be invoked by the Sled Agent, after performing inventory. From f210c6d323c62932768fad194f5bb50c238ec942 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Tue, 25 Oct 2022 23:17:56 -0400 Subject: [PATCH 07/14] cargo doc didn't like my ascii art --- nexus/test-interface/src/lib.rs | 27 ++++++++++----------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/nexus/test-interface/src/lib.rs b/nexus/test-interface/src/lib.rs index 40227313f90..c130dde0802 100644 --- a/nexus/test-interface/src/lib.rs +++ b/nexus/test-interface/src/lib.rs @@ -12,15 +12,12 @@ //! `nexus-test-utils`. //! //! Without a separate test interface crate, this dependency looks like the -//! following (note: "─►" means "depends on") +//! following (note: "->" means "depends on") //! -//! ┌────────────┐ -//! │ │ -//! ┌──▼──┐ ┌───────┴────────┐ ┌─────────────────┐ -//! │nexus├─►nexus-test-utils◄──┤integration tests│ -//! └──▲──┘ └────────────────┘ └─────────┬───────┘ -//! │ │ -//! └──────────────────────────────────┘ +//! - nexus -> nexus-test-utils +//! - nexus-test-utils -> nexus +//! - integration tests -> nexus +//! - integration tests -> nexus-test-utils //! //! As we can see, this introduces a circular dependency between //! `nexus-test-utils` and `nexus`. @@ -28,15 +25,11 @@ //! However, by separating out the portion of `nexus` used by `nexus-test-utils` //! into a separate trait, we can break the circular dependency: //! -//! ┌────────────────────────────────────┐ -//! │ │ -//! ┌──▼──┐ ┌────────────────┐ ┌─────────┴───────┐ -//! │nexus├───►nexus-test-utils◄──┤integration tests│ -//! └──┬──┘ └──────┬─────────┘ └─────────────────┘ -//! │ │ -//! │ ┌────────▼───────────┐ -//! └────►nexus-test-interface│ -//! └────────────────────┘ +//! - nexus -> nexus-test-interface +//! - nexus -> nexus-test-utils +//! - nexus-test-utils -> nexus-test-interface +//! - integration tests -> nexus +//! - integration tests -> nexus-test-utils use async_trait::async_trait; use omicron_common::nexus_config::Config; From 0d1f8bf4683fbc7e6561a6b3bc335a27df707432 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Tue, 25 Oct 2022 23:22:04 -0400 Subject: [PATCH 08/14] Add prefix --- nexus/src/app/sagas/disk_create.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nexus/src/app/sagas/disk_create.rs b/nexus/src/app/sagas/disk_create.rs index ded22f407ac..4da7757d36b 100644 --- a/nexus/src/app/sagas/disk_create.rs +++ b/nexus/src/app/sagas/disk_create.rs @@ -461,14 +461,14 @@ async fn sdc_regions_ensure_undo( sagactx: NexusActionContext, ) -> Result<(), anyhow::Error> { let log = sagactx.user_data().log(); - warn!(log, "regions_ensure_undo: Deleting crucible regions"); + warn!(log, "sdc_regions_ensure_undo: Deleting crucible regions"); delete_crucible_regions( sagactx.lookup::>( "datasets_and_regions", )?, ) .await?; - info!(log, "regions_ensure_undo: Deleted crucible regions"); + info!(log, "sdc_regions_ensure_undo: Deleted crucible regions"); Ok(()) } From 5422a0f97e168e778e761f0046478eb57a71bd77 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Tue, 25 Oct 2022 23:55:05 -0400 Subject: [PATCH 09/14] Merge updates, using generic nexus-test-utils and test-only DB access --- nexus/src/app/sagas/instance_create.rs | 69 +++++++++++--------------- 1 file changed, 30 insertions(+), 39 deletions(-) diff --git a/nexus/src/app/sagas/instance_create.rs b/nexus/src/app/sagas/instance_create.rs index 4c7b9d75cc1..0a07772152a 100644 --- a/nexus/src/app/sagas/instance_create.rs +++ b/nexus/src/app/sagas/instance_create.rs @@ -1017,21 +1017,20 @@ async fn sic_instance_ensure( #[cfg(test)] mod test { - use async_bb8_diesel::{AsyncRunQueryDsl, OptionalExtension}; - use diesel::{ExpressionMethods, QueryDsl, SelectableHelper}; - use dropshot::test_util::ClientTestContext; - use nexus_test_utils::nexus::{ + use crate::{ app::saga::create_saga_dag, app::sagas::instance_create::Params, app::sagas::instance_create::SagaInstanceCreate, authn::saga::Serialized, context::OpContext, db::datastore::DataStore, external_api::params, }; + use async_bb8_diesel::{AsyncRunQueryDsl, OptionalExtension}; + use diesel::{ExpressionMethods, QueryDsl, SelectableHelper}; + use dropshot::test_util::ClientTestContext; use nexus_test_utils::resource_helpers::create_disk; use nexus_test_utils::resource_helpers::create_ip_pool; use nexus_test_utils::resource_helpers::create_organization; use nexus_test_utils::resource_helpers::create_project; use nexus_test_utils::resource_helpers::DiskTest; - use nexus_test_utils::ControlPlaneTestContext; use nexus_test_utils_macros::nexus_test; use omicron_common::api::external::{ ByteCount, IdentityMetadataCreateParams, InstanceCpuCount, @@ -1039,6 +1038,9 @@ mod test { use omicron_sled_agent::sim::SledAgent; use uuid::Uuid; + type ControlPlaneTestContext = + nexus_test_utils::ControlPlaneTestContext; + const ORG_NAME: &str = "test-org"; const PROJECT_NAME: &str = "springfield-squidport"; const DISK_NAME: &str = "my-disk"; @@ -1082,7 +1084,14 @@ mod test { } } - #[nexus_test] + pub fn test_opctx(cptestctx: &ControlPlaneTestContext) -> OpContext { + OpContext::for_tests( + cptestctx.logctx.log.new(o!()), + cptestctx.server.apictx.nexus.datastore().clone(), + ) + } + + #[nexus_test(server = crate::Server)] async fn test_saga_basic_usage_succeeds( cptestctx: &ControlPlaneTestContext, ) { @@ -1092,7 +1101,7 @@ mod test { let project_id = create_org_project_and_disk(&client).await; // Build the saga DAG with the provided test parameters - let opctx = cptestctx.test_opctx(); + let opctx = test_opctx(&cptestctx); let params = new_test_params(&opctx, project_id); let dag = create_saga_dag::(params).unwrap(); let runnable_saga = nexus.create_runnable_saga(dag).await.unwrap(); @@ -1101,29 +1110,21 @@ mod test { nexus.run_saga(runnable_saga).await.unwrap(); } - async fn no_instance_records_exist( - datastore: &DataStore, - opctx: &OpContext, - ) -> bool { + async fn no_instance_records_exist(datastore: &DataStore) -> bool { use crate::db::model::Instance; use crate::db::schema::instance::dsl; dsl::instance .filter(dsl::time_deleted.is_null()) .select(Instance::as_select()) - .first_async::( - datastore.pool_authorized(opctx).await.unwrap(), - ) + .first_async::(datastore.pool_for_tests().await.unwrap()) .await .optional() .unwrap() .is_none() } - async fn no_network_interface_records_exist( - datastore: &DataStore, - opctx: &OpContext, - ) -> bool { + async fn no_network_interface_records_exist(datastore: &DataStore) -> bool { use crate::db::model::NetworkInterface; use crate::db::schema::network_interface::dsl; @@ -1131,7 +1132,7 @@ mod test { .filter(dsl::time_deleted.is_null()) .select(NetworkInterface::as_select()) .first_async::( - datastore.pool_authorized(opctx).await.unwrap(), + datastore.pool_for_tests().await.unwrap(), ) .await .optional() @@ -1139,10 +1140,7 @@ mod test { .is_none() } - async fn no_external_ip_records_exist( - datastore: &DataStore, - opctx: &OpContext, - ) -> bool { + async fn no_external_ip_records_exist(datastore: &DataStore) -> bool { use crate::db::model::ExternalIp; use crate::db::schema::external_ip::dsl; @@ -1150,7 +1148,7 @@ mod test { .filter(dsl::time_deleted.is_null()) .select(ExternalIp::as_select()) .first_async::( - datastore.pool_authorized(opctx).await.unwrap(), + datastore.pool_for_tests().await.unwrap(), ) .await .optional() @@ -1158,10 +1156,7 @@ mod test { .is_none() } - async fn disk_is_detached( - datastore: &DataStore, - opctx: &OpContext, - ) -> bool { + async fn disk_is_detached(datastore: &DataStore) -> bool { use crate::db::model::Disk; use crate::db::schema::disk::dsl; @@ -1169,9 +1164,7 @@ mod test { .filter(dsl::time_deleted.is_null()) .filter(dsl::name.eq(DISK_NAME)) .select(Disk::as_select()) - .first_async::( - datastore.pool_authorized(opctx).await.unwrap(), - ) + .first_async::(datastore.pool_for_tests().await.unwrap()) .await .unwrap() .runtime_state @@ -1184,7 +1177,7 @@ mod test { && sled_agent.disk_count().await == 0 } - #[nexus_test] + #[nexus_test(server = crate::Server)] async fn test_action_failure_can_unwind( cptestctx: &ControlPlaneTestContext, ) { @@ -1196,7 +1189,7 @@ mod test { let project_id = create_org_project_and_disk(&client).await; // Build the saga DAG with the provided test parameters - let opctx = cptestctx.test_opctx(); + let opctx = test_opctx(&cptestctx); let params = new_test_params(&opctx, project_id); let dag = create_saga_dag::(params).unwrap(); @@ -1232,12 +1225,10 @@ mod test { let datastore = nexus.datastore(); // Check that no partial artifacts of instance creation exist - assert!(no_instance_records_exist(datastore, &opctx).await); - assert!( - no_network_interface_records_exist(datastore, &opctx).await - ); - assert!(no_external_ip_records_exist(datastore, &opctx).await); - assert!(disk_is_detached(datastore, &opctx).await); + assert!(no_instance_records_exist(datastore).await); + assert!(no_network_interface_records_exist(datastore).await); + assert!(no_external_ip_records_exist(datastore).await); + assert!(disk_is_detached(datastore).await); assert!( no_instances_or_disks_on_sled(&cptestctx.sled_agent.sled_agent) .await From 320a380a69e01b1fe0fa664d05046517608deaa7 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 2 Nov 2022 11:38:06 -0400 Subject: [PATCH 10/14] Use new steno interface --- Cargo.lock | 2 +- common/Cargo.toml | 2 +- nexus/Cargo.toml | 2 +- nexus/db-model/Cargo.toml | 2 +- nexus/src/app/sagas/instance_create.rs | 10 +++++----- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 73dcd90f8da..a40fec7b60d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5843,7 +5843,7 @@ checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" [[package]] name = "steno" version = "0.2.1-dev" -source = "git+https://github.com/oxidecomputer/steno?branch=node-access#19cdbd6c26b3f0ebcb1948cd95558d5857a66097" +source = "git+https://github.com/oxidecomputer/steno?rev=6e4946b165622612453f2d96c82f9a59342325d6#6e4946b165622612453f2d96c82f9a59342325d6" dependencies = [ "anyhow", "async-trait", diff --git a/common/Cargo.toml b/common/Cargo.toml index d37aac4440e..63ed5862f76 100644 --- a/common/Cargo.toml +++ b/common/Cargo.toml @@ -25,7 +25,7 @@ serde_json = "1.0" serde_with = "2.0.1" slog = { version = "2.5", features = [ "max_level_trace", "release_max_level_debug" ] } smf = "0.2" -steno = { git = "https://github.com/oxidecomputer/steno", branch = "node-access" } +steno = { git = "https://github.com/oxidecomputer/steno", rev = "6e4946b165622612453f2d96c82f9a59342325d6" } thiserror = "1.0" tokio = { version = "1.21", features = [ "full" ] } tokio-postgres = { version = "0.7", features = [ "with-chrono-0_4", "with-uuid-1" ] } diff --git a/nexus/Cargo.toml b/nexus/Cargo.toml index 5302a66fe50..f0676b9d4df 100644 --- a/nexus/Cargo.toml +++ b/nexus/Cargo.toml @@ -58,7 +58,7 @@ serde_urlencoded = "0.7.1" serde_with = "2.0.1" sled-agent-client = { path = "../sled-agent-client" } slog-dtrace = "0.2" -steno = { git = "https://github.com/oxidecomputer/steno", branch = "node-access" } +steno = { git = "https://github.com/oxidecomputer/steno", rev = "6e4946b165622612453f2d96c82f9a59342325d6" } tempfile = "3.3" thiserror = "1.0" tokio-tungstenite = "0.17.2" diff --git a/nexus/db-model/Cargo.toml b/nexus/db-model/Cargo.toml index bff03a1a2a4..d22d6fa1287 100644 --- a/nexus/db-model/Cargo.toml +++ b/nexus/db-model/Cargo.toml @@ -25,7 +25,7 @@ serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" uuid = { version = "1.2.1", features = ["serde", "v4"] } -steno = { git = "https://github.com/oxidecomputer/steno", branch = "node-access" } +steno = { git = "https://github.com/oxidecomputer/steno", rev = "6e4946b165622612453f2d96c82f9a59342325d6" } db-macros = { path = "../db-macros" } omicron-common = { path = "../../common" } diff --git a/nexus/src/app/sagas/instance_create.rs b/nexus/src/app/sagas/instance_create.rs index 0a07772152a..6570696d7f7 100644 --- a/nexus/src/app/sagas/instance_create.rs +++ b/nexus/src/app/sagas/instance_create.rs @@ -1194,13 +1194,13 @@ mod test { let params = new_test_params(&opctx, project_id); let dag = create_saga_dag::(params).unwrap(); - for i in 0..dag.get_node_count() { + for node in dag.get_nodes() { // Create a new saga for this node. info!( log, - "Creating new saga which will fail at index {i}"; - "node_name" => dag.get_node_name(i).unwrap(), - "label" => dag.get_node_label(i).unwrap(), + "Creating new saga which will fail at index {:?}", node.index(); + "node_name" => node.name().as_ref(), + "label" => node.label(), ); let runnable_saga = @@ -1213,7 +1213,7 @@ mod test { .sec() .saga_inject_error( runnable_saga.id(), - petgraph::graph::NodeIndex::new(i), + node.index(), ) .await .unwrap(); From bd5cd0a9a6a6010911f482169307629020318ba5 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 2 Nov 2022 11:53:14 -0400 Subject: [PATCH 11/14] Use new steno API to automatically iterate over all nodes --- Cargo.lock | 5 ++--- common/Cargo.toml | 2 +- nexus/Cargo.toml | 2 +- nexus/db-model/Cargo.toml | 2 +- nexus/src/app/sagas/disk_create.rs | 27 +++++++++++---------------- 5 files changed, 16 insertions(+), 22 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c0519dedbee..76c35ed1afe 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5812,9 +5812,8 @@ checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" [[package]] name = "steno" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f695d04f2d9e08f3ed0c72acfa21cddc20eb76698a2ff0961a6758d3566454c2" +version = "0.2.1-dev" +source = "git+https://github.com/oxidecomputer/steno?rev=6e4946b165622612453f2d96c82f9a59342325d6#6e4946b165622612453f2d96c82f9a59342325d6" dependencies = [ "anyhow", "async-trait", diff --git a/common/Cargo.toml b/common/Cargo.toml index d9fffcd7954..63ed5862f76 100644 --- a/common/Cargo.toml +++ b/common/Cargo.toml @@ -25,7 +25,7 @@ serde_json = "1.0" serde_with = "2.0.1" slog = { version = "2.5", features = [ "max_level_trace", "release_max_level_debug" ] } smf = "0.2" -steno = "0.2" +steno = { git = "https://github.com/oxidecomputer/steno", rev = "6e4946b165622612453f2d96c82f9a59342325d6" } thiserror = "1.0" tokio = { version = "1.21", features = [ "full" ] } tokio-postgres = { version = "0.7", features = [ "with-chrono-0_4", "with-uuid-1" ] } diff --git a/nexus/Cargo.toml b/nexus/Cargo.toml index 378d63a791d..d8971b0297c 100644 --- a/nexus/Cargo.toml +++ b/nexus/Cargo.toml @@ -58,7 +58,7 @@ serde_urlencoded = "0.7.1" serde_with = "2.0.1" sled-agent-client = { path = "../sled-agent-client" } slog-dtrace = "0.2" -steno = "0.2" +steno = { git = "https://github.com/oxidecomputer/steno", rev = "6e4946b165622612453f2d96c82f9a59342325d6" } tempfile = "3.3" thiserror = "1.0" tokio-tungstenite = "0.17.2" diff --git a/nexus/db-model/Cargo.toml b/nexus/db-model/Cargo.toml index 6e648657c8a..d22d6fa1287 100644 --- a/nexus/db-model/Cargo.toml +++ b/nexus/db-model/Cargo.toml @@ -25,7 +25,7 @@ serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" uuid = { version = "1.2.1", features = ["serde", "v4"] } -steno = "0.2" +steno = { git = "https://github.com/oxidecomputer/steno", rev = "6e4946b165622612453f2d96c82f9a59342325d6" } db-macros = { path = "../db-macros" } omicron-common = { path = "../../common" } diff --git a/nexus/src/app/sagas/disk_create.rs b/nexus/src/app/sagas/disk_create.rs index 4da7757d36b..99de189cca0 100644 --- a/nexus/src/app/sagas/disk_create.rs +++ b/nexus/src/app/sagas/disk_create.rs @@ -744,30 +744,25 @@ mod test { // Build the saga DAG with the provided test parameters let opctx = test_opctx(cptestctx); - let nodes_to_fail = [ - "disk_id", - "volume_id", - "created_disk", - "datasets_and_regions", - "regions_ensure", - "created_volume", - "disk_runtime", - ]; + let params = new_test_params(&opctx, project_id); + let dag = create_saga_dag::(params).unwrap(); - for failing_node in &nodes_to_fail { + for node in dag.get_nodes() { // Create a new saga for this node. - info!(log, "Creating new saga which will fail at {failing_node}"); - let params = new_test_params(&opctx, project_id); - let dag = create_saga_dag::(params).unwrap(); - let node_id = dag.get_index(failing_node).unwrap(); - let runnable_saga = nexus.create_runnable_saga(dag).await.unwrap(); + info!( + log, + "Creating new saga which will fail at index {:?}", node.index(); + "node_name" => node.name().as_ref(), + "label" => node.label(), + ); + let runnable_saga = nexus.create_runnable_saga(dag.clone()).await.unwrap(); // Inject an error instead of running the node. // // This should cause the saga to unwind. nexus .sec() - .saga_inject_error(runnable_saga.id(), node_id) + .saga_inject_error(runnable_saga.id(), node.index()) .await .unwrap(); nexus From d63fed274da1145e125d46737331fbfe10e8f682 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 2 Nov 2022 11:53:38 -0400 Subject: [PATCH 12/14] fmt --- nexus/src/app/sagas/disk_create.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/nexus/src/app/sagas/disk_create.rs b/nexus/src/app/sagas/disk_create.rs index 99de189cca0..f72d01d6cae 100644 --- a/nexus/src/app/sagas/disk_create.rs +++ b/nexus/src/app/sagas/disk_create.rs @@ -755,7 +755,8 @@ mod test { "node_name" => node.name().as_ref(), "label" => node.label(), ); - let runnable_saga = nexus.create_runnable_saga(dag.clone()).await.unwrap(); + let runnable_saga = + nexus.create_runnable_saga(dag.clone()).await.unwrap(); // Inject an error instead of running the node. // From 4fe4896caf0dab1bb45721ce964e3351ddcd3edd Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 2 Nov 2022 11:53:53 -0400 Subject: [PATCH 13/14] fmt --- nexus/src/app/sagas/instance_create.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/nexus/src/app/sagas/instance_create.rs b/nexus/src/app/sagas/instance_create.rs index a24180984ba..8f6e79662a3 100644 --- a/nexus/src/app/sagas/instance_create.rs +++ b/nexus/src/app/sagas/instance_create.rs @@ -1178,10 +1178,7 @@ mod test { // This should cause the saga to unwind. nexus .sec() - .saga_inject_error( - runnable_saga.id(), - node.index(), - ) + .saga_inject_error(runnable_saga.id(), node.index()) .await .unwrap(); nexus From 2d06f71494c468185e9c3e01a4000dbada0adb55 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 2 Nov 2022 12:37:17 -0400 Subject: [PATCH 14/14] Fix steno version --- Cargo.lock | 2 +- common/Cargo.toml | 2 +- nexus/Cargo.toml | 2 +- nexus/db-model/Cargo.toml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 76c35ed1afe..1e2d195657a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5813,7 +5813,7 @@ checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" [[package]] name = "steno" version = "0.2.1-dev" -source = "git+https://github.com/oxidecomputer/steno?rev=6e4946b165622612453f2d96c82f9a59342325d6#6e4946b165622612453f2d96c82f9a59342325d6" +source = "git+https://github.com/oxidecomputer/steno?rev=4cebb996217453b79896b53c9c2026007f2e69e8#4cebb996217453b79896b53c9c2026007f2e69e8" dependencies = [ "anyhow", "async-trait", diff --git a/common/Cargo.toml b/common/Cargo.toml index 63ed5862f76..13a42579ca4 100644 --- a/common/Cargo.toml +++ b/common/Cargo.toml @@ -25,7 +25,7 @@ serde_json = "1.0" serde_with = "2.0.1" slog = { version = "2.5", features = [ "max_level_trace", "release_max_level_debug" ] } smf = "0.2" -steno = { git = "https://github.com/oxidecomputer/steno", rev = "6e4946b165622612453f2d96c82f9a59342325d6" } +steno = { git = "https://github.com/oxidecomputer/steno", rev = "4cebb996217453b79896b53c9c2026007f2e69e8" } thiserror = "1.0" tokio = { version = "1.21", features = [ "full" ] } tokio-postgres = { version = "0.7", features = [ "with-chrono-0_4", "with-uuid-1" ] } diff --git a/nexus/Cargo.toml b/nexus/Cargo.toml index d8971b0297c..ef426c09120 100644 --- a/nexus/Cargo.toml +++ b/nexus/Cargo.toml @@ -58,7 +58,7 @@ serde_urlencoded = "0.7.1" serde_with = "2.0.1" sled-agent-client = { path = "../sled-agent-client" } slog-dtrace = "0.2" -steno = { git = "https://github.com/oxidecomputer/steno", rev = "6e4946b165622612453f2d96c82f9a59342325d6" } +steno = { git = "https://github.com/oxidecomputer/steno", rev = "4cebb996217453b79896b53c9c2026007f2e69e8" } tempfile = "3.3" thiserror = "1.0" tokio-tungstenite = "0.17.2" diff --git a/nexus/db-model/Cargo.toml b/nexus/db-model/Cargo.toml index d22d6fa1287..37353bc6e5f 100644 --- a/nexus/db-model/Cargo.toml +++ b/nexus/db-model/Cargo.toml @@ -25,7 +25,7 @@ serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" uuid = { version = "1.2.1", features = ["serde", "v4"] } -steno = { git = "https://github.com/oxidecomputer/steno", rev = "6e4946b165622612453f2d96c82f9a59342325d6" } +steno = { git = "https://github.com/oxidecomputer/steno", rev = "4cebb996217453b79896b53c9c2026007f2e69e8" } db-macros = { path = "../db-macros" } omicron-common = { path = "../../common" }