From 4e172d0de30011e086500c5284db5d15a5d020cf Mon Sep 17 00:00:00 2001 From: Greg Colombo Date: Tue, 4 Apr 2023 12:58:32 -0700 Subject: [PATCH] sled agent: remove unused migration-related types (#2755) Tidy up the migration-related types in sled agent's Nexus API. This is just a bit of pre-work to reduce the size of subsequent commits that change this API. In `sled-agent/src/params.rs`: - Rename migration-related types so that they indicate whether they're used with migration sources or migration targets (without having to look at their fields' names). - Remove types and variants that were only used for migration sources. - Remove `InstanceRuntimeStateRequested` and replace it with `InstanceStateRequested`. - Remove the `Migrating` variant from `InstanceStateRequested`. Sled agent does need an API that allows Nexus to set an instance's migration ID, but this doesn't (won't) actually change any Propolis state or user-visible instance state, so subsequent changes will implement this in a separate API. Clean up the call sites that are affected by this change. For now, just add `todo!()`s and comment out the Nexus call sites that use migration types that were removed here. These will be replaced in subsequent commits that create sled agent's new migration APIs. --- nexus/src/app/instance.rs | 46 ++++++++-------------- nexus/src/app/sagas/instance_create.rs | 7 +--- nexus/src/app/sagas/instance_migrate.rs | 17 +++------ openapi/sled-agent.json | 51 ++----------------------- sled-agent/src/instance.rs | 25 ++++-------- sled-agent/src/instance_manager.rs | 20 ++++------ sled-agent/src/params.rs | 23 ++--------- sled-agent/src/sim/instance.rs | 3 -- sled-agent/src/sim/sled_agent.rs | 19 +++------ sled-agent/src/sled_agent.rs | 9 +++-- 10 files changed, 54 insertions(+), 166 deletions(-) diff --git a/nexus/src/app/instance.rs b/nexus/src/app/instance.rs index cec676f583..e4d7c4b07c 100644 --- a/nexus/src/app/instance.rs +++ b/nexus/src/app/instance.rs @@ -37,8 +37,6 @@ use omicron_common::api::external::UpdateResult; use omicron_common::api::external::Vni; use omicron_common::api::internal::nexus; use ref_cast::RefCast; -use sled_agent_client::types::InstanceRuntimeStateMigrateParams; -use sled_agent_client::types::InstanceRuntimeStateRequested; use sled_agent_client::types::InstanceStateRequested; use sled_agent_client::types::SourceNatConfig; use sled_agent_client::Client as SledAgentClient; @@ -293,11 +291,14 @@ impl super::Nexus { /// Idempotently place the instance in a 'Migrating' state. pub async fn instance_start_migrate( &self, - opctx: &OpContext, - instance_id: Uuid, - migration_id: Uuid, - dst_propolis_id: Uuid, + _opctx: &OpContext, + _instance_id: Uuid, + _migration_id: Uuid, + _dst_propolis_id: Uuid, ) -> UpdateResult { + todo!("Migration endpoint not yet implemented in sled agent"); + + /* let (.., authz_instance, db_instance) = LookupPath::new(opctx, &self.db_datastore) .instance_id(instance_id) @@ -319,6 +320,7 @@ impl super::Nexus { ) .await?; self.db_datastore.instance_refetch(opctx, &authz_instance).await + */ } /// Reboot the specified instance. @@ -339,15 +341,11 @@ impl super::Nexus { // never lose track of the fact that this Instance was supposed to be // running. let (.., authz_instance, db_instance) = instance_lookup.fetch().await?; - let requested = InstanceRuntimeStateRequested { - run_state: InstanceStateRequested::Reboot, - migration_params: None, - }; self.instance_set_runtime( opctx, &authz_instance, &db_instance, - requested, + InstanceStateRequested::Reboot, ) .await?; self.db_datastore.instance_refetch(opctx, &authz_instance).await @@ -360,15 +358,11 @@ impl super::Nexus { instance_lookup: &lookup::Instance<'_>, ) -> UpdateResult { let (.., authz_instance, db_instance) = instance_lookup.fetch().await?; - let requested = InstanceRuntimeStateRequested { - run_state: InstanceStateRequested::Running, - migration_params: None, - }; self.instance_set_runtime( opctx, &authz_instance, &db_instance, - requested, + InstanceStateRequested::Running, ) .await?; self.db_datastore.instance_refetch(opctx, &authz_instance).await @@ -381,15 +375,11 @@ impl super::Nexus { instance_lookup: &lookup::Instance<'_>, ) -> UpdateResult { let (.., authz_instance, db_instance) = instance_lookup.fetch().await?; - let requested = InstanceRuntimeStateRequested { - run_state: InstanceStateRequested::Stopped, - migration_params: None, - }; self.instance_set_runtime( opctx, &authz_instance, &db_instance, - requested, + InstanceStateRequested::Stopped, ) .await?; self.db_datastore.instance_refetch(opctx, &authz_instance).await @@ -407,15 +397,13 @@ impl super::Nexus { fn check_runtime_change_allowed( &self, runtime: &nexus::InstanceRuntimeState, - requested: &InstanceRuntimeStateRequested, ) -> Result<(), Error> { // Users are allowed to request a start or stop even if the instance is // already in the desired state (or moving to it), and we will issue a // request to the SA to make the state change in these cases in case the // runtime state we saw here was stale. However, users are not allowed // to change the state of an instance that's migrating, failed or - // destroyed. But if we're already migrating, requesting a migration is - // allowed to allow for idempotency. + // destroyed. let allowed = match runtime.run_state { InstanceState::Creating => true, InstanceState::Starting => true, @@ -423,10 +411,7 @@ impl super::Nexus { InstanceState::Stopping => true, InstanceState::Stopped => true, InstanceState::Rebooting => true, - - InstanceState::Migrating => { - requested.run_state == InstanceStateRequested::Migrating - } + InstanceState::Migrating => false, InstanceState::Repairing => false, InstanceState::Failed => false, InstanceState::Destroyed => false, @@ -451,13 +436,12 @@ impl super::Nexus { opctx: &OpContext, authz_instance: &authz::Instance, db_instance: &db::model::Instance, - requested: InstanceRuntimeStateRequested, + requested: InstanceStateRequested, ) -> Result<(), Error> { opctx.authorize(authz::Action::Modify, authz_instance).await?; self.check_runtime_change_allowed( &db_instance.runtime().clone().into(), - &requested, )?; // Gather disk information and turn that into DiskRequests @@ -610,7 +594,7 @@ impl super::Nexus { &db_instance.id(), &sled_agent_client::types::InstanceEnsureBody { initial: instance_hardware, - target: requested.clone(), + target: requested, migrate: None, }, ) diff --git a/nexus/src/app/sagas/instance_create.rs b/nexus/src/app/sagas/instance_create.rs index 816b60b048..755e943423 100644 --- a/nexus/src/app/sagas/instance_create.rs +++ b/nexus/src/app/sagas/instance_create.rs @@ -27,7 +27,6 @@ use omicron_common::api::external::Name; use omicron_common::api::internal::nexus::InstanceRuntimeState; use serde::Deserialize; use serde::Serialize; -use sled_agent_client::types::InstanceRuntimeStateRequested; use sled_agent_client::types::InstanceStateRequested; use slog::warn; use std::convert::TryFrom; @@ -1241,10 +1240,6 @@ async fn sic_instance_ensure( let osagactx = sagactx.user_data(); let params = sagactx.saga_params::()?; let datastore = osagactx.datastore(); - let runtime_params = InstanceRuntimeStateRequested { - run_state: InstanceStateRequested::Running, - migration_params: None, - }; // TODO-correctness TODO-security It's not correct to re-resolve the // instance name now. See oxidecomputer/omicron#1536. @@ -1297,7 +1292,7 @@ async fn sic_instance_ensure( &opctx, &authz_instance, &db_instance, - runtime_params, + InstanceStateRequested::Running, ) .await .map_err(ActionError::action_failed)?; diff --git a/nexus/src/app/sagas/instance_migrate.rs b/nexus/src/app/sagas/instance_migrate.rs index ef5c3101da..7ced552824 100644 --- a/nexus/src/app/sagas/instance_migrate.rs +++ b/nexus/src/app/sagas/instance_migrate.rs @@ -7,19 +7,10 @@ use super::{NexusActionContext, NexusSaga, ACTION_GENERATE_ID}; use crate::app::sagas::declare_saga_actions; use crate::authn; use crate::db::identity::Resource; -use crate::db::model::IpKind; use crate::external_api::params; -use omicron_common::api::external::Error; use omicron_common::api::internal::nexus::InstanceRuntimeState; use serde::Deserialize; use serde::Serialize; -use sled_agent_client::types::InstanceEnsureBody; -use sled_agent_client::types::InstanceHardware; -use sled_agent_client::types::InstanceMigrateParams; -use sled_agent_client::types::InstanceRuntimeStateMigrateParams; -use sled_agent_client::types::InstanceRuntimeStateRequested; -use sled_agent_client::types::InstanceStateRequested; -use sled_agent_client::types::SourceNatConfig; use std::net::Ipv6Addr; use steno::ActionError; use steno::Node; @@ -137,8 +128,11 @@ async fn sim_allocate_propolis_ip( } async fn sim_instance_migrate( - sagactx: NexusActionContext, + _sagactx: NexusActionContext, ) -> Result<(), ActionError> { + todo!("Migration action not yet implemented"); + + /* let osagactx = sagactx.user_data(); let params = sagactx.saga_params::()?; let opctx = crate::context::op_context_for_saga_action( @@ -243,7 +237,7 @@ async fn sim_instance_migrate( &InstanceEnsureBody { initial: instance_hardware, target, - migrate: Some(InstanceMigrateParams { + migrate: Some(InstanceMigrationTargetParams { src_propolis_addr: src_propolis_addr.to_string(), src_propolis_id, }), @@ -262,6 +256,7 @@ async fn sim_instance_migrate( .map_err(ActionError::action_failed)?; Ok(()) + */ } async fn sim_cleanup_source( diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index 0382896410..a14ff2da63 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -927,7 +927,7 @@ "description": "If we're migrating this instance, the details needed to drive the migration", "allOf": [ { - "$ref": "#/components/schemas/InstanceMigrateParams" + "$ref": "#/components/schemas/InstanceMigrationTargetParams" } ] }, @@ -935,7 +935,7 @@ "description": "requested runtime state of the Instance", "allOf": [ { - "$ref": "#/components/schemas/InstanceRuntimeStateRequested" + "$ref": "#/components/schemas/InstanceStateRequested" } ] } @@ -1019,7 +1019,7 @@ "snapshot_id" ] }, - "InstanceMigrateParams": { + "InstanceMigrationTargetParams": { "type": "object", "properties": { "src_propolis_addr": { @@ -1119,44 +1119,6 @@ "time_updated" ] }, - "InstanceRuntimeStateMigrateParams": { - "description": "Instance runtime state to update for a migration.", - "type": "object", - "properties": { - "dst_propolis_id": { - "type": "string", - "format": "uuid" - }, - "migration_id": { - "type": "string", - "format": "uuid" - } - }, - "required": [ - "dst_propolis_id", - "migration_id" - ] - }, - "InstanceRuntimeStateRequested": { - "description": "Used to request an Instance state change from a sled agent\n\nRight now, it's only the run state and migration id that can be changed, though we might want to support changing properties like \"ncpus\" here.", - "type": "object", - "properties": { - "migration_params": { - "nullable": true, - "allOf": [ - { - "$ref": "#/components/schemas/InstanceRuntimeStateMigrateParams" - } - ] - }, - "run_state": { - "$ref": "#/components/schemas/InstanceStateRequested" - } - }, - "required": [ - "run_state" - ] - }, "InstanceState": { "description": "Running state of an Instance (primarily: booted or stopped)\n\nThis typically reflects whether it's starting, running, stopping, or stopped, but also includes states related to the Instance's lifecycle", "oneOf": [ @@ -1256,13 +1218,6 @@ "reboot" ] }, - { - "description": "Migrate the instance to another node.", - "type": "string", - "enum": [ - "migrating" - ] - }, { "description": "Stop the instance and delete it.", "type": "string", diff --git a/sled-agent/src/instance.rs b/sled-agent/src/instance.rs index da997a8679..b4f34c249b 100644 --- a/sled-agent/src/instance.rs +++ b/sled-agent/src/instance.rs @@ -11,8 +11,7 @@ use crate::params::NetworkInterface; use crate::params::SourceNatConfig; use crate::params::VpcFirewallRule; use crate::params::{ - InstanceHardware, InstanceMigrateParams, InstanceRuntimeStateRequested, - InstanceStateRequested, + InstanceHardware, InstanceMigrationTargetParams, InstanceStateRequested, }; use anyhow::anyhow; use futures::lock::{Mutex, MutexGuard}; @@ -303,7 +302,7 @@ impl InstanceInner { instance: Instance, instance_ticket: InstanceTicket, setup: PropolisSetup, - migrate: Option, + migrate: Option, ) -> Result<(), Error> { let PropolisSetup { client, running_zone, port_tickets } = setup; @@ -424,11 +423,11 @@ mockall::mock! { pub async fn start( &self, instance_ticket: InstanceTicket, - migrate: Option, + migrate: Option, ) -> Result<(), Error>; pub async fn transition( &self, - target: InstanceRuntimeStateRequested, + target: InstanceStateRequested, ) -> Result; pub async fn issue_snapshot_request( &self, @@ -703,7 +702,7 @@ impl Instance { pub async fn start( &self, instance_ticket: InstanceTicket, - migrate: Option, + migrate: Option, ) -> Result<(), Error> { let mut inner = self.inner.lock().await; @@ -793,11 +792,11 @@ impl Instance { /// This method may panic if it has been invoked before [`Instance::start`]. pub async fn transition( &self, - target: InstanceRuntimeStateRequested, + target: InstanceStateRequested, ) -> Result { let mut inner = self.inner.lock().await; - let (propolis_state, next_published) = match target.run_state { + let (propolis_state, next_published) = match target { InstanceStateRequested::Running => { (propolis_client::api::InstanceStateRequested::Run, None) } @@ -810,9 +809,6 @@ impl Instance { propolis_client::api::InstanceStateRequested::Reboot, Some(InstanceState::Rebooting), ), - InstanceStateRequested::Migrating => { - unimplemented!("Live migration not yet implemented"); - } }; inner.propolis_state_put(propolis_state).await?; @@ -951,11 +947,6 @@ mod test { // Trying to transition before the instance has been initialized will // result in a panic. - inst.transition(InstanceRuntimeStateRequested { - run_state: InstanceStateRequested::Running, - migration_params: None, - }) - .await - .unwrap(); + inst.transition(InstanceStateRequested::Running).await.unwrap(); } } diff --git a/sled-agent/src/instance_manager.rs b/sled-agent/src/instance_manager.rs index fc285845eb..a74389a834 100644 --- a/sled-agent/src/instance_manager.rs +++ b/sled-agent/src/instance_manager.rs @@ -7,7 +7,7 @@ use crate::nexus::LazyNexusClient; use crate::params::VpcFirewallRule; use crate::params::{ - InstanceHardware, InstanceMigrateParams, InstanceRuntimeStateRequested, + InstanceHardware, InstanceMigrationTargetParams, InstanceStateRequested, }; use illumos_utils::dladm::Etherstub; use illumos_utils::link::VnicAllocator; @@ -93,8 +93,8 @@ impl InstanceManager { &self, instance_id: Uuid, initial_hardware: InstanceHardware, - target: InstanceRuntimeStateRequested, - migrate: Option, + target: InstanceStateRequested, + migrate: Option, ) -> Result { info!( &self.inner.log, @@ -363,10 +363,7 @@ mod test { .ensure( test_uuid(), new_initial_instance(), - InstanceRuntimeStateRequested { - run_state: InstanceStateRequested::Running, - migration_params: None, - }, + InstanceStateRequested::Running, None, ) .await @@ -453,15 +450,12 @@ mod test { let id = test_uuid(); let rt = new_initial_instance(); - let target = InstanceRuntimeStateRequested { - run_state: InstanceStateRequested::Running, - migration_params: None, - }; + let target = InstanceStateRequested::Running; // Creates instance, start + transition. - im.ensure(id, rt.clone(), target.clone(), None).await.unwrap(); + im.ensure(id, rt.clone(), target, None).await.unwrap(); // Transition only. - im.ensure(id, rt.clone(), target.clone(), None).await.unwrap(); + im.ensure(id, rt.clone(), target, None).await.unwrap(); // Transition only. im.ensure(id, rt, target, None).await.unwrap(); diff --git a/sled-agent/src/params.rs b/sled-agent/src/params.rs index e92e247eba..437d496db7 100644 --- a/sled-agent/src/params.rs +++ b/sled-agent/src/params.rs @@ -72,13 +72,13 @@ pub struct InstanceEnsureBody { /// has never seen this Instance before). pub initial: InstanceHardware, /// requested runtime state of the Instance - pub target: InstanceRuntimeStateRequested, + pub target: InstanceStateRequested, /// If we're migrating this instance, the details needed to drive the migration - pub migrate: Option, + pub migrate: Option, } #[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] -pub struct InstanceMigrateParams { +pub struct InstanceMigrationTargetParams { pub src_propolis_id: Uuid, pub src_propolis_addr: SocketAddr, } @@ -107,8 +107,6 @@ pub enum InstanceStateRequested { /// Issue a reset command to the instance, such that it should /// stop and then immediately become running. Reboot, - /// Migrate the instance to another node. - Migrating, /// Stop the instance and delete it. Destroyed, } @@ -125,7 +123,6 @@ impl InstanceStateRequested { InstanceStateRequested::Running => "running", InstanceStateRequested::Stopped => "stopped", InstanceStateRequested::Reboot => "reboot", - InstanceStateRequested::Migrating => "migrating", InstanceStateRequested::Destroyed => "destroyed", } } @@ -136,7 +133,6 @@ impl InstanceStateRequested { InstanceStateRequested::Running => false, InstanceStateRequested::Stopped => true, InstanceStateRequested::Reboot => false, - InstanceStateRequested::Migrating => false, InstanceStateRequested::Destroyed => true, } } @@ -144,22 +140,11 @@ impl InstanceStateRequested { /// Instance runtime state to update for a migration. #[derive(Copy, Clone, Debug, Deserialize, Serialize, JsonSchema)] -pub struct InstanceRuntimeStateMigrateParams { +pub struct InstanceMigrationSourceParams { pub migration_id: Uuid, pub dst_propolis_id: Uuid, } -/// Used to request an Instance state change from a sled agent -/// -/// Right now, it's only the run state and migration id that can -/// be changed, though we might want to support changing properties -/// like "ncpus" here. -#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] -pub struct InstanceRuntimeStateRequested { - pub run_state: InstanceStateRequested, - pub migration_params: Option, -} - #[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq)] pub enum DiskType { U2, diff --git a/sled-agent/src/sim/instance.rs b/sled-agent/src/sim/instance.rs index d27aa04d70..aa4de3e973 100644 --- a/sled-agent/src/sim/instance.rs +++ b/sled-agent/src/sim/instance.rs @@ -183,9 +183,6 @@ impl Simulatable for SimInstance { ))) } }, - InstanceStateRequested::Migrating => { - unimplemented!("Migration not implemented yet"); - } InstanceStateRequested::Destroyed => { self.state .observe_transition(&PropolisInstanceState::Destroyed); diff --git a/sled-agent/src/sim/sled_agent.rs b/sled-agent/src/sim/sled_agent.rs index d57ecc8d25..4be57fa5c9 100644 --- a/sled-agent/src/sim/sled_agent.rs +++ b/sled-agent/src/sim/sled_agent.rs @@ -6,8 +6,7 @@ use crate::nexus::NexusClient; use crate::params::{ - DiskStateRequested, InstanceHardware, InstanceRuntimeStateRequested, - InstanceStateRequested, + DiskStateRequested, InstanceHardware, InstanceStateRequested, }; use crate::updates::UpdateManager; use futures::lock::Mutex; @@ -210,7 +209,7 @@ impl SledAgent { self: &Arc, instance_id: Uuid, mut initial_hardware: InstanceHardware, - target: InstanceRuntimeStateRequested, + target: InstanceStateRequested, ) -> Result { // respond with a fake 500 level failure if asked to ensure an instance // with more than 16 CPUs. @@ -230,7 +229,7 @@ impl SledAgent { time_updated: chrono::Utc::now(), }; - let target = match target.run_state { + let target = match target { InstanceStateRequested::Running | InstanceStateRequested::Reboot => { DiskStateRequested::Attached(instance_id) @@ -239,7 +238,6 @@ impl SledAgent { | InstanceStateRequested::Destroyed => { DiskStateRequested::Detached } - _ => panic!("state {} not covered!", target.run_state), }; let id = match disk.volume_construction_request { @@ -292,7 +290,7 @@ impl SledAgent { )?; } - let body = match target.run_state { + let body = match target { InstanceStateRequested::Running => { propolis_client::types::InstanceStateRequested::Run } @@ -303,9 +301,6 @@ impl SledAgent { InstanceStateRequested::Reboot => { propolis_client::types::InstanceStateRequested::Reboot } - InstanceStateRequested::Migrating => { - propolis_client::types::InstanceStateRequested::MigrateStart - } }; client.instance_state_put().body(body).send().await.map_err( |e| Error::internal_error(&format!("propolis-client: {}", e)), @@ -314,11 +309,7 @@ impl SledAgent { let instance_run_time_state = self .instances - .sim_ensure( - &instance_id, - initial_hardware.runtime, - target.run_state, - ) + .sim_ensure(&instance_id, initial_hardware.runtime, target) .await?; for disk_request in &initial_hardware.disks { diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index a33d362a55..17eddcd290 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -10,8 +10,9 @@ use crate::instance_manager::InstanceManager; use crate::nexus::{LazyNexusClient, NexusRequestQueue}; use crate::params::VpcFirewallRule; use crate::params::{ - DatasetKind, DiskStateRequested, InstanceHardware, InstanceMigrateParams, - InstanceRuntimeStateRequested, ServiceEnsureBody, TimeSync, Zpool, + DatasetKind, DiskStateRequested, InstanceHardware, + InstanceMigrationTargetParams, InstanceStateRequested, ServiceEnsureBody, + TimeSync, Zpool, }; use crate::services::{self, ServiceManager}; use crate::storage_manager::StorageManager; @@ -519,8 +520,8 @@ impl SledAgent { &self, instance_id: Uuid, initial: InstanceHardware, - target: InstanceRuntimeStateRequested, - migrate: Option, + target: InstanceStateRequested, + migrate: Option, ) -> Result { self.inner .instances