Skip to content

Commit

Permalink
sled agent: remove unused migration-related types (#2755)
Browse files Browse the repository at this point in the history
Tidy up the migration-related types in sled agent's Nexus API. This is
just a bit of pre-work to reduce the size of subsequent commits that
change this API.

In `sled-agent/src/params.rs`:

- Rename migration-related types so that they indicate whether they're
used with migration sources or migration targets (without having to look
at their fields' names).
- Remove types and variants that were only used for migration sources.
- Remove `InstanceRuntimeStateRequested` and replace it with
`InstanceStateRequested`.
  - Remove the `Migrating` variant from `InstanceStateRequested`.

Sled agent does need an API that allows Nexus to set an instance's
migration ID, but this doesn't (won't) actually change any Propolis
state or user-visible instance state, so subsequent changes will
implement this in a separate API.

Clean up the call sites that are affected by this change. For now, just
add `todo!()`s and comment out the Nexus call sites that use migration
types that were removed here. These will be replaced in subsequent
commits that create sled agent's new migration APIs.
  • Loading branch information
gjcolombo committed Apr 4, 2023
1 parent e031c29 commit 4e172d0
Show file tree
Hide file tree
Showing 10 changed files with 54 additions and 166 deletions.
46 changes: 15 additions & 31 deletions nexus/src/app/instance.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,6 @@ use omicron_common::api::external::UpdateResult;
use omicron_common::api::external::Vni;
use omicron_common::api::internal::nexus;
use ref_cast::RefCast;
use sled_agent_client::types::InstanceRuntimeStateMigrateParams;
use sled_agent_client::types::InstanceRuntimeStateRequested;
use sled_agent_client::types::InstanceStateRequested;
use sled_agent_client::types::SourceNatConfig;
use sled_agent_client::Client as SledAgentClient;
Expand Down Expand Up @@ -293,11 +291,14 @@ impl super::Nexus {
/// Idempotently place the instance in a 'Migrating' state.
pub async fn instance_start_migrate(
&self,
opctx: &OpContext,
instance_id: Uuid,
migration_id: Uuid,
dst_propolis_id: Uuid,
_opctx: &OpContext,
_instance_id: Uuid,
_migration_id: Uuid,
_dst_propolis_id: Uuid,
) -> UpdateResult<db::model::Instance> {
todo!("Migration endpoint not yet implemented in sled agent");

/*
let (.., authz_instance, db_instance) =
LookupPath::new(opctx, &self.db_datastore)
.instance_id(instance_id)
Expand All @@ -319,6 +320,7 @@ impl super::Nexus {
)
.await?;
self.db_datastore.instance_refetch(opctx, &authz_instance).await
*/
}

/// Reboot the specified instance.
Expand All @@ -339,15 +341,11 @@ impl super::Nexus {
// never lose track of the fact that this Instance was supposed to be
// running.
let (.., authz_instance, db_instance) = instance_lookup.fetch().await?;
let requested = InstanceRuntimeStateRequested {
run_state: InstanceStateRequested::Reboot,
migration_params: None,
};
self.instance_set_runtime(
opctx,
&authz_instance,
&db_instance,
requested,
InstanceStateRequested::Reboot,
)
.await?;
self.db_datastore.instance_refetch(opctx, &authz_instance).await
Expand All @@ -360,15 +358,11 @@ impl super::Nexus {
instance_lookup: &lookup::Instance<'_>,
) -> UpdateResult<db::model::Instance> {
let (.., authz_instance, db_instance) = instance_lookup.fetch().await?;
let requested = InstanceRuntimeStateRequested {
run_state: InstanceStateRequested::Running,
migration_params: None,
};
self.instance_set_runtime(
opctx,
&authz_instance,
&db_instance,
requested,
InstanceStateRequested::Running,
)
.await?;
self.db_datastore.instance_refetch(opctx, &authz_instance).await
Expand All @@ -381,15 +375,11 @@ impl super::Nexus {
instance_lookup: &lookup::Instance<'_>,
) -> UpdateResult<db::model::Instance> {
let (.., authz_instance, db_instance) = instance_lookup.fetch().await?;
let requested = InstanceRuntimeStateRequested {
run_state: InstanceStateRequested::Stopped,
migration_params: None,
};
self.instance_set_runtime(
opctx,
&authz_instance,
&db_instance,
requested,
InstanceStateRequested::Stopped,
)
.await?;
self.db_datastore.instance_refetch(opctx, &authz_instance).await
Expand All @@ -407,26 +397,21 @@ impl super::Nexus {
fn check_runtime_change_allowed(
&self,
runtime: &nexus::InstanceRuntimeState,
requested: &InstanceRuntimeStateRequested,
) -> Result<(), Error> {
// Users are allowed to request a start or stop even if the instance is
// already in the desired state (or moving to it), and we will issue a
// request to the SA to make the state change in these cases in case the
// runtime state we saw here was stale. However, users are not allowed
// to change the state of an instance that's migrating, failed or
// destroyed. But if we're already migrating, requesting a migration is
// allowed to allow for idempotency.
// destroyed.
let allowed = match runtime.run_state {
InstanceState::Creating => true,
InstanceState::Starting => true,
InstanceState::Running => true,
InstanceState::Stopping => true,
InstanceState::Stopped => true,
InstanceState::Rebooting => true,

InstanceState::Migrating => {
requested.run_state == InstanceStateRequested::Migrating
}
InstanceState::Migrating => false,
InstanceState::Repairing => false,
InstanceState::Failed => false,
InstanceState::Destroyed => false,
Expand All @@ -451,13 +436,12 @@ impl super::Nexus {
opctx: &OpContext,
authz_instance: &authz::Instance,
db_instance: &db::model::Instance,
requested: InstanceRuntimeStateRequested,
requested: InstanceStateRequested,
) -> Result<(), Error> {
opctx.authorize(authz::Action::Modify, authz_instance).await?;

self.check_runtime_change_allowed(
&db_instance.runtime().clone().into(),
&requested,
)?;

// Gather disk information and turn that into DiskRequests
Expand Down Expand Up @@ -610,7 +594,7 @@ impl super::Nexus {
&db_instance.id(),
&sled_agent_client::types::InstanceEnsureBody {
initial: instance_hardware,
target: requested.clone(),
target: requested,
migrate: None,
},
)
Expand Down
7 changes: 1 addition & 6 deletions nexus/src/app/sagas/instance_create.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ use omicron_common::api::external::Name;
use omicron_common::api::internal::nexus::InstanceRuntimeState;
use serde::Deserialize;
use serde::Serialize;
use sled_agent_client::types::InstanceRuntimeStateRequested;
use sled_agent_client::types::InstanceStateRequested;
use slog::warn;
use std::convert::TryFrom;
Expand Down Expand Up @@ -1241,10 +1240,6 @@ async fn sic_instance_ensure(
let osagactx = sagactx.user_data();
let params = sagactx.saga_params::<Params>()?;
let datastore = osagactx.datastore();
let runtime_params = InstanceRuntimeStateRequested {
run_state: InstanceStateRequested::Running,
migration_params: None,
};

// TODO-correctness TODO-security It's not correct to re-resolve the
// instance name now. See oxidecomputer/omicron#1536.
Expand Down Expand Up @@ -1297,7 +1292,7 @@ async fn sic_instance_ensure(
&opctx,
&authz_instance,
&db_instance,
runtime_params,
InstanceStateRequested::Running,
)
.await
.map_err(ActionError::action_failed)?;
Expand Down
17 changes: 6 additions & 11 deletions nexus/src/app/sagas/instance_migrate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,10 @@ use super::{NexusActionContext, NexusSaga, ACTION_GENERATE_ID};
use crate::app::sagas::declare_saga_actions;
use crate::authn;
use crate::db::identity::Resource;
use crate::db::model::IpKind;
use crate::external_api::params;
use omicron_common::api::external::Error;
use omicron_common::api::internal::nexus::InstanceRuntimeState;
use serde::Deserialize;
use serde::Serialize;
use sled_agent_client::types::InstanceEnsureBody;
use sled_agent_client::types::InstanceHardware;
use sled_agent_client::types::InstanceMigrateParams;
use sled_agent_client::types::InstanceRuntimeStateMigrateParams;
use sled_agent_client::types::InstanceRuntimeStateRequested;
use sled_agent_client::types::InstanceStateRequested;
use sled_agent_client::types::SourceNatConfig;
use std::net::Ipv6Addr;
use steno::ActionError;
use steno::Node;
Expand Down Expand Up @@ -137,8 +128,11 @@ async fn sim_allocate_propolis_ip(
}

async fn sim_instance_migrate(
sagactx: NexusActionContext,
_sagactx: NexusActionContext,
) -> Result<(), ActionError> {
todo!("Migration action not yet implemented");

/*
let osagactx = sagactx.user_data();
let params = sagactx.saga_params::<Params>()?;
let opctx = crate::context::op_context_for_saga_action(
Expand Down Expand Up @@ -243,7 +237,7 @@ async fn sim_instance_migrate(
&InstanceEnsureBody {
initial: instance_hardware,
target,
migrate: Some(InstanceMigrateParams {
migrate: Some(InstanceMigrationTargetParams {
src_propolis_addr: src_propolis_addr.to_string(),
src_propolis_id,
}),
Expand All @@ -262,6 +256,7 @@ async fn sim_instance_migrate(
.map_err(ActionError::action_failed)?;
Ok(())
*/
}

async fn sim_cleanup_source(
Expand Down
51 changes: 3 additions & 48 deletions openapi/sled-agent.json
Original file line number Diff line number Diff line change
Expand Up @@ -927,15 +927,15 @@
"description": "If we're migrating this instance, the details needed to drive the migration",
"allOf": [
{
"$ref": "#/components/schemas/InstanceMigrateParams"
"$ref": "#/components/schemas/InstanceMigrationTargetParams"
}
]
},
"target": {
"description": "requested runtime state of the Instance",
"allOf": [
{
"$ref": "#/components/schemas/InstanceRuntimeStateRequested"
"$ref": "#/components/schemas/InstanceStateRequested"
}
]
}
Expand Down Expand Up @@ -1019,7 +1019,7 @@
"snapshot_id"
]
},
"InstanceMigrateParams": {
"InstanceMigrationTargetParams": {
"type": "object",
"properties": {
"src_propolis_addr": {
Expand Down Expand Up @@ -1119,44 +1119,6 @@
"time_updated"
]
},
"InstanceRuntimeStateMigrateParams": {
"description": "Instance runtime state to update for a migration.",
"type": "object",
"properties": {
"dst_propolis_id": {
"type": "string",
"format": "uuid"
},
"migration_id": {
"type": "string",
"format": "uuid"
}
},
"required": [
"dst_propolis_id",
"migration_id"
]
},
"InstanceRuntimeStateRequested": {
"description": "Used to request an Instance state change from a sled agent\n\nRight now, it's only the run state and migration id that can be changed, though we might want to support changing properties like \"ncpus\" here.",
"type": "object",
"properties": {
"migration_params": {
"nullable": true,
"allOf": [
{
"$ref": "#/components/schemas/InstanceRuntimeStateMigrateParams"
}
]
},
"run_state": {
"$ref": "#/components/schemas/InstanceStateRequested"
}
},
"required": [
"run_state"
]
},
"InstanceState": {
"description": "Running state of an Instance (primarily: booted or stopped)\n\nThis typically reflects whether it's starting, running, stopping, or stopped, but also includes states related to the Instance's lifecycle",
"oneOf": [
Expand Down Expand Up @@ -1256,13 +1218,6 @@
"reboot"
]
},
{
"description": "Migrate the instance to another node.",
"type": "string",
"enum": [
"migrating"
]
},
{
"description": "Stop the instance and delete it.",
"type": "string",
Expand Down
25 changes: 8 additions & 17 deletions sled-agent/src/instance.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,7 @@ use crate::params::NetworkInterface;
use crate::params::SourceNatConfig;
use crate::params::VpcFirewallRule;
use crate::params::{
InstanceHardware, InstanceMigrateParams, InstanceRuntimeStateRequested,
InstanceStateRequested,
InstanceHardware, InstanceMigrationTargetParams, InstanceStateRequested,
};
use anyhow::anyhow;
use futures::lock::{Mutex, MutexGuard};
Expand Down Expand Up @@ -303,7 +302,7 @@ impl InstanceInner {
instance: Instance,
instance_ticket: InstanceTicket,
setup: PropolisSetup,
migrate: Option<InstanceMigrateParams>,
migrate: Option<InstanceMigrationTargetParams>,
) -> Result<(), Error> {
let PropolisSetup { client, running_zone, port_tickets } = setup;

Expand Down Expand Up @@ -424,11 +423,11 @@ mockall::mock! {
pub async fn start(
&self,
instance_ticket: InstanceTicket,
migrate: Option<InstanceMigrateParams>,
migrate: Option<InstanceMigrationTargetParams>,
) -> Result<(), Error>;
pub async fn transition(
&self,
target: InstanceRuntimeStateRequested,
target: InstanceStateRequested,
) -> Result<InstanceRuntimeState, Error>;
pub async fn issue_snapshot_request(
&self,
Expand Down Expand Up @@ -703,7 +702,7 @@ impl Instance {
pub async fn start(
&self,
instance_ticket: InstanceTicket,
migrate: Option<InstanceMigrateParams>,
migrate: Option<InstanceMigrationTargetParams>,
) -> Result<(), Error> {
let mut inner = self.inner.lock().await;

Expand Down Expand Up @@ -793,11 +792,11 @@ impl Instance {
/// This method may panic if it has been invoked before [`Instance::start`].
pub async fn transition(
&self,
target: InstanceRuntimeStateRequested,
target: InstanceStateRequested,
) -> Result<InstanceRuntimeState, Error> {
let mut inner = self.inner.lock().await;

let (propolis_state, next_published) = match target.run_state {
let (propolis_state, next_published) = match target {
InstanceStateRequested::Running => {
(propolis_client::api::InstanceStateRequested::Run, None)
}
Expand All @@ -810,9 +809,6 @@ impl Instance {
propolis_client::api::InstanceStateRequested::Reboot,
Some(InstanceState::Rebooting),
),
InstanceStateRequested::Migrating => {
unimplemented!("Live migration not yet implemented");
}
};

inner.propolis_state_put(propolis_state).await?;
Expand Down Expand Up @@ -951,11 +947,6 @@ mod test {

// Trying to transition before the instance has been initialized will
// result in a panic.
inst.transition(InstanceRuntimeStateRequested {
run_state: InstanceStateRequested::Running,
migration_params: None,
})
.await
.unwrap();
inst.transition(InstanceStateRequested::Running).await.unwrap();
}
}

0 comments on commit 4e172d0

Please sign in to comment.