diff --git a/gateway-messages/src/lib.rs b/gateway-messages/src/lib.rs index add2778829e..389d8f95dad 100644 --- a/gateway-messages/src/lib.rs +++ b/gateway-messages/src/lib.rs @@ -63,7 +63,8 @@ pub enum RequestKind { offset: u64, }, SerialConsoleDetach, - UpdatePrepare(UpdatePrepare), + SpUpdatePrepare(SpUpdatePrepare), + ComponentUpdatePrepare(ComponentUpdatePrepare), /// `UpdateChunk` always includes trailing raw data. UpdateChunk(UpdateChunk), UpdateStatus(SpComponent), @@ -102,6 +103,25 @@ pub enum UpdateStatus { /// Returned when the SP is still preparing to apply the update with the /// given ID (e.g., erasing a target flash slot). Preparing(UpdatePreparationStatus), + /// Special status only applicable to SP updates: the SP has finished + /// scanning its auxiliary flash slots, and we now know whether we need to + /// send the aux flash image. + /// + /// This state is only applicable to (a) the `SP_ITSELF` component when (b) + /// the update preparation message sent by MGS indicates an aux flash image + /// is present. + SpUpdateAuxFlashChckScan { + id: UpdateId, + /// If true, MGS will not send the aux flash image and will only send + /// the SP image. + found_match: bool, + /// Total size of the update to be applied. + /// + /// This is not directly relevant to this state, but is used by MGS to + /// convert this state (which only it knows about) into an `InProgress` + /// state to return to its callers. + total_size: u32, + }, /// Returned when an update is currently in progress. InProgress(UpdateInProgressStatus), /// Returned when an update has completed. @@ -118,6 +138,13 @@ pub enum UpdateStatus { /// update starts (or the status is reset some other way, such as an SP /// reboot). Aborted(UpdateId), + /// Returned when an update has failed on the SP. + /// + /// The SP has no concept of time, so we cannot indicate how recently this + /// abort happened. The SP will continue to return this status until a new + /// update starts (or the status is reset some other way, such as an SP + /// reboot). + Failed { id: UpdateId, code: u32 }, } /// See RFD 81. @@ -166,7 +193,8 @@ pub enum ResponseKind { BulkIgnitionState(BulkIgnitionState), IgnitionCommandAck, SpState(SpState), - UpdatePrepareAck, + SpUpdatePrepareAck, + ComponentUpdatePrepareAck, UpdateChunkAck, UpdateStatus(UpdateStatus), UpdateAbortAck, @@ -217,6 +245,8 @@ pub enum ResponseError { /// Cannot attach to the serial console because another MGS instance is /// already attached. SerialConsoleAlreadyAttached, + /// An update cannot be started while another component is being updated. + OtherComponentUpdateInProgress(SpComponent), /// An update has not been prepared yet. UpdateNotPrepared, /// An update-related message arrived at the SP, but its update ID does not @@ -265,6 +295,9 @@ impl fmt::Display for ResponseError { ResponseError::SerialConsoleAlreadyAttached => { write!(f, "serial console already attached") } + ResponseError::OtherComponentUpdateInProgress(component) => { + write!(f, "another component is being updated {component:?}") + } ResponseError::UpdateNotPrepared => { write!(f, "SP has not received update prepare request") } @@ -354,7 +387,22 @@ impl From for uuid::Uuid { #[derive( Debug, Clone, Copy, PartialEq, Eq, SerializedSize, Serialize, Deserialize, )] -pub struct UpdatePrepare { +pub struct SpUpdatePrepare { + pub id: UpdateId, + /// If this update includes an aux flash image, this size will be nonzero. + pub aux_flash_size: u32, + /// If this update includes an aux flash image, this check value is used by + /// the SP do determine whether it already has this aux flash image in one + /// of its slots. + pub aux_flash_chck: [u8; 32], + /// Size of the SP image in bytes. + pub sp_image_size: u32, +} + +#[derive( + Debug, Clone, Copy, PartialEq, Eq, SerializedSize, Serialize, Deserialize, +)] +pub struct ComponentUpdatePrepare { pub component: SpComponent, pub id: UpdateId, /// The number of available slots depends on `component`; passing an invalid @@ -462,6 +510,9 @@ impl SpComponent { /// The SP itself. pub const SP_ITSELF: Self = Self { id: *b"sp\0\0\0\0\0\0\0\0\0\0\0\0\0\0" }; + /// The SP's auxiliary flash. + pub const SP_AUX_FLASH: Self = Self { id: *b"sp-aux-flash\0\0\0\0" }; + /// The `sp3` host CPU. pub const SP3_HOST_CPU: Self = Self { id: *b"sp3-host-cpu\0\0\0\0" }; diff --git a/gateway-messages/src/sp_impl.rs b/gateway-messages/src/sp_impl.rs index a6e4617dde0..d7886d75702 100644 --- a/gateway-messages/src/sp_impl.rs +++ b/gateway-messages/src/sp_impl.rs @@ -6,6 +6,7 @@ use crate::version; use crate::BulkIgnitionState; +use crate::ComponentUpdatePrepare; use crate::DiscoverResponse; use crate::IgnitionCommand; use crate::IgnitionState; @@ -19,9 +20,9 @@ use crate::SpMessage; use crate::SpMessageKind; use crate::SpPort; use crate::SpState; +use crate::SpUpdatePrepare; use crate::UpdateChunk; use crate::UpdateId; -use crate::UpdatePrepare; use crate::UpdateStatus; use core::convert::Infallible; use core::mem; @@ -70,11 +71,18 @@ pub trait SpHandler { port: SpPort, ) -> Result; - fn update_prepare( + fn sp_update_prepare( &mut self, sender: SocketAddrV6, port: SpPort, - update: UpdatePrepare, + update: SpUpdatePrepare, + ) -> Result<(), ResponseError>; + + fn component_update_prepare( + &mut self, + sender: SocketAddrV6, + port: SpPort, + update: ComponentUpdatePrepare, ) -> Result<(), ResponseError>; fn update_chunk( @@ -242,9 +250,12 @@ pub fn handle_message( RequestKind::SpState => { handler.sp_state(sender, port).map(ResponseKind::SpState) } - RequestKind::UpdatePrepare(update) => handler - .update_prepare(sender, port, update) - .map(|()| ResponseKind::UpdatePrepareAck), + RequestKind::SpUpdatePrepare(update) => handler + .sp_update_prepare(sender, port, update) + .map(|()| ResponseKind::SpUpdatePrepareAck), + RequestKind::ComponentUpdatePrepare(update) => handler + .component_update_prepare(sender, port, update) + .map(|()| ResponseKind::ComponentUpdatePrepareAck), RequestKind::UpdateChunk(chunk) => handler .update_chunk(sender, port, chunk, trailing_data) .map(|()| ResponseKind::UpdateChunkAck), diff --git a/gateway-sp-comms/src/communicator.rs b/gateway-sp-comms/src/communicator.rs index 026053b0582..b3759260cb3 100644 --- a/gateway-sp-comms/src/communicator.rs +++ b/gateway-sp-comms/src/communicator.rs @@ -354,7 +354,10 @@ pub(crate) trait ResponseKindExt { fn expect_serial_console_detach_ack(self) -> Result<(), BadResponseType>; - fn expect_update_prepare_ack(self) -> Result<(), BadResponseType>; + fn expect_sp_update_prepare_ack(self) -> Result<(), BadResponseType>; + + fn expect_component_update_prepare_ack(self) + -> Result<(), BadResponseType>; fn expect_update_status(self) -> Result; @@ -392,8 +395,11 @@ impl ResponseKindExt for ResponseKind { ResponseKind::SerialConsoleDetachAck => { response_kind_names::SERIAL_CONSOLE_DETACH_ACK } - ResponseKind::UpdatePrepareAck => { - response_kind_names::UPDATE_PREPARE_ACK + ResponseKind::SpUpdatePrepareAck => { + response_kind_names::SP_UPDATE_PREPARE_ACK + } + ResponseKind::ComponentUpdatePrepareAck => { + response_kind_names::COMPONENT_UPDATE_PREPARE_ACK } ResponseKind::UpdateStatus(_) => response_kind_names::UPDATE_STATUS, ResponseKind::UpdateAbortAck => { @@ -496,11 +502,23 @@ impl ResponseKindExt for ResponseKind { } } - fn expect_update_prepare_ack(self) -> Result<(), BadResponseType> { + fn expect_sp_update_prepare_ack(self) -> Result<(), BadResponseType> { + match self { + ResponseKind::SpUpdatePrepareAck => Ok(()), + other => Err(BadResponseType { + expected: response_kind_names::SP_UPDATE_PREPARE_ACK, + got: other.name(), + }), + } + } + + fn expect_component_update_prepare_ack( + self, + ) -> Result<(), BadResponseType> { match self { - ResponseKind::UpdatePrepareAck => Ok(()), + ResponseKind::ComponentUpdatePrepareAck => Ok(()), other => Err(BadResponseType { - expected: response_kind_names::UPDATE_PREPARE_ACK, + expected: response_kind_names::COMPONENT_UPDATE_PREPARE_ACK, got: other.name(), }), } @@ -579,7 +597,9 @@ mod response_kind_names { "serial_console_write_ack"; pub(super) const SERIAL_CONSOLE_DETACH_ACK: &str = "serial_console_detach_ack"; - pub(super) const UPDATE_PREPARE_ACK: &str = "update_prepare_ack"; + pub(super) const SP_UPDATE_PREPARE_ACK: &str = "sp_update_prepare_ack"; + pub(super) const COMPONENT_UPDATE_PREPARE_ACK: &str = + "component_update_prepare_ack"; pub(super) const UPDATE_STATUS: &str = "update_status"; pub(super) const UPDATE_ABORT_ACK: &str = "update_abort_ack"; pub(super) const UPDATE_CHUNK_ACK: &str = "update_chunk_ack"; diff --git a/gateway-sp-comms/src/single_sp.rs b/gateway-sp-comms/src/single_sp.rs index d6e172dd24f..dd9ac4c41a8 100644 --- a/gateway-sp-comms/src/single_sp.rs +++ b/gateway-sp-comms/src/single_sp.rs @@ -14,6 +14,7 @@ use crate::hubris_archive::HubrisArchive; use gateway_messages::sp_impl; use gateway_messages::version; use gateway_messages::BulkIgnitionState; +use gateway_messages::ComponentUpdatePrepare; use gateway_messages::IgnitionCommand; use gateway_messages::IgnitionState; use gateway_messages::PowerState; @@ -28,7 +29,6 @@ use gateway_messages::SpPort; use gateway_messages::SpState; use gateway_messages::UpdateChunk; use gateway_messages::UpdateId; -use gateway_messages::UpdatePrepare; use gateway_messages::UpdateStatus; use omicron_common::backoff; use omicron_common::backoff::Backoff; @@ -278,7 +278,7 @@ impl SingleSp { slot: u16, total_size: u32, ) -> Result<()> { - self.rpc(RequestKind::UpdatePrepare(UpdatePrepare { + self.rpc(RequestKind::ComponentUpdatePrepare(ComponentUpdatePrepare { component, id, slot, @@ -286,7 +286,7 @@ impl SingleSp { })) .await .and_then(|(_peer, response)| { - response.expect_update_prepare_ack().map_err(Into::into) + response.expect_component_update_prepare_ack().map_err(Into::into) }) } @@ -457,6 +457,10 @@ async fn poll_until_update_prep_complete( continue; } } + UpdateStatus::SpUpdateAuxFlashChckScan { .. } => { + return Err("SP returned unexpected status (aux flash scan?!)" + .to_string()); + } UpdateStatus::InProgress(sub_status) => { if sub_status.id == id { return Ok(()); @@ -465,6 +469,16 @@ async fn poll_until_update_prep_complete( UpdateStatus::None | UpdateStatus::Complete(_) | UpdateStatus::Aborted(_) => (), + UpdateStatus::Failed { id: failed_id, code } => { + if id == failed_id { + return Err(format!("updated failed (SP code {code})")); + } else { + let failed_id = Uuid::from(failed_id); + return Err(format!( + "different SP update failed ({failed_id})" + )); + } + } } return Err(format!("update preparation failed; status = {status:?}")); diff --git a/gateway/faux-mgs/src/main.rs b/gateway/faux-mgs/src/main.rs index bfd01126746..5ef9a5be484 100644 --- a/gateway/faux-mgs/src/main.rs +++ b/gateway/faux-mgs/src/main.rs @@ -280,6 +280,18 @@ async fn main() -> Result<()> { ); } } + UpdateStatus::SpUpdateAuxFlashChckScan { + id, + found_match, + .. + } => { + let id = Uuid::from(id); + info!( + log, "aux flash scan complete"; + "id" => %id, + "found_match" => found_match, + ); + } UpdateStatus::InProgress(sub_status) => { let id = Uuid::from(sub_status.id); info!( @@ -297,6 +309,10 @@ async fn main() -> Result<()> { let id = Uuid::from(id); info!(log, "update aborted"; "id" => %id); } + UpdateStatus::Failed { id, code } => { + let id = Uuid::from(id); + info!(log, "update failed"; "id" => %id, "code" => code); + } UpdateStatus::None => { info!(log, "no update status available"); } @@ -372,6 +388,20 @@ async fn update( info!(log, "update preparing (no progress available)"); } } + UpdateStatus::SpUpdateAuxFlashChckScan { + id, + found_match, + total_size, + } => { + if id != sp_update_id { + bail!("different update in progress ({:?})", id); + } + info!( + log, "aux flash scan complete"; + "found_match" => found_match, + "total_size" => total_size, + ); + } UpdateStatus::InProgress(sub_status) => { if sub_status.id != sp_update_id { bail!("different update in progress ({:?})", sub_status.id); @@ -394,6 +424,12 @@ async fn update( } bail!("update aborted"); } + UpdateStatus::Failed { id, code } => { + if id != sp_update_id { + bail!("different update failed ({id:?}, code {code})"); + } + bail!("update failed (code {code})"); + } } tokio::time::sleep(Duration::from_secs(1)).await; } diff --git a/gateway/src/http_entrypoints.rs b/gateway/src/http_entrypoints.rs index c60b05aba60..762156505b7 100644 --- a/gateway/src/http_entrypoints.rs +++ b/gateway/src/http_entrypoints.rs @@ -133,6 +133,8 @@ enum SpUpdateStatus { Complete { id: Uuid }, /// The SP has aborted an in-progress update. Aborted { id: Uuid }, + /// The update process failed. + Failed { id: Uuid, code: u32 }, } /// Progress of an SP preparing to update. diff --git a/gateway/src/http_entrypoints/conversions.rs b/gateway/src/http_entrypoints/conversions.rs index 9a603dee8a7..5cccf74914c 100644 --- a/gateway/src/http_entrypoints/conversions.rs +++ b/gateway/src/http_entrypoints/conversions.rs @@ -36,6 +36,13 @@ impl From for SpUpdateStatus { id: status.id.into(), progress: status.progress.map(Into::into), }, + UpdateStatus::SpUpdateAuxFlashChckScan { + id, total_size, .. + } => Self::InProgress { + id: id.into(), + bytes_received: 0, + total_bytes: total_size, + }, UpdateStatus::InProgress(status) => Self::InProgress { id: status.id.into(), bytes_received: status.bytes_received, @@ -43,6 +50,9 @@ impl From for SpUpdateStatus { }, UpdateStatus::Complete(id) => Self::Complete { id: id.into() }, UpdateStatus::Aborted(id) => Self::Aborted { id: id.into() }, + UpdateStatus::Failed { id, code } => { + Self::Failed { id: id.into(), code } + } } } } diff --git a/openapi/gateway.json b/openapi/gateway.json index 02a6564406d..543cfe11e99 100644 --- a/openapi/gateway.json +++ b/openapi/gateway.json @@ -1307,6 +1307,32 @@ "id", "state" ] + }, + { + "description": "The update process failed.", + "type": "object", + "properties": { + "code": { + "type": "integer", + "format": "uint32", + "minimum": 0 + }, + "id": { + "type": "string", + "format": "uuid" + }, + "state": { + "type": "string", + "enum": [ + "failed" + ] + } + }, + "required": [ + "code", + "id", + "state" + ] } ] }, diff --git a/sp-sim/src/gimlet.rs b/sp-sim/src/gimlet.rs index 1da6c422000..16bd3b94a4e 100644 --- a/sp-sim/src/gimlet.rs +++ b/sp-sim/src/gimlet.rs @@ -619,11 +619,27 @@ impl SpHandler for Handler { Ok(state) } - fn update_prepare( + fn sp_update_prepare( &mut self, sender: SocketAddrV6, port: SpPort, - update: gateway_messages::UpdatePrepare, + update: gateway_messages::SpUpdatePrepare, + ) -> Result<(), ResponseError> { + warn!( + &self.log, + "received update prepare request; not supported by simulated gimlet"; + "sender" => %sender, + "port" => ?port, + "update" => ?update, + ); + Err(ResponseError::RequestUnsupportedForSp) + } + + fn component_update_prepare( + &mut self, + sender: SocketAddrV6, + port: SpPort, + update: gateway_messages::ComponentUpdatePrepare, ) -> Result<(), ResponseError> { warn!( &self.log, diff --git a/sp-sim/src/sidecar.rs b/sp-sim/src/sidecar.rs index cd35d0c5834..3f29690f6c9 100644 --- a/sp-sim/src/sidecar.rs +++ b/sp-sim/src/sidecar.rs @@ -448,11 +448,27 @@ impl SpHandler for Handler { Ok(state) } - fn update_prepare( + fn sp_update_prepare( &mut self, sender: SocketAddrV6, port: SpPort, - update: gateway_messages::UpdatePrepare, + update: gateway_messages::SpUpdatePrepare, + ) -> Result<(), ResponseError> { + warn!( + &self.log, + "received update prepare request; not supported by simulated sidecar"; + "sender" => %sender, + "port" => ?port, + "update" => ?update, + ); + Err(ResponseError::RequestUnsupportedForSp) + } + + fn component_update_prepare( + &mut self, + sender: SocketAddrV6, + port: SpPort, + update: gateway_messages::ComponentUpdatePrepare, ) -> Result<(), ResponseError> { warn!( &self.log,