Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions gateway-sp-comms/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ once_cell = "1.15.0"
serde = { version = "1.0", features = ["derive"] }
serde_with = "2.0.1"
thiserror = "1.0.36"
tlvc = {git = "https://github.com/oxidecomputer/tlvc.git"}
tokio-stream = "0.1.10"
usdt = "0.3.1"
uuid = "1.1.0"
Expand Down
4 changes: 4 additions & 0 deletions gateway-sp-comms/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,10 @@ pub enum UpdateError {
SpUpdateFileNotFound { path: String, err: zip::result::ZipError },
#[error("failed to decompress `{path}` within SP update: {err}")]
SpUpdateDecompressionFailed { path: String, err: io::Error },
#[error("error reading aux flash image: {0:?}")]
TlvcError(tlvc::TlvcReadError),
#[error("corrupt aux flash image: {0}")]
CorruptTlvc(String),
#[error("failed to send update message to SP: {0}")]
Communication(#[from] SpCommunicationError),
}
Expand Down
4 changes: 4 additions & 0 deletions gateway-sp-comms/src/hubris_archive.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,10 @@ impl HubrisArchive {
self.extract_by_name("img/final.bin")
}

pub(crate) fn aux_image(&mut self) -> Result<Vec<u8>, UpdateError> {
self.extract_by_name("img/auxi.tlvc")
}

fn extract_by_name(&mut self, name: &str) -> Result<Vec<u8>, UpdateError> {
let mut f = self.archive.by_name(name).map_err(|err| {
UpdateError::SpUpdateFileNotFound { path: name.to_string(), err }
Expand Down
253 changes: 29 additions & 224 deletions gateway-sp-comms/src/single_sp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,9 @@ use crate::communicator::ResponseKindExt;
use crate::error::BadResponseType;
use crate::error::SpCommunicationError;
use crate::error::UpdateError;
use crate::hubris_archive::HubrisArchive;
use gateway_messages::sp_impl;
use gateway_messages::version;
use gateway_messages::BulkIgnitionState;
use gateway_messages::ComponentUpdatePrepare;
use gateway_messages::IgnitionCommand;
use gateway_messages::IgnitionState;
use gateway_messages::PowerState;
Expand All @@ -27,18 +25,14 @@ use gateway_messages::SpMessage;
use gateway_messages::SpMessageKind;
use gateway_messages::SpPort;
use gateway_messages::SpState;
use gateway_messages::UpdateChunk;
use gateway_messages::UpdateId;
use gateway_messages::UpdateStatus;
use omicron_common::backoff;
use omicron_common::backoff::Backoff;
use slog::debug;
use slog::error;
use slog::info;
use slog::trace;
use slog::warn;
use slog::Logger;
use std::convert::TryInto;
use std::io::Cursor;
use std::io::Seek;
use std::io::SeekFrom;
Expand All @@ -55,6 +49,12 @@ use tokio::time;
use tokio::time::timeout;
use uuid::Uuid;

mod update;

use self::update::start_component_update;
use self::update::start_sp_update;
use self::update::update_status;

pub const DISCOVERY_MULTICAST_ADDR: Ipv6Addr =
Ipv6Addr::new(0xff15, 0, 0, 0, 0, 0, 0x1de, 0);

Expand Down Expand Up @@ -183,111 +183,30 @@ impl SingleSp {
return Err(UpdateError::ImageEmpty);
}

// If we're updating the SP, we expect `image` to be a hubris archive;
// extract the SP image from it.
//
// TODO 1: We will need to pull other data out of the archive (aux flash
// images).
// TODO 2: Are we sticking with hubris archives as the delivery format?
let image = if component == SpComponent::SP_ITSELF {
let mut archive = HubrisArchive::new(image)?;
archive.final_bin()?
} else {
image
};

let total_size = image
.len()
.try_into()
.map_err(|_err| UpdateError::ImageTooLarge)?;

info!(
self.log, "starting update";
"component" => component.as_str(),
"id" => %update_id,
"total_size" => total_size,
);
let id = update_id.into();
self.update_prepare(component, id, slot, total_size).await?;

let log = self.log.clone();
let inner = self.cmds_tx.clone();
tokio::spawn(async move {
// Wait until the SP has finished preparing for this update.
match poll_until_update_prep_complete(&inner, component, id, &log)
.await
{
Ok(()) => {
info!(
log, "update preparation complete";
"update_id" => %update_id,
);
}
Err(message) => {
error!(
log, "update preparation failed";
"err" => message,
"update_id" => %update_id,
);
return;
}
// SP updates are special (`image` is a hubris archive and may include
// an aux flash image in addition to the SP image).
if component == SpComponent::SP_ITSELF {
if slot != 0 {
// We know the SP only has one possible slot, so fail fast if
// the caller requested a slot other than 0.
return Err(UpdateError::Communication(
SpCommunicationError::SpError(
ResponseError::InvalidSlotForComponent,
),
));
}

// Deliver the update in chunks.
let mut image = Cursor::new(image);
let mut offset = 0;
while !CursorExt::is_empty(&image) {
let prior_pos = image.position();
debug!(
log, "sending update chunk";
"id" => %update_id,
"offset" => offset,
);

image = match update_chunk(&inner, component, id, offset, image)
.await
{
Ok(image) => image,
Err(err) => {
error!(
log, "update failed";
"id" => %update_id,
"err" => %err,
);
return;
}
};

// Update our offset according to how far our cursor advanced.
offset += (image.position() - prior_pos) as u32;
}
info!(log, "update complete"; "id" => %update_id);
});

Ok(())
}

/// Instruct the SP to begin the update process.
///
/// This should be followed by a series of `update_chunk()` calls totalling
/// `total_size` bytes of data.
async fn update_prepare(
&self,
component: SpComponent,
id: UpdateId,
slot: u16,
total_size: u32,
) -> Result<()> {
self.rpc(RequestKind::ComponentUpdatePrepare(ComponentUpdatePrepare {
component,
id,
slot,
total_size,
}))
.await
.and_then(|(_peer, response)| {
response.expect_component_update_prepare_ack().map_err(Into::into)
})
start_sp_update(&self.cmds_tx, update_id, image, &self.log).await
} else {
start_component_update(
&self.cmds_tx,
component,
update_id,
slot,
image,
&self.log,
)
.await
}
}

/// Get the status of any update being applied to the given component.
Expand Down Expand Up @@ -414,120 +333,6 @@ impl SingleSp {
}
}

/// Poll an SP until it indicates that preparation for update identified by `id`
/// has completed.
async fn poll_until_update_prep_complete(
inner_tx: &mpsc::Sender<InnerCommand>,
component: SpComponent,
id: UpdateId,
log: &Logger,
) -> Result<(), String> {
// The choice of interval is relatively arbitrary; we expect update
// preparation to generally fall in one of two cases:
//
// 1. No prep is necessary, and the update can happen immediately
// (we'll never sleep)
// 2. Prep is relatively slow (e.g., erasing a flash part)
//
// We choose a few seconds assuming this polling interval is
// primarily hit when the SP is doing something slow.
const POLL_UPDATE_STATUS_INTERVAL: Duration = Duration::from_secs(2);

// Poll SP until update preparation is complete.
loop {
// Get update status from the SP or give up.
let status = match update_status(inner_tx, component).await {
Ok(status) => status,
Err(err) => {
return Err(format!("could not get status from SP: {err}"));
}
};

// Either sleep and retry (if still preparing), break out of our
// loop (if prep complete), or fail (anything else).
match status {
UpdateStatus::Preparing(sub_status) => {
if sub_status.id == id {
debug!(
log,
"SP still preparing; sleeping for {:?}",
POLL_UPDATE_STATUS_INTERVAL
);
tokio::time::sleep(POLL_UPDATE_STATUS_INTERVAL).await;
continue;
}
}
UpdateStatus::SpUpdateAuxFlashChckScan { .. } => {
return Err("SP returned unexpected status (aux flash scan?!)"
.to_string());
}
UpdateStatus::InProgress(sub_status) => {
if sub_status.id == id {
return Ok(());
}
}
UpdateStatus::None
| UpdateStatus::Complete(_)
| UpdateStatus::Aborted(_) => (),
UpdateStatus::Failed { id: failed_id, code } => {
if id == failed_id {
return Err(format!("updated failed (SP code {code})"));
} else {
let failed_id = Uuid::from(failed_id);
return Err(format!(
"different SP update failed ({failed_id})"
));
}
}
}

return Err(format!("update preparation failed; status = {status:?}"));
}
}

/// Get the status of any update being applied to the given component.
async fn update_status(
inner_tx: &mpsc::Sender<InnerCommand>,
component: SpComponent,
) -> Result<UpdateStatus> {
rpc(inner_tx, RequestKind::UpdateStatus(component), None)
.await
.result
.and_then(|(_peer, response)| {
response.expect_update_status().map_err(Into::into)
})
}

/// Send a portion of an update to the SP.
///
/// Must be preceded by a call to `update_prepare()` (and may be preceded by
/// earlier chunks of this update)`.
///
/// The completion of an update is implicit, and is detected by the SP based
/// on size of the update (specified by the `total_size` given when the
/// update starts).
async fn update_chunk(
inner_tx: &mpsc::Sender<InnerCommand>,
component: SpComponent,
id: UpdateId,
offset: u32,
data: Cursor<Vec<u8>>,
) -> Result<Cursor<Vec<u8>>> {
let update_chunk = UpdateChunk { component, id, offset };
let (result, data) = rpc_with_trailing_data(
inner_tx,
RequestKind::UpdateChunk(update_chunk),
data,
)
.await;

result.and_then(|(_peer, response)| {
response.expect_update_chunk_ack().map_err(Into::into)
})?;

Ok(data)
}

async fn rpc_with_trailing_data(
inner_tx: &mpsc::Sender<InnerCommand>,
kind: RequestKind,
Expand Down
Loading